In [1]:
%load_ext autoreload
%autoreload 2
import dpp
import numpy as np
import torch
from copy import deepcopy
torch.set_default_tensor_type(torch.cuda.FloatTensor)

In [2]:
# Config
seed = 0
np.random.seed(seed)
torch.manual_seed(seed)
dataset_name = 'synth/hawkes1'  # other: ['stack_overflow', 'lastfm',
#          'synth/poisson', 'synth/renewal', 'synth/self_correcting',
#          'synth/hawkes1', 'synth/hawkes2']

# Model config
context_size = 64                 # Size of the RNN hidden vector
mark_embedding_size = 32          # Size of the mark embedding (used as RNN input)
num_mix_components = 64           # Number of components for a mixture model
rnn_type = "GRU"                  # What RNN to use as an encoder {"RNN", "GRU", "LSTM"}

# Training config
batch_size = 64        # Number of sequences in a batch
regularization = 1e-5  # L2 regularization parameter
learning_rate = 3e-4   # Learning rate for Adam optimizer
max_epochs = 1000      # For how many epochs to train
display_step = 5       # Display training statistics after every display_step
patience = 50          # After how many consecutive epochs without improvement of val loss to stop training

In [3]:
# Load the data
dataset = dpp.data.load_dataset(dataset_name)
d_train, d_val, d_test = dataset.train_val_test_split(seed=seed)

dl_train = d_train.get_dataloader(batch_size=batch_size, shuffle=True)
dl_val = d_val.get_dataloader(batch_size=batch_size, shuffle=False)
dl_test = d_test.get_dataloader(batch_size=batch_size, shuffle=False)

In [4]:
# Define the model
print('Building model...')
mean_log_inter_time, std_log_inter_time = d_train.get_inter_time_statistics()

model = dpp.models.LogNormMix(
    num_marks=d_train.num_marks,
    mean_log_inter_time=mean_log_inter_time,
    std_log_inter_time=std_log_inter_time,
    context_size=context_size,
    mark_embedding_size=mark_embedding_size,
    rnn_type=rnn_type,
    num_mix_components=num_mix_components,
)
opt = torch.optim.Adam(model.parameters(), weight_decay=regularization, lr=learning_rate)

Building model...


In [5]:
def aggregate_loss_over_dataloader(dl):
    total_loss = 0.0
    total_count = 0
    with torch.no_grad():
        for batch in dl:
            total_loss += -model.log_prob(batch).sum().item()
            total_count += batch.size
    return total_loss / total_count

In [6]:
# Traning
print('Starting training...')

impatient = 0
best_loss = np.inf
best_model = deepcopy(model.state_dict())
training_val_losses = []

for epoch in range(max_epochs):
    model.train()
    for batch in dl_train:
        opt.zero_grad()
        loss = -model.log_prob(batch).mean()
        loss.backward()
        opt.step()

    model.eval()
    with torch.no_grad():
        loss_val = aggregate_loss_over_dataloader(dl_val)
        training_val_losses.append(loss_val)

    if (best_loss - loss_val) < 1e-4:
        impatient += 1
        if loss_val < best_loss:
            best_loss = loss_val
            best_model = deepcopy(model.state_dict())
    else:
        best_loss = loss_val
        best_model = deepcopy(model.state_dict())
        impatient = 0

    if impatient >= patience:
        print(f'Breaking due to early stopping at epoch {epoch}')
        break

    if epoch % display_step == 0:
        print(f"Epoch {epoch:4d}: loss_train_last_batch = {loss.item():.1f}, loss_val = {loss_val:.1f}")

Starting training...
Epoch    0: loss_train_last_batch = 69.4, loss_val = 62.6
Epoch    5: loss_train_last_batch = 50.3, loss_val = 53.5
Epoch   10: loss_train_last_batch = 31.5, loss_val = 30.8
Epoch   15: loss_train_last_batch = -7.3, loss_val = -20.0
Epoch   20: loss_train_last_batch = -245.4, loss_val = -230.9
Epoch   25: loss_train_last_batch = -324.5, loss_val = -328.3
Epoch   30: loss_train_last_batch = -346.1, loss_val = -391.1
Epoch   35: loss_train_last_batch = -407.1, loss_val = -396.0
Epoch   40: loss_train_last_batch = -359.3, loss_val = -404.1
Epoch   45: loss_train_last_batch = -429.6, loss_val = -407.1
Epoch   50: loss_train_last_batch = -458.1, loss_val = -413.2
Epoch   55: loss_train_last_batch = -375.5, loss_val = -414.2
Epoch   60: loss_train_last_batch = -318.2, loss_val = -419.7
Epoch   65: loss_train_last_batch = -362.3, loss_val = -420.1
Epoch   70: loss_train_last_batch = -509.6, loss_val = -420.7
Epoch   75: loss_train_last_batch = -352.5, loss_val = -424.6
Ep

In [7]:
# Evaluation
model.load_state_dict(best_model)
model.eval()

# All training & testing sequences stacked into a single batch
with torch.no_grad():
    final_loss_train = aggregate_loss_over_dataloader(dl_train)
    final_loss_val = aggregate_loss_over_dataloader(dl_val)
    final_loss_test = aggregate_loss_over_dataloader(dl_test)

In [9]:
print(f'Negative log-likelihood:\n'
      f' - Train: {final_loss_train:.1f}\n'
      f' - Val:   {final_loss_val:.1f}\n'
      f' - Test:  {final_loss_test:.1f}')

Negative log-likelihood:
 - Train: -426.2
 - Val:   -426.4
 - Test:  -446.9
