In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
import torch
from torch import optim
from torch.utils.data import DataLoader

from src.moglow import Moglow
from src.data import SeriesDataset

In [2]:
# flow setup
sequence_length = 5
num_layers = 3
coupling_flow='affine'
coupling_network='lstm'
hidden_features=64
num_blocks_per_layer=2

# experiment setup
batch_size = 128
learning_rate = 1e-3
weight_decay = 1e-2
max_epochs = 1000
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
log_interval = 100

In [3]:
n_travels = 512
samples = np.random.choice(list(range(5050)), n_travels, replace=False)
df_train = (
    pd
    .read_csv('../data/raw/data-train.csv')
)
df_test = (
    pd
    .read_csv('../data/raw/data-test.csv')
)
df_train_sampled = df_train.query('travel in @samples')

In [4]:
train_set = SeriesDataset(
    df=df_train_sampled, 
    data_columns=['x1', 'x2'],
    label_column='label',
    serie_column='travel',
    seqlen=sequence_length,
    dropout=0,
)
train_set_size = train_set[:]['x'].shape[0]
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)

autoreg:(512, 295, 10)
self.x:(512, 2, 295)
self.cond:(512, 10, 295)


In [5]:
features, sequence_length = train_set[0]['x'].shape
conditional_features, _ = train_set[0]['cond'].shape
model = Moglow(
    features,
    conditional_features,
    sequence_length,
    num_layers=num_layers,
    coupling_flow=coupling_flow,
    coupling_network=coupling_network,
    hidden_features=hidden_features,
    num_blocks_per_layer=num_blocks_per_layer
).to(device)

In [None]:
# train
optimizer = optim.Adam(
    model.parameters(),
    lr=learning_rate,
    weight_decay=weight_decay
)
loss_list = []
batch_norm = []
for epoch in range(1, max_epochs+1):
    running_loss = 0.0
    model.train()
    for i, data_batch in enumerate(train_loader):
        if epoch == 1:
            model.init_lstm_hidden(data_batch)
        model.repackage_lstm_hidden()
        optimizer.zero_grad()
        loss = -model.log_prob(inputs=data_batch['x'], conds=data_batch['cond']).mean()
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
        # for p in model.parameters(): 
        #     p.data.add_(-learning_rate, p.grad.data)
        optimizer.step()
        # statistics
        running_loss += loss.item() * data_batch['x'].size(0)
    epoch_loss = running_loss / train_set_size
    loss_list.append(epoch_loss)
    if (epoch % log_interval == 0) or epoch == 1:
        print(f" - Epoch {epoch:3d}/{max_epochs:3d}: {epoch_loss:.3f}")

 - Epoch   1/1000: 563.626
 - Epoch 100/1000: -1172.877
 - Epoch 200/1000: -1633.884
 - Epoch 300/1000: -1816.725
 - Epoch 400/1000: -1903.165
 - Epoch 500/1000: -2051.062
 - Epoch 600/1000: -2124.366
 - Epoch 700/1000: -1837.935


In [None]:
plt.plot(loss_list)
plt.show()

In [None]:
model.eval()
# batch_size = 128
# test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
# train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False)
# log_probs = []
# for batch in train_loader:
#     inputs = batch['x']
#     conds = batch['cond']
#     log_probs.append(-model.log_prob(inputs=inputs, conds=conds))

In [None]:
# probs = torch.cat(log_probs).detach().numpy()

# plt.hist(probs[train_set[:]['label'] == 0], bins=50, color='red')
# plt.hist(probs[train_set[:]['label'] == 1], bins=50, color='blue')
# plt.show()

In [None]:
# n_samples = 1
# for travel in range(train_set[:]['cond'].shape[0]):
#     inputs = torch.Tensor(train_set[travel]['x']).repeat(n_samples, 1, 1).double()
#     conds = torch.Tensor(train_set[travel]['cond']).repeat(n_samples, 1, 1).double()
#     model.init_lstm_hidden({
#         'x': inputs,
#         'cond': conds
#     })
#     samples = model.sample(n_samples, conds)
#     for sample in samples.detach().numpy():
#         sample = sample.transpose()
#         plt.plot(sample[:, 0], sample[:, 1], color='blue', alpha=.05)
# plt.show()