# Baseline models

In this notebook a simple GMM and a non-recurrent mixture density network (i.e. an MDN) will be trained and evaluated. 


In [18]:
# Imports

# Misc
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture
import pickle 

# Visualiation tools
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import animation, rc
from IPython.display import HTML, Image
from matplotlib import rc
# Plot settings
#plt.style.use('seaborn-dark')
#plt.rcParams['figure.figsize'] = (10,4)
plt.rcParams.update({'font.size': 20})
plt.rcParams['figure.dpi'] = 300
# Latex font in plots
rc('text', usetex=True)
rc('font',**{'family':'serif','serif':['Computer Modern Roman']})

# Pyro/PyTorch
import torch
from torch import nn
import pyro
import pyro.distributions as dist
from pyro.infer import SVI, TraceEnum_ELBO, Predictive, NUTS, MCMC, config_enumerate
from pyro.infer.autoguide import AutoDelta, AutoDiagonalNormal, AutoMultivariateNormal
from pyro.optim import Adam, ClippedAdam
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from pyro.distributions import MultivariateNormal as MN
from pyro.ops.indexing import Vindex

# Implemented modules
from util import *
from model import RMDN, NonRMDN, RMDN2

### Load data

In [13]:
# Load data files

X_train_tensor, X_val_tensor, X_test_tensor =   torch.load("./data/X_train_tensor_1h.pt"), \
                                                torch.load("./data/X_val_tensor_1h.pt"),   \
                                                torch.load("./data/X_test_tensor_1h.pt")
U_train_tensor, U_val_tensor, U_test_tensor =   torch.load("./data/U_train_tensor_1h.pt"), \
                                                torch.load("./data/U_val_tensor_1h.pt"),   \
                                                torch.load("./data/U_test_tensor_1h.pt")
N_t_train, N_t_valid, N_t_test = np.load("./data/N_t_train_1h.npy"), \
                                 np.load("./data/N_t_val_1h.npy"),   \
                                 np.load("./data/N_t_test_1h.npy")

# Concatenation
X_train_val = torch.cat((X_train_tensor, X_val_tensor), 1)
U_train_val = torch.cat((U_train_tensor, U_val_tensor), 1)
N_train_val = np.hstack((N_t_train, N_t_valid))

## GMM baseline

In [14]:
# Create input for mixture model
X_train_full = np.zeros((np.sum(N_t_train), 2))
prev_count = 0
for i, count in enumerate(N_t_train):
    X_train_full[prev_count:(prev_count+count), :] = X_train_tensor[:, i, :count, :].numpy()[0]
    prev_count+=count
    
# Create validation set
X_val_full = np.zeros((np.sum(N_t_valid), 2))
prev_count = 0
for i, count in enumerate(N_t_valid):
    X_val_full[prev_count:(prev_count+count), :] = X_val_tensor[:, i, :count, :].numpy()[0]
    prev_count+=count

# Create test set
X_test_full = np.zeros((np.sum(N_t_test), 2))
prev_count = 0
for i, count in enumerate(N_t_test):
    X_test_full[prev_count:(prev_count+count), :] = X_test_tensor[:, i, :count, :].numpy()[0]
    prev_count+=count 

# Compute boundaries
latmax = (latMax-latmean)/latstd
latmin = (latMin-latmean)/latstd
lonmax = (lonMax-lonmean)/lonstd
lonmin = (lonMin-lonmean)/lonstd

# Bin the training data
bins_lat = np.linspace(latmin, latmax, 33)
bins_lon = np.linspace(lonmin, lonmax, 33)
binidx_lat = np.digitize(X_train_full[:,1], bins=bins_lat)
binidx_lon = np.digitize(X_train_full[:,0], bins=bins_lon)
X_train_base = np.vstack((bins_lon[binidx_lon], bins_lat[binidx_lat]))    
    
# Initialise and fit model
baseline = GaussianMixture(n_components=15).fit(X_train_base.T)

# Compute loglikelihood
baseline_ll_train = baseline.score(X_train_full) * np.sum(N_t_train)
baseline_ll_validation = baseline.score(X_val_full) * np.sum(N_t_valid)
baseline_ll_test = baseline.score(X_test_full) * np.sum(N_t_test)
print(f'Baseline training LL:\t{baseline_ll_train:.4f}')
print(f'Baseline validation LL:\t{baseline_ll_validation:.4f}')
print(f'Baseline testing LL:\t{baseline_ll_test:.4f}')

Baseline training LL:	-27949.2627
Baseline validation LL:	-15853.4644
Baseline testing LL:	-13528.9642


## Non-recurrent MDN

The non-recurrent implementation of the mixture density network will here be trained.

### Train model

In [15]:
dataset  = RFNDataset(X_train_tensor, U_train_tensor)
dataloader= DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
grid = U_train_tensor.size(3)
model = NonRMDN(input_dim=grid, hidden_dim=128, LSTM_input=32, output_dim=2, LSTM_dim=32, K = 30)
guide = model.guide 
optimizer = pyro.optim.ClippedAdam({"lr":3e-4, "clip_norm":5., "weight_decay":5e-2})
svi = SVI(model.model, guide, optimizer, TraceEnum_ELBO(num_particles=1, max_plate_nesting=2))

In [16]:
pyro.clear_param_store()
num_epochs = 5000
train_losses = []
val_losses = []
train_ll = []
validation_ll =  []
best_ll = -np.inf
for i in range(num_epochs):
    for X_i, U_i in dataloader:
        # Set model to training
        model.train()
         # Take step and update parameters
        loss = svi.step(X_i, U_i, N_t_train) / (N_t_train.sum())
        # Save current loss
        train_losses.append(loss)       
        if i%500 == 499 or i == 0:
            # Set model to evaluation
            model.eval()
            #Compute LL on training and validation set
            train_ll_i      = model.get_loglikelihood(X=X_i, U=U_i, mask=N_t_train).sum()
            validation_ll_i = model.get_loglikelihood(X=X_val_tensor, U=U_val_tensor, mask=N_t_valid).sum()
            # Save LLs
            train_ll.append(train_ll_i)
            validation_ll.append(validation_ll_i)
            
            print(f"Epoch: {i+1}, \tLoss: {loss:.3f}, \tTrain LL: {train_ll_i.numpy()}, \tValidation LL: {validation_ll_i.numpy()}")
            
            # Check if current LL best. If so, save checkpoint.
            if validation_ll_i > best_ll:
                best_ll = validation_ll_i
                torch.save(model.state_dict(), 'models/MDN_checkpoint')

Epoch: 1, 	Loss: 3.132, 	Train LL: -32036.158203125, 	Validation LL: -18191.49609375
Epoch: 500, 	Loss: 2.509, 	Train LL: -25048.259765625, 	Validation LL: -14396.16015625
Epoch: 1000, 	Loss: 2.416, 	Train LL: -24108.58984375, 	Validation LL: -14037.6806640625
Epoch: 1500, 	Loss: 2.395, 	Train LL: -23720.6640625, 	Validation LL: -13922.6943359375
Epoch: 2000, 	Loss: 2.373, 	Train LL: -23561.4375, 	Validation LL: -13869.677734375
Epoch: 2500, 	Loss: 2.336, 	Train LL: -23315.716796875, 	Validation LL: -13810.94140625
Epoch: 3000, 	Loss: 2.328, 	Train LL: -23151.396484375, 	Validation LL: -13843.171875
Epoch: 3500, 	Loss: 2.316, 	Train LL: -22960.201171875, 	Validation LL: -13854.18359375
Epoch: 4000, 	Loss: 2.305, 	Train LL: -22901.765625, 	Validation LL: -13884.369140625
Epoch: 4500, 	Loss: 2.284, 	Train LL: -22751.794921875, 	Validation LL: -13887.1083984375
Epoch: 5000, 	Loss: 2.264, 	Train LL: -22538.4296875, 	Validation LL: -13876.3564453125


In [24]:
# Save losses, LLs and model if save == True
save = True
if save:
    with open("models/mdn_losses.txt", "wb") as fp:   
        pickle.dump(train_losses, fp)

    with open("models/mdn_validation.txt", "wb") as fp:
        pickle.dump(validation_ll, fp)

    with open("models/mdn_train.txt", "wb") as fp:
        pickle.dump(train_ll, fp)    

    torch.save(model.state_dict(), 'models/MDN_trained')    

In [25]:
model

NonRMDN(
  (FeatureExtractor): FeatureExtractor(
    (input_to_hidden): Linear(in_features=1024, out_features=128, bias=True)
    (hidden_to_hidden): Linear(in_features=128, out_features=128, bias=True)
    (hidden_to_output): Linear(in_features=128, out_features=32, bias=True)
    (elu): ELU(alpha=1.0)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (MDN): MDN(
    (input_to_hidden): Linear(in_features=32, out_features=128, bias=True)
    (hidden_to_hidden): Linear(in_features=128, out_features=128, bias=True)
    (hidden_to_loc): Linear(in_features=128, out_features=60, bias=True)
    (hidden_to_sigma): Linear(in_features=128, out_features=60, bias=True)
    (hidden_to_off_diag): Linear(in_features=128, out_features=30, bias=True)
    (hidden_to_mix): Linear(in_features=128, out_features=30, bias=True)
    (elu): ELU(alpha=1.0)
    (softmax): Softmax(dim=2)
    (softplus): Softplus(beta=1, threshold=20)
    (dropout1): Dropout(p=0.3, inplace=False)
    (dropout2): Dropout(p=0.5, i

### Load trained model

Assuming that an MDN has been trained, it is here loaded.

In [22]:
# Instantiate and load model
grid = U_train_tensor.size(3)
model = NonRMDN(input_dim=grid, hidden_dim=128, LSTM_input=32, output_dim=2, LSTM_dim=32, K = 30)
model.load_state_dict(torch.load('models/MDN_trained'))
# Set to evaluation
model.eval()

NonRMDN(
  (FeatureExtractor): FeatureExtractor(
    (input_to_hidden): Linear(in_features=1024, out_features=128, bias=True)
    (hidden_to_hidden): Linear(in_features=128, out_features=128, bias=True)
    (hidden_to_output): Linear(in_features=128, out_features=32, bias=True)
    (elu): ELU(alpha=1.0)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (MDN): MDN(
    (input_to_hidden): Linear(in_features=32, out_features=128, bias=True)
    (hidden_to_hidden): Linear(in_features=128, out_features=128, bias=True)
    (hidden_to_loc): Linear(in_features=128, out_features=60, bias=True)
    (hidden_to_sigma): Linear(in_features=128, out_features=60, bias=True)
    (hidden_to_off_diag): Linear(in_features=128, out_features=30, bias=True)
    (hidden_to_mix): Linear(in_features=128, out_features=30, bias=True)
    (elu): ELU(alpha=1.0)
    (softmax): Softmax(dim=2)
    (softplus): Softplus(beta=1, threshold=20)
    (dropout1): Dropout(p=0.3, inplace=False)
    (dropout2): Dropout(p=0.5, i

### Evaluation

Compute the log-likelihood on the test set.

In [21]:
test_ll = model.get_loglikelihood(X=X_test_tensor, U=U_test_tensor, mask=N_t_test).sum()
print(f'Non-recurrent MDN log-likelihood on the test set: {test_ll.numpy()}')

Non-recurrent MDN log-likelihood on the test set: -11376.287109375
