# Recurrent Mixture Density Networks

In this notebook, a recurrent mixture density network (RMDN) is trained and evaluated. A simple Gaussian mixture model (GMM) will be trained as a simple baseline.

### Imports

In [16]:
# Imports

# Misc
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture
import pickle

# Visualiation tools
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import animation, rc
from IPython.display import HTML, Image

# Pyro/PyTorch
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import pyro
import pyro.distributions as dist
from pyro.infer import SVI, TraceEnum_ELBO, Predictive, NUTS, MCMC, config_enumerate
from pyro.infer.autoguide import AutoDelta, AutoDiagonalNormal, AutoMultivariateNormal
from pyro.optim import Adam, ClippedAdam
from pyro.distributions import MultivariateNormal as MN
from pyro.ops.indexing import Vindex

# Implemented modules
from util  import *
from model import RMDN

### Load data

In [17]:
# Load data files

X_train_tensor, X_val_tensor, X_test_tensor =   torch.load("./data/X_train_tensor_1h.pt"), \
                                                torch.load("./data/X_val_tensor_1h.pt"),   \
                                                torch.load("./data/X_test_tensor_1h.pt")
U_train_tensor, U_val_tensor, U_test_tensor =   torch.load("./data/U_train_tensor_1h.pt"), \
                                                torch.load("./data/U_val_tensor_1h.pt"),   \
                                                torch.load("./data/U_test_tensor_1h.pt")
N_t_train, N_t_valid, N_t_test = np.load("./data/N_t_train_1h.npy"), \
                                 np.load("./data/N_t_val_1h.npy"),   \
                                 np.load("./data/N_t_test_1h.npy")

# Concatenation
X_train_val = torch.cat((X_train_tensor, X_val_tensor), 1)
U_train_val = torch.cat((U_train_tensor, U_val_tensor), 1)
N_train_val = np.hstack((N_t_train, N_t_valid))

### Prepare and train baseline GMM 

A GMM will now be trained. For this purpose, the data containing the traning data is concatenated one the same dimension to remove the temporal aspect. The data is then gridded at the same resolution as the RMDN will be trained on.

In [18]:
# Create input for mixture model
X_train_full = np.zeros((np.sum(N_t_train), 2))
prev_count = 0
for i, count in enumerate(N_t_train):
    X_train_full[prev_count:(prev_count+count), :] = X_train_tensor[:, i, :count, :].numpy()[0]
    prev_count+=count
    
# Create validation set
X_val_full = np.zeros((np.sum(N_t_valid), 2))
prev_count = 0
for i, count in enumerate(N_t_valid):
    X_val_full[prev_count:(prev_count+count), :] = X_val_tensor[:, i, :count, :].numpy()[0]
    prev_count+=count

# Create test set
X_test_full = np.zeros((np.sum(N_t_test), 2))
prev_count = 0
for i, count in enumerate(N_t_test):
    X_test_full[prev_count:(prev_count+count), :] = X_test_tensor[:, i, :count, :].numpy()[0]
    prev_count+=count 

# Compute boundaries
latmax = (latMax-latmean)/latstd
latmin = (latMin-latmean)/latstd
lonmax = (lonMax-lonmean)/lonstd
lonmin = (lonMin-lonmean)/lonstd

# Bin the training data
bins_lat = np.linspace(latmin, latmax, 33)
bins_lon = np.linspace(lonmin, lonmax, 33)
binidx_lat = np.digitize(X_train_full[:,1], bins=bins_lat)
binidx_lon = np.digitize(X_train_full[:,0], bins=bins_lon)
X_train_base = np.vstack((bins_lon[binidx_lon], bins_lat[binidx_lat]))    
    
# Initialise and fit model
baseline = GaussianMixture(n_components=15).fit(X_train_base.T)

# Compute loglikelihood
baseline_ll_train = baseline.score(X_train_full) * np.sum(N_t_train)
baseline_ll_validation = baseline.score(X_val_full) * np.sum(N_t_valid)
baseline_ll_test = baseline.score(X_test_full) * np.sum(N_t_test)
print(f'Baseline training LL:\t{baseline_ll_train:.4f}')
print(f'Baseline validation LL:\t{baseline_ll_validation:.4f}')
print(f'Baseline testing LL:\t{baseline_ll_test:.4f}')

Baseline training LL:	-27707.0815
Baseline validation LL:	-15757.8908
Baseline testing LL:	-13415.9389


### Train model

#### Preparation for training a new model

In [19]:
# Define dataset
dataset  = RFNDataset(X_train_tensor, U_train_tensor)
dataloader= DataLoader(dataset, batch_size=1, shuffle=True, num_workers=0)
grid = U_train_tensor.size(3)
# Initialise model and guide
model = RMDN(input_dim=grid, hidden_dim=128, LSTM_input=32, output_dim=2, LSTM_dim=32, K=60, use_cuda=False)
guide = model.guide 
# Optimisation options
optimizer = pyro.optim.ClippedAdam({"lr":3e-4, "clip_norm":5., "weight_decay":5e-2})
svi = SVI(model.model, guide, optimizer, TraceEnum_ELBO(num_particles=1, max_plate_nesting=2))
best_ll = -np.inf
train_losses = []
train_ll = []
validation_ll = []

#### Preparation for continued training of a model

In [20]:
# Instantiate and load model
dataset  = RFNDataset(X_train_tensor, U_train_tensor)
dataloader= DataLoader(dataset, batch_size=1, shuffle=True, num_workers=0)
grid = U_train_tensor.size(3)
model = RMDN(input_dim=grid, hidden_dim=128, LSTM_input=32, output_dim=2, LSTM_dim=32, K=60, use_cuda=False)
model.load_state_dict(torch.load('models/RMDN_trained_60'))
optimizer = pyro.optim.ClippedAdam({"lr":3e-4, "clip_norm":5., "weight_decay":5e-2})
svi = SVI(model.model, model.guide, optimizer, TraceEnum_ELBO(num_particles=1, max_plate_nesting=2))

In [21]:
with open("data/rmdn_losses_60.txt", "rb") as fp:   
    train_losses = pickle.load(fp)    

with open("data/rmdn_validation_60.txt", "rb") as fp:
    validation_ll = pickle.load(fp)    

with open("data/rmdn_train_60.txt", "rb") as fp:
    train_ll = pickle.load(fp)    
    
best_ll = max(validation_ll)

#### Initial training run:

In [20]:
pyro.clear_param_store()
num_epochs = 5000

for i in range(num_epochs):
    for X_i, U_i in dataloader:
        # Set model to training
        model.train()
        # Take step and update parameters
        loss = svi.step(X_i, U_i, N_t_train) / (N_t_train.sum())       
        # Save current loss
        train_losses.append(loss)       
        
        if i%500== 499 or i == 0:
            # Set model to evaluation
            model.eval()
            #Compute LL on training and validation set
            train_ll_i      = model.get_loglikelihood(X=X_i, U=U_i, mask=N_t_train, U_init=None, X_init=None, mask_init=None).sum()
            validation_ll_i = model.get_loglikelihood(X=X_val_tensor, U=U_val_tensor, mask=N_t_valid, U_init=U_i, X_init=X_i, mask_init=N_t_train).sum()
            # Save LLs
            train_ll.append(train_ll_i)
            validation_ll.append(validation_ll_i)
            
            print(f"Epoch: {i+1}, \tLoss: {loss:.3f}, \tTrain LL: {train_ll_i.numpy()}, \tValidation LL: {validation_ll_i.numpy()}")
            
            # Check if current LL best. If so, save checkpoint.
            if validation_ll_i > best_ll:
                best_ll = validation_ll_i
                torch.save(model.state_dict(), 'models/RMDN_checkpoint_5')
                

Epoch: 1, 	Loss: 3.113, 	Train LL: -31889.55078125, 	Validation LL: -18100.109375
Epoch: 500, 	Loss: 2.492, 	Train LL: -24885.326171875, 	Validation LL: -14282.859375
Epoch: 1000, 	Loss: 2.415, 	Train LL: -23993.26171875, 	Validation LL: -13901.84765625
Epoch: 1500, 	Loss: 2.387, 	Train LL: -23701.0703125, 	Validation LL: -13814.2626953125
Epoch: 2000, 	Loss: 2.332, 	Train LL: -23011.166015625, 	Validation LL: -13506.44921875
Epoch: 2500, 	Loss: 2.310, 	Train LL: -22717.974609375, 	Validation LL: -13405.0166015625
Epoch: 3000, 	Loss: 2.266, 	Train LL: -22280.1875, 	Validation LL: -13245.5927734375
Epoch: 3500, 	Loss: 2.237, 	Train LL: -22022.07421875, 	Validation LL: -13180.9775390625
Epoch: 4000, 	Loss: 2.213, 	Train LL: -21732.205078125, 	Validation LL: -13168.970703125
Epoch: 4500, 	Loss: 2.203, 	Train LL: -21412.51953125, 	Validation LL: -13046.4833984375
Epoch: 5000, 	Loss: 2.162, 	Train LL: -21075.673828125, 	Validation LL: -13022.6162109375


#### Continued training run

In [None]:
pyro.clear_param_store()
num_epochs = 5000
for i in range(num_epochs):
    for X_i, U_i in dataloader:
        # Set model to training
        model.train()
        # Take step and update parameters
        loss = svi.step(X_i, U_i, N_t_train) / (N_t_train.sum())       
        # Save current loss
        train_losses.append(loss)       
        
        if i%500== 499 or i == 0:
            # Set model to evaluation
            model.eval()
            #Compute LL on training and validation set
            train_ll_i      = model.get_loglikelihood(X=X_i, U=U_i, mask=N_t_train, U_init=None, X_init=None, mask_init=None).sum()
            validation_ll_i = model.get_loglikelihood(X=X_val_tensor, U=U_val_tensor, mask=N_t_valid, U_init=U_i, X_init=X_i, mask_init=N_t_train).sum()
            # Save LLs
            train_ll.append(train_ll_i)
            validation_ll.append(validation_ll_i)
            
            print(f"Epoch: {i+1+5000}, \tLoss: {loss:.3f}, \tTrain LL: {train_ll_i.numpy()}, \tValidation LL: {validation_ll_i.numpy()}")
            
            # Check if current LL best. If so, save checkpoint.
            if validation_ll_i > best_ll:
                best_ll = validation_ll_i
                torch.save(model.state_dict(), 'models/RMDN_checkpoint_5')
                

Epoch: 5001, 	Loss: 2.181, 	Train LL: -21394.423828125, 	Validation LL: -13101.984375
Epoch: 5500, 	Loss: 2.135, 	Train LL: -20915.01953125, 	Validation LL: -13088.349609375
Epoch: 6000, 	Loss: 2.122, 	Train LL: -20556.646484375, 	Validation LL: -13043.0068359375
Epoch: 6500, 	Loss: 2.130, 	Train LL: -20360.349609375, 	Validation LL: -13112.0146484375
Epoch: 7000, 	Loss: 2.094, 	Train LL: -20118.6640625, 	Validation LL: -13160.8017578125
Epoch: 7500, 	Loss: 2.083, 	Train LL: -19981.654296875, 	Validation LL: -13187.884765625


In [21]:
# Save losses, LLs and model if save == True
save = False
if save:
    with open("data/rmdn_losses_60.txt", "wb") as fp:   
        pickle.dump(train_losses, fp)

    with open("data/rmdn_validation_60.txt", "wb") as fp:
        pickle.dump(validation_ll, fp)

    with open("data/rmdn_train_60.txt", "wb") as fp:
        pickle.dump(train_ll, fp)    

    torch.save(model.state_dict(), 'models/RMDN_trained_60')    

### Load trained model

Assuming that a model has been trained, it is here loaded for evaluation.

In [4]:
# Instantiate and load model
grid = U_train_tensor.size(3)
model = RMDN(input_dim=grid, hidden_dim=128, LSTM_input=32, output_dim=2, LSTM_dim=32, K=60, use_cuda=False)
model.load_state_dict(torch.load('models/RMDN_trained_60'))
# Set to evaluation
model.eval()

RMDN(
  (FeatureExtractor): FeatureExtractor(
    (input_to_hidden): Linear(in_features=1024, out_features=128, bias=True)
    (hidden_to_hidden): Linear(in_features=128, out_features=128, bias=True)
    (hidden_to_output): Linear(in_features=128, out_features=32, bias=True)
    (elu): ELU(alpha=1.0)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (MDN): MDN(
    (input_to_hidden): Linear(in_features=32, out_features=128, bias=True)
    (hidden_to_hidden): Linear(in_features=128, out_features=128, bias=True)
    (hidden_to_loc): Linear(in_features=128, out_features=120, bias=True)
    (hidden_to_sigma): Linear(in_features=128, out_features=120, bias=True)
    (hidden_to_off_diag): Linear(in_features=128, out_features=60, bias=True)
    (hidden_to_mix): Linear(in_features=128, out_features=60, bias=True)
    (elu): ELU(alpha=1.0)
    (softmax): Softmax(dim=2)
    (softplus): Softplus(beta=1, threshold=20)
    (dropout1): Dropout(p=0.3, inplace=False)
    (dropout2): Dropout(p=0.5, in

### Evaluation

The trained RMDN will now be evaluated on the test set.

In [5]:
test_ll = model.get_loglikelihood(X=X_test_tensor, U=U_test_tensor, mask=N_t_test, U_init=U_train_val, X_init=X_train_val, mask_init=N_train_val).sum()
print(f'RMDN log-likelihood on the test set: {test_ll.numpy()}')

RMDN log-likelihood on the test set: -10561.7744140625
