## Notebook for training a neural process to predict simulated trajectories

This notebook demonstrates how to sample user trajectories with varying user parameters from the pomdp-gridworld, and train a neural process to predict trajectories.

In [2]:
import numpy as np
import torch

USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda") if USE_CUDA else torch.device("cpu")
training = True
print(device)

cuda


### Constructing datasets

The Sampler-class is used to extract the simulated user trajectories. Below is an example data processing pipeline using the class.

In [4]:
def build_context_and_target(trajectories):
  '''
  Given a set of trajectories, build the context and target tensors 
  to feed for the neural process
  '''
  xc, yc, xt, yt = [], [], [], []
  traj_length = len(trajectories[0])
  half_point = traj_length // 2
  
  for i, traj in enumerate(trajectories):

    #Context is each context but the current index
    context = [trajectories[j] for j in range(len(trajectories)) if j != i]
    
    #All except last
    #context += [traj[:-1]]
    
    #Half
    context += [traj[:half_point]]
    context_s, context_a = zip(*[point for c in context for point in c])
    
    #Only predict the last step
    #target_s, target_a = zip(*[traj[-1]])
    
    #Target last half
    target_s, target_a = zip(*traj[half_point:])
    
    xc.append(torch.tensor(context_s, dtype=torch.float32))
    yc.append(torch.tensor(context_a, dtype=torch.float32))
    xt.append(torch.tensor(target_s, dtype=torch.float32))
    yt.append(torch.tensor(target_a, dtype=torch.float32))
    
  # Stack tensors
  xc = torch.stack(xc).to(device)
  yc = torch.stack(yc).to(device)
  xt = torch.stack(xt).to(device)
  yt = torch.stack(yt).to(device)

  return xc, yc, xt, yt

In [6]:
def get_batch(sampler, device = device):
  user_params = sampler.generate_user_parameters()

  n_trajectories = np.random.randint(low = 5, high = 10)

  trajectories = sampler.generate_user_trajectories(n_trajectories, user_params)

  xc, yc, xt, yt = build_context_and_target(trajectories)
  
  xc = xc.permute(0,2,1).to(device)
  yc = yc.permute(0,2,1).to(device)
  xt = xt.permute(0,2,1).to(device)
  yt = yt.permute(0,2,1).to(device)
  
  return xc, yc, xt, yt, user_params

The sampler can be used to define the parameters of GridWorld, which is then passed to
the data processing pipeline.

In [19]:
from src.utils.sampler import Sampler

GRID_SIZE = 10
AGENT_VIEW_SIZE = 3
TRAJ_LENGTH = 10

sampler = Sampler(grid_size = GRID_SIZE, agent_view_size = AGENT_VIEW_SIZE, traj_length = TRAJ_LENGTH, fixed_goal = True)
xc, yc, xt, yt, user_params = get_batch(sampler)

print(user_params["goal_position"])

print(f"Shape of xc: {xc.shape}, yc: {yc.shape}, xt: {xt.shape}, yt: {yt.shape}")
print(f"Device of xc: {xc.device}, yc: {yc.device}, xt: {xt.device}, yt: {yt.device}")

print(f"First sequence context:\n{xc[0]}")
print(f"Goal position: {user_params['goal_position']}")

(4, 1)
Shape of xc: torch.Size([8, 2, 75]), yc: torch.Size([8, 5, 75]), xt: torch.Size([8, 2, 5]), yt: torch.Size([8, 5, 5])
Device of xc: cuda:0, yc: cuda:0, xt: cuda:0, yt: cuda:0
First sequence context:
tensor([[ 8.,  7.,  6.,  5.,  4.,  4.,  4.,  4.,  4.,  4.,  7.,  6.,  5.,  4.,
          4.,  4.,  4.,  4.,  4.,  4.,  9.,  8.,  7.,  6.,  5.,  4.,  4.,  4.,
          4.,  4.,  4.,  5.,  4.,  4.,  4.,  3.,  4.,  4.,  4.,  4.,  4.,  4.,
          4.,  4.,  5.,  4.,  4.,  4.,  4.,  4.,  6.,  5.,  4.,  4.,  4.,  4.,
          4.,  4.,  4.,  4.,  1.,  2.,  3.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,
         10.,  9.,  9.,  8.,  7.],
        [ 9.,  9.,  9.,  9.,  9.,  8.,  7.,  6.,  5.,  4.,  1.,  1.,  1.,  1.,
          1.,  1.,  1.,  1.,  1.,  1.,  7.,  7.,  7.,  7.,  7.,  7.,  6.,  5.,
          4.,  3.,  9.,  9.,  9.,  8.,  7.,  7.,  7.,  6.,  5.,  4.,  5.,  4.,
          3.,  2.,  2.,  2.,  1.,  1.,  1.,  1.,  7.,  7.,  7.,  6.,  5.,  6.,
          5.,  4.,  3.,  2.,  3.,  3.,  3.,  3.,

### Qualitative trajectory evaluation

The following functions help measuring the quality of the predicted trajectory sequences.

Given the one-hot encoded actions, the trajectories are constructed either from (0,0) or from the last coordinates
of the context set. The predictions and true trajectories are compared by the manhattan distance in each time step, which is fixed
in the simulation.

In [29]:
from minigrid.core.constants import DIR_TO_VEC
#The last action corresponding to stationary
DIR_TO_VEC =  DIR_TO_VEC + [(0, 0)]

def construct_trajectory(actions, start_pos = None):
    """
    Constructs a trajectory from a sequence of actions
    """
    
    mapping = torch.tensor(DIR_TO_VEC, dtype = torch.float32, device = actions.device)
    
    batch_size, seq_length = actions.shape
    
    trajectory = torch.zeros(batch_size, 2, seq_length, device = actions.device)
    
    if start_pos is not None:
        prev = start_pos
    else:
        prev = torch.zeros(batch_size, 2, device = actions.device)
        
    for i in range(seq_length):
        action_indices = actions[:, i]
        change = mapping[action_indices]
        trajectory[:, :, i] = prev + change
        prev = trajectory[:, :, i]
    
    return trajectory
    
def construct_and_calc_l1_dist(yt, predictions, xc=None):
    '''
    Calculates the Manhattan distance between the predicted and true trajectories.
    '''
    # Construct trajectories
    start_pos = xc[:, :, -1] if xc is not None else None
    true_trajectory = construct_trajectory(yt, start_pos)
    pred_trajectory = construct_trajectory(predictions, start_pos)
    
    # Calculate Manhattan distance at each timestep
    distances = torch.abs(pred_trajectory - true_trajectory).sum(dim = 1)
    
    return distances

# Test
xc, yc, xt, yt, _ = get_batch(sampler)

import neuralprocesses.torch as nps
import torch.nn.functional as F

agnp = nps.construct_agnp(dim_x = 2, dim_y = 5, likelihood = "het").to(device)
dist = agnp(xc, yc, xt).mean
preds = F.softmax(dist, dim=-2)
true_actions = yt.argmax(-2)
predictions = preds.argmax(-2)
print(true_actions.shape, predictions.shape)
distance = construct_and_calc_l1_dist(true_actions, predictions)


print(f"Predictions: {predictions}")
print(f"True actions: {true_actions}")
print(f"Manhattan distance: {distance}")

torch.Size([8, 5]) torch.Size([8, 5])
Predictions: tensor([[2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2]], device='cuda:0')
True actions: tensor([[0, 2, 2, 2, 2],
        [1, 1, 1, 4, 4],
        [1, 1, 1, 3, 3],
        [1, 0, 1, 2, 1],
        [3, 4, 4, 4, 4],
        [1, 1, 2, 2, 2],
        [2, 2, 2, 4, 4],
        [2, 2, 4, 4, 4]], device='cuda:0')
Manhattan distance: tensor([[2., 2., 2., 2., 2.],
        [2., 4., 6., 7., 8.],
        [2., 4., 6., 6., 6.],
        [2., 4., 6., 6., 8.],
        [2., 3., 4., 5., 6.],
        [2., 4., 4., 4., 4.],
        [0., 0., 0., 1., 2.],
        [0., 0., 1., 2., 3.]], device='cuda:0')


# Attempt to calculate the kl-divergence

Does not seem to be currently working. The implementation is based on neuralprocesses library code.

In [32]:
from neuralprocesses.coding import code,code_track, recode_stochastic
from neuralprocesses.model.util import compress_contexts
from neuralprocesses import _dispatch
from neuralprocesses.parallel import Parallel
from neuralprocesses.dist import AbstractDistribution

@_dispatch
def _kl(q: AbstractDistribution, p: AbstractDistribution):
    return q.kl(p)


@_dispatch
def _kl(q: Parallel, p: Parallel):
    return sum([_kl(qi, pi) for qi, pi in zip(q, p)])


def calc_kl_divergence(model, xc, yc, xt, yt, dtype_lik = None):
  
    if not dtype_lik:
      dtype_lik = torch.float32
    
    all_x = torch.cat([xc, xt], dim = -1)
    all_y = torch.cat([yc, yt], dim = -1)
      
    xz, pz, h = code_track(model.encoder, xc, yc, xt, root=True)
    
    qz  = recode_stochastic(model.encoder, pz, all_x, all_y, h, root=True, dtype_lik = dtype_lik)
    
    kl = _kl(qz, pz)
    
    return kl

kl = calc_kl_divergence(agnp, xc, yc, xt, yt)
print(kl)

tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')


## Training loop

The training loop maximizes the log-likelihood of the categorical distribution of 5-actions.

In [34]:
import neuralprocesses.torch as nps
import torch
import torch.nn.functional as F

GRID_SIZE = 10
AGENT_VIEW_SIZE = 3
TRAJ_LENGTH = 10

agnp = nps.construct_agnp(dim_x=2, dim_y=5, likelihood="het").to(device)
#gnp = nps.construct_gnp(dim_x = 2, dim_y = 5, likelihood="het").to(device)
opt = torch.optim.Adam(agnp.parameters(), lr=1e-5)
sampler = Sampler(grid_size = GRID_SIZE, agent_view_size = AGENT_VIEW_SIZE, traj_length = TRAJ_LENGTH, fixed_goal = True)

if training:
    for i in range(20000):
        xc, yc, xt, yt, user_params = get_batch(sampler)

        #Normalization seems to be important!
        dist = agnp(xc, yc, xt, normalize = True)
          
        logits = dist.mean  
        log_probs = F.log_softmax(logits, dim=-2)  # Apply log softmax to get log-probabilities
        
        nll = -(log_probs * yt).sum(dim=-2)  # Sum over the class dimension
        loss = nll.mean()
    
        #Calculate the KL divergence by sampling????
        kl = calc_kl_divergence(agnp, xc, yc, xt, yt)
        
        opt.zero_grad(set_to_none=True)
        loss.backward()
        opt.step()
        
        if i % 100 == 0:
            predicted = F.softmax(dist.mean, dim = -2).argmax(dim=-2)
            targets = yt.argmax(dim=-2)
            accuracy = (predicted == targets).float().mean()
            traj_distances = construct_and_calc_l1_dist(targets, predicted, xc)
            mean_distance = traj_distances.sum(dim = -1).mean()
            print(f"Iteration {i}, Loss: {loss.item():.4f}, Accuracy: {accuracy.item():.4f} mean distance difference: {mean_distance.item():.4f}, Final distance difference: {traj_distances[:, -1].mean().item():.4f}")


Iteration 0, Loss: 1.6102, Accuracy: 0.2857 mean distance difference: 15.8571, Final distance difference: 5.8571
Iteration 100, Loss: 1.6093, Accuracy: 0.0571 mean distance difference: 25.0000, Final distance difference: 7.7143
Iteration 200, Loss: 1.6094, Accuracy: 0.0444 mean distance difference: 20.3333, Final distance difference: 6.0000
Iteration 300, Loss: 1.6153, Accuracy: 0.2286 mean distance difference: 9.7143, Final distance difference: 3.2857
Iteration 400, Loss: 1.6022, Accuracy: 0.3714 mean distance difference: 6.7143, Final distance difference: 1.7143
Iteration 500, Loss: 1.6081, Accuracy: 0.2667 mean distance difference: 11.1111, Final distance difference: 3.4444
Iteration 600, Loss: 1.5895, Accuracy: 0.3714 mean distance difference: 8.7143, Final distance difference: 2.5714
Iteration 700, Loss: 1.5692, Accuracy: 0.4500 mean distance difference: 9.2500, Final distance difference: 2.7500
Iteration 800, Loss: 1.5353, Accuracy: 0.6500 mean distance difference: 6.3750, Final 

In [None]:
import os

if training:
  PATH = os.path.abspath(os.getcwd())
  torch.save(agnp.state_dict(), PATH + "/models/agnp.pth")

In [None]:
if not training:
  agnp = nps.construct_agnp(dim_x = 2, dim_y = 5, likelihood = "het").to(device)
  agnp.load_state_dict(torch.load('models/anp.pth'))

agnp.eval().to("cpu")

Model(
    Chain(
        Chain(
            SqueezeParallel(),
            AssertNoParallel(),
        ),
        Copy(),
        Parallel(
            Chain(
                RepeatForAggregateInputs(
                  (coder): InputsCoder()
                ),
                DeterministicLikelihood(),
            ),
            Parallel(
                Chain(
                    RepeatForAggregateInputs(
                      (coder): Attention(
                        (encoder_x): MLP(
                          (net): Sequential(
                            (0): Linear(in_features=2, out_features=256, bias=True)
                            (1): ReLU()
                            (2): Linear(in_features=256, out_features=256, bias=True)
                            (3): ReLU()
                            (4): Linear(in_features=256, out_features=256, bias=True)
                            (5): ReLU()
                            (6): Linear(in_features=256, out_features=256, bias=True

### Make predictions on new trajectories, and compare

In [1]:
agnp.eval().to("cpu")
xc, yc, xt, yt, _ = get_batch(grid_size = 10, agent_view_size = 3, traj_length = 7, device = device)
dist = agnp(xc, yc, xt)
preds = F.softmax(dist.mean, dim=-2). argmax(dim=-2)

NameError: name 'agnp' is not defined

In [90]:
F.softmax(dist.mean, dim=-2), yt

(tensor([[[0.0188],
          [0.0234],
          [0.1705],
          [0.0986],
          [0.6886]],
 
         [[0.0190],
          [0.0236],
          [0.1710],
          [0.0991],
          [0.6872]],
 
         [[0.0196],
          [0.0243],
          [0.1725],
          [0.1004],
          [0.6833]],
 
         [[0.0188],
          [0.0234],
          [0.1705],
          [0.0986],
          [0.6886]],
 
         [[0.0188],
          [0.0234],
          [0.1705],
          [0.0986],
          [0.6886]]], grad_fn=<SoftmaxBackward0>),
 tensor([[[0.],
          [0.],
          [0.],
          [0.],
          [1.]],
 
         [[0.],
          [0.],
          [0.],
          [1.],
          [0.]],
 
         [[0.],
          [0.],
          [0.],
          [1.],
          [0.]],
 
         [[0.],
          [0.],
          [0.],
          [0.],
          [1.]],
 
         [[0.],
          [0.],
          [0.],
          [0.],
          [1.]]]))

In [97]:
mean[0].argmax(), yt[0]

NameError: name 'mean' is not defined

## The following is a copy from the relational_neural_process githu

Model

In [None]:
import numpy as np
import numpy.random as npr
import torch
import torch.nn as nn
import torch.optim as optim
import collections
import matplotlib.pyplot as plt
import datetime
import torch.nn.functional as F

class CNPDeterministicEncoder(nn.Module):
    def __init__(self, sizes):
        super(CNPDeterministicEncoder, self).__init__()
        self.linears = nn.ModuleList()
        for i in range(len(sizes) - 1):
            self.linears.append(nn.Linear(sizes[i], sizes[i + 1]))

    def forward(self, context_x, context_y):
        """
        Encode training set as one vector representation

        Args:
            context_x: batch_size x set_size x feature_dim_x
            context_y: batch_size x set_size x feature_dim_y

        Returns: representation: batch_size x representation_size:
        """

        encoder_input = torch.cat((context_x, context_y), dim = -1)
        batch_size, set_size, filter_size = encoder_input.shape
        x = encoder_input.view(batch_size * set_size, -1)
        for i, linear in enumerate(self.linears[:-1]):
            x = torch.relu(linear(x))
        x = self.linears[-1](x)
        x = x.view(batch_size, set_size, -1)
        representation = x.sum(dim=1)
        return representation
            
class CNPDeterministicDecoder(nn.Module):
    def __init__(self, sizes):
        super(CNPDeterministicDecoder, self).__init__()
        self.linears = nn.ModuleList()
        for i in range(len(sizes) - 1):
            self.linears.append(nn.Linear(sizes[i], sizes[i + 1]))

    def forward(self, representation, target_x):
        """
        Take representation representation of current training set, and a target input x,
        return the predictive distribution at x (Gaussian with mean mu and scale sigma)

        Args:
            representation: batch_size x representation_size
            target_x: batch_size x set_size x d
        """
        batch_size, set_size, d = target_x.shape
        
        if representation is None:        
            input = target_x            
        else:
            representation = representation.unsqueeze(1).repeat([1, set_size, 1])
            input = torch.cat((representation, target_x), dim=-1)
        
        #All rows
        x = input.view(batch_size * set_size, -1)
        for linear in self.linears[:-1]:
            x = torch.relu(linear(x))
        logits = self.linears[-1](x)
        logits = logits.view(batch_size, set_size, -1)
        probs = F.softmax(logits, dim = -1)

        dist = torch.distributions.categorical.Categorical(probs = probs)
        return dist, probs, logits
    
        '''
        mu, log_sigma = torch.split(out, 1, dim = -1)
        sigma = 0.01 + 0.99 * torch.nn.functional.softplus(log_sigma)
        dist = torch.distributions.normal.Normal(loc=mu, scale=sigma)
        '''

class CNPDeterministicModel(nn.Module):
    def __init__(self, encoder_size, decoder_size):
        super(CNPDeterministicModel, self).__init__()
        self._encoder = CNPDeterministicEncoder(encoder_size)
        self._decoder = CNPDeterministicDecoder(decoder_size)


    def forward(self, query, target_y = None):
        (context_x, context_y), target_x = query
        representation = self._encoder(context_x, context_y)
        dist, probs, logits = self._decoder(representation, target_x)

        log_p = None
        if target_y is not None:
            #Reverse one hot encoding on target_y
            target_y = torch.argmax(target_y, dim = -1)
            log_p = dist.log_prob(target_y)

        return log_p, probs, logits

In [None]:
import neuralprocesses.torch as nps

d_x, d_in, representation_size, d_out, hidden_size = 2, 6, 258, 4, 128
encoder_sizes = [d_in, hidden_size, hidden_size, hidden_size, representation_size]
decoder_sizes = [representation_size + d_x, hidden_size, hidden_size, hidden_size, d_out]

model = CNPDeterministicModel(encoder_size=encoder_sizes, decoder_size=decoder_sizes)
model.to(device)

CNPDeterministicModel(
  (_encoder): CNPDeterministicEncoder(
    (linears): ModuleList(
      (0): Linear(in_features=6, out_features=128, bias=True)
      (1-2): 2 x Linear(in_features=128, out_features=128, bias=True)
      (3): Linear(in_features=128, out_features=258, bias=True)
    )
  )
  (_decoder): CNPDeterministicDecoder(
    (linears): ModuleList(
      (0): Linear(in_features=260, out_features=128, bias=True)
      (1-2): 2 x Linear(in_features=128, out_features=128, bias=True)
      (3): Linear(in_features=128, out_features=4, bias=True)
    )
  )
)