In [1]:
import os
import math
import random
import pickle

import yaml
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils import data
from torch.autograd import Variable
from torch.nn.utils import weight_norm
from torch.distributions.normal import Normal

from sklearn.cluster import KMeans
from matplotlib.patches import Rectangle

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Model Definitions

In [3]:
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_size=(1024, 512), activation='relu', discrim=False, dropout=-1):
        super(MLP, self).__init__()
        dims = []
        dims.append(input_dim)
        dims.extend(hidden_size)
        dims.append(output_dim)
        self.layers = nn.ModuleList()
        for i in range(len(dims)-1):
            self.layers.append(nn.Linear(dims[i], dims[i+1]))

        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()

        self.sigmoid = nn.Sigmoid() if discrim else None
        self.dropout = dropout

    def forward(self, x):
        for i in range(len(self.layers)):
            x = self.layers[i](x)
            if i != len(self.layers)-1:
                x = self.activation(x)
                if self.dropout != -1:
                    x = nn.Dropout(min(0.1, self.dropout/3) if i == 1 else self.dropout)(x)
            elif self.sigmoid:
                x = self.sigmoid(x)
        return x

In [4]:
class PECNet(nn.Module):
    def __init__(self, 
                 enc_past_size, 
                 enc_dest_size, 
                 enc_latent_size, 
                 dec_size, 
                 predictor_size, 
                 fdim, 
                 zdim, 
                 sigma,
                 past_length, 
                 future_length, 
                 verbose):
        '''
        Args:
            size parameters: Dimension sizes
            sigma: Standard deviation used for sampling N(0, sigma)
            past_length: Length of past history (number of timesteps)
            future_length: Length of future trajectory to be predicted
        '''
        super(PECNet, self).__init__()

        self.zdim = zdim
        self.sigma = sigma

        # takes in the past
        self.encoder_past = MLP(input_dim = past_length*2, output_dim = fdim, hidden_size=enc_past_size)

        self.encoder_dest = MLP(input_dim = 2, output_dim = fdim, hidden_size=enc_dest_size)

        self.encoder_latent = MLP(input_dim = 2*fdim, output_dim = 2*zdim, hidden_size=enc_latent_size)

        self.decoder = MLP(input_dim = fdim + zdim, output_dim = 2, hidden_size=dec_size)

        self.predictor = MLP(input_dim = 2*fdim, output_dim = 2*(future_length-1), hidden_size=predictor_size)

        architecture = lambda net: [l.in_features for l in net.layers] + [net.layers[-1].out_features]

        if verbose:
            print("Past Encoder architecture : {}".format(architecture(self.encoder_past)))
            print("Dest Encoder architecture : {}".format(architecture(self.encoder_dest)))
            print("Latent Encoder architecture : {}".format(architecture(self.encoder_latent)))
            print("Decoder architecture : {}".format(architecture(self.decoder)))
            print("Predictor architecture : {}".format(architecture(self.predictor)))

    def forward(self, x, dest = None, device=torch.device('cpu')):
        # provide destination iff training
        # assert model.training
        assert self.training ^ (dest is None)

        # encode
        ftraj = self.encoder_past(x)

        if not self.training:
            z = torch.Tensor(x.size(0), self.zdim)
            z.normal_(0, self.sigma)

        else:
            # during training, use the destination to produce generated_dest and use it again to predict final future points

            # CVAE code
            dest_features = self.encoder_dest(dest)
            features = torch.cat((ftraj, dest_features), dim = 1)
            latent =  self.encoder_latent(features)

            mu = latent[:, 0:self.zdim] # 2-d array
            logvar = latent[:, self.zdim:] # 2-d array

            var = logvar.mul(0.5).exp_()
            eps = torch.DoubleTensor(var.size()).normal_()
            eps = eps.to(device)
            z = eps.mul(var).add_(mu)

        z = z.double().to(device)
        decoder_input = torch.cat((ftraj, z), dim = 1)
        generated_dest = self.decoder(decoder_input)

        if self.training:
            generated_dest_features = self.encoder_dest(generated_dest)

            prediction_features = torch.cat((ftraj, generated_dest_features), dim = 1)

            pred_future = self.predictor(prediction_features)
            return generated_dest, mu, logvar, pred_future

        return generated_dest

    # separated for forward to let choose the best destination
    # def predict(self, past, generated_dest, mask, initial_pos):
    def predict(self, past, generated_dest):
        ftraj = self.encoder_past(past)
        generated_dest_features = self.encoder_dest(generated_dest)

        prediction_features = torch.cat((ftraj, generated_dest_features), dim = 1)

        interpolated_future = self.predictor(prediction_features)
        return interpolated_future

## Loss

In [5]:
def calculate_loss(x, reconstructed_x, mean, log_var, criterion, future, interpolated_future):
    # Weights
    w1 = torch.tensor([10, 1]).to(device)
    w2 = torch.tensor([10, 1] * 11).to(device)
    
    # reconstruction loss
    # RCL_dest = criterion(x, reconstructed_x)
    RCL_dest = torch.mean(w1 * (x - reconstructed_x) ** 2)
    
    # ADL_traj = criterion(future, interpolated_future) # better with l2 loss
    ADL_traj = torch.mean(w2 * (future - interpolated_future) ** 2)
    
    # kl divergence loss
    KLD = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp())
    return RCL_dest, KLD, ADL_traj

In [6]:
def load_hyper_parameters(file_name='optimal.yaml'):
    with open(file_name, 'r') as file:
        hyper_params = yaml.load(file)
    
    return hyper_params

In [7]:
hyper_params = load_hyper_parameters()
hyper_params["data_scale"] = 1

  This is separate from the ipykernel package so we can avoid doing imports until


## Load Pretrained Model and Test

In [8]:
checkpoint = torch.load('./trained.pt', map_location=device)
hyper_params = checkpoint["hyper_params"]

In [9]:
model = PECNet(
    hyper_params["enc_past_size"],
    hyper_params["enc_dest_size"],
    hyper_params["enc_latent_size"],
    hyper_params["dec_size"],
    hyper_params["predictor_hidden_size"],
    hyper_params["fdim"], 
    hyper_params["zdim"], 
    hyper_params["sigma"], 
    hyper_params["past_length"], 
    hyper_params["future_length"], 
    verbose=True
)
model = model.double().to(device)
model.load_state_dict(checkpoint["model_state_dict"])

Past Encoder architecture : [16, 512, 256, 16]
Dest Encoder architecture : [2, 8, 16, 16]
Latent Encoder architecture : [32, 8, 50, 32]
Decoder architecture : [32, 1024, 512, 1024, 2]
Predictor architecture : [32, 1024, 512, 256, 22]


<All keys matched successfully>

In [10]:
best_of_n = 20
model.eval()
scenes_data = np.load('../../../code/scene_data.npy', allow_pickle=True)

In [21]:
dest.shape

(11, 2)

In [13]:
optimizer_data = []
with torch.no_grad():
    for scene in scenes_data:
        trajx = torch.tensor(np.array(scene), requires_grad=False)
        traj = trajx - trajx[:, :1, :]
        traj *= hyper_params["data_scale"]

        traj = torch.DoubleTensor(traj).to(device)
        x = traj[:, :hyper_params['past_length'], :]
        y = traj[:, hyper_params['past_length']:, :]
        y = y.cpu().numpy()

        # reshape the data
        x = x.view(-1, x.shape[1]*x.shape[2])
        x = x.to(device)

        dest = y[:, -1, :]
        all_l2_errors_dest = []
        all_guesses = []
        for _ in range(best_of_n):
            # dest_recon = model.forward(x, initial_pos, device=device)
            dest_recon = model.forward(x, device=device)
            dest_recon = dest_recon.cpu().numpy()
            all_guesses.append(dest_recon)

            l2error_sample = np.linalg.norm(dest_recon - dest, axis = 1)
            all_l2_errors_dest.append(l2error_sample)

        all_l2_errors_dest = np.array(all_l2_errors_dest)
        all_guesses = np.array(all_guesses)
        
        inp = np.array(scene)
        optimizer_data.append([inp, all_guesses])
        # average error
        l2error_avg_dest = np.mean(all_l2_errors_dest)

        # choosing the best guess
        indices = np.argmin(all_l2_errors_dest, axis = 0)

        best_guess_dest = all_guesses[indices,np.arange(x.shape[0]),  :]

        # taking the minimum error out of all guess
        l2error_dest = np.mean(np.min(all_l2_errors_dest, axis = 0))

        best_guess_dest = torch.DoubleTensor(best_guess_dest).to(device)

        interpolated_future = model.predict(x, best_guess_dest)
        # interpolated_future = interpolated_future.cpu().numpy()
        interpolated_future = interpolated_future.cpu().numpy()
        best_guess_dest = best_guess_dest.cpu().numpy()

         # final overall prediction
        predicted_future = np.concatenate((interpolated_future, best_guess_dest), axis = 1)
        predicted_future = np.reshape(predicted_future, (-1, hyper_params['future_length'], 2)) # making sure
        # ADE error
        l2error_overall = np.mean(np.linalg.norm(y - predicted_future, axis = 2))

        l2error_overall /= hyper_params["data_scale"]
        l2error_dest /= hyper_params["data_scale"]
        l2error_avg_dest /= hyper_params["data_scale"]

        print('Test time error in destination best: {:0.3f} and mean: {:0.3f}'.format(l2error_dest, l2error_avg_dest))
        print('Test time error overall (ADE) best: {:0.3f}'.format(l2error_overall))
        print('-'*50)

Test time error in destination best: 1.415 and mean: 20.876
Test time error overall (ADE) best: 2.035
--------------------------------------------------
Test time error in destination best: 1.813 and mean: 17.633
Test time error overall (ADE) best: 2.368
--------------------------------------------------
Test time error in destination best: 5.520 and mean: 26.111
Test time error overall (ADE) best: 2.739
--------------------------------------------------
Test time error in destination best: 1.937 and mean: 21.952
Test time error overall (ADE) best: 2.285
--------------------------------------------------
Test time error in destination best: 4.876 and mean: 26.626
Test time error overall (ADE) best: 2.576
--------------------------------------------------
Test time error in destination best: 2.561 and mean: 27.291
Test time error overall (ADE) best: 2.257
--------------------------------------------------
Test time error in destination best: 1.615 and mean: 24.655
Test time error overal

In [17]:
np.save('optimizer_data.npy', optimizer_data)

In [14]:
for scene in optimizer_data:
    trajectories = scene[0] # num_vehicles x 20 x 2
    goal_predictions = scene[1] # num_goals x num_vehicles x 2 (here num_goals is 20)
    # To get the bounding boxes for each vehicle, simply find the min_x, min_y, max_x, max_y for each vehicle in the scene

In [15]:
trajectories.shape

(11, 20, 2)

In [16]:
goal_predictions.shape

(20, 11, 2)