In [None]:
import pandas as pd

In [99]:
data = pd.read_csv('data.txt','\t')

In [100]:
normalized_data = data.copy()
# normalize X
high = data['X'].max()
low = data['X'].min()
normalized_data['X'] = (data['X'] - low) / (high - low)

# normalize Y
high = data['Y'].max()
low = data['Y'].min()
normalized_data['Y'] = (data['Y'] - low) / (high - low)

# timestamp / 40
normalized_data['timestamp'] = (data['timestamp'] / 40).map(int)

In [101]:
normalized_data.to_csv('data_normalized.csv')

In [102]:
# functions for generate a batch of sample
'''
    Input:
        data: the CSV
        num_data: size of batch
    Output:
        IDs: list of selected IDs
        input_sequence: batch with shape (num_data,input_length, 2)
        output_sequence: batch with shape (num_data, output_length, 2)
'''
from random import shuffle
def get_batch(data,num_data = 16,input_length = 8, output_length = 4):
    # evaluate the total length of series required
    total_length = input_length + output_length
    # filter out the series that has at least the number of `total_length` long
    id_counts = data.groupby('ID').ID.count()
    # get a table of candidate id, whose sequence is longer than (or eq. to) total_length
    candidate_id_counts = id_counts[id_counts >= total_length]
    # the number of candidates...
    total_candidate_id_count = candidate_id_counts.count()
    # get the random sequence...
    random_ids_selected = [i for i in range(total_candidate_id_count)]
    shuffle(random_ids_selected)
    
    selected_ids = []
    input_batch = []
    target_batch = []
    # retrieve the coordinates of the sequence (from the beginning to `total_length`)
    for i in random_ids_selected[:num_data]:
        selected_ids.append(i)
        # select X,Y from ID where ID == i order by timestamp...
        sequence_of_i = data[data.ID == i].sort_values(by = "timestamp")[["X","Y"]]
        # divide the sequence into two parts...
        input_sequence = sequence_of_i.iloc[:input_length]
        target_sequence = sequence_of_i.iloc[input_length:total_length]
        # and append the new sequence to existing arrays
        input_batch.append(np.array(input_sequence))
        target_batch.append(np.array(target_sequence))
    
    # return and array of selected ids as well as the batch...
    return np.stack(selected_ids), np.stack(input_batch), np.stack(target_batch)
    

# verify the shape is right...
list(map(lambda a: a.shape,get_batch(normalized_data)))
    

[(16,), (16, 8, 2), (16, 4, 2)]

In [104]:
'''
    Visualize the traces in a batch
    If batch size = B, sequence length = L...
    Input:
        batch: batch of sequence of arbitrary length, i.e. array of shape (B,L,2)
    Output:
        None, a graph will be drawn instead..
'''

import matplotlib.pyplot as plt
def visualize_trace(batch,target_batch):
    # first we make sure that the shape of the batch looks like (_, _, 2)
    def check_shape(shape):
        if len(shape) != 3:
            raise ValueError("batch should be in 3 dimension")
        if shape[-1] != 2:
            raise ValueError("Last axis should be storing X,Y coordinates")
    
    check_shape(batch.shape)
    check_shape(target_batch.shape)
    # sub-routine for draw a particular batch
    def draw_batch(batch,linestyle = None):
        # now extract the dimension
        batch_size, sequence_length, _ = batch.shape
        for batch_id in range(batch_size):
            # pick a random color for this trace
            line_color = np.random.rand(3)
            for sequence_pos in range(sequence_length - 1):
                # get the two adjacent coordinates
                cur_coord = batch[batch_id, sequence_pos]
                next_coord = batch[batch_id, sequence_pos + 1]
                # and draw the line...
                # sneaky plot function requires x-coordinates to be put in the same argument, so are y-coordinates...
                plt.plot([cur_coord[0],next_coord[0]],
                         [cur_coord[1],next_coord[1]],
                         linestyle = linestyle,
                         c = line_color)
    
    draw_batch(batch)
    draw_batch(target_batch, ":")
    # finally show the graph
    plt.show()
    
# let's test this visualization,
_, input_batch, target_batch = get_batch(normalized_data,16,16,8)
visualize_trace(input_batch,target_batch)

<IPython.core.display.Javascript object>

In [None]:
# now lets define a vanilla LSTM model
'''
    According to the paper, there should be an RNN that takes a sequence and gives a sequence (like seq-to-seq)
    except this output are hidden layers, like vectors of length 128
    To interpret such result, a dense layer with ReLU is added to condense the output to 5 numbers,
    namely, the mean_x, mean_y, sxx, syy, and sxy 
    of the bivariate gaussian of the probability of the agent at that given timestamp.
    
    The negative log likelihood between the real coordinate and this estimated distribution will be the loss.
'''
# first, the loss function, in Keras backend
from keras.layers import *
import keras.backend as K
'''
    The function takes a series of params of bivariate normal distribution, and a batch of observed coordinates,
    and return the log likelikhood of them...
    
    probability (likelihood) of the observed point (x,y) given the 5 parameters (mx,my,sx,sy,sp):
        det(2 * pi * [[sx,sp],[sp,sy]]) ^(-0.5) 
            * exp(-0.5 * ((x,y) - (mx,my)).T * [[sx,sp],[sp,sy]] * ((x,y) - (mx,my)))
    
    after taking log and add a minus (* -1)...
        -( (-0.5 * log(4 * pi ^ 2 * sx * sy - sp * sp)) + (-0.5 * (...))
    
    If batch size = B, sequence length = D...
    Input:
        Batch bivariate parameters (estimated): K.variable with shape (B,D,5),
        Batch of overserved coordinates (label): K.variable with shape (B,D,2)
    
    Output:
        a scaler (K.variable with shape ()), which is the sum of negative log likelihood
'''

def negative_log_likelihood_loss(batch_bivariate_params, batch_observed_coordinates):
    # first check the dimension...
    input_shape = K.int_shape(batch_bivariate_params)
    target_shape = K.int_shape(batch_observed_coordinates)
    
    if len(input_shape) != 3 or len(target_shape) != 3:
        raise ValueError("Dimension of both tensors should be 3")
    
    if input_shape[0] != target_shape[0]:
        raise ValueError("Batch size of both tensors should be the same")
    
    if input_shape[1] != target_shape[1]:
        raise ValueError("Sequence length of both tensors should be the same")
    
    if input_shape[2] != 5:
        raise ValueError("Number of predicted parameters should be 5. Namely, (mx,my,sx,sy,sp)")
    
    if target_shape[2] != 2:
        raise ValueError("Dimension of target coordinates should be 2. Namely, (x,y)")
    
    # then split the tensors into (mx,my,sx,sy,sp)...
    # all of them should be of shape (B,D)
    batch_mx = batch_bivariate_params[:,:,0]
    batch_my = batch_bivariate_params[:,:,1]
    batch_sx = batch_bivariate_params[:,:,2]
    batch_sy = batch_bivariate_params[:,:,3]
    batch_sp = batch_bivariate_params[:,:,4]
    
    batch_x = batch_observed_coordinates[:,:,0]
    batch_y = batch_observed_coordinates[:,:,1]
    
    # now evaluate the determinant...
    det = Multiply([batch_sx,batch_sy]) - K.square(batch_sp) # (B,D), (sx * sy - sp^2)
    log_det = -0.5 * K.log(det * 4 * np.pi ** 2) # (B,D), (-0.5 * log(4 * pi ^ 2 * sx * sy - sp * sp))
    
    # and evaluate the exponent (since it is "logged", no K.exp exists here)
    dx = batch_x - batch_mx # (B,D), (x - mx)
    dy = batch_y - batch_my # (B,D), (y - my)
    