# Ladybug : predicting 2D trajectory with GRU

This notebook is very inspired by the excellent [FIDLE](https://fidle.cnrs.fr/) course from CNRS (in french).

In [None]:
from collections import defaultdict , Counter

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
import pytorch_model_summary as pms 

from torch.utils.data import TensorDataset, DataLoader

# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")


## 1. data 

### 1.1 generating ladybug trajectory data

For this simple example, we generate some artificial trajectory data:

In [None]:
import random 
from math import cos, sin

def ladybug_init(s=122):
    
    if s>0 : random.seed(s)
    ladybug_init.params_x = [ random.gauss(0.,1.) for u in range(8)]
    ladybug_init.params_y = [ random.gauss(0.,1.) for u in range(8)]
    
def ladybug_move(t):

    [ax1, ax2, ax3, ax4, kx1, kx2, kx3, kx4] = ladybug_init.params_x
    [ay1, ay2, ay3, ay4, ky1, ky2, ky3, ky4] = ladybug_init.params_y
    
    x = ax1*sin(t*(kx1+20)) + ax2*cos(t*(kx2+10)) + ax3*sin(t*(kx3+5)) + ax4*cos(t*(kx4+5))
    y = ay1*cos(t*(ky1+20)) + ay2*sin(t*(ky2+10)) + ay3*cos(t*(ky3+5)) + ay4*sin(t*(ky4+5)) 

    return x,y

In [None]:
# ---- About dataset
#
max_t        = 1000
delta_t      = 0.003
features_len = 2


sequence_len = 20
predict_len  = 5

scale         = .2       # Percentage of dataset to be used (1=all)
train_prop    = .8       # Percentage for train (the rest being for the valid)

# ---- Get positions
#
ladybug_init(s=16)
x,y = 0,0
positions=[]
for t in np.arange(0., max_t, delta_t):
    x,y = ladybug_move(t)
    positions.append([x,y])

# ---- Build rescaled dataset
#
n = int( len(positions)*scale )
dataset = np.array(positions[:n])

k = int(len(dataset)*train_prop)
x_train = dataset[:k]
x_valid  = dataset[k:]

# ---- Normalize
#
mean = x_train.mean()
std  = x_train.std()
x_train = (x_train - mean) / std
x_valid  = (x_valid  - mean) / std

print("Dataset generated.")
print("Train shape is : ", x_train.shape)
print("Valid  shape is : ", x_valid.shape)

In [None]:
## plotting part of the trajectory.
plt.plot( x_train[:500,0] , x_train[:500,1] )

The goal will be to predict the next point in the trajectory given the previous 20 points.

In [None]:
k1,k2 = sequence_len, 2
i = random.randint(0,len(x_valid)-k1-k2)
j = i+k1

plt.plot( x_valid[i:j+k2,0] , x_valid[i:j+k2,1] , label = 'input' )
plt.plot( x_valid[j:j+k2,0] , x_valid[j:j+k2,1] , label = 'objective' )
plt.legend()

### 1.2 prepare the sequences from dataset

In [None]:
# ---- Create sequences and labels for train and valid
xs_train, ys_train=[],[]

# each sequence is the defined by its start point. 
# we use permutation to have random start points
all_i = np.random.permutation( len(x_train) - sequence_len - 1 ) 

for i in all_i:
    xs_train.append( x_train[ i : i+sequence_len ] )
    ys_train.append( x_train[ i+sequence_len+1 ]   )
    
xs_valid, ys_valid=[],[]
for i in range( len(x_valid) - sequence_len - 1):
    xs_valid.append( x_valid[ i : i+sequence_len ] )
    ys_valid.append( x_valid[ i+sequence_len+1 ]   )

# ---- Convert to pytorch dataset

train_dataset = TensorDataset(torch.Tensor( np.array(xs_train, dtype='float16') ) ,
                              torch.Tensor( np.array(ys_train, dtype='float16') ) ) 
valid_dataset = TensorDataset(torch.Tensor( np.array(xs_valid, dtype='float16') ) ,
                              torch.Tensor( np.array(ys_valid, dtype='float16') ) ) 


batch_size = 128
train_dataloader = DataLoader(train_dataset , batch_size = batch_size )
valid_dataloader = DataLoader(valid_dataset , batch_size = batch_size )

## exercise

### exercise 1 defining model

We want a model that takes a sequence of ladybug positions (dimension: (number of timepoints,2) )
and outputs the next position (dimension: 2 ).

 1. Create a model using a GRU and a set of linear layers to predict the next ladybug prediction
 2. train your model to optimize a Mean Squared Error loss (`nn.MSELoss()`)
    

In [None]:
class ladybug_GRU(torch.nn.Module):
    
    def __init__(self , input_dim = features_len, 
                         hidden_dim = 10 ,
                         num_layers = 1 ,
                         output_dim = 2 ):
        super().__init__()
        ...

    def forward(self,x):

In [None]:
## use the following code to test your model:
print(pms.summary(model, torch.zeros(3,10,2).to(device), show_input=True))

In [None]:
## use the following code to test your model:
x, y = valid_dataset[:5] ## let's go with a batch of 5 samples

with torch.no_grad(): ## disables tracking of gradient: prevent accidental training + speeds up computation
    x = x.to(device)
    y = y.to(device)
    pred = model(x)
    predicted, actual = pred, y
    print(f'Predicted : \n"{predicted}"\nActual: \n"{actual}"')

In [None]:
## use the following code to test your model:
loss = nn.MSELoss()
loss(predicted,y)

In [None]:
# %load solutions/GRU_ladybug.py

In [None]:
print(pms.summary(model, torch.zeros(3,10,2).to(device), show_input=True))

In [None]:
x, y = valid_dataset[:5] ## let's go with a batch of 5 samples

with torch.no_grad(): ## disables tracking of gradient: prevent accidental training + speeds up computation
    x = x.to(device)
    y = y.to(device)
    pred = model(x)
    predicted, actual = pred, y
    print(f'Predicted : \n"{predicted}"\nActual: \n"{actual}"')

In [None]:
## usual helper funtions:
def train(dataloader, model, loss_fn, optimizer, echo = True , echo_batch = False):
    
    size = len(dataloader.dataset) # how many batches do we have
    model.train() #     Sets the module in training mode.
    
    ## we will keep prediction and target on the whole dataset
    all_predictions = []
    all_targets = []
    
    for batch, (X, y) in enumerate(dataloader): # for each batch
        X, y = X.to(device), y.to(device) # send the data to the GPU or whatever device you use for training

        # Compute prediction error
        pred = model(X)              # prediction for the model -> forward pass
        loss = loss_fn(pred, y)      # loss function from these prediction
        
        loss.backward()              # backward propagation 

        optimizer.step()             
        optimizer.zero_grad()
        
        if echo_batch:
            current =  (batch + 1) * len(X)
            print(f"Train loss: {loss.item():>7f}  [{current:>5d}/{size:>5d}]")
    
    if echo:
        current =  (batch + 1) * len(X)
        print(f"Train loss: {loss.item():>7f}")

    return loss.item()

def valid(dataloader, model, loss_fn, echo = True):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval() #     Sets the module in evaluation mode
        
    valid_loss = 0
    with torch.no_grad(): ## disables tracking of gradient: prevent accidental training + speeds up computation
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            valid_loss += loss_fn(pred, y).item()  ## accumulating the loss function over the batches
            
    valid_loss /= num_batches

    if echo:
        print(f"\tValid loss: {valid_loss:>8f}")
    ## return the average loss / batch
    return  valid_loss


### exercise 2 training code

In [None]:
model = ladybug_GRU( features_len , 50, 2 ).to(device)
loss = nn.MSELoss()

## these parameters worked well for me in my tests, but you may have to adapt the learning rate:
optimizer = torch.optim.Adam(model.parameters(), 
                       lr = 10**-3,
                       weight_decay = 10**-2)


## container to keep the scores across all epochs
train_scores = []
valid_scores = []

In [None]:
%%time

for i in range(10): ## depending on your model, you may need as little as 5 epochs, or as much as 100...
    train_scores.append( train(train_dataloader, 
                               model, 
                               loss, 
                               optimizer,
                               echo = True,
                               echo_batch = False 
                               )
                         )
    
    
    valid_scores.append( valid(valid_dataloader, 
                               model, 
                               loss , 
                               echo = True
                              )
                       )


In [None]:
plt.plot(train_scores,label='train')
plt.plot(valid_scores,label='valid')

## plotting a single prediction:

In [None]:
x,y = valid_dataset[15]
model.eval()
with torch.no_grad():
    y_pred = model(x.unsqueeze(0)).detach().squeeze()

plt.plot( x[:,0] , x[:,1])
plt.plot( [ x[-1,0], y[0]] , [x[-1,1], y[1]] , linestyle = '-' )
plt.plot( [x[-1,0],y_pred[0]] , [x[-1,1], y_pred[1]] , linestyle = '--' )


## prediction of multiple time steps:

We are going to see how the model fares when we predict multiple time steps:

In [None]:
pred_len = 4

In [None]:
i = np.random.randint( 0, len(x_valid) - sequence_len - pred_len )
x = x_valid[ i : i+sequence_len ]
y = x_valid[ i+sequence_len : i+sequence_len+pred_len ]

In [None]:
model.eval()
with torch.no_grad():
    xt = torch.Tensor( x ).unsqueeze(0)
    y_pred = model( xt ).detach()

y_pred

In [None]:
extended = torch.cat( ( xt, y_pred.unsqueeze(0) ) , 1 )
extended.shape

In [None]:
def predict_multiple_timepoints(model, x , pred_len ):
    xt = torch.Tensor( x ).unsqueeze(0)
    model.eval()
    with torch.no_grad():
    
        for l in range( pred_len ) :
            y_pred = model( xt ).detach()
            xt = torch.cat( ( xt, y_pred.unsqueeze(0) ) , 1 )
    
    return xt[0,:pred_len,:].detach().numpy()

In [None]:
predict_multiple_timepoints(model, x , pred_len )

In [None]:
i = np.random.randint( 0, len(x_valid) - sequence_len - pred_len )
x = x_valid[ i : i+sequence_len ]
y = x_valid[ i+sequence_len : i+sequence_len+pred_len ]
y_pred  = predict_multiple_timepoints(model, x , pred_len )

In [None]:
## adding the last x point for plotting purpose
xy = np.concat( ( x[[-1],:] , y ) )
xypred = np.concat( ( x[[-1],:] , y_pred ) )

plt.plot( x[:,0] , x[:,1])
plt.plot( xy[:,0] , xy[:,1] , linestyle = '-' )
plt.plot( xypred[:,0] , xypred[:,1] , linestyle = '--' )
