In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [2]:
!git clone https://github.com/rodrigorivera/mds20_deepfolio

from mds20_deepfolio.models.NeuralHawkesProcess.DataWrapper import prepare_datasets, collate_fn
from mds20_deepfolio.models.NeuralHawkesProcess.model import NHPModel
from mds20_deepfolio.models.NeuralHawkesProcess.train import train

!unzip //content/mds20_deepfolio/datasets/data/ETH.zip \
      -d //content/mds20_deepfolio/datasets/data/

Cloning into 'mds20_deepfolio'...
remote: Enumerating objects: 77, done.[K
remote: Counting objects: 100% (77/77), done.[K
remote: Compressing objects: 100% (77/77), done.[K
remote: Total 1065 (delta 42), reused 0 (delta 0), pack-reused 988[K
Receiving objects: 100% (1065/1065), 112.66 MiB | 23.40 MiB/s, done.
Resolving deltas: 100% (649/649), done.
Archive:  //content/mds20_deepfolio/datasets/data/ETH.zip
  inflating: //content/mds20_deepfolio/datasets/data/ETH.npy  


In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
DO_TRAIN = False

if DO_TRAIN:
    train_dsets, val_dsets, test_dsets = prepare_datasets('/content/mds20_deepfolio/datasets/data/')

    train_dataset = torch.utils.data.ConcatDataset([dset for dset in train_dsets.values()]) 
    val_dataset = torch.utils.data.ConcatDataset([dset for dset in val_dsets.values()]) 
      
    train_loader = DataLoader(train_dataset, batch_size=32, collate_fn=collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=32, collate_fn=collate_fn)

    print(len(train_dataset), len(val_dataset))

In [None]:
model = NHPModel(256, device=device).to(device)

if DO_TRAIN:

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, threshold=0.001, patience=10, verbose=True)

    statiscs = train(model, optimizer, train_loader, val_loader, device,scheduler=scheduler, 
                    n_epochs = 50, verbose_epoch=10, scale=0.001, save_path='/content/model.pth')

else:
    model.load_state_dict(torch.load('/content/mds20_deepfolio/models/NeuralHawkesProcess/weights/NHP,all,256.pth'))

Epoch: 0
Log-Likelihood:: train: -12.435624222252049 , val: -8.78149696429415
Time MSE:: train: 1475.8141149902344 , val: 1892.9773763020833
Event CE:: train: 0.648451828956604 , val: 0.624378071890937
Event pred accuracy:: train: 0.6176864692982456 , val: 0.6983620370370371
time: 238.26966428756714
------------------------------------------------------------
Epoch: 10
Log-Likelihood:: train: -8.583953379369097 , val: -8.805317628000118
Time MSE:: train: 910.7218176269531 , val: 1207.1437276204426
Event CE:: train: 0.6023679351806641 , val: 0.6134296655654907
Event pred accuracy:: train: 0.7111568859649121 , val: 0.6983620370370371
time: 2602.355491876602
------------------------------------------------------------
Epoch: 20
Log-Likelihood:: train: -8.576123357546637 , val: -8.797698699730997
Time MSE:: train: 870.18255859375 , val: 1121.2044338650173
Event CE:: train: 0.6006509304046631 , val: 0.6112796266873678
Event pred accuracy:: train: 0.7111568859649121 , val: 0.6983620370370371

# Evaluate model on test datasets

In [5]:
from sklearn.metrics import accuracy_score
model = NHPModel(256, device=device).to(device)
model.load_state_dict(torch.load('/content/mds20_deepfolio/models/NeuralHawkesProcess/weights/NHP,all,256.pth'))
train_dsets, val_dsets, test_dsets = prepare_datasets('/content/mds20_deepfolio/datasets/data/')

In [14]:
stats = {}

for name, test_dset in test_dsets.items():
    test_loader = DataLoader(test_dset, batch_size=12, collate_fn=collate_fn)
    loss_time, type_acc,loss_llh,loss_event = 0, 0, 0, 0
    for event_seq, time_seq in test_loader:

        event_seq = event_seq.to(device)
        time_seq = time_seq.to(device)
        
        intens, time, event = model.forward(event_seq, time_seq)

        loss_llh += (model.LogLikelihoodLoss(intens, time_seq) / (time_seq.shape[0] * time_seq.shape[1])).detach().cpu().numpy()
        loss_time += model.time_loss(time, time_seq).detach().cpu().numpy()
        loss_event += model.event_loss(event, event_seq).detach().cpu().numpy()
        type_acc += accuracy_score(event[:,:-1].argmax(dim=2).cpu().reshape(-1), 
                                                      event_seq[:, 1:].cpu().reshape(-1))
        
    stats[name] = (loss_llh/len(test_loader), loss_time/len(test_loader), loss_event/len(test_loader), type_acc/len(test_loader))
    


In [38]:
import pandas as pd

df = pd.DataFrame(stats, index=['LL', 'MSE', 'ce', 'acc'])
for indx in df.columns:
    df[indx]['MSE'] = df[indx]['MSE']**0.5
    df[indx]['LL'] = -df[indx]['LL']
df.T

Unnamed: 0,LL,MSE,ce,acc
EOS,-9.060723,52.455442,0.616969,0.707163
XRP,-10.347733,71.85062,0.618255,0.706636
BNB,-11.232201,78.106731,0.6198,0.703472
ETH,-7.922001,22.570457,0.619801,0.703517
LTC,-9.110211,43.744799,0.617133,0.706433


# Train and Evaluate model using only LogLike loss and prediction using probability function

In [None]:
model = NHPModel(256, device=device).to(device)

if DO_TRAIN:

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, threshold=0.001, patience=10, verbose=True)

    statiscs = train(model, optimizer, train_loader, val_loader, device,scheduler=scheduler, 
                    n_epochs = 50, verbose_epoch=10, scale=None, save_path=None)

else:
    # load model trained only with LogLike loss
    model.load_state_dict(torch.load('/content/mds20_deepfolio/models/NeuralHawkesProcess/weights/ALL,llh_only.pth'))

In [12]:
stats = {}

for name, test_dset in test_dsets.items():
    test_loader = DataLoader(test_dset, batch_size=12, collate_fn=collate_fn)
    loss_time, type_acc,loss_llh,loss_event = 0, 0, 0, 0
    for event_seq, time_seq in test_loader:

        event_seq = event_seq.to(device)
        time_seq = time_seq.to(device)
      
        intens, time, event = model.forward(event_seq, time_seq)
        loss_llh += (model.LogLikelihoodLoss(intens, time_seq) / (time_seq.shape[0] * time_seq.shape[1])).item()
        
    stats[name] = loss_llh/len(test_loader)
  

In [14]:
import pandas as pd

df = pd.DataFrame(stats, index=['LL'])
for indx in df.columns:
    df[indx]['LL'] = -df[indx]['LL']

Unnamed: 0,LL
EOS,-9.075202
XRP,-10.398117
BNB,-11.307566
ETH,-7.904173
LTC,-9.12609


In [None]:
from mds20_deepfolio.models.NeuralHawkesProcess.utils import predict_event

stats = {}
for name, test_dset in test_dsets.items():
    test_loader = DataLoader(test_dset, batch_size=12, collate_fn=collate_fn)
    acc = []
    print(name)
    for event, time in test_loader:
        for i in range(time.shape[0]):
          
            # Make prediction like it was suggested in the paper
            times_gt, times_pred, types_gt, types_pred = predict_event(model, time[i,:], event[i,:], device)
            acc.append([accuracy_score(types_gt, types_pred), mean_squared_error(times_gt, times_pred)])

    stats[name] = acc

In [61]:
ss ={}
for n, _ in test_dsets.items():
    ss[n] = [df[n].values[0], np.array(stats[n])[:,0].mean(), np.array(stats[n])[:,1].mean()**0.5]
df1 = pd.DataFrame(ss, index=['LL','acc', 'rmse'])
df1.T

Unnamed: 0,LL,acc,rmse
EOS,-9.075202,0.450751,53.112944
XRP,-10.398117,0.457173,73.989308
BNB,-11.307566,0.467208,77.652723
ETH,-7.904173,0.447487,23.808629
LTC,-9.12609,0.465304,43.965744


# Evaluate model on dataset from original paper

In [67]:
from mds20_deepfolio.models.NeuralHawkesProcess.DataWrapper import NHPDataset, collate_fn

!unzip /content/mds20_deepfolio/models/NeuralHawkesProcess/data/fin_data.zip \
      -d /content/mds20_deepfolio/models/NeuralHawkesProcess/data

Archive:  /content/mds20_deepfolio/models/NeuralHawkesProcess/data/fin_data.zip
  inflating: /content/mds20_deepfolio/models/NeuralHawkesProcess/data/test.pkl  
  inflating: /content/mds20_deepfolio/models/NeuralHawkesProcess/data/dev.pkl  
  inflating: /content/mds20_deepfolio/models/NeuralHawkesProcess/data/train.pkl  


In [68]:
train_dataset = NHPDataset('/content/mds20_deepfolio/models/NeuralHawkesProcess/data/train.pkl')
val_dataset = NHPDataset('/content/mds20_deepfolio/models/NeuralHawkesProcess/data/dev.pkl')
test_dataset = NHPDataset('/content/mds20_deepfolio/models/NeuralHawkesProcess/data/test.pkl')

train_loader = DataLoader(train_dataset, batch_size=12, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=12, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=12, collate_fn=collate_fn)

print('lenght of train_dataset:', len(train_dataset))
print('lenght of val_dataset:', len(val_dataset))
print('lenght of test_dataset:', len(test_dataset))


lenght of train_dataset: 90
lenght of val_dataset: 10
lenght of test_dataset: 100


In [69]:
model = NHPModel(256, device=device).to(device)
DO_TRAIN = False
if DO_TRAIN:

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, threshold=0.001, patience=10, verbose=True)

    statiscs = train(model, optimizer, train_loader, val_loader, device,scheduler=scheduler, 
                    n_epochs = 50, verbose_epoch=10, scale=None, save_path=None)

else:
    # load model trained only with LogLike loss
    model.load_state_dict(torch.load('/content/mds20_deepfolio/models/NeuralHawkesProcess/weights/fin_dset.pth'))

In [70]:
for event, time in test_loader:
    for i in range(time.shape[0]):
          
        # Make prediction like it was suggested in the paper
        times_gt, times_pred, types_gt, types_pred = predict_event(model, time[i,:], event[i,:], device)
        acc.append([accuracy_score(types_gt, types_pred), mean_squared_error(times_gt, times_pred)])

In [76]:
acc = np.array(acc)
print('Type prediction accuracy:', acc[:,0].mean())
print('Time prediction RMSE:', acc[:,1].mean()**0.5)

Type prediction accuracy: 0.5348678514532393
Time prediction RMSE: 29.36444389052214
