In [1]:
import sys
sys.path.insert(1, '/Users/mac/Desktop/PycharmProjects/TAADL/src')
sys.path.insert(2, '/Users/mac/Desktop/PycharmProjects/TAADL/models')

import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from config import DATA_PATH
from typing import Optional, Tuple
from scipy.stats import norm
from torch.linalg import det, inv
from statsmodels.distributions.empirical_distribution import ECDF



prices   = pd.read_csv(DATA_PATH+'/prices.csv', index_col='Date')

In [2]:

def transform(Z:torch.Tensor, context_len:Optional[int]=None) -> pd.DataFrame:
    """
    transforms data distribution to standard normal distribution. 
    
    Transform takes 2 steps:
    1. estimate empirical distribution of the data per asset. Then convert it to uniform, u [0,1] distribution
        note that we use step function based empirical CDF which is different to the one specified in the original paper.
        ** values are truncated to prevent standard normal variable goes either -inf or inf.

    2. use inverse CDF of standard normal to convert u to x, where x follows standard normal distribution.
    
      (1)  (2)
    z -> u -> x 

    INPUTS
        df: pd.DataFrame
            input data sequence, that is multivariate
        context_len: Optional[int]
            specifies context length for the training set. rest of the data will be used for prediction.
    
    RETURNS
        X: pd.DataFrame
            returns transformed dataset.
    """
    
    X = []
    m = 100 if context_len is None else context_len
    
    # lower and upper bound for emp. CDF
    delta_m = (4 * np.sqrt(np.log(m) * np.pi) * m ** 0.25) ** -1 

    for i in range(Z_test.size(1)):
        Z_i = Z[:,i].numpy()
        
        # estimate empirical CDF
        # only use m datapoint to estimate CDF.
        emp_cdf = ECDF(Z_i[:m].reshape(-1))
        
        # for each datapoint, transform
        U_i = emp_cdf(Z_i)
        
        # truncate extreme values
        U_i = np.where(U_i < delta_m, delta_m, U_i) 
        U_i = np.where(U_i > 1-delta_m, 1-delta_m, U_i)
        
        # get standard normal values
        X_i = norm.ppf(q=U_i, loc=0, scale=1) # inverse CDF of standard normal
        X.append(torch.Tensor(X_i))
        
    # make a dataframe
    X = torch.stack(X, axis=0).T
    return X

In [209]:
class MultiVariateGP(nn.Module):
    def __init__(self, input_size:int, rank_size:int):
        super(MultiVariateGP,self).__init__()
        self.input_size = input_size
        self.rank_size  = rank_size

        # define linear weight for calculating parameters of gaussian process
        # these weights are SHARED across all time series.
        self.layer_m = nn.Linear(input_size, 1) # mean 
        self.layer_d = nn.Linear(input_size, rank_size) # diagonal point
        self.layer_v = nn.Sequential(
                                        nn.Linear(input_size, 1),
                                        nn.Softplus(beta=1)
                                    ) # volatility

    def forward(self, h_t:torch.Tensor):
        m_t = self.layer_m(h_t)
        d_t = self.layer_d(h_t)
        v_t = self.layer_v(h_t)
        return m_t, d_t, v_t


class GPAutoregressiveRNN(nn.Module):
    def __init__(
        self, 
        input_size:int, 
        hidden_size:int, 
        num_layers:int, 
        batch_size:int,
        rank_size:int,
        dropout:float=0.0,
        batch_first:bool=False,
        features:Optional[dict]=None 
    ):
        super(GPAutoregressiveRNN,self).__init__()
        self.input_size   = input_size
        self.hidden_size  = hidden_size
        self.batch_size   = batch_size
        self.num_layers   = num_layers
        self.batch_first  = batch_first
        self.feature_size = len(features[0]) if features is not None else 0
        self.features = features
        self.hidden   = None
        
        # local lstm
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                             num_layers=num_layers, batch_first=batch_first, dropout=dropout)
        
        # Multivariate Gaussian Process
        self.gp = MultiVariateGP(input_size=hidden_size + self.feature_size, rank_size=rank_size)

    def init_weight(self) -> None:
        h0, c0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size),\
                    torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
        self.hidden = (h0, c0)

    def feature_selector(self, indices:torch.Tensor, time_steps:int):
        feature = []
        for idx in indices:
            feature.append(self.e_dict[idx.item()].repeat(time_steps,1))
        feature = torch.stack(feature, axis=1)
        return feature

    def forward(self, z_t:torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        # we only use the subset of batch for training
        batch_indices = torch.randperm(7)[:self.batch_size]

        # select the batch then pass it through the unrolled LSTM
        z_t = z_t[:,batch_indices].unsqueeze(2)# timestep x num_batch x 1
        output, self.hidden = self.lstm(z_t, self.hidden)

        # find feature vector e_i for each asset i then concatenate it with LSTM output.
        e = self.feature_selector(batch_indices, z_t.size(0))
        y_t = torch.concat([output, e], axis=2)

        # get GP parameters
        # calculate parameters of multivariate gaussian process for each time t
        mu_t, v_t, d_t = deepar.gp(y_t)
        cov_t = torch.diag_embed(d_t.squeeze(2)) + (v_t @ v_t.permute(0,2,1)) # D + V @ V.T
        return mu_t, cov_t, batch_indices

    def predict(self, z_t:torch.Tensor, hidden_state=None) -> torch.Tensor:
        assert z_t.size(1) == 1, 'input is a sequence! input should include only one time step.'
        outputs = []
        hidden_states = []
        linear = nn.Linear(4,1)

        if hidden_state is None:
            hidden_state = self.hidden
    
        for t in range(self.prediction_len):
            h_t, hidden_state = self.lstm(z_t, hidden_state)
            z_t = linear(h_t)
            outputs.append(z_t)
            hidden_states.append(h_t)        
        return torch.stack(outputs, axis=2), torch.stack(hidden_states, axis=2)
    

In [165]:
# preprocess weekly data
prices.index = pd.to_datetime(prices.index)
prices = prices.resample('W').last()

lret_w = np.log(prices/prices.shift(1)).fillna(0.0) # weekly return

In [167]:
# define feature vector, which is speical to each time series
# first 5: stock, gov bond, corp bond, real estate, commodity
# last 3 : us, europe, other

e1 = torch.Tensor([1,0,0,0,0,0,1,0]) # IEV, europe
e2 = torch.Tensor([1,0,0,0,0,0,0,1]) # EEM, em. market
e3 = torch.Tensor([0,0,1,0,0,1,0,0]) # AGG, us corp bonds
e4 = torch.Tensor([0,1,0,0,0,1,0,0]) # IEF, us gov bonds
e5 = torch.Tensor([0,0,0,1,0,1,0,0]) # IYR, us real estates
e6 = torch.Tensor([0,0,0,0,1,0,0,0]) # IAU, gold
e7 = torch.Tensor([1,0,0,0,0,1,0,0]) # ITOT, us equities

e_dict = {0: e1, 1: e2, 2: e3, 3: e4, 4: e5, 5: e6, 6: e7}

# generate random sample
Z_test = torch.Tensor(np.random.normal(0.0, 0.05, (200,7)))
X_test = transform(Z_test)

In [172]:
deepar = GPAutoregressiveRNN(input_size=1, hidden_size=4, num_layers=2, rank_size=2, batch_size=3, features=e_dict)
deepar.init_weight()

In [173]:
# get hidden states of the input by unrolled LSTM
mu_t, cov_t, idx = deepar(Z_test)

In [199]:
mg = torch.distributions.multivariate_normal.MultivariateNormal(mu_t[0].view(-1), cov_t[0])

In [207]:
mg.log_prob(x[0])

tensor(-3.8473, grad_fn=<SubBackward0>)

In [181]:
def gaussianLL_loss(x:torch.Tensor, mu_t:torch.Tensor, cov_t:torch.Tensor):
    assert len(cov_t.size()) == 3 or len(mu_t.size()) == 3, \
        'one of inputs dimension is not equal to 3.'
    
    d = cov_t.size(1) # dimension
    if len(x.size()) == 2:
        x = x.unsqueeze(2)

    ll = (x - mu_t).permute(0,2,1) @ inv(cov_t) @ (x - mu_t) # (x - mu).T @ inverse cov matrix @ (x - mu)
    ll = -0.5 * (d * np.log(2 * torch.pi) + det(cov_t).unsqueeze(1) + ll.squeeze(2)) # ll + det(cov)
    return -ll.sum() # negative log-likelihood


In [206]:
x = X_test[:,idx]

In [208]:
mg.sample()

tensor([ 0.6921, -1.0946,  0.9929])