In [1]:
import json
import torch
import time
import numpy as np
from torch.utils.data import Dataset

In [2]:
torch.set_default_dtype(torch.float64)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
class ScenarioDataset(Dataset):
    def __init__(self, scenarios):
        super(ScenarioDataset, self).__init__()
        self.scenarios = scenarios

    def __len__(self):
        return len(self.scenarios)

    def __getitem__(self, idx):
        return self.scenarios[idx]

In [11]:
scenario_path = "D:\\Scenario-Generator\\"
output_path = "D:\\GMIB\\"

markets = ['USDiversified', 'International', 'Intermediate', 'Aggressive', 'MoneyMkt', 'MedGovt', 'LongCorp']
timesteps = 1200

def read_scenario(scn_path, scenario_num):
    scn_path = f'{scn_path}\scenario_{scenario_num}.json'
    with open(scn_path, 'r') as file:
        data = json.load(file)

    market_data = []
    for m in markets:
        market_data.append(np.array(data['equities'][m][0:timesteps]) * 100)
    
    reformatted_data = list(zip(*market_data))
    return torch.as_tensor(reformatted_data, device=device)
    
    
scenario_lower_bound = 1
scenario_upper_bound = scenario_lower_bound + 10000
data_all_scenarios = []
for i in range(scenario_lower_bound, scenario_upper_bound):
    tensor_this_scenario = read_scenario(scenario_path, i)
    data_all_scenarios.append(tensor_this_scenario)

dataset = ScenarioDataset(data_all_scenarios)


In [5]:
#
# Source: An assignment from my NLP class (CS388)
#
# Implementation of positional encoding that you can use in your network
class PositionalEncoding(torch.nn.Module):
    def __init__(self, d_model: int, num_positions: int=20, batched=False, add=False):
        """
        :param d_model: dimensionality of the embedding layer to your model; since the position encodings are being
        added to character encodings, these need to match (and will match the dimension of the subsequent Transformer
        layer inputs/outputs)
        :param num_positions: the number of positions that need to be encoded; the maximum sequence length this
        module will see
        :param batched: True if you are using batching, False otherwise
        """
        super().__init__()
        # Dict size
        self.emb = torch.nn.Embedding(num_positions, d_model)
        self.batched = batched
        self.add = add

    def forward(self, x):
        """
        :param x: If using batching, should be [batch size, seq len, embedding dim]. Otherwise, [seq len, embedding dim]
        :return: a tensor of the same size with positional embeddings added in
        """
        # Second-to-last dimension will always be sequence length
        input_size = x.shape[-2]
        indices_to_embed = torch.tensor(np.asarray(range(0, input_size))).type(torch.LongTensor).to(device)
        if self.batched:
            # Use unsqueeze to form a [1, seq len, embedding dim] tensor -- broadcasting will ensure that this
            # gets added correctly across the batch
            emb_unsq = self.emb(indices_to_embed).unsqueeze(0)
            if self.add:
                return x + emb_unsq
            else:
                return x - emb_unsq
        else:
            if self.add:
                return x + self.emb(indices_to_embed)
            else:
                return x - self.emb(indices_to_embed)

In [6]:
class Model(torch.nn.Module):
    def __init__(self, hidden_dim, input_shape, batched=False):
        super(Model, self).__init__()
        self.hidden_dim = hidden_dim
        self.input_dim = input_shape[1]
        self.sequence_length = input_shape[0]
        self.add_positional_encoding = PositionalEncoding(self.hidden_dim, timesteps, batched=True, add=True)
        self.sub_positional_encoding = PositionalEncoding(self.hidden_dim, timesteps, batched=True, add=False)
        self.embed_market_data = torch.nn.Linear(in_features=self.input_dim, out_features=self.hidden_dim)
        self.encoder_layer = torch.nn.TransformerEncoderLayer(d_model=self.hidden_dim,
                                                              nhead=2,
                                                              dim_feedforward=512,
                                                              dropout=0.1,
                                                              activation='relu',
                                                              batch_first=True)
        self.encoder = torch.nn.TransformerEncoder(self.encoder_layer, num_layers=1, norm=None, enable_nested_tensor=True, mask_check=False)
        self.downscale = torch.nn.Linear(in_features=self.hidden_dim * timesteps, out_features=64)
        self.upscale = torch.nn.Linear(in_features=64, out_features=self.hidden_dim * timesteps)
        self.decode_layer = torch.nn.TransformerEncoderLayer(d_model=self.hidden_dim,
                                                              nhead=2,
                                                              dim_feedforward=512,
                                                              dropout=0.1,
                                                              activation='relu',
                                                              batch_first=True)

        self.decoder = torch.nn.TransformerEncoder(self.decode_layer, num_layers=1, norm=None, enable_nested_tensor=True, mask_check=False)        
        self.unembed_market_data = torch.nn.Linear(in_features=self.hidden_dim, out_features=self.input_dim)

    def forward(self, src):
        #encoding
        src = self.embed_market_data(src)
        src = self.add_positional_encoding(src)
        enc = self.encoder(src)
        enc = self.downscale(enc.flatten(start_dim=1))
        
        #decoding
        dec = self.upscale(enc).unflatten(1, (self.sequence_length, self.hidden_dim))

        dec = self.decoder(dec)
        dec = self.sub_positional_encoding(src)
        dec = self.unembed_market_data(dec)
        
        return dec


In [12]:
# Create the network
model = Model(hidden_dim=6, input_shape=(timesteps, len(markets)), batched=True)

model = model.to(device)


# Create the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Create the loss
loss = torch.nn.MSELoss()

# Create a DataLoader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)

num_epochs = 10

start_time = time.time()

for epoch in range(num_epochs):
    total_loss = 0
    for batch in dataloader:
        optimizer.zero_grad()
        output = model(batch)

        l = loss(output, batch)
        
        l.backward()
        optimizer.step()
        
        # Compute the accuracy
        total_loss += l.cpu().detach()

    if epoch % 1 == 0:
        print(f'epoch: {epoch}, loss: {total_loss}')
    if total_loss < 1e-2:
        break
    
print('Duration: {}'.format(time.time() - start_time))


epoch: 0, loss: 1445.0559868205644
epoch: 1, loss: 34.311859591153166
epoch: 2, loss: 12.503643146053482
epoch: 3, loss: 1.7070446325953816
epoch: 4, loss: 1.1706659545435445
epoch: 5, loss: 1.1601854119336914
epoch: 6, loss: 1.1589834572214661
epoch: 7, loss: 1.1579294626084025
epoch: 8, loss: 1.1564447811742082
epoch: 9, loss: 1.1524882183068093
Duration: 908.1584424972534


In [13]:
dataset[0]

tensor([[ 1.1679e+00, -6.4703e+00, -2.7507e+00,  ..., -3.0700e-02,
          9.0000e-04,  1.1400e-02],
        [ 3.6363e+00,  3.5294e+00,  3.9141e+00,  ..., -3.4900e-02,
          6.6500e-02,  4.7000e-03],
        [ 4.1800e-02,  2.7178e+00, -1.0438e+01,  ..., -3.8900e-02,
         -2.8500e-02, -4.0960e-01],
        ...,
        [-1.6466e+00, -3.6367e+00, -4.5058e+00,  ...,  2.7060e-01,
          4.0240e-01, -3.4500e-02],
        [ 8.0416e+00,  7.9011e+00,  9.3764e+00,  ...,  1.3630e-01,
          1.9250e+00,  3.0164e+00],
        [-7.2116e+00, -8.0554e+00, -6.0846e+00,  ...,  2.9300e-02,
          1.3744e+01,  2.3511e+01]], device='cuda:0')

In [14]:
o = model(dataset[0][None,:,:])

In [15]:
print(o)

tensor([[[ 1.1678e+00, -6.4702e+00, -2.7503e+00,  ...,  1.4913e-01,
          -1.7830e-03,  1.2697e-02],
         [ 3.6365e+00,  3.5298e+00,  3.9147e+00,  ...,  1.2787e-01,
           6.4083e-02,  6.2005e-03],
         [ 4.1994e-02,  2.7181e+00, -1.0437e+01,  ...,  1.1622e-01,
          -3.0672e-02, -4.0854e-01],
         ...,
         [-1.6466e+00, -3.6367e+00, -4.5061e+00,  ...,  1.5597e-01,
           4.0389e-01, -3.5433e-02],
         [ 8.0416e+00,  7.9011e+00,  9.3764e+00,  ...,  1.3281e-01,
           1.9253e+00,  3.0162e+00],
         [-7.2115e+00, -8.0552e+00, -6.0837e+00,  ...,  1.8161e-01,
           1.3743e+01,  2.3512e+01]]], device='cuda:0',
       grad_fn=<ViewBackward0>)
