from chapter82pytorch, based on Section 8.2 in AlgoTarde

In [1]:
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, TensorDataset

from Preprocessing import generate_data

In [35]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = generate_data()

In [46]:
len_seq = X_train.shape[1]
n_features = X_train.shape[2]

In [None]:
class Rnn(nn.Module):
    def __init__(self, n_units, n_features, n_layers=5):
        super(Rnn, self).__init__()
        self.lstm1 = nn.LSTM(n_features, n_units, n_layers=n_layers, batch_first=True, dropout=0.25)
        self.linear1 = nn.Linear(n_units, 2)
        
    def forward(self, x):
        x, (h_1, c_1) = self.lstm1(x)        
        logits = self.linear1(x[:, -1])

        return logits

In [None]:
class Rnn2BN(nn.Module):
    def __init__(self, n_units, n_features):
        super(Rnn2BN, self).__init__()
        self.lstm1 = nn.LSTM(n_features, n_units, batch_first=True)
        # self.bn1 = nn.Batch
        self.dropout1 = nn.Dropout(0.25)
        self.lstm2 = nn.LSTM(n_units, n_units, batch_first=True)
        self.dropout2 = nn.Dropout(0.25)
        self.linear1 = nn.Linear(n_units, 2)
        
    def forward(self, x):
        x, (h_1, c_1) = self.lstm1(x)
        x = self.dropout1(x)
        x, (h_2, c_2) = self.lstm2(x, (h_1, c_1))
        x = self.dropout2(x)
        logits = self.linear1(x[:, -1])

        return logits

In [74]:
class Rnn5BN(nn.Module):
    def __init__(self, n_units, n_features, len_seq):
        super(Rnn5BN, self).__init__()
        self.n_units = n_units
        
        self.lstm1 = nn.LSTM(n_features, n_units, batch_first=True)
        # self.bn1 = nn.BatchNorm1d(batch_size)
        self.ln1 = nn.LayerNorm([len_seq, n_units])
        
        self.lstm2 = nn.LSTM(n_units, n_units, batch_first=True)
        self.dropout2 = nn.Dropout(0.25)
        
        self.lstm3 = nn.LSTM(n_units, n_units, batch_first=True)
        # self.bn3 = nn.BatchNorm1d(batch_size)
        self.ln3 = nn.LayerNorm([len_seq, n_units])
        
        self.lstm4 = nn.LSTM(n_units, n_units, batch_first=True)
        self.dropout4 = nn.Dropout(0.25)
        
        self.lstm5 = nn.LSTM(n_units, n_units, batch_first=True)
        # self.bn5 = nn.BatchNorm1d(batch_size)
        self.ln5 = nn.LayerNorm([len_seq, n_units])
        
        self.linear1 = nn.Linear(n_units, 2)

        
    def forward(self, x):
        x, (h_1, c_1) = self.lstm1(x)
        x = self.ln1(x)
        # x = self.bn1(x.permute(1, 0, 2)).permute(1, 0, 2)
        
        x, (h_2, c_2) = self.lstm2(x, (h_1, c_1))
        x = self.dropout2(x)
        
        x, (h_3, c_3) = self.lstm3(x, (h_2, c_2))
        x = self.ln3(x)
        # x = self.bn3(x.permute(1, 0, 2)).permute(1, 0, 2)
        
        x, (h_4, c_4) = self.lstm4(x, (h_3, c_3))
        x = self.dropout4(x)
        
        x, (h_5, c_5) = self.lstm5(x, (h_4, c_4))
        x = self.ln5(x)
        # x = self.bn5(x.permute(1, 0, 2)).permute(1, 0, 2)
        
        logits = self.linear1(x[:, -1])

        
        return logits  

In [75]:
# model = Rnn(num_unit=200, seq_len=5)
# model = Rnn2BN(num_units=100, num_features=22, seq_len=5)
model = Rnn5BN(n_units=200, n_features=n_features, len_seq=len_seq)

In [33]:
batch_size = 10
epochs = 2
learning_rate = 1e-2

In [36]:
X_train_ = torch.from_numpy(X_train).float()
y_train_ = torch.from_numpy(y_train).long()
X_test_ = torch.from_numpy(X_test).float()
y_test_ = torch.from_numpy(y_test).long()

In [73]:
train_dataset = TensorDataset(X_train_, y_train_)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [70]:
from torch.utils.tensorboard import SummaryWriter

def train_loop(epochs, tensorboard=False):
    # model.train()
    size = len(train_dataloader.dataset)
    
    for e in range(epochs):
        train_loss = 0.
        train_score = 0.
        if tensorboard:
            writer = SummaryWriter(f"runs/AlgoTradeEpoch{e}")
        
        for i, (X, y) in enumerate(train_dataloader, 1):
            pred = model(X)
            loss = loss_fn(pred, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
                        
            if tensorboard:
                for n, p in model.named_parameters():
                    if 'weight' in n:
                        writer.add_histogram(f"{n}", p.grad, i)
                        writer.add_scalar(f"{n}_abs_mean", p.grad.abs().mean(), i)
                        # the name of histogram and scalar should be different, if not, both of them cannot be recognized
                writer.close()
                
            
        if e % 10 == 0:
            model.eval()
            pred = model(X_test_)
            loss = loss_fn(pred, y_test_)
            print(f"|TRAIN| Epoch: {e:3d}, Loss: {train_loss/i:.6f}   |TEST| Epoch: {e:3d}, Loss: {loss.item():.6f}")
            model.train()

In [78]:
# loss_fn = nn.BCEWithLogitsLoss()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [82]:
optimizer.param_groups[0]['lr'] = 1e-5

In [83]:
train_loop(epochs=100, tensorboard=False)

|TRAIN| Epoch:   0, Loss: 0.653356   |TEST| Epoch:   0, Loss: 0.711697
|TRAIN| Epoch:  10, Loss: 0.652863   |TEST| Epoch:  10, Loss: 0.712334
|TRAIN| Epoch:  20, Loss: 0.656736   |TEST| Epoch:  20, Loss: 0.712126
|TRAIN| Epoch:  30, Loss: 0.653080   |TEST| Epoch:  30, Loss: 0.714316
|TRAIN| Epoch:  40, Loss: 0.651470   |TEST| Epoch:  40, Loss: 0.713698
|TRAIN| Epoch:  50, Loss: 0.651356   |TEST| Epoch:  50, Loss: 0.714710
|TRAIN| Epoch:  60, Loss: 0.648487   |TEST| Epoch:  60, Loss: 0.715051
|TRAIN| Epoch:  70, Loss: 0.650429   |TEST| Epoch:  70, Loss: 0.714781
|TRAIN| Epoch:  80, Loss: 0.644368   |TEST| Epoch:  80, Loss: 0.714947
|TRAIN| Epoch:  90, Loss: 0.646378   |TEST| Epoch:  90, Loss: 0.717084


In [87]:
dir(model.lstm1)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__constants__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__jit_unused_properties__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_all_weights',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_call_impl',
 '_flat_weights',
 '_flat_weights_names',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_backward_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_set',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_replicate_for_data_parall

In [109]:
a = [l.numel() for l in model.lstm1.all_weights[0]]

In [102]:
model.lstm1.bias_hh_l0.size()


torch.Size([800])

In [101]:
model.lstm1.bias_ih_l0.size() 

torch.Size([800])

In [108]:
model.lstm1.weight_hh_l0.numel() + model.lstm1.weight_ih_l0.numel() + 1600

179200

In [67]:
writer = SummaryWriter(f"runs/testing")

In [69]:
for i in range(10):
    writer.add_scalar(f"testing_abs_mean", i+10, i)

In [5]:
net = nn.BatchNorm1d(100)

In [11]:
preds = net(torch.randn(10, 100))

In [13]:
criterion = nn.CrossEntropyLoss()

In [None]:
torch.randint

In [21]:
loss = criterion(preds, torch.randint(0,5, (10,)).long())

In [22]:
loss.backward()

In [24]:
dir(net)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__constants__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_call_impl',
 '_check_input_dim',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_backward_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_set',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_replicate_for_data_parallel',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',

In [26]:
net.bias

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.], requires_grad=True)