from chapter82pytorch, based on Section 8.2 in AlgoTarde

In [1]:
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, TensorDataset

from Preprocessing import generate_data

In [2]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = generate_data()

In [3]:
len_seq = X_train.shape[1]
n_features = X_train.shape[2]

In [6]:
batch_size = 10
learning_rate = 1e-3

In [7]:
X_train_ = torch.from_numpy(X_train).float()
y_train_ = torch.from_numpy(y_train).long()
X_test_ = torch.from_numpy(X_test).float()
y_test_ = torch.from_numpy(y_test).long()

In [8]:
train_dataset = TensorDataset(X_train_, y_train_)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [92]:
from torch.utils.tensorboard import SummaryWriter
import time

def train_loop(epochs, tensorboard=False):
    s_ = time.time()
    model.train()
    size = len(train_dataloader.dataset)
    
    for e in range(1, epochs+1):
        train_loss = 0.
        train_score = 0.
        if tensorboard:
            writer = SummaryWriter(f"runs/AlgoTradeEpoch{e}")
        
        for i, (X, y) in enumerate(train_dataloader, 1):
            pred = model(X)
            loss = loss_fn(pred, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
                        
            if tensorboard:
                for n, p in model.named_parameters():
                    if 'weight' in n:
                        writer.add_histogram(f"{n}", p.grad, i)
                        writer.add_scalar(f"{n}_abs_mean", p.grad.abs().mean(), i)
                        # the name of histogram and scalar should be different, if not, both of them cannot be recognized
                writer.close()
                
            
        if e % 10 == 0:
            model.eval()
            pred = model(X_test_)
            loss = loss_fn(pred, y_test_)
            correct = (pred.argmax(dim=1) == y_test_).sum()
            acc = correct.item() / len(y_test_)
            
            print(f"|TRAIN| Epoch: {e:3d}, Loss: {train_loss/i:.6f}   |TEST| Epoch: {e:3d}, Loss: {loss.item():.6f}, Acc: {acc:.2f}")
            model.train()
    print(f"Elapsed time for {time.time() - s_}")

In [86]:
# loss_fn = nn.BCEWithLogitsLoss()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [93]:
train_loop(epochs=100, tensorboard=False)

|TRAIN| Epoch:  10, Loss: 0.650827   |TEST| Epoch:  10, Loss: 0.744723, Acc: 0.62
|TRAIN| Epoch:  20, Loss: 0.651698   |TEST| Epoch:  20, Loss: 0.746222, Acc: 0.60
|TRAIN| Epoch:  30, Loss: 0.652120   |TEST| Epoch:  30, Loss: 0.736374, Acc: 0.62
|TRAIN| Epoch:  40, Loss: 0.650522   |TEST| Epoch:  40, Loss: 0.746095, Acc: 0.62
|TRAIN| Epoch:  50, Loss: 0.650319   |TEST| Epoch:  50, Loss: 0.752180, Acc: 0.62
|TRAIN| Epoch:  60, Loss: 0.647589   |TEST| Epoch:  60, Loss: 0.751550, Acc: 0.60
|TRAIN| Epoch:  70, Loss: 0.649297   |TEST| Epoch:  70, Loss: 0.756482, Acc: 0.62
|TRAIN| Epoch:  80, Loss: 0.646144   |TEST| Epoch:  80, Loss: 0.760549, Acc: 0.62
|TRAIN| Epoch:  90, Loss: 0.641986   |TEST| Epoch:  90, Loss: 0.776427, Acc: 0.60
|TRAIN| Epoch: 100, Loss: 0.643302   |TEST| Epoch: 100, Loss: 0.776290, Acc: 0.60
Elapsed time for 132.99255919456482


In [90]:
optimizer.param_groups[0]['lr'] = 1e-5

In [None]:
model = Rnn4BN(n_units=150, n_features=n_features)
Acc : 0.55
    
model = Rnn5BN(n_units=200, n_features=n_features)
Acc : 0.47 at Epoch 100
Acc : 0.62 at Epoch 200
Acc : 0.45 at Epoch 300
    
model = Rnn5BN(n_units=150, n_features=n_features)
Acc : 0.47 at Epoch 100
Acc : 0.60 at Epoch 200
Acc : 0.45 at Epoch 300 

In [None]:
model = Rnn5BN(n_units=200, n_features=n_features)
Epoch 200 ~ Epoch 300
|TRAIN| Epoch:  10, Loss: 0.663524   |TEST| Epoch:  10, Loss: 0.712885, Acc: 0.60
|TRAIN| Epoch:  20, Loss: 0.661331   |TEST| Epoch:  20, Loss: 0.716691, Acc: 0.60
|TRAIN| Epoch:  30, Loss: 0.661507   |TEST| Epoch:  30, Loss: 0.720340, Acc: 0.60
|TRAIN| Epoch:  40, Loss: 0.658665   |TEST| Epoch:  40, Loss: 0.723448, Acc: 0.60
|TRAIN| Epoch:  50, Loss: 0.659152   |TEST| Epoch:  50, Loss: 0.727791, Acc: 0.60
|TRAIN| Epoch:  60, Loss: 0.656233   |TEST| Epoch:  60, Loss: 0.728771, Acc: 0.60
|TRAIN| Epoch:  70, Loss: 0.657999   |TEST| Epoch:  70, Loss: 0.732527, Acc: 0.60
|TRAIN| Epoch:  80, Loss: 0.651821   |TEST| Epoch:  80, Loss: 0.733417, Acc: 0.60
|TRAIN| Epoch:  90, Loss: 0.653711   |TEST| Epoch:  90, Loss: 0.737276, Acc: 0.62
|TRAIN| Epoch: 100, Loss: 0.654109   |TEST| Epoch: 100, Loss: 0.740980, Acc: 0.62

In [85]:
# model = Rnn(num_unit=200, seq_len=5)
# model = Rnn3BN(n_units=100, n_features=n_features)
# model = Rnn4BN(n_units=150, n_features=n_features)
model = Rnn5BN(n_units=200, n_features=n_features)

In [None]:
class Rnn(nn.Module):
    def __init__(self, n_units, n_features, n_layers=5):
        super(Rnn, self).__init__()
        self.lstm1 = nn.LSTM(n_features, n_units, n_layers=n_layers, batch_first=True, dropout=0.25)
        self.linear1 = nn.Linear(n_units, 2)
        
    def forward(self, x):
        x, (h_1, c_1) = self.lstm1(x)        
        logits = self.linear1(x[:, -1])

        return logits

In [19]:
class Rnn3BN(nn.Module):
    def __init__(self, n_units, n_features):
        super(Rnn3BN, self).__init__()
        self.lstm1 = nn.LSTM(n_features, n_units, batch_first=True)
        self.ln1 = nn.LayerNorm(n_units)
        self.lstm2 = nn.LSTM(n_units, n_units, batch_first=True)
        self.dropout2 = nn.Dropout(0.25)
        self.lstm3 = nn.LSTM(n_units, n_units, batch_first=True)
        self.ln3 = nn.LayerNorm(n_units)
        self.linear1 = nn.Linear(n_units, 2)
        
    def forward(self, x):
        x, (h_1, c_1) = self.lstm1(x)
        x = self.ln1(x)
        x, (h_2, c_2) = self.lstm2(x, (h_1, c_1))
        x = self.dropout2(x)
        x, _ = self.lstm3(x, (h_2, c_2))
        x = self.ln3(x)
        logits = self.linear1(x[:, -1])

        return logits

In [30]:
class Rnn4BN(nn.Module):
    def __init__(self, n_units, n_features):
        super(Rnn4BN, self).__init__()
        self.lstm1 = nn.LSTM(n_features, n_units, batch_first=True)
        self.ln1 = nn.LayerNorm(n_units)
        self.lstm2 = nn.LSTM(n_units, n_units, batch_first=True)
        self.dropout2 = nn.Dropout(0.25)
        self.lstm3 = nn.LSTM(n_units, n_units, batch_first=True)
        self.ln3 = nn.LayerNorm(n_units)
        self.lstm4 = nn.LSTM(n_units, n_units, batch_first=True)
        self.dropout4 = nn.Dropout(0.25)
        
        self.linear1 = nn.Linear(n_units, 2)
        
    def forward(self, x):
        x, (h_1, c_1) = self.lstm1(x)
        x = self.ln1(x)
        x, (h_2, c_2) = self.lstm2(x, (h_1, c_1))
        x = self.dropout2(x)
        x, (h_3, c_3) = self.lstm3(x, (h_2, c_2))
        x = self.ln3(x)
        x, (h_4, c_4) = self.lstm4(x, (h_3, c_3))
        x = self.dropout4(x)
        
        logits = self.linear1(x[:, -1])

        return logits

In [69]:
class Rnn5BN(nn.Module):
    def __init__(self, n_units, n_features):
        super(Rnn5BN, self).__init__()
        self.n_units = n_units
        
        self.lstm1 = nn.LSTM(n_features, n_units, batch_first=True)
        # self.bn1 = nn.BatchNorm1d(batch_size)
        self.ln1 = nn.LayerNorm(n_units)
        
        self.lstm2 = nn.LSTM(n_units, n_units, batch_first=True)
        self.dropout2 = nn.Dropout(0.25)
        
        self.lstm3 = nn.LSTM(n_units, n_units, batch_first=True)
        # self.bn3 = nn.BatchNorm1d(batch_size)
        self.ln3 = nn.LayerNorm(n_units)
        
        self.lstm4 = nn.LSTM(n_units, n_units, batch_first=True)
        self.dropout4 = nn.Dropout(0.25)
        
        self.lstm5 = nn.LSTM(n_units, n_units, batch_first=True)
        # self.bn5 = nn.BatchNorm1d(batch_size)
        self.ln5 = nn.LayerNorm(n_units)
        
        self.linear1 = nn.Linear(n_units, 2)

        
    def forward(self, x):
        x, (h_1, c_1) = self.lstm1(x)
        x = self.ln1(x)
        # x = self.bn1(x.permute(1, 0, 2)).permute(1, 0, 2)
        
        x, (h_2, c_2) = self.lstm2(x, (h_1, c_1))
        x = self.dropout2(x)
        
        x, (h_3, c_3) = self.lstm3(x, (h_2, c_2))
        x = self.ln3(x)
        # x = self.bn3(x.permute(1, 0, 2)).permute(1, 0, 2)
        
        x, (h_4, c_4) = self.lstm4(x, (h_3, c_3))
        x = self.dropout4(x)
        
        x, (h_5, c_5) = self.lstm5(x, (h_4, c_4))
        x = self.ln5(x)
        # x = self.bn5(x.permute(1, 0, 2)).permute(1, 0, 2)
        
        logits = self.linear1(x[:, -1])

        
        return logits  

In [65]:
(pred.argmax(1) == y_test_).sum()

tensor(29)

In [40]:
pred = model(X_test_)

In [45]:
import torch.nn.functional as F

In [60]:
(pred.argmax(dim=1).numpy() == y_test).sum()

29

In [55]:
y_test

array([0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0])

In [50]:
pred.shape

torch.Size([58, 2])

In [47]:
pred

tensor([[-2.1001e-01,  3.6461e-01],
        [-1.0804e+00,  1.3277e+00],
        [-8.6788e-01,  1.5455e+00],
        [-6.6292e-01,  1.0861e+00],
        [-8.1993e-01,  1.2462e+00],
        [-3.0072e-01, -8.4105e-02],
        [ 3.2806e-02, -3.5405e-01],
        [-2.7448e-01,  4.0643e-01],
        [-9.3002e-01,  1.2291e+00],
        [-6.0369e-02, -1.5818e-01],
        [ 1.9017e-01, -2.4336e-01],
        [-9.4550e-01,  1.2811e+00],
        [-6.0024e-01,  1.4085e+00],
        [-1.0060e+00,  1.7116e+00],
        [-2.4308e-02, -2.2104e-01],
        [-4.5752e-02, -3.0051e-02],
        [-2.2287e-02, -2.2116e-01],
        [-5.0894e-02, -1.0537e-01],
        [-9.9198e-02, -5.3795e-02],
        [-4.6180e-02, -1.6504e-01],
        [-1.2952e-02, -1.1342e-01],
        [-1.3399e-02, -9.6625e-02],
        [ 3.1433e-04, -8.7891e-02],
        [-2.7498e-02, -9.7061e-02],
        [-4.9809e-02, -1.7689e-01],
        [-3.0669e-02, -3.9026e-02],
        [-1.2183e-01, -4.1537e-02],
        [ 6.8738e-03, -1.276

In [46]:
F.softmax(pred, dim=1)

tensor([[0.3602, 0.6398],
        [0.0826, 0.9174],
        [0.0822, 0.9178],
        [0.1482, 0.8518],
        [0.1124, 0.8876],
        [0.4461, 0.5539],
        [0.5955, 0.4045],
        [0.3361, 0.6639],
        [0.1035, 0.8965],
        [0.5244, 0.4756],
        [0.6067, 0.3933],
        [0.0974, 0.9026],
        [0.1183, 0.8817],
        [0.0619, 0.9381],
        [0.5490, 0.4510],
        [0.4961, 0.5039],
        [0.5496, 0.4504],
        [0.5136, 0.4864],
        [0.4887, 0.5113],
        [0.5297, 0.4703],
        [0.5251, 0.4749],
        [0.5208, 0.4792],
        [0.5220, 0.4780],
        [0.5174, 0.4826],
        [0.5317, 0.4683],
        [0.5021, 0.4979],
        [0.4799, 0.5201],
        [0.5336, 0.4664],
        [0.5227, 0.4773],
        [0.5398, 0.4602],
        [0.4987, 0.5013],
        [0.5136, 0.4864],
        [0.5235, 0.4765],
        [0.5137, 0.4863],
        [0.5258, 0.4742],
        [0.5008, 0.4992],
        [0.5001, 0.4999],
        [0.5293, 0.4707],
        [0.5

In [42]:
pred.shape

torch.Size([58, 2])

In [None]:
100 epochs 1e-3, 100 epochs 1e-4, 300 epochs 1e-5
|TRAIN| Epoch:  90, Loss: 0.653850   |TEST| Epoch:  90, Loss: 0.691221

In [5]:
net = nn.BatchNorm1d(100)

In [11]:
preds = net(torch.randn(10, 100))

In [13]:
criterion = nn.CrossEntropyLoss()

In [None]:
torch.randint

In [21]:
loss = criterion(preds, torch.randint(0,5, (10,)).long())

In [22]:
loss.backward()

In [24]:
dir(net)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__constants__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_call_impl',
 '_check_input_dim',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_backward_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_set',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_replicate_for_data_parallel',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',

In [26]:
net.bias

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.], requires_grad=True)