In [118]:
import torch
import os
import pandas as pd
import numpy as np
import time
import random
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [119]:
RANDOM_SEED = 123
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [120]:
class StockDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path)
        self.y = df['Label'].to_numpy().astype(int)
        self.features = df.drop(["Company", "Date", "Target", "Label"], axis=1).to_numpy().astype('float32')

    def __getitem__(self, index):
        features = self.features[index]
        label = self.y[index]
        
        return features, label

    def __len__(self):
        return self.y.shape[0]

In [121]:
BATCH_SIZE = 64

In [132]:
train_dataset = StockDataset(csv_path='train_normalized.csv')


train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=4)


valid_dataset = StockDataset(csv_path='val_normalized.csv')

valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          num_workers=4)

test_dataset = StockDataset(csv_path='test_normalized.csv')

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=4)

In [146]:
num_features = 143
num_classes = 9

In [157]:
torch.manual_seed(0)

num_epochs = 2
for epoch in range(num_epochs):

    for batch_idx, (x, y) in enumerate(train_loader):
        
        print('Epoch:', epoch+1, end='')
        print(' | Batch index:', batch_idx, end='')
        print(' | Batch size:', y.size()[0])
        num_features = x.shape[1]
        x = x.to(DEVICE)
        y = y.to(DEVICE)
        print('break minibatch for-loop')
        break

Epoch: 1 | Batch index: 0 | Batch size: 64
break minibatch for-loop
Epoch: 2 | Batch index: 0 | Batch size: 64
break minibatch for-loop


In [134]:
class SoftmaxRegression(torch.nn.Module):

    def __init__(self, num_features, num_classes):
        super(SoftmaxRegression, self).__init__()
        self.linear = torch.nn.Linear(num_features, num_classes)
        
        self.linear.weight.detach().zero_()
        self.linear.bias.detach().zero_()
        
    def forward(self, x):
        logits = self.linear(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [135]:
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x10700b890>

In [145]:
model = SoftmaxRegression(num_features=143, num_classes=9)
model = model.to(DEVICE)

In [147]:
learning_rate = 0.1

In [148]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [149]:
NUM_EPOCHS = 50

In [150]:
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    
    for features, targets in data_loader:
        features = features.view(-1, num_features).to(DEVICE)
        targets = targets.to(DEVICE)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
        
    return correct_pred.float() / num_examples * 100
max_runs = 2
runs = 0
start_time = time.time()
epoch_costs = []
for epoch in range(NUM_EPOCHS):
    avg_cost = 0.
    for batch_idx, (features, targets) in enumerate(train_loader):
        runs = runs+1
        features = features.view(-1, num_features).to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        
        # note that the PyTorch implementation of
        # CrossEntropyLoss works with logits, not
        # probabilities
        cost = F.cross_entropy(probas, targets)
        optimizer.zero_grad()
        cost.backward()
        avg_cost += cost
        avg_cost
        
#         print("Logits:")
#         display(logits)
#         print("Probas:")
#         display(probas)
#         print("Targets:")
#         display(targets)
#         print("Cost:")
#         display(cost)
#         print()
#         if batch_idx > max_runs:
#             break
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 200:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, NUM_EPOCHS, batch_idx, 
                     len(train_dataset)//BATCH_SIZE, cost))
    
    
#     if runs > max_runs:
#         break
    
    with torch.set_grad_enabled(False):
        avg_cost = avg_cost/len(train_dataset)
        epoch_costs.append(avg_cost)
        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
              epoch+1, NUM_EPOCHS, 
              compute_accuracy(model, train_loader)))
        print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/050 | Batch 000/977 | Cost: 2.1972
Epoch: 001/050 | Batch 200/977 | Cost: 2.1945
Epoch: 001/050 | Batch 400/977 | Cost: 2.1553
Epoch: 001/050 | Batch 600/977 | Cost: 2.1926
Epoch: 001/050 | Batch 800/977 | Cost: 2.2026
Epoch: 001/050 training accuracy: 15.32%
Time elapsed: 0.02 min
Epoch: 002/050 | Batch 000/977 | Cost: 2.1696
Epoch: 002/050 | Batch 200/977 | Cost: 2.1848
Epoch: 002/050 | Batch 400/977 | Cost: 2.2144
Epoch: 002/050 | Batch 600/977 | Cost: 2.2548
Epoch: 002/050 | Batch 800/977 | Cost: 2.2000
Epoch: 002/050 training accuracy: 16.55%
Time elapsed: 0.05 min
Epoch: 003/050 | Batch 000/977 | Cost: 2.1715
Epoch: 003/050 | Batch 200/977 | Cost: 2.2162
Epoch: 003/050 | Batch 400/977 | Cost: 2.1391
Epoch: 003/050 | Batch 600/977 | Cost: 2.2243
Epoch: 003/050 | Batch 800/977 | Cost: 2.2150
Epoch: 003/050 training accuracy: 15.43%
Time elapsed: 0.07 min
Epoch: 004/050 | Batch 000/977 | Cost: 2.2312
Epoch: 004/050 | Batch 200/977 | Cost: 2.2149
Epoch: 004/050 | Batch 400

In [75]:
num = 0
for batch_idx, (features, targets) in enumerate(test_loader):
    logits, probas = model(features)
    display(logits[num])
    display(probas[num])
    display(targets[0])
    display(F.cross_entropy(logits, targets))
    break

tensor([-6.1182e+10,  6.2020e+10, -6.1386e+10], grad_fn=<SelectBackward>)

tensor([0., 1., 0.], grad_fn=<SelectBackward>)

tensor(0)

tensor(5.9103e+11, grad_fn=<NllLossBackward>)

In [70]:
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)

In [71]:
display(input[1])

tensor([ 0.3922,  0.9763, -0.4188, -0.5152,  1.2310], grad_fn=<SelectBackward>)

In [72]:
display(target[1])

tensor(3)

In [73]:
F.cross_entropy(input, target)

tensor(2.0278, grad_fn=<NllLossBackward>)