In [75]:
import torch
import os
import pandas as pd
import numpy as np
import time
import random
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [101]:
RANDOM_SEED = 123
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [102]:
class StockDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path)
        self.y = df['Label'].to_numpy().astype(int)
        self.features = df.drop(["Company", "Date", "Target", "Label"], axis=1).to_numpy().astype('float32')

    def __getitem__(self, index):
        features = self.features[index]
        label = self.y[index]
        
        return features, label

    def __len__(self):
        return self.y.shape[0]

In [103]:
BATCH_SIZE = 128

In [104]:
train_dataset = StockDataset(csv_path='./My Data/train.csv')


train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=4)


valid_dataset = StockDataset(csv_path='./My Data/val.csv')

valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          num_workers=4)

test_dataset = StockDataset(csv_path='./My Data/test.csv')

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=4)

In [105]:
torch.manual_seed(0)

num_epochs = 2
for epoch in range(num_epochs):

    for batch_idx, (x, y) in enumerate(train_loader):
        
        print('Epoch:', epoch+1, end='')
        print(' | Batch index:', batch_idx, end='')
        print(' | Batch size:', y.size()[0])
        
        x = x.to(DEVICE)
        y = y.to(DEVICE)
        print('break minibatch for-loop')
        break

Epoch: 1 | Batch index: 0 | Batch size: 128
break minibatch for-loop
Epoch: 2 | Batch index: 0 | Batch size: 128
break minibatch for-loop


In [114]:
class SoftmaxRegression(torch.nn.Module):

    def __init__(self, num_features, num_classes):
        super(SoftmaxRegression, self).__init__()
        self.linear = torch.nn.Linear(num_features, num_classes)
        
        self.linear.weight.detach().normal_(0.0, 0.1)
        self.linear.bias.detach().zero_()
        
    def forward(self, x):
        logits = self.linear(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [115]:
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x123baf7b0>

In [116]:
model = SoftmaxRegression(num_features=143, num_classes=3)
model = model.to(DEVICE)

NameError: name 'conv1' is not defined

In [109]:
num_features = 143
num_classes = 3

In [110]:
learning_rate = 0.1

In [111]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.9)
optimizer = torch.optim.Adam(model.parameters(),lr=0.0001)

In [112]:
NUM_EPOCHS = 50

In [113]:
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    
    for features, targets in data_loader:
        features = features.view(-1, num_features).to(DEVICE)
        targets = targets.to(DEVICE)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
        
    return correct_pred.float() / num_examples * 100

start_time = time.time()
epoch_costs = []
for epoch in range(NUM_EPOCHS):
    avg_cost = 0.
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.view(-1, num_features).to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        
        # note that the PyTorch implementation of
        # CrossEntropyLoss works with logits, not
        # probabilities
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        cost.backward()
        avg_cost += cost
        avg_cost
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 200:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, NUM_EPOCHS, batch_idx, 
                     len(train_dataset)//BATCH_SIZE, cost))
            
    with torch.set_grad_enabled(False):
        avg_cost = avg_cost/len(train_dataset)
        epoch_costs.append(avg_cost)
        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
              epoch+1, NUM_EPOCHS, 
              compute_accuracy(model, train_loader)))
        print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/050 | Batch 000/490 | Cost: 1.0986
Epoch: 001/050 | Batch 200/490 | Cost: 1.0927
Epoch: 001/050 | Batch 400/490 | Cost: 1.2282
Epoch: 001/050 training accuracy: 44.90%
Time elapsed: 0.02 min
Epoch: 002/050 | Batch 000/490 | Cost: 1.0846
Epoch: 002/050 | Batch 200/490 | Cost: 1.0882
Epoch: 002/050 | Batch 400/490 | Cost: 1.0794
Epoch: 002/050 training accuracy: 44.86%
Time elapsed: 0.03 min
Epoch: 003/050 | Batch 000/490 | Cost: 1.0757
Epoch: 003/050 | Batch 200/490 | Cost: 1.0843
Epoch: 003/050 | Batch 400/490 | Cost: 1.0428
Epoch: 003/050 training accuracy: 44.93%
Time elapsed: 0.05 min
Epoch: 004/050 | Batch 000/490 | Cost: 1.0971
Epoch: 004/050 | Batch 200/490 | Cost: 1.0864
Epoch: 004/050 | Batch 400/490 | Cost: 1.0676
Epoch: 004/050 training accuracy: 44.97%
Time elapsed: 0.07 min
Epoch: 005/050 | Batch 000/490 | Cost: 1.0845
Epoch: 005/050 | Batch 200/490 | Cost: 1.0772
Epoch: 005/050 | Batch 400/490 | Cost: 1.0641
Epoch: 005/050 training accuracy: 44.99%
Time elapsed: