## Implement 3 classifiers to predict the stock movement
* Logistic Regression
* Neural Network
* SVM


In [0]:
import torch.nn as nn
import torch
import torch.nn.functional as F


class LogisticRegression(nn.Module):
  def __init__(self, size):
    super(LogisticRegression, self).__init__()
    self.size = size
    self.linear = nn.Linear(size, 1)

  def forward(self, batch_x):
    y = self.linear(batch_x)
    return torch.sigmoid(y).round()

class SVM(nn.Module):
  def __init__(self, size):
    super(SVM, self).__init__()
    self.size = size
    self.linear = nn.Linear(size, 1)
  def forward(self, batch_x):
    return self.linear(batch_x).round()

class NeuralNetwork(nn.Module):
  def __init__(self, size):
    super(NeuralNetwork, self).__init__()
    self.size = size
    self.layer1 = nn.Sequential(
        nn.Linear(size, 32),
        nn.ReLU()
    )
    self.layer2 = nn.Sequential(
        nn.Linear(32, 8),
        nn.Tanh()
    )
    self.layer3 = nn.Linear(8, 1)
    
  def forward(self, batch_x):
    h1 = self.layer1(batch_x)
    h2 = self.layer2(h1)
    y = self.layer3(h2)

    return torch.sigmoid(y).round()


In [0]:
from torch.utils.data import Dataset

class StockDataset(Dataset):
  def __init__(self, x, y):
    self.x = x
    self.y = y
  def __len__(self): return len(self.x)
  def __getitem__(self, i): return self.x[i], self.y[i]

#### I'll try to predict the Close Price movement based on the previous several prices

In [231]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

NUM_FETCH = 5 # Number of days to fetch for next prediction
NUM_BATCH = 20
NUM_EPOCH = 500 # Number of epochs to train

def preprocess_x(data, num_fetch):
  ret = []
  for n, c in data.iteritems():
    for i in range(len(c)-num_fetch):
      try:
        ret[i]
      except:
        ret.append(np.array([]))
        
      ret[i] = np.concatenate((ret[i], c[i:i+num_fetch]))
  return np.array(ret)

def preprocess_data(data):
  ret = pd.DataFrame()
  for n, c in data.iteritems():
    if n == "Close Price":      
      x = np.array([int(c[i]>c[i-1]) for i in range(1, len(c))])
      ret[n] = x
  
  return ret

def collate_fn(batch):
  x_list = []
  y_list = []
  for x, y in batch:
    x_list.append([x])
    y_list.append([y])
  return [torch.tensor(x_list).float(), torch.tensor(y_list).float()]


def train(model, criterion, optimizer):
  global dataloader, test_dataloader
  for epoch in range(NUM_EPOCH):
    model.train()
    if epoch % 100 == 0: print(f'Epoch {epoch}==========================')
    for batch_x, batch_y in dataloader:
      pred_y = model(batch_x).reshape(-1, 1)
      loss = criterion(pred_y, batch_y)
      loss.backward()
      optimizer.step()
      
    with torch.no_grad():
      total_loss = 0
      accuracy = 0
      for batch_x, batch_y in dataloader:
        pred_y = model(batch_x).reshape(-1, 1)
        loss = criterion(pred_y, batch_y)

        total_loss += float(loss)/len(dataloader)
        equals = (pred_y==batch_y)
        accuracy += torch.mean(equals.type(torch.FloatTensor))
      if epoch % 100 == 0: 
        print(f'Training loss: {total_loss}, accuracy:{accuracy/len(dataloader)}')
train_data = pd.read_csv('train.csv')
preprocessed = preprocess_data(train_data)
train_data_y = np.array([preprocessed['Close Price'][i] for i in range(NUM_FETCH, len(preprocessed))])
train_data_x = preprocess_x(preprocessed, NUM_FETCH)
dataset = StockDataset(train_data_x, train_data_y)
dataloader = DataLoader(
    dataset,
    batch_size=NUM_BATCH,
    shuffle=True,
    collate_fn=collate_fn
)

print("======Logistic Regression======")
logistic_regression = LogisticRegression(NUM_FETCH)
criterion = nn.MSELoss()
optimizer = optim.SGD(logistic_regression.parameters(), lr=1e-3)
train(logistic_regression, criterion, optimizer)

print("======Neural Network======")

neural_network = NeuralNetwork(NUM_FETCH)
criterion = nn.MSELoss()
optimizer = optim.SGD(neural_network.parameters(), lr=1e-3)
train(neural_network, criterion, optimizer)

print("========SVM===============")
svm = SVM(NUM_FETCH)
criterion = nn.MSELoss()
optimizer = optim.SGD(svm.parameters(), lr=1e-4)
train(svm, criterion, optimizer)


Training loss: 0.5221238968382895, accuracy:0.47787612676620483
Training loss: 0.5221238956514712, accuracy:0.47787612676620483
Training loss: 0.5222222248537354, accuracy:0.47777771949768066
Training loss: 0.5221730602526032, accuracy:0.47782689332962036
Training loss: 0.5219764018480759, accuracy:0.47802355885505676
Training loss: 0.4533431680329083, accuracy:0.5466567277908325
Training loss: 0.4534906586714553, accuracy:0.5465093851089478
Training loss: 0.45363815273858843, accuracy:0.5463617444038391
Training loss: 0.45339233223843395, accuracy:0.5466075539588928
Training loss: 0.4535398240637993, accuracy:0.5464602112770081
Training loss: 0.5464110150273921, accuracy:0.45358893275260925
Training loss: 0.5466568357912842, accuracy:0.4533431828022003
Training loss: 0.5465585043472527, accuracy:0.45344141125679016
Training loss: 0.5466076710582836, accuracy:0.45339229702949524
Training loss: 0.5463618493713109, accuracy:0.4536382257938385


In [224]:
test_data = pd.read_csv('test.csv')
preprocessed = preprocess_data(test_data)
test_data_y = np.array([preprocessed['Close Price'][i] for i in range(NUM_FETCH, len(preprocessed))])
test_data_x = preprocess_x(preprocessed, NUM_FETCH)

correct = torch.tensor(test_data_y)>0
logistic_regression.eval()
predict = (logistic_regression(torch.tensor(test_data_x).float())).reshape(-1)
size = len(correct)
correct = (correct == predict).sum().item()
print(correct, correct/size)

correct = torch.tensor(test_data_y)>0
neural_network.eval()
predict = (neural_network(torch.tensor(test_data_x).float())).reshape(-1)
correct = (correct == predict).sum().item()
print(correct, correct/size)

correct = torch.tensor(test_data_y)>0
svm.eval()
predict = (svm(torch.tensor(test_data_x).float())).reshape(-1)
correct = (correct == predict).sum().item()
print(correct, correct/size)

print(size)

126 0.5121951219512195
126 0.5121951219512195
120 0.4878048780487805
246
