In [94]:
import torch
import os
import pandas as pd
import numpy as np
import time
import random
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.autograd import Variable
%matplotlib inline

In [95]:
import math

millnames = ['',' Thousand',' Million',' Billion',' Trillion', 'Quad', 'Pent', 'Sextillion', 'Sept', 'Oct']

def millify(n):
    n = float(n)
    millidx = max(0,min(len(millnames)-1,
                        int(math.floor(0 if n == 0 else math.log10(abs(n))/3))))

    return '{:.0f}{}'.format(n / 10**(3 * millidx), millnames[millidx])

In [96]:
# No need to change anything here!
# If there is a GPU available, it will use it,
# otherwise, it will use the CPU

RANDOM_SEED = 123
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [97]:
BATCH_SIZE = 64
EPOCH = 200

In [98]:
class StockDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path)
        self.y = df['Target'].to_numpy().astype('float32')
        self.features = df.drop(["Company", "Date", "Target", "Label"], axis=1).to_numpy().astype('float32')

#         self.shares = df['Estimated Shares Outstanding'].to_numpy().astype('float32')
#         self.close = df['Close'].to_numpy().astype('float32')
     
#         self.features = df.drop(["Ticker Symbol", "Period Ending", "Estimated Shares Outstanding", "Close"], axis=1).to_numpy().astype('float32')

    def __getitem__(self, index):
        features = self.features[index]
        label = self.y[index]
        return features, label
#         shares = self.shares[index]
#         close = self.close[index]
        
#         return features, shares*close

    def __len__(self):
        return self.y.shape[0]
    
    def get(self, index):
        return __getitem__(self, index)

In [141]:
train_dataset = StockDataset(csv_path='train_raw.csv')


train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          num_workers=8)


# valid_dataset = StockDataset(csv_path='val.csv')

# valid_loader = DataLoader(dataset=valid_dataset,
#                           batch_size=BATCH_SIZE,
#                           shuffle=False,
#                           num_workers=8)

# test_dataset = StockDataset(csv_path='test.csv')

# test_loader = DataLoader(dataset=test_dataset,
#                          batch_size=BATCH_SIZE,
#                          shuffle=False,
#                          num_workers=8)

In [142]:
torch.manual_seed(0)

num_features = 0

num_epochs = 2
for epoch in range(num_epochs):

    for batch_idx, (x, y) in enumerate(train_loader):
        
        print('Epoch:', epoch+1, end='')
        print(' | Batch index:', batch_idx, end='')
        print(' | Batch size:', y.size()[0])
        
        x = x.to(DEVICE)
        y = y.to(DEVICE)
#         print(net(x))
#         print(y)
        num_features = x.shape[1]
        print('break minibatch for-loop')
        break
    break

Epoch: 1 | Batch index: 0 | Batch size: 64
break minibatch for-loop


In [143]:
# another way to define a network
net = torch.nn.Sequential(
        torch.nn.Linear(num_features, 20),
        torch.nn.ReLU(),
        torch.nn.Linear(20, 1),
#         torch.nn.ReLU(),
#         torch.nn.Linear(15, 10),
#         torch.nn.ReLU(),
#         torch.nn.Linear(10, 1)
)

In [144]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss

In [145]:
# start training
for epoch in range(EPOCH):
    totalcost = 0
    for step, (batch_x, batch_y) in enumerate(train_loader): # for each training step
        
        b_x = Variable(batch_x)
        b_y = Variable(batch_y)

        prediction = net(b_x)     # input x and predict based on x

        loss = loss_func(prediction, b_y)     # must be (1. nn output, 2. target)
        totalcost += loss
        optimizer.zero_grad()   # clear gradients for next train
        loss.backward()         # backpropagation, compute gradients
        optimizer.step()        # apply gradients

         ### LOGGING
        if not step % 200:
            print (f'Epoch: {epoch+1:03d}/{EPOCH:03d} | '
                   f'Batch {step:03d}/{len(train_loader):03d} |' 
                   f' Cost: {millify(loss)}')
    print (f'Epoch: {epoch+1:03d}/{EPOCH:03d} | '
           f'Output {net(b_x)[0]} |' 
                   f' Cost: {millify(totalcost)}')

Epoch: 001/200 | Batch 000/978 | Cost: 1Pent
Epoch: 001/200 | Batch 200/978 | Cost: 66Quad
Epoch: 001/200 | Batch 400/978 | Cost: 76Quad
Epoch: 001/200 | Batch 600/978 | Cost: 1Pent
Epoch: 001/200 | Batch 800/978 | Cost: 317Quad
Epoch: 001/200 | Output tensor([6013484.], grad_fn=<SelectBackward>) | Cost: 2Oct
Epoch: 002/200 | Batch 000/978 | Cost: 423Quad
Epoch: 002/200 | Batch 200/978 | Cost: 24Quad
Epoch: 002/200 | Batch 400/978 | Cost: 70Quad
Epoch: 002/200 | Batch 600/978 | Cost: 138Quad
Epoch: 002/200 | Batch 800/978 | Cost: 545Quad
Epoch: 002/200 | Output tensor([1503984.1250], grad_fn=<SelectBackward>) | Cost: 2Oct
Epoch: 003/200 | Batch 000/978 | Cost: 358Quad
Epoch: 003/200 | Batch 200/978 | Cost: 21Quad
Epoch: 003/200 | Batch 400/978 | Cost: 86Quad
Epoch: 003/200 | Batch 600/978 | Cost: 62Quad
Epoch: 003/200 | Batch 800/978 | Cost: 285Quad
Epoch: 003/200 | Output tensor([46278.0938], grad_fn=<SelectBackward>) | Cost: 2Oct
Epoch: 004/200 | Batch 000/978 | Cost: 311Quad
Epoch: 

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x11ba73c20>
Traceback (most recent call last):
  File "/Users/ritik/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/Users/ritik/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/Users/ritik/anaconda3/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process


Epoch: 077/200 | Batch 200/978 | Cost: 10Quad


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x11ba73c20>
Traceback (most recent call last):
  File "/Users/ritik/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/Users/ritik/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/Users/ritik/anaconda3/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process


Epoch: 077/200 | Batch 400/978 | Cost: 22Quad
Epoch: 077/200 | Batch 600/978 | Cost: 74Quad
Epoch: 077/200 | Batch 800/978 | Cost: 3Quad
Epoch: 077/200 | Output tensor([-2927592.2500], grad_fn=<SelectBackward>) | Cost: 2Oct
Epoch: 078/200 | Batch 000/978 | Cost: 58Quad


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x11ba73c20>
Traceback (most recent call last):
  File "/Users/ritik/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/Users/ritik/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/Users/ritik/anaconda3/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process


Epoch: 078/200 | Batch 200/978 | Cost: 10Quad


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x11ba73c20>
Traceback (most recent call last):
  File "/Users/ritik/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/Users/ritik/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/Users/ritik/anaconda3/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process


Epoch: 078/200 | Batch 400/978 | Cost: 23Quad
Epoch: 078/200 | Batch 600/978 | Cost: 75Quad
Epoch: 078/200 | Batch 800/978 | Cost: 2Quad
Epoch: 078/200 | Output tensor([-3279842.2500], grad_fn=<SelectBackward>) | Cost: 2Oct
Epoch: 079/200 | Batch 000/978 | Cost: 57Quad
Epoch: 079/200 | Batch 200/978 | Cost: 10Quad
Epoch: 079/200 | Batch 400/978 | Cost: 23Quad
Epoch: 079/200 | Batch 600/978 | Cost: 79Quad
Epoch: 079/200 | Batch 800/978 | Cost: 3Quad
Epoch: 079/200 | Output tensor([-2933268.2500], grad_fn=<SelectBackward>) | Cost: 2Oct
Epoch: 080/200 | Batch 000/978 | Cost: 57Quad
Epoch: 080/200 | Batch 200/978 | Cost: 11Quad
Epoch: 080/200 | Batch 400/978 | Cost: 23Quad
Epoch: 080/200 | Batch 600/978 | Cost: 80Quad
Epoch: 080/200 | Batch 800/978 | Cost: 2Quad
Epoch: 080/200 | Output tensor([-3285832.2500], grad_fn=<SelectBackward>) | Cost: 2Oct
Epoch: 081/200 | Batch 000/978 | Cost: 56Quad
Epoch: 081/200 | Batch 200/978 | Cost: 11Quad
Epoch: 081/200 | Batch 400/978 | Cost: 24Quad
Epoch:

In [119]:
test_dataset = StockDataset(csv_path='test_raw.csv')

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=8)

In [124]:
lower = 0
higher = 64
num = 15
for step, (batch_x, batch_y) in enumerate(test_loader): # for each training step
    if step == num:
        b_x = Variable(batch_x)
        b_y = Variable(batch_y).view(-1,1)

        prediction = net(b_x).view(-1,1)     # input x and predict based on x
        display((100*(b_y - prediction)/b_y)[lower:higher])
        display(prediction[lower:higher])
        display(b_y[lower:higher])
        break

tensor([[-4.1401e+02],
        [-2.2735e+02],
        [-3.3610e+02],
        [-3.0691e+02],
        [-2.1088e+02],
        [-1.5842e+02],
        [-1.8250e+02],
        [-2.6033e+02],
        [-3.0958e+01],
        [-5.0933e+01],
        [-3.5385e+01],
        [-1.2699e+01],
        [-6.1908e+00],
        [ 2.0096e+00],
        [ 2.1231e+01],
        [ 1.2508e+01],
        [ 1.6884e+01],
        [ 2.5842e+01],
        [ 1.6107e+01],
        [ 2.6357e+01],
        [ 1.9306e+01],
        [-8.5535e+03],
        [-1.3778e+04],
        [-2.0523e+04],
        [-1.6040e+03],
        [-2.1586e+03],
        [-1.7117e+03],
        [-1.6027e+03],
        [-3.5716e+03],
        [-5.3891e+01],
        [-3.9045e+01],
        [-4.2825e+01],
        [-2.2496e+01],
        [-1.4233e+01],
        [-1.3365e+01],
        [-4.7105e-01],
        [-2.7618e+02],
        [-3.8264e+02],
        [-3.6164e+02],
        [-3.1975e+02],
        [-3.8723e+02],
        [-3.3800e+02],
        [-4.0786e+02],
        [-1

tensor([[220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],
        [220.6646],


tensor([[ 42.9300],
        [ 67.4100],
        [ 50.6000],
        [ 54.2300],
        [ 70.9800],
        [ 85.3900],
        [ 78.1100],
        [ 61.2400],
        [168.5000],
        [146.2000],
        [162.9900],
        [195.8000],
        [207.8000],
        [225.1900],
        [280.1400],
        [252.2100],
        [265.4900],
        [297.5600],
        [263.0300],
        [299.6400],
        [273.4600],
        [  2.5500],
        [  1.5900],
        [  1.0700],
        [ 12.9500],
        [  9.7700],
        [ 12.1800],
        [ 12.9600],
        [  6.0100],
        [143.3900],
        [158.7000],
        [154.5000],
        [180.1400],
        [193.1700],
        [194.6500],
        [219.6300],
        [ 58.6600],
        [ 45.7200],
        [ 47.8000],
        [ 52.5700],
        [ 45.2900],
        [ 50.3800],
        [ 43.4500],
        [ 13.3300],
        [  9.3500],
        [ 11.6400],
        [ 13.2200],
        [ 12.8600],
        [ 10.8000],
        [ 46.0600],
