In [1]:
import psycopg2
import datetime
import numpy as np
import pandas as pd
from itertools import islice

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn import preprocessing
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters

from finance.utilities import utils
from finance.data_science.utilities import financial_utils, time_series_utils, cluster_utils, random_forest_utils

In [2]:
register_matplotlib_converters()

%matplotlib notebook
%matplotlib inline

### Stock data

In [3]:
query = """
    with raw as (
        select 
            market_datetime
            , symbol
            , open
            , volume
            , avg(open) over (partition by symbol order by market_datetime rows between 100 preceding and current row) as average_open_hundred_days
            , avg(open) over (partition by symbol order by market_datetime rows between 50 preceding and current row) as average_open_fifty_days
            , avg(open) over (partition by symbol order by market_datetime rows between 10 preceding and current row) as average_open_ten_days
            , max(open) over (partition by symbol order by market_datetime rows between 100 preceding and current row) as high_open_hundred_days
            , min(open) over (partition by symbol order by market_datetime rows between 100 preceding and current row) as low_open_hundred_days
            , avg(volume) over (partition by symbol order by market_datetime rows between 100 preceding and current row) as average_volume_hundred_days
            , avg(volume) over (partition by symbol order by market_datetime rows between 10 preceding and current row) as average_volume_ten_days
            , avg(open - close) over (partition by symbol order by market_datetime rows between 10 preceding and current row) as average_daily_finish_ten_days
            , avg(high - low) over (partition by symbol order by market_datetime rows between 10 preceding and current row) as average_daily_range_ten_days
            , lead(open, 10) over (partition by symbol order by market_datetime) as open_ten_days_forward
        from td.equities)
    select * 
    from raw
    where open_ten_days_forward is not null
    limit 10000
    """

df = utils.query_db(query=query)
df.head()

Unnamed: 0,market_datetime,symbol,open,volume,average_open_hundred_days,average_open_fifty_days,average_open_ten_days,high_open_hundred_days,low_open_hundred_days,average_volume_hundred_days,average_volume_ten_days,average_daily_finish_ten_days,average_daily_range_ten_days,open_ten_days_forward
0,1999-11-18 06:00:00,A,32.546495,62546377.0,32.546495,32.546495,32.546495,32.546495,32.546495,62546377.0,62546377.0,1.072961,7.153077,32.144134
1,1999-11-19 06:00:00,A,30.713519,15234143.0,31.630007,31.630007,31.630007,32.546495,30.713519,38890260.0,38890260.0,1.452968,4.71656,32.367668
2,1999-11-22 06:00:00,A,29.551144,6577866.0,30.937053,30.937053,30.937053,32.546495,29.551144,28119462.0,28119462.0,0.327849,4.083215,32.725322
3,1999-11-23 06:00:00,A,30.400572,5975608.0,30.802933,30.802933,30.802933,32.546495,29.551144,22583498.5,22583498.5,0.692954,3.710659,32.367668
4,1999-11-24 06:00:00,A,28.701717,4843228.0,30.382689,30.382689,30.382689,32.546495,28.701717,19035444.4,19035444.4,0.420243,3.245709,32.367668


### Check for gpus

In [4]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(1))

True
GeForce GTX 1080 Ti


In [5]:
df = cluster_utils.encode_one_hot(df=df, column='symbol')

In [6]:
df.head()

Unnamed: 0,market_datetime,symbol,open,volume,average_open_hundred_days,average_open_fifty_days,average_open_ten_days,high_open_hundred_days,low_open_hundred_days,average_volume_hundred_days,average_volume_ten_days,average_daily_finish_ten_days,average_daily_range_ten_days,open_ten_days_forward,A,AA
0,1999-11-18 06:00:00,A,32.546495,62546377.0,32.546495,32.546495,32.546495,32.546495,32.546495,62546377.0,62546377.0,1.072961,7.153077,32.144134,1,0
1,1999-11-19 06:00:00,A,30.713519,15234143.0,31.630007,31.630007,31.630007,32.546495,30.713519,38890260.0,38890260.0,1.452968,4.71656,32.367668,1,0
2,1999-11-22 06:00:00,A,29.551144,6577866.0,30.937053,30.937053,30.937053,32.546495,29.551144,28119462.0,28119462.0,0.327849,4.083215,32.725322,1,0
3,1999-11-23 06:00:00,A,30.400572,5975608.0,30.802933,30.802933,30.802933,32.546495,29.551144,22583498.5,22583498.5,0.692954,3.710659,32.367668,1,0
4,1999-11-24 06:00:00,A,28.701717,4843228.0,30.382689,30.382689,30.382689,32.546495,28.701717,19035444.4,19035444.4,0.420243,3.245709,32.367668,1,0


In [7]:
import pandas as pd
import torch
from torch import nn, functional, optim, autograd

from finance.utilities import utils
from finance.data_science.utilities import cluster_utils


class TorchNN(nn.Module):
    def __init__(self,
                 device='cuda:0',
                 train_x=pd.DataFrame,
                 train_y=pd.DataFrame,
                 hidden_size=8,
                 output_size=1,
                 learning_rate=0,
                 momentum=.9):
        
        self.device = device
        
        self.train_x = torch.tensor(train_x.values).to(self.device).float().view(1, -1).detach().requires_grad_(True)
        self.train_y = torch.tensor(train_y.values).to(self.device).float()

        self.size = train_x.shape[0] * train_x.shape[1]
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.learning_rate = learning_rate
        self.momentum = momentum

    @property
    def layers(self):
        layers = {
            'linear_one': nn.Linear(in_features=self.size, out_features=self.hidden_size).float().to(self.device),
            'linear_two': nn.Linear(in_features=self.hidden_size, out_features=self.output_size).float().to(self.device)
        }
        return layers

    def forward(self, data):
        for layer in self.layers:
            data = self.layers.get(layer)(data)
        return data

    def optimizer(self):
        return optim.SGD([self.train_x], lr=self.learning_rate, momentum=self.momentum)

    @property
    def criterion(self):
        return nn.MSELoss().to(self.device)

    @property
    def n_epochs(self):
        return 2

    def train_network(self):
        running_loss = 0
        optimizer = self.optimizer()
        criterion = self.criterion
        
        losses = []
        for epoch in range(self.n_epochs):
            for data, target in zip(self.train_x, self.train_y):
                                
                optimizer.zero_grad()

                outputs = self.forward(data).to(self.device)
                loss = criterion(outputs, target).to(self.device)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                losses.append(loss.item())
        
        return losses

In [8]:
net = TorchNN(train_x=df[['open', 'volume']], train_y=df['open_ten_days_forward'])

In [11]:
net.train_network()

[668017491968.0, 101324382208.0]

In [12]:
net.forward()

TypeError: forward() missing 1 required positional argument: 'data'