In [1]:
import psycopg2
import datetime
import numpy as np
import pandas as pd
from itertools import islice

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn import preprocessing
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters

from finance.utilities import utils
from finance.data_science.utilities import financial_utils, time_series_utils, cluster_utils, random_forest_utils

In [2]:
register_matplotlib_converters()

%matplotlib notebook
%matplotlib inline

### Stock data

In [3]:
query = """
    with raw as (
        select 
            market_datetime
            , symbol
            , open
            , volume
            , avg(open) over (partition by symbol order by market_datetime rows between 100 preceding and current row) as average_open_hundred_days
            , avg(open) over (partition by symbol order by market_datetime rows between 50 preceding and current row) as average_open_fifty_days
            , avg(open) over (partition by symbol order by market_datetime rows between 10 preceding and current row) as average_open_ten_days
            , max(open) over (partition by symbol order by market_datetime rows between 100 preceding and current row) as high_open_hundred_days
            , min(open) over (partition by symbol order by market_datetime rows between 100 preceding and current row) as low_open_hundred_days
            , avg(volume) over (partition by symbol order by market_datetime rows between 100 preceding and current row) as average_volume_hundred_days
            , avg(volume) over (partition by symbol order by market_datetime rows between 10 preceding and current row) as average_volume_ten_days
            , avg(open - close) over (partition by symbol order by market_datetime rows between 10 preceding and current row) as average_daily_finish_ten_days
            , avg(high - low) over (partition by symbol order by market_datetime rows between 10 preceding and current row) as average_daily_range_ten_days
            , lead(open, 10) over (partition by symbol order by market_datetime) as open_ten_days_forward
        from td.equities)
    select * 
    from raw
    where open_ten_days_forward is not null
    limit 10000
    """

df = utils.query_db(query=query)
df.head()

Unnamed: 0,market_datetime,symbol,open,volume,average_open_hundred_days,average_open_fifty_days,average_open_ten_days,high_open_hundred_days,low_open_hundred_days,average_volume_hundred_days,average_volume_ten_days,average_daily_finish_ten_days,average_daily_range_ten_days,open_ten_days_forward
0,1999-11-18 06:00:00,A,32.546495,62546377.0,32.546495,32.546495,32.546495,32.546495,32.546495,62546377.0,62546377.0,1.072961,7.153077,32.144134
1,1999-11-19 06:00:00,A,30.713519,15234143.0,31.630007,31.630007,31.630007,32.546495,30.713519,38890260.0,38890260.0,1.452968,4.71656,32.367668
2,1999-11-22 06:00:00,A,29.551144,6577866.0,30.937053,30.937053,30.937053,32.546495,29.551144,28119462.0,28119462.0,0.327849,4.083215,32.725322
3,1999-11-23 06:00:00,A,30.400572,5975608.0,30.802933,30.802933,30.802933,32.546495,29.551144,22583498.5,22583498.5,0.692954,3.710659,32.367668
4,1999-11-24 06:00:00,A,28.701717,4843228.0,30.382689,30.382689,30.382689,32.546495,28.701717,19035444.4,19035444.4,0.420243,3.245709,32.367668


### Check for gpus

In [4]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(1))

True
GeForce GTX 1080 Ti


In [5]:
df = cluster_utils.encode_one_hot(df=df, column='symbol')

for col in ['open', 'high', 'low', 'close', 'volume']:
    df = cluster_utils.normalize(df=df, column=col, subset='symbol')

In [7]:
train = df.copy()
train = train.drop(columns=['symbol'])
train['market_datetime'] = train['market_datetime'].astype(int)

In [15]:
train_target = train['open_ten_days_forward']

In [16]:
split_size = 80000
train_x = train.iloc[1:split_size].values
train_y = train_target.iloc[1:split_size].values

test_x = train.iloc[split_size+1:-1].values
test_y = train_target.iloc[split_size+1:-1].values

In [36]:
print(torch.cuda.memory_allocated())

print(torch.cuda.memory_cached())

2035200
2359296


In [18]:
tensor_x = torch.tensor(train_x).cuda(0)
tensor_y = torch.tensor(train_y).cuda(0)

In [30]:
input_size = 15
output_size = 1
learning_rate = 0.01 
epochs = 100

model = torch.nn.Linear(input_size, output_size).cuda(0)

criterion = torch.nn.MSELoss().cuda(0)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [34]:
with torch.cuda.device(1):
    for t in range(2):
        prediction = model(tensor_x.float()).cuda(0)

        loss = criterion(prediction, tensor_y.float()).cuda(0)

        optimizer.zero_grad() # .cuda(0)
        loss.backward() #.cuda(0)
        optimizer.step() #.cuda(0)
        
        loss.item()

In [27]:
loss.item()

inf

In [38]:
torch.cuda.empty_cache()

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import cluster as skcluster
import torch
from torch import nn, functional, optim, autograd

from finance.utilities import utils, cluster_utils


class TorchNN:
    def __init__(self, 
                 train_x=pd.DataFrame, 
                 train_y=pd.DataFrame,
                 test_x=pd.DataFrame, 
                 test_y=pd.DataFrame, 
                 learning_rate=0,
                 momentum=.9, 
                 hidden_size=None, 
                 out_features=1):
        self.train_x = torch.tensor(train_x.values).float().view(1, -1)
        self.train_y = torch.tensor(train_y.values).float()
        # self.test_x = torch.tensor(test_x)
        # self.test_y = torch.tensor(test_y)
        self.size = train_x.shape[0] * train_x.shape[1]
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.linear_one = nn.Linear(in_features=self.size, out_features=hidden_size[0]).float()
        self.linear_two = nn.Linear(in_features=hidden_size[0], out_features=out_features).float()

    def forward(self):
        # output = self.linear_one(self.train_x.view(-1, 1))
        output = self.linear_one(self.train_x.view(self.train_x.size(0), -1))
        # output = self.linear_one(self.train_x)
        output = functional.F.relu(output)
        output = self.linear_two(output)
        return output

    def optimizer(self, model):
        optimizer = optim.SGD(nn.Module.parameters(model), lr=self.learning_rate, momentum=self.momentum)
        return optimizer

    def loss_criterion(self):
        return nn.NLLLoss()

    def train_network(self, model):
        for idx, row in self.train_x.iterrows():
            data = autograd.Variable(row)
            target = autograd.Variable(self.train_y[idx])

            self.optimizer.zero_grad()
            net_out = model(data)
            loss = self.loss_criterion(net_out, target)
            loss.backward()

            self.optimizer.step()


if __name__ == '__main__':
    query = """
        select
            e.symbol
            , e.market_datetime
            , e.open
            , e.high
            , e.low
            , e.close
            , e.volume
        from td.equities as e
        where 
        --left(e.symbol, 1) = 'A'
        e.symbol = 'AA'
        order by e.market_datetime
        limit 100
        """
    df = utils.query_db(query=query)
    for col in ['open']:
        df = cluster_utils.normalize(df=df, column=col, subset='symbol')
    df['market_datetime'] = df['market_datetime'].astype(int)

    train = df.copy()
    train = train.drop(columns=['symbol'])
    train['market_datetime'] = train['market_datetime'].astype(int)

    train_target = train['open'].shift(-1)

    split_size = 80000
    train_x = train.iloc[1:split_size]
    train_y = train_target.iloc[1:split_size]

    test_x = train.iloc[split_size + 1:-1]
    test_y = train_target.iloc[split_size + 1:-1]

    model = TorchNN(train_x=train_x, train_y=train_y, hidden_size=(1,)).forward()
    print(model)


tensor([[0.1446]], grad_fn=<AddmmBackward>)


In [2]:
import torch


class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred


# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

99 3.5007288455963135
199 0.13228681683540344
299 0.008658592589199543
399 0.0007689027697779238
499 7.713446393609047e-05
