#### Imports

In [1]:
import multiprocessing
from joblib import Parallel, delayed
import math
import pandas as pd
import numpy as np
from PIL import Image as pil_image
import math
from os import listdir
from os.path import isfile, join
from tqdm import tqdm_notebook as tqdm
import requests
from datetime import datetime
from pathlib import Path
from IPython.display import clear_output

from strategy.TINNStrategy import prepare_data
from chartobjects.Charting import Charting

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.functional import relu
from torch.backends import cudnn
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset, DataLoader
from torch import optim
import torchvision
from torchvision import models

import warnings
warnings.filterwarnings("ignore")

In [None]:
import os

path = 'data/Stocks'

files = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
    for file in f:
        if '.txt' in file:
            files.append(os.path.join(r, file))

### Import dataset and preprocess data

In [2]:
dataset_df = pd.read_csv('data/ETHBTC.csv')[:5000]
dataset_df.reset_index(drop=True, inplace=True)
dataset_df.head()

Unnamed: 0,time,open,high,low,close,volume
0,1533772800000,0.05656,0.056674,0.05656,0.056621,56.604
1,1533772860000,0.056621,0.056624,0.056558,0.056607,202.197
2,1533772920000,0.056601,0.056668,0.056599,0.056649,29.063
3,1533772980000,0.056638,0.056673,0.056628,0.056652,32.376
4,1533773040000,0.056639,0.056691,0.056638,0.056675,256.55


#### refactoring steps to get dataset_df into shape [time, open, high, low, close, volume]

In [3]:
dataset_df = dataset_df.drop('OpenInt', axis=1)
dataset_df.columns = ['date', 'open', 'high', 'low', 'close', 'volume']

timestamp_col = [datetime.timestamp(datetime.strptime(dte, '%Y-%m-%d')) for dte in dataset_df.date]
dataset_df.date = timestamp_col
dataset_df.columns = ['time', 'open', 'high', 'low', 'close', 'volume']
dataset_df.to_csv('data/Stocks/aapl.csv', index=False)

KeyError: "['OpenInt'] not found in axis"

In [4]:
dataset_df.head()

Unnamed: 0,time,open,high,low,close,volume
0,1533772800000,0.05656,0.056674,0.05656,0.056621,56.604
1,1533772860000,0.056621,0.056624,0.056558,0.056607,202.197
2,1533772920000,0.056601,0.056668,0.056599,0.056649,29.063
3,1533772980000,0.056638,0.056673,0.056628,0.056652,32.376
4,1533773040000,0.056639,0.056691,0.056638,0.056675,256.55


#### Add technical indicators as feature columns

In [5]:
dataset_df = prepare_data(dataset_df)

### Helper functions

In [6]:
def print_progress(at, total):
    """Clears cell output and prints percentage progress"""
    clear_output()
    print("progress: {}%".format(round((at/total)*100,2)))

In [7]:
def split_dataset(dataset_df, a=0, b=30, step_size=5):
    """Split dataset_df into a list of DataFrames. Sliding window of b-a. step_size = 5
    returns [ [DataFrame], [DataFrame], ..., [DataFrame] ] where length is dependent on a, b, and step_size. 
    """
    
    end_b = dataset_df.shape[0] 
    
    dataset_splits = []
    
    # Take a 30 period window of dataset_df, with a step size of 5
    while b < end_b:
        if b % 10000 == 0: print_progress(b, end_b)
        
        window = dataset_df.iloc[a:b, :]
        dataset_splits.append(window)
        
        a += step_size
        b += step_size
    # dataset_splits = dataset_splits[:len(dataset_splits)-5] # remove last 5 element since we predict price t+5
    return dataset_splits

In [8]:
def split_columns(split_dataset, cols_list, a=0, b=30, step_size=5):
    """Split columns dataset_df into a list of DataFrames. Sliding window of b-a. step_size = 5
     returns [ [log_col1, log_col2, ...], [log_col1, log_col2, ...], ..., [log_col1, log_col2, ...] ] 
     where length is dependent on a, b, and step_size. 
    """
    
    end_b = dataset_df.shape[0] 
    
    columns_splits = []
    
    # Take a 30 period window of dataset_df, with a step size of 5
    while b < end_b-5:
        cols = {}
        
        # init cols dict to empty lists
        for col_name in cols_list:
            cols[col_name] = []
        
        for col_name in cols_list:
            val = dataset_df.get_value(b, col_name)
            cols[col_name].append(val)
        
        columns_splits.append(cols)
                
        if b % 10000 == 0: print_progress(b, end_b) 
            
        a += 5
        b += 5
        
    return columns_splits

In [9]:
def price_labels(dataset_windows, period_size):
    """returns a list with len = len(dataset_windows) - 1 containing the price return of time + 5 and now"""
    dct = {'curr_price': [], 'future_price': [], 'return': []}
    for i, df in enumerate(dataset_windows[:-1]): # skip the last one
        curr_price = df['close'][period_size-1]
        dct['curr_price'].append(curr_price)
        
        future_price = dataset_windows[i+1]['close'][4] # 4 periods into the future
        dct['future_price'].append(future_price) 
        dct['return'].append((future_price) - (curr_price)/curr_price) # take labels as percentage return
        
    return dct     

In [10]:
def save_model(model, path):
    """Save a trained PyTorch model to path"""
    torch.save(model.state_dict(), path)

In [11]:
def load_model(model, path):
    """Load weights into model from path"""
    try:
        model.load_state_dict(torch.load(path))
    except:
        print('file not found')

### Create custom PyTorch Array Dataset for stock time series

In [12]:
class ArrayTimeSeriesDataset(Dataset):
    """Dataset for historical timeseries data. 
    self.feature_dfs = [np.Array, np.Array, np.Array, ..., np.Array]
    self.labels = [np.Array, np.Array, np.Array, ..., np.Array]
    """
    def __init__(self, time_series, labels):
        self.time_series, self.labels = time_series, labels
        self.c = 1 # one label
    
    def __len__(self):
        return len(self.time_series)
    
    def __getitem__(self, i):
        time_series_arr =  np.array(self.time_series[i])
        label = np.array(self.labels[i])
        return time_series_arr.flatten(), label.flatten() # convert array into vector

### Create PyTorch Dataset for fusion chart images 

In [13]:
class ChartImageDataset(Dataset):
    """Stock chart image Dataset"""
    
    def __init__(self, image_paths: list, labels: list):
        """ 
        image_paths: list containing path to image. Order is maintained
        labels: list containing label for each image
        """
        self.image_paths = image_paths
        self.labels = labels
    
    def __len__(self):
        return len(self.labels) 
    
    def __getitem__(self, index):
        """Return Tensor representation of image at images_paths[index]"""
        img = pil_image.open(self.image_paths[index])
        img.load()
        
        img_tensor = torchvision.transforms.ToTensor()(img)
        
        # remove alpha dimension from png
        img_tensor = img_tensor[:3,:,:]
        return img_tensor, np.array(self.labels[index])

### GRU recurrent neural network architecture

##### Note about dropout layers:
You do not need to remove the Dropout layers in testing but you need to call model.eval() before testing. Calling this will change the behavior of layers such as Dropout, BatchNorm, etc. so that Dropout layers, for example, will not affect the result.

### ResNet for stock fusion chart images

Instead of training out our convolutional neural network, we instead take a pretrained resnet18 model and apply transfer learning. This method is faster and is more accurate with a limited dataset.

In [15]:
class ST_CNN(nn.Module):
    def __init__(self):
        """Initialize the model by setting up the layers"""
        super(ST_CNN, self).__init__()
        
        # initial layer is resnet
        self.resnet = models.resnet34(pretrained=True, progress=False)
        
        # final fully connected layers
        self.dense1 = nn.Linear(1000, 500)
        self.dense2 = nn.Linear(500, 100)
        self.dense3 = nn.Linear(100, 12)
        
        # output layer
        self.dense4 = nn.Linear(12, 1)
    
    def forward(self, x):
        """Perform a forward pass of our model on some input and hidden state"""
        
        x = self.resnet(x)
        
         # apply three fully-connected Linear layers with ReLU activation function
        x = self.dense1(x)
        x = relu(x)
        
        x = self.dense2(x)
        x = relu(x)
        
        x = self.dense3(x)
        x = relu(x)
        
        # output is a size 1 Tensor
        x = self.dense4(x)
        
        return x

### GRU-CNN model architecture comprised of both resnet18 and grunet

##### create forward() hooks for grunet and resnet at link layer to recieve their weight tensors

In [16]:
class GRU_CNN(nn.Module):
    def __init__(self, num_features, batch_size, hidden_size):
        """Initialize the model by setting up the layers"""
        super(GRU_CNN, self).__init__()
        
        # initialize gru and cnn - the full models
        
        # gru model params
        self.num_features = num_features
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.n_layers = 1
        
        # resnet model
        self.cnn = models.resnet34(pretrained=True, progress=False)
              
        # RNN-GRU model
        self.rnn = nn.GRU(input_size=self.num_features,
                          hidden_size=self.hidden_size)
        
        # init GRU hidden layer
        self.hidden = self.init_hidden(batch_size=self.batch_size, hidden_size=hidden_size)
        self.gru_output = nn.Linear(self.hidden_size, 1000)
        
        # final fully connected layers
        self.dense1 = nn.Linear(1000, 500)
        self.dense2 = nn.Linear(500, 100)
        self.dense3 = nn.Linear(100, 12)
        
        # output layer
        self.dense4 = nn.Linear(12, 1)
    
    def load_cnn_weights(self, cnn):
        cnn_params = cnn.named_parameters()
        gru_cnn_params = dict(self.cnn.named_parameters())
        
        for name, cnn_param in cnn_params:
            if name in gru_cnn_params:
                gru_cnn_params[name].data.copy_(cnn_param.data)
    
    def load_gru_weights(self, gru):
        gru_params = gru.named_parameters()
        gru_cnn_params = dict(self.rnn.named_parameters())
        
        for name, gru_param in gru_params:
            if name in gru_cnn_params:
                gru_cnn_params[name].data.copy_(gru_param.data)
    
    def forward(self, gru_input, cnn_input):
        """Perform a forward pass of our model on some input and hidden state"""
  
        # gru
        gru_out, self.hidden = self.rnn(gru_input, self.hidden)
        
        # detatch the hidden layer to prevent further backpropagating. i.e. fix the vanishing gradient problem
        self.hidden = self.hidden.detach().cuda()
        
        # pass through linear layer
        gru_out = torch.squeeze(self.gru_output(gru_out))
                
        # cnn
        cnn_out = self.cnn(cnn_input)
        
        # add the outputs of grunet and cnn
        x = gru_out.add(cnn_out)
        
        # feed through final layers

        # apply three fully-connected Linear layers with ReLU activation function
        x = self.dense1(x)
        x = relu(x)
        
        x = self.dense2(x)
        x = relu(x)
        
        x = self.dense3(x)
        x = relu(x)
        
        # output is a size 1 Tensor
        x = self.dense4(x)
        
        return x
    
    def init_hidden(self, batch_size, hidden_size):
        """Initializes hidden state"""
        
        # Creates initial hidden state for GRU of zeroes
        hidden = torch.ones(1, self.batch_size, hidden_size).cuda()
        return hidden

#### Select what technical indicators to draw alongside the price data

In [17]:
chart_ti = ['sma10', 'bb10_low', 'bb10_mid', 'bb10_up', 'bb20_low', 'bb20_mid', 'bb20_up'] 

### Training, validation, and test sets

##### add price and volume returns as columns 

In [18]:
# append price return and volume return as columns
price_return = np.log(dataset_df.close) - np.log(dataset_df.close.shift(1))
volume_return = np.log(dataset_df.close) - np.log(dataset_df.close.shift(1))   

dataset_df['price_return'] = price_return
dataset_df['volume_return'] = volume_return

###### Create a list of training inputs

In [19]:
# Split dataset_df into slices split by window size of 30 and step_size 5
dataset_windows = split_dataset(dataset_df, a=0, b=30, step_size=5)

for df in dataset_windows: df.reset_index(drop=True, inplace=True) # reindex 0-29
    
dataset_windows[1].head()

Unnamed: 0,time,open,high,low,close,volume,sma20,sma50,macd,obv,bb20_low,bb20_mid,bb20_up,price_return,volume_return
0,1533774540000,0.056729,0.05675,0.056722,0.056746,191.736,0.056705,0.056705,0.039828,103.084,0.056564,0.056705,0.056845,3.5e-05,3.5e-05
1,1533774600000,0.05675,0.056774,0.056722,0.056767,55.468,0.056713,0.056713,0.010549,158.552,0.056582,0.056713,0.056843,0.00037,0.00037
2,1533774660000,0.056767,0.056778,0.056722,0.056722,46.758,0.056724,0.056724,-0.016565,111.794,0.056614,0.056724,0.056833,-0.000793,-0.000793
3,1533774720000,0.056722,0.056735,0.056678,0.056678,85.016,0.05673,0.05673,-0.041657,26.778,0.056635,0.05673,0.056824,-0.000776,-0.000776
4,1533774780000,0.056689,0.05671,0.056667,0.056669,64.724,0.056732,0.056732,-0.064864,-37.946,0.056648,0.056732,0.056817,-0.000159,-0.000159


### Generating fusion candlestick, volume, and technical indicator chart images for training CNN

In [20]:
# Paths to training images and testing images

train_img_path = Path('data/training-images/')
test_img_path = Path('data/testing-images/')

image_path_list = [train_img_path / 'image-{}.png'.format(i) for i in range(len(dataset_windows))]

##### Note - after creating images they will print inside the cell. Restart notebook to clear memory, and rerun, ignoring this cell.

In [21]:
def generate_chart_image(i):
    df = dataset_windows[i] # grab the i'th window of the df
    chart = Charting(df=df, col_label='time', row_label='close', tech_inds=chart_ti)
    chart.chart_to_image(train_img_path / 'image-{}.png'.format(i)) # the / is a Path join method 

# save every DataFrame of price + vol + tech. id. data into a chart image
p = multiprocessing.Pool(processes = 7)

p.map_async(generate_chart_image, [i for i in range(len(dataset_windows))])

p.close()
p.join()

### Normalizing dataset

In [22]:
price_labels_dct = price_labels(dataset_windows, period_size=30)

In [23]:
price_returns = price_labels_dct['return']

In [24]:
# ensure len(dataset_windows) == len(labels_for_windows)
dataset_windows = dataset_windows[:len(price_returns)]
print(len(dataset_windows))

989


##### we normalize our data by taking price returns instead of raw numbers to generalize better, and we log the rest of the DataFrame

In [25]:
dataset_windows[0]

Unnamed: 0,time,open,high,low,close,volume,sma20,sma50,macd,obv,bb20_low,bb20_mid,bb20_up,price_return,volume_return
0,1533774240000,0.056735,0.056736,0.056624,0.056728,134.008,0.056682,0.056682,0.224341,-70.763,0.05652,0.056682,0.056844,,
1,1533774300000,0.056708,0.056725,0.056676,0.056693,100.215,0.056685,0.056685,0.181786,-170.978,0.056522,0.056685,0.056848,-0.000617,-0.000617
2,1533774360000,0.056693,0.056731,0.056677,0.056716,35.151,0.056684,0.056684,0.142234,-135.827,0.056521,0.056684,0.056846,0.000406,0.000406
3,1533774420000,0.056731,0.056747,0.056712,0.056721,24.48,0.056689,0.056689,0.105505,-111.347,0.056532,0.056689,0.056847,8.8e-05,8.8e-05
4,1533774480000,0.056732,0.056746,0.056723,0.056744,22.695,0.056696,0.056696,0.071426,-88.652,0.056544,0.056696,0.056848,0.000405,0.000405
5,1533774540000,0.056729,0.05675,0.056722,0.056746,191.736,0.056705,0.056705,0.039828,103.084,0.056564,0.056705,0.056845,3.5e-05,3.5e-05
6,1533774600000,0.05675,0.056774,0.056722,0.056767,55.468,0.056713,0.056713,0.010549,158.552,0.056582,0.056713,0.056843,0.00037,0.00037
7,1533774660000,0.056767,0.056778,0.056722,0.056722,46.758,0.056724,0.056724,-0.016565,111.794,0.056614,0.056724,0.056833,-0.000793,-0.000793
8,1533774720000,0.056722,0.056735,0.056678,0.056678,85.016,0.05673,0.05673,-0.041657,26.778,0.056635,0.05673,0.056824,-0.000776,-0.000776
9,1533774780000,0.056689,0.05671,0.056667,0.056669,64.724,0.056732,0.056732,-0.064864,-37.946,0.056648,0.056732,0.056817,-0.000159,-0.000159


#### Drop OCHL data and normalize values by taking log(abs(value)+1)

In [26]:
def normalize_data(i):
    df = dataset_windows[i]
    if 'time' in df: df = df.drop('time', axis=1)
    if 'open' in df: df = df.drop('open', axis=1)
    if 'high' in df: df = df.drop('high', axis=1)
    if 'low' in df: df = df.drop('low', axis=1)
    if 'close' in df: df = df.drop('close', axis=1)
    
    df.iloc[:, :df.shape[1]-1] = np.log(df.iloc[:, :df.shape[1]-1] + 1) #Remove OHCL columns. Log +1 every value 
    df = df.fillna(0) # replace all NaN with 0. 
   
    dataset_windows[i] = df
    
for i in range(len(dataset_windows)): normalize_data(i)

In [27]:
dataset_windows[1]

Unnamed: 0,volume,sma20,sma50,macd,obv,bb20_low,bb20_mid,bb20_up,price_return,volume_return
0,5.261321,0.055155,0.055155,0.039055,4.645198,0.055023,0.055155,0.055288,3.5e-05,3.5e-05
1,4.033674,0.055163,0.055163,0.010494,5.07237,0.055039,0.055163,0.055287,0.00037,0.00037
2,3.866147,0.055173,0.055173,-0.016704,4.725563,0.05507,0.055173,0.055277,-0.000793,-0.000793
3,4.454533,0.055179,0.055179,-0.04255,3.324244,0.055089,0.055179,0.055268,-0.000776,-0.000776
4,4.185464,0.055181,0.055181,-0.067063,0.0,0.055101,0.055181,0.055261,-0.000159,-0.000159
5,3.551684,0.055184,0.055184,-0.09027,0.0,0.055119,0.055184,0.05525,0.000476,0.000476
6,4.015518,0.055184,0.055184,-0.112205,3.93921,0.055119,0.055184,0.05525,0.000441,0.000441
7,3.442243,0.055182,0.055182,-0.132906,4.402258,0.055118,0.055182,0.055247,0.000176,0.000176
8,2.676353,0.055183,0.055183,-0.152414,4.221021,0.055118,0.055183,0.055247,-0.000476,-0.000476
9,3.302555,0.055178,0.055178,-0.170767,3.735787,0.055118,0.055178,0.055239,-0.000989,-0.000988


#### Remove all NaN values - in our case only the first window has a NaN so we remove first window and label

In [28]:
done = False

In [29]:
if not done:
    dataset_windows = dataset_windows[1:]
    price_returns = price_returns[1:] 
    curr_prices = price_labels_dct['curr_price'][1:]
    future_prices = price_labels_dct['future_price'][1:]
    done = True

## Training

#### Initial training parameters

In [30]:
# This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware
cudnn.benchmark = True

# Parameters
params = {'batch_size': 64,
          'shuffle': False,
          'num_workers': 5}

num_epochs = 10

#### Slice datasets to fit with batch_size

In [31]:
while len(dataset_windows) % params['batch_size'] != 0:
    dataset_windows = dataset_windows[:-1]
    
while len(curr_prices) % params['batch_size'] != 0:
    curr_prices = curr_prices[:-1]

while len(future_prices) % params['batch_size'] != 0:
    future_prices = future_prices[:-1]

while len(price_returns) % params['batch_size'] != 0:
    price_returns = price_returns[:-1]
  
assert len(dataset_windows) == len(price_returns) == len(curr_prices) == len(future_prices)
dataset_windows[0]

Unnamed: 0,volume,sma20,sma50,macd,obv,bb20_low,bb20_mid,bb20_up,price_return,volume_return
0,5.261321,0.055155,0.055155,0.039055,4.645198,0.055023,0.055155,0.055288,3.5e-05,3.5e-05
1,4.033674,0.055163,0.055163,0.010494,5.07237,0.055039,0.055163,0.055287,0.00037,0.00037
2,3.866147,0.055173,0.055173,-0.016704,4.725563,0.05507,0.055173,0.055277,-0.000793,-0.000793
3,4.454533,0.055179,0.055179,-0.04255,3.324244,0.055089,0.055179,0.055268,-0.000776,-0.000776
4,4.185464,0.055181,0.055181,-0.067063,0.0,0.055101,0.055181,0.055261,-0.000159,-0.000159
5,3.551684,0.055184,0.055184,-0.09027,0.0,0.055119,0.055184,0.05525,0.000476,0.000476
6,4.015518,0.055184,0.055184,-0.112205,3.93921,0.055119,0.055184,0.05525,0.000441,0.000441
7,3.442243,0.055182,0.055182,-0.132906,4.402258,0.055118,0.055182,0.055247,0.000176,0.000176
8,2.676353,0.055183,0.055183,-0.152414,4.221021,0.055118,0.055183,0.055247,-0.000476,-0.000476
9,3.302555,0.055178,0.055178,-0.170767,3.735787,0.055118,0.055178,0.055239,-0.000989,-0.000988


#### Standard training loop with Stochastic Gradient Descent as the backprop algorithm, and RMSE metric for the loss function

In [40]:
def train(model, num_epochs, batch_size, train_gen, valid_gen, test_gen, gru=False):
    """Standard training function used by all three models"""
    
    # For optimizing our model, we choose SGD 
    optimizer = optim.Adam(model.parameters(), lr=1e-1)
    
    # training loop
    
    # toop through the dataset num_epoch times
    for epoch in range(num_epochs):
               
        # train loop
        train_loss = []
        valid_loss = []
        
        # take the batch and labels for batch 
        for batch, labels in train_gen:
            
            if gru:
                # add extra dimension to every vector in batch
                batch.unsqueeze_(-1)
                batch = batch.expand(batch.shape[0], batch.shape[1], 1)
                
                # reformat dimensions
                batch = batch.transpose(2,0)
                batch = batch.transpose(1, 2)
                
            batch, labels = batch.cuda(), labels.cuda()
            batch, labels = batch.float(), labels.float()
            
            # clear gradients
            model.zero_grad()
            output = model(batch)
            
            if gru:
                output = output[0] # turn (1, batch_size, 1) to (batch_size, 1)
            
            # declare the loss function and calculate output loss
            
            # we use the RMSE error function to train our model
            criterion = nn.MSELoss()
            
            loss = torch.sqrt(criterion(output, labels))
            
            # backpropogate loss through model
            loss.backward()

            # perform model training based on propogated loss
            optimizer.step()
            
            train_loss.append(loss)
        
        # validation loop
        
        profit = 0
        with torch.set_grad_enabled(False):
            for batch, labels in valid_gen:
                if gru:
                    # add extra dimension to every vector in batch
                    batch.unsqueeze_(-1)
                    batch = batch.expand(batch.shape[0], batch.shape[1], 1)

                    # reformat dimensions
                    batch = batch.transpose(2,0)
                    batch = batch.transpose(1, 2)
                    
                batch, labels = batch.cuda(), labels.cuda()
                batch, labels = batch.float(), labels.float()
                
                # transform the model from training configuration to testing configuration. ex. dropout layers are removed
                model.eval()

                output = model(batch)
                
                if gru:
                    output = output[0] # turn (1, batch_size, 1) to (batch_size, 1)
                
                val_loss = torch.sqrt(criterion(output, labels))
                
                model.train()
                
                valid_loss.append(val_loss)
                
            
            # Profitability testing
            profit = 0.0
            
            for batch, labels in test_gen:
                if gru:
                    # add extra dimension to every vector in batch
                    batch.unsqueeze_(-1)
                    batch = batch.expand(batch.shape[0], batch.shape[1], 1)

                    # reformat dimensions
                    batch = batch.transpose(2,0)
                    batch = batch.transpose(1, 2)
                
                batch, labels = batch.cuda(), labels.cuda()
                batch, labels = batch.float(), labels.float()
                
                # transform the model from training configuration to testing configuration. ex. dropout layers are removed
                model.eval()
                
                output = model(batch)
                if gru:
                    output = output[0] # turn (1, batch_size, 1) to (batch_size, 1)
                
                # if output is > 0 ==> model predict positive growth for the next five cycles. Purchase now and sell in 5 periods.
                for i, pred in enumerate(output):
                    #print(pred)
                    if pred[0] > 0: # price will increase
                        profit += labels[i]
                       
                model.train()
                
                
        print("Epoch: {}/{}...".format(epoch+1, num_epochs),
              "Training Loss: {}".format(round(float(sum(train_loss)/len(train_loss)), 4)),
              "Validation Loss: {}".format(round(float(sum(valid_loss)/len(valid_loss)), 4)),
              "Profitability: {}".format(round(float(profit), 3)))     

#### Training loop for GRU-CNN with two inputs - chart images and time series array

In [41]:
def train_dual(model, num_epochs, batch_size, train_gen1, train_gen2, valid_gen1, valid_gen2, test_gen, gru=False):
    """Standard training function used by all three models"""
    # For optimizing our model, we choose SGD 
    optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
    
    # training loop
    
    # toop through the dataset num_epoch times
    for epoch in range(num_epochs):
        
        # train loop
        
        train_loss = []
        valid_loss = []
        
        # loop through each batch
        for i  in range(batch_size):
            gru_batch, gru_labels = next(iter(train_gen1))
            gru_batch, gru_labels = gru_batch.cuda(), gru_labels.cuda()
            gru_batch, gru_labels = gru_batch.float(), gru_labels.float()
            
            # add extra dimension to every vector in batch
            gru_batch.unsqueeze_(-1)
            gru_batch = gru_batch.expand(gru_batch.shape[0], gru_batch.shape[1], 1)

            # reformat dimensions
            gru_batch = gru_batch.transpose(2,0)
            gru_batch = gru_batch.transpose(1, 2)
            cnn_batch, cnn_labels = next(iter(train_gen2))
            cnn_batch, cnn_labels = cnn_batch.cuda(), cnn_labels.cuda()
            cnn_batch, cnn_labels = cnn_batch.float(), cnn_labels.float()
            
            # clear gradients
            model.zero_grad()
            output = model(gru_batch, cnn_batch)
            output = output[0]
            # declare the loss function and calculate output loss
            
            # we use the RMSE error function to train our model
            criterion = nn.MSELoss()
            
            loss = torch.sqrt(criterion(output, gru_labels))
            
            # backpropogate loss through model
            loss.backward()
            # perform model training based on propogated loss
            optimizer.step()
            
            train_loss.append(loss)
        
        
        # validation loop
        with torch.set_grad_enabled(False):
            for i in range(batch_size):
                gru_batch, gru_labels = next(iter(valid_gen1))
                gru_batch, gru_labels = gru_batch.cuda(), gru_labels.cuda()
                gru_batch, gru_labels = gru_batch.float(), gru_labels.float()
                
                # add extra dimension to every vector in batch
                gru_batch.unsqueeze_(-1)
                gru_batch = gru_batch.expand(gru_batch.shape[0], gru_batch.shape[1], 1)

                # reformat dimensions
                gru_batch = gru_batch.transpose(2,0)
                gru_batch = gru_batch.transpose(1, 2)

                cnn_batch, cnn_labels = next(iter(valid_gen2))
                cnn_batch, cnn_labels = cnn_batch.cuda(), cnn_labels.cuda()
                cnn_batch, cnn_labels = cnn_batch.float(), cnn_labels.float()
                
                # transform the model from training configuration to testing configuration. ex. dropout layers are removed
                model.eval()

                output = model(gru_batch, cnn_batch)
                output = output[0]
                
                val_loss = torch.sqrt(criterion(output, gru_labels))
                
                model.train()
                
                valid_loss.append(val_loss)
                
             # Profitability testing
            profit = 0.0
            
            for batch, labels in test_gen:
                gru_batch, gru_labels = next(iter(valid_gen1))
                gru_batch, gru_labels = gru_batch.cuda(), gru_labels.cuda()
                gru_batch, gru_labels = gru_batch.float(), gru_labels.float()
                
                # add extra dimension to every vector in batch
                gru_batch.unsqueeze_(-1)
                gru_batch = gru_batch.expand(gru_batch.shape[0], gru_batch.shape[1], 1)

                # reformat dimensions
                gru_batch = gru_batch.transpose(2,0)
                gru_batch = gru_batch.transpose(1, 2)

                cnn_batch, cnn_labels = next(iter(valid_gen2))
                cnn_batch, cnn_labels = cnn_batch.cuda(), cnn_labels.cuda()
                cnn_batch, cnn_labels = cnn_batch.float(), cnn_labels.float()
                
                # transform the model from training configuration to testing configuration. ex. dropout layers are removed
                model.eval()

                output = model(gru_batch, cnn_batch)
                output = output[0]
                
                # if output is > 0 ==> model predict positive growth for the next five cycles. Purchase now and sell in 5 periods.
                
                for i, pred in enumerate(output):
                    if pred > 0: # price will increase
                        profit += labels[i]
                
                
                model.train()
                
        print("Epoch: {}/{}...".format(epoch+1, num_epochs),
              "Training Loss: {}".format(round(float(sum(train_loss)/len(train_loss)), 4)),
              "Validation Loss: {}".format(round(float(sum(valid_loss)/len(valid_loss)), 4)),
              "Profitability: {}".format(round(float(profit), 3)))            

### Training CNN

#### preparing input data for train() by creating DataLoaders for train and valid sets

In [42]:
# specify the split between train_df and valid_df from the process of splitting dataset_windows and labels_for_windows
split = 0.7

s = int(len(dataset_windows) * 0.7)
while s % params['batch_size'] != 0:
    s += 1

In [43]:
# create two ChartImageDatasets, split by split, for the purpose of creating a DataLoader for the specific model

train_ds_cnn = ChartImageDataset(image_path_list[:s], price_returns[:s])
valid_ds_cnn = ChartImageDataset(image_path_list[s:], price_returns[s:])

# add potential profit as label
test_ds_cnn = ChartImageDataset(image_path_list[s:], [future_prices[i] - curr_prices[i] for i in range(s, len(future_prices))])

#### creating and training the model

In [44]:
train_gen_cnn = DataLoader(train_ds_cnn, **params)
valid_gen_cnn = DataLoader(valid_ds_cnn, **params)
train_gen_cnn = DataLoader(valid_ds_cnn, **params)

In [45]:
cnn = ST_CNN().cuda()

#### Load CNN weights

In [46]:
cnn_path = Path('strategy/cnn/cnn_weights')
load_model(cnn, cnn_path)

In [47]:
train(cnn, num_epochs, batch_size=params['batch_size'], train_gen=train_gen_cnn, valid_gen=valid_gen_cnn, test_gen=train_gen_cnn)

Epoch: 1/10... Training Loss: 3103.28 Validation Loss: inf Profitability: 0.0
Epoch: 2/10... Training Loss: 11.8363 Validation Loss: 1.2226 Profitability: -242.969
Epoch: 3/10... Training Loss: 1.1538 Validation Loss: 1.0113 Profitability: -242.969
Epoch: 4/10... Training Loss: 0.9022 Validation Loss: 0.7081 Profitability: 0.0
Epoch: 5/10... Training Loss: 0.5802 Validation Loss: 0.3606 Profitability: 0.0
Epoch: 6/10... Training Loss: 0.2227 Validation Loss: 0.0109 Profitability: 0.0
Epoch: 7/10... Training Loss: 0.102 Validation Loss: 0.1978 Profitability: 0.0
Epoch: 8/10... Training Loss: 0.2003 Validation Loss: 0.1696 Profitability: 0.0
Epoch: 9/10... Training Loss: 0.1167 Validation Loss: 0.0065 Profitability: 0.0
Epoch: 10/10... Training Loss: 0.0643 Validation Loss: 0.1257 Profitability: 0.0


#### Save CNN weights

In [None]:
save_model(cnn, cnn_path)

### Training GRU

In [None]:
# specify the split between train_df and valid_df from the process of splitting dataset_windows and labels_for_windows
split = 0.7

In [None]:
len(dataset_windows)

#### Normalizing the inputs by logging each value

In [None]:
# create two ArrayTimeSeriesDatasets, split by split, for the purpose of creating a DataLoader for the specific model

s = int(len(dataset_windows) * 0.7)
while s % params['batch_size'] != 0:
    s += 1

train_ds_gru = ArrayTimeSeriesDataset(dataset_windows[:s], price_returns[:s])
valid_ds_gru = ArrayTimeSeriesDataset(dataset_windows[s:], price_returns[s:])
test_ds_gru = ArrayTimeSeriesDataset(dataset_windows[s:], [future_prices[i] - curr_prices[i] for i in range(s, len(future_prices))])

In [None]:
hidden_size = 2500

train_gen_gru = DataLoader(train_ds_gru, **params)
valid_gen_gru = DataLoader(valid_ds_gru, **params)
test_gen_gru = DataLoader(valid_ds_gru, **params)

#### Load GRU weights

In [None]:
gru = GRUnet(num_features=300, batch_size=params['batch_size'], hidden_size=hidden_size).float().cuda()

In [None]:
gru_path = Path('strategy/gru/gru_weights')
load_model(gru, gru_path)

In [None]:
train(gru, num_epochs*10, batch_size=params['batch_size'], train_gen=train_gen_gru, valid_gen=valid_gen_gru, test_gen=test_gen_gru, gru=True)

In [None]:
save_model(gru, gru_path)

### Training GRU-CNN

In [None]:
split = 0.7

In [None]:
s = int(len(dataset_windows) * 0.7)
while s % params['batch_size'] != 0:
    s += 1

In [None]:
gru_cnn = GRU_CNN(num_features=390, batch_size=params['batch_size'], hidden_size=800).float().cuda()

#### Load GRU-CNN Weights

In [None]:
gru_cnn_path = Path('strategy/cnn-gru/cnn_gru_weights')
load_model(gru_cnn, gru_cnn_path)

#### Initialize weights of GRU-CNN with pretrained GRU and CNN models

In [None]:
gru_cnn.load_cnn_weights(cnn)
gru_cnn.load_gru_weights(gru)

In [None]:
train_dual(gru_cnn, num_epochs, batch_size=params['batch_size'], train_gen1=train_gen_gru, train_gen2=train_gen_cnn,
           valid_gen1=valid_gen_gru, valid_gen2=valid_gen_cnn, test_gen=test_gen_gru, gru=True)

#### Save GRU-CNN weights

In [None]:
save_model(gru_cnn, gru_cnn_path)