In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [3]:
STORAGE_PATH_DATA = "../../persisted_data/feather/{}.feather"
STORAGE_PATH_MODELS = "../../persisted_data/models/{}.pth"
def load_data_set(name):
    data = pd.read_feather(STORAGE_PATH_DATA.format(name))
    return data

In [85]:
# define the used indicators
# the quickest way to dynamically load all indicator columns is to load a small data and retrieve the columns
chart_data = load_data_set("IBM_normalized")
all_indicators = chart_data.columns[~chart_data.columns.str.contains("future|current", regex=True)]

standard_indicators = ["sma10", "sma20", "sma50", "sma100", "lwma10", "lwma20", "lwma50", "lwma100", "lwma200",
                       "ema10", "ema20", "ema50", "ema100", "rate_of_change20", "rate_of_change50", 
                       "horizontal_position20", "horizontal_position50", "horizontal_position100",
                       "regression_position20", "regression_position50", "regression_position100",
                       "bollinger_position20_2", "bollinger_position50_2", "bollinger_position100_2"]

ftest_selected_indicators = ["macd12_26", "macd_signal12_26", "cci50", "horizontal_lower20", "horizontal_lower50", 
                             "ma_cross50_200", "horizontal_lower200", "regression_threshold20", 
                             "regression_threshold100", "chande100", "horizontal_lower100", "ma_cross20_50", 
                             "cci_threshold50", "lwma10", "regression_position20", "regression_upper20",
                             "regression_position100", "volatility10", "ema10", "aaron_oscillator40", 
                             "bollinger_lower20_2", "rsi20", "aaron_oscillator25", "horizontal_upper20",
                             "volatility20", "aaron_up40", "aaron_oscillator15", "aaron_down40", "sma10", 
                             "aaron_down25"]

In [5]:
# default mappers for the labels
def future_price_mapper(data_set):
    # map to 1, if price is increasing, 0 otherwise
    return np.maximum(np.sign(data_set["future_price"] - 1), 0)

def future_volatility_mapper(data_set, interval=10):
    volatility_indicator = "future_volatility{}".format(interval)
    # map to 1, if volatility is increasing, 0 otherwise
    return np.maximum(np.sign(data_set[volatility_indicator] - 1), 0)

# currying function used to fix the interval and pass the resulting function to the data loader
def build_future_volatility_mapper(interval):
    return lambda data_set: future_volatility_mapper(data_set, interval)

In [6]:
# a simple price data loader, the label is created using a custom map function
class PriceHistoryDataset(Dataset):
    def __init__(self, file_name, selected_indicators=standard_indicators, label_mapper=future_price_mapper):
        # Read the feather data set file
        data = load_data_set(file_name)
        self.indicators = torch.tensor(data[selected_indicators].values.astype(np.float32)) 
        self.label = torch.tensor(label_mapper(data).astype(np.float32)) 

    def __getitem__(self, i):
        return self.indicators[i], self.label[i]

    def __len__(self):
        return self.label.shape[0]

In [7]:
# small neural network for predicting the future price or volatility
class PriceHistoryNetwork(nn.Module):
    def __init__(self, input_neurons=24):
        super().__init__() 
        # define layers 
        self.fc1 = nn.Linear(input_neurons, 12)
        self.sigm1 = nn.Sigmoid()
        self.fc2 = nn.Linear(12, 8)
        self.sigm2 = nn.Sigmoid()
        self.fc3 = nn.Linear(8, 1)
        self.sigm3 = nn.Sigmoid()


    def forward(self, x):
        x = self.fc1(x)
        x = self.sigm1(x)
        x = self.fc2(x)
        x = self.sigm2(x)
        x = self.fc3(x)
        x = self.sigm3(x)
        return x

In [80]:
# training function
def train(net, train_loader, loss_function, epochs=10, optimizer=None):
    if optimizer is None:
        # use the default optimizer
        optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
        
    correct_total = 0
    predictions_total = 0
    for epoch in range(epochs):
        correct_total = 0
        predictions_total = 0
        net.train() 
        for i, (indicators, future) in enumerate(train_loader):
            outs = torch.flatten(net(indicators))
            loss = loss_function(outs, future)
                                  
            # clear grads
            optimizer.zero_grad()
            
            # backward
            loss.backward()
            
            # update parameters
            optimizer.step()
            
            # evaluate the accuracy
            preds = torch.where(outs > 0.5, 1, 0)
            correct_total += torch.sum(preds == future)
            predictions_total += len(future)
        
        print("Epoch {}. Running accuracy: {}".format(epoch, correct_total / predictions_total))
        
    print("Training finished. Total accuracy: {}".format(correct_total / predictions_total))
    
    return net

In [81]:
# create a data loader for training on the future price
training_dataset = PriceHistoryDataset("all_stocks_10spy_20shift_normalized")
dataloader_training = DataLoader(training_dataset, 64, shuffle=True, num_workers=4)
# create and train the network
net = PriceHistoryNetwork()
loss_function = nn.BCELoss()

In [82]:
# train the network
train(net, dataloader_training, loss_function)

Epoch 0. Running accuracy: 0.5065320730209351
Epoch 1. Running accuracy: 0.5092029571533203
Epoch 2. Running accuracy: 0.5145485401153564
Epoch 3. Running accuracy: 0.5189905762672424
Epoch 4. Running accuracy: 0.5206906795501709
Epoch 5. Running accuracy: 0.5207306146621704
Epoch 6. Running accuracy: 0.5218386650085449
Epoch 7. Running accuracy: 0.5218299031257629
Epoch 8. Running accuracy: 0.5222466588020325
Epoch 9. Running accuracy: 0.5229623317718506
Training finished. Total accuracy: 0.5229623317718506


PriceHistoryNetwork(
  (fc1): Linear(in_features=24, out_features=12, bias=True)
  (sigm1): Sigmoid()
  (fc2): Linear(in_features=12, out_features=8, bias=True)
  (sigm2): Sigmoid()
  (fc3): Linear(in_features=8, out_features=1, bias=True)
  (sigm3): Sigmoid()
)

In [83]:
# save the weight of current model to disk
torch.save(net.state_dict(), STORAGE_PATH_MODELS.format("price_predictor_net_standard"))

In [78]:
# create a data loader for training on the future price
training_dataset = PriceHistoryDataset("all_stocks_10spy_20shift_normalized", selected_indicators=all_indicators)
dataloader_training = DataLoader(training_dataset, 64, shuffle=True, num_workers=4)
# create and train the network
net = PriceHistoryNetwork(len(all_indicators))
loss_function = nn.BCELoss()

In [79]:
# train the network
tuned_optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
train(net, dataloader_training, loss_function, optimizer=tuned_optimizer)

Epoch 0. Running accuracy: 0.5079906582832336
Epoch 1. Running accuracy: 0.5091814994812012
Epoch 2. Running accuracy: 0.5120821595191956
Epoch 3. Running accuracy: 0.5145368576049805
Epoch 4. Running accuracy: 0.5187948942184448
Epoch 5. Running accuracy: 0.5226137042045593
Epoch 6. Running accuracy: 0.5235260725021362
Epoch 7. Running accuracy: 0.52419114112854
Epoch 8. Running accuracy: 0.526024580001831
Epoch 9. Running accuracy: 0.5261462926864624
Training finished. Total accuracy: 0.5261462926864624


PriceHistoryNetwork(
  (fc1): Linear(in_features=116, out_features=12, bias=True)
  (sigm1): Sigmoid()
  (fc2): Linear(in_features=12, out_features=8, bias=True)
  (sigm2): Sigmoid()
  (fc3): Linear(in_features=8, out_features=1, bias=True)
  (sigm3): Sigmoid()
)

In [70]:
# save the weight of current model to disk
torch.save(net.state_dict(), STORAGE_PATH_MODELS.format("price_predictor_net_all"))

In [86]:
# create a data loader for training on the future price
training_dataset = PriceHistoryDataset("all_stocks_10spy_20shift_normalized", selected_indicators=ftest_selected_indicators)
dataloader_training = DataLoader(training_dataset, 64, shuffle=True, num_workers=4)
# create and train the network
net = PriceHistoryNetwork(len(ftest_selected_indicators))
loss_function = nn.BCELoss()

In [87]:
# train the network
tuned_optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
train(net, dataloader_training, loss_function, optimizer=tuned_optimizer)

Epoch 0. Running accuracy: 0.5171599984169006
Epoch 1. Running accuracy: 0.5197831988334656
Epoch 2. Running accuracy: 0.5211346745491028
Epoch 3. Running accuracy: 0.5226137042045593
Epoch 4. Running accuracy: 0.5240256190299988
Epoch 5. Running accuracy: 0.5252369046211243
Epoch 6. Running accuracy: 0.5260128974914551
Epoch 7. Running accuracy: 0.5277772545814514
Epoch 8. Running accuracy: 0.5282251834869385
Epoch 9. Running accuracy: 0.529039204120636
Training finished. Total accuracy: 0.529039204120636


PriceHistoryNetwork(
  (fc1): Linear(in_features=30, out_features=12, bias=True)
  (sigm1): Sigmoid()
  (fc2): Linear(in_features=12, out_features=8, bias=True)
  (sigm2): Sigmoid()
  (fc3): Linear(in_features=8, out_features=1, bias=True)
  (sigm3): Sigmoid()
)

In [88]:
# save the weight of current model to disk
torch.save(net.state_dict(), STORAGE_PATH_MODELS.format("price_predictor_net_fselected"))

In [74]:
# create a data loader for training on the future volatility
training_dataset = PriceHistoryDataset("all_stocks_10spy_20shift_normalized", selected_indicators=all_indicators, label_mapper=build_future_volatility_mapper(10))
dataloader_training = DataLoader(training_dataset, 64, shuffle=True, num_workers=4)
# create and train the network
net = PriceHistoryNetwork(len(all_indicators))
loss_function = nn.BCELoss()

In [75]:
tuned_optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
train(net, dataloader_training, loss_function, optimizer=tuned_optimizer)

Epoch 0. Running accuracy: 0.5741108655929565
Epoch 1. Running accuracy: 0.659165620803833
Epoch 2. Running accuracy: 0.6639182567596436
Epoch 3. Running accuracy: 0.6666465401649475
Epoch 4. Running accuracy: 0.6694284081459045
Epoch 5. Running accuracy: 0.6725267171859741
Epoch 6. Running accuracy: 0.674107015132904
Epoch 7. Running accuracy: 0.6749551296234131
Epoch 8. Running accuracy: 0.676155686378479
Epoch 9. Running accuracy: 0.6771401166915894
Training finished. Total accuracy: 0.6771401166915894


PriceHistoryNetwork(
  (fc1): Linear(in_features=116, out_features=12, bias=True)
  (sigm1): Sigmoid()
  (fc2): Linear(in_features=12, out_features=8, bias=True)
  (sigm2): Sigmoid()
  (fc3): Linear(in_features=8, out_features=1, bias=True)
  (sigm3): Sigmoid()
)

In [77]:
# save the weight of current model to disk
torch.save(net.state_dict(), STORAGE_PATH_MODELS.format("volatility10_predictor_net_all"))