In [1]:
from helpers.charting_tools import Charting
from helpers.data_processing import add_ti
from BookWorm import BookWorm, BinanceWrapper
from PIL import Image

import torch

In [2]:
worm = BookWorm()

In [12]:
candles = worm.historical_candles(start_time='January 1 2017', end_time='January 1 2019', api_wrapper=BinanceWrapper('5lJ0uGit9PuUxHka3hBWhPmsi7dWyxEwvEntUZFKmm0xfNz3VjHWi5WSr5W1VBJV',
                                                      'BFWVs8ko7Cd4sjdQ9amGJTnToGWy9TbQWIjeorSCj23FGiwFaknzkgLPcrgWrxsw'), 
                  symbol='ETHBTC', interval='1m')

In [3]:
candles = worm.historical_candles(start_time='February 2 2018', end_time='August 1 2018', api_wrapper=BinanceWrapper('5lJ0uGit9PuUxHka3hBWhPmsi7dWyxEwvEntUZFKmm0xfNz3VjHWi5WSr5W1VBJV',
                                                      'BFWVs8ko7Cd4sjdQ9amGJTnToGWy9TbQWIjeorSCj23FGiwFaknzkgLPcrgWrxsw'), 
                  symbol='ETHBTC', interval='1m')

In [13]:
candles.to_csv('ethbtc_2yr.csv', index=False)

In [None]:
import pandas as pd
candles = pd.read_csv('ethbtc_2yr.csv')

In [14]:
candles = add_ti(candles)

In [15]:
def split_candles(df, num_rows=30, step=10):
    """Split a DataFrame of candlestick data into a list of smaller DataFrames each with num_rows rows"""
    
    slices = []
    
    for row_i in range(0, df.shape[0] - num_rows, step):
        small_df = df.iloc[row_i:row_i+num_rows, :]
        slices.append(small_df)
        
    return slices

In [16]:
def price_returns(df, num_rows=30, num_into_fut=5, step=10):
    labels = []
    
    for row_i in range(0, df.shape[0] - num_rows - num_into_fut, step):
        # skip all iterations while row_i < num_rows since nothing yet to create a label for
        if row_i <= num_rows: continue
        
        vf, vi = df['close'][row_i+num_into_fut], df['close'][row_i]
        price_return = (vf - vi) / vi
        labels.append(price_return)
    return labels

In [17]:
def create_charts(candles_sliced, save_path):
    """Create a chart image for each in sliced_candles and return a list of paths to those images"""
    from tqdm import tqdm_notebook as tqdm
    import warnings
    warnings.filterwarnings("ignore")
    
    i = 0
    paths_to_images = []
    for small_df in tqdm(candles_sliced):
        chart = Charting(small_df, 'time', 'close')
        
        path = save_path + 'chart_{}.png'.format(i)
        chart.chart_to_image(path)
        paths_to_images.append(path)
        i += 1
    return paths_to_images        

In [None]:
paths_to_images = create_charts(candles_sliced, "images/")

In [None]:
paths_to_images = [ 'images/chart_{}.png'.format(i) for i in range(len(candles_sliced)) ]

In [None]:
Image.open(paths_to_images[103])

In [18]:
price_returnz = price_returns(candles)

In [19]:
def normalize_series(ser):
    return (ser-ser.min())/(ser.max()-ser.min())

In [20]:
try: candles = candles.drop('time', axis=1).reset_index(drop=True)
except: pass
candles.shape

(767705, 11)

In [21]:
candles = candles.apply(normalize_series, axis=0)
candles.shape

(767705, 11)

In [22]:
# split candles into 30 period and a label
candles_sliced = split_candles(candles)
# we need to remove candle slices without a label from candles_sliced
candles_sliced = candles_sliced[len(candles_sliced)-len(price_returnz):]

assert len(candles_sliced) == len(price_returnz)
len(candles_sliced)

76763

In [23]:
candles.head()

Unnamed: 0,open,high,low,close,volume,sma20,macd,obv,bb20_low,bb20_mid,bb20_up
0,0.625265,0.624317,0.626416,0.625295,0.000153,0.607249,1.0,0.483798,0.556625,0.607249,0.651261
1,0.617806,0.616862,0.618944,0.617836,4e-06,0.610275,0.939988,0.483798,0.561115,0.610275,0.652808
2,0.617955,0.618671,0.619094,0.619646,7.6e-05,0.612922,0.884244,0.483801,0.566401,0.612922,0.652828
3,0.619616,0.618671,0.620757,0.619646,1e-05,0.615661,0.832495,0.483801,0.572414,0.615661,0.652317
4,0.619616,0.618671,0.620757,0.619646,0.0,0.6184,0.784494,0.483801,0.579323,0.6184,0.650924


In [24]:
import torch
torch.backends.cudnn.benchmark = True

# Parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 5}

In [25]:
def _train(train_gen, model, optim, error_func):
    losses = []
    
    for batch, labels in train_gen:    
        batch, labels = batch.cuda().float(), labels.cuda().float()
        # set model to train mode
        model.train()
        
        # clear gradients
        model.zero_grad()
        
        output = model(batch)
        loss = error_func(output, labels)
        loss.backward()
        optim.step()
        
        
        losses.append(loss)
        
    return round(float(sum(losses) / len(losses)), 6)

In [26]:
def _valid(valid_gen, model, optim, error_func):
    with torch.set_grad_enabled(False):
        losses = []

        for batch, labels in valid_gen:
            batch, labels = batch.cuda().float(), labels.cuda().float()
            
            # set to eval mode
            model.eval()
            
            # clear gradients
            model.zero_grad()

            output = model(batch)
            loss = error_func(output, labels)

            losses.append(loss)
        
    return round(float(sum(losses) / len(losses)), 6)

In [27]:
def _test(test_gen, model, optim, error_func):
    with torch.set_grad_enabled(False):
        losses = []

        for batch, labels in valid_gen:
            batch, labels = batch.cuda().float(), labels.cuda().float()
            
            # set to eval mode
            model.eval()
            
            # clear gradients
            model.zero_grad()

            output = model(batch)
            loss = error_func(output, labels)

            losses.append(loss)
        
    return round(float(sum(losses) / len(losses)), 6)

In [28]:
def train(model, model_name, optim, num_epochs, train_gen, valid_gen, test_gen=None):
    """Train a PyTorch model with optim as optimizer strategy"""
    
    for epoch_i in range(num_epochs):
        
        
        def RMSE(x, y):
            
            # have to squish x into a rank 1 tensor with batch_size length with the outputs we want
            if model_name == 'resnet':
                 # torch.Size([64, 1])
                x = x.squeeze(1)
            elif model_name == 'gru':
                # torch.Size([64, 30, 1])
                x = x[:, 29, :] # take only the last prediction from the 30 time periods in our matrix
                x = x.squeeze(1)
    
            mse = torch.nn.MSELoss()
            return torch.sqrt(mse(x, y))
        
        
        # forward and backward passes of all batches inside train_gen
        train_loss = _train(train_gen, model, optim, RMSE)
        valid_loss = _valid(valid_gen, model, optim, RMSE)
        
        # run on test set if provided
        if test_gen: test_output = _test(test_gen, model, optim)
        else: test_output = "no test selected"
        print("train loss: {}, valid loss: {}, test output: {}".format(train_loss, valid_loss, test_output))

In [None]:
from models.CNN.CNN import CNN
cnn = CNN().cuda().float()

In [None]:
from helpers.datasets import DFTimeSeriesDataset, ChartImageDataset
from torch.utils.data import *
# create dataloaders
# specify the split between train_df and valid_df from the process of splitting dataset_windows 
split = 0.7

s = int(len(candles_sliced) * 0.7)
while s % params['batch_size'] != 0:
    s += 1

# create two ChartImageDatasets, split by split, for the purpose of creating a DataLoader for the specific model
train_ds_cnn = ChartImageDataset(paths_to_images[:s], price_returnz[:s])
valid_ds_cnn = ChartImageDataset(paths_to_images[s:], price_returnz[s:])
train_gen_cnn = DataLoader(train_ds_cnn, **params)
valid_gen_cnn = DataLoader(valid_ds_cnn, **params)

In [None]:
train(cnn, 'resnet', torch.optim.Adam(cnn.parameters(), 1e-3), 8, train_gen_cnn, valid_gen_cnn)

In [None]:
save_model(cnn, 'cnn_weights')

In [34]:
from models.GRU.GRU import GRUnet
gru = GRUnet(num_features=11, num_rows=30, batch_size=64, hidden_size=500, num_layers=5).cuda().float()

In [35]:
from helpers.datasets import DFTimeSeriesDataset, ChartImageDataset
from torch.utils.data import *
# create dataloaders
# specify the split between train_df and valid_df from the process of splitting dataset_windows 
split = 0.7

s = int(len(candles_sliced) * 0.7)
while s % params['batch_size'] != 0:
    s += 1
print(s)

# create two ChartImageDatasets, split by split, for the purpose of creating a DataLoader for the specific model
train_ds_gru = DFTimeSeriesDataset(candles_sliced[:s], price_returnz[:s])
valid_ds_gru = DFTimeSeriesDataset(candles_sliced[s:], price_returnz[s:])
train_gen_gru = DataLoader(train_ds_gru, **params, drop_last=True)
valid_gen_gru = DataLoader(valid_ds_gru, **params, drop_last=True)

53760


In [36]:
train(gru, 'gru', torch.optim.Adam(gru.parameters(), 1e-2), 15, train_gen_gru, valid_gen_gru)

train loss: 0.014936, valid loss: 0.001772, test output: no test selected
train loss: 0.003143, valid loss: 0.001772, test output: no test selected
train loss: 0.003155, valid loss: 0.001874, test output: no test selected
train loss: 0.003137, valid loss: 0.001763, test output: no test selected
train loss: 0.003133, valid loss: 0.002392, test output: no test selected
train loss: 0.003079, valid loss: 0.001894, test output: no test selected
train loss: 0.003134, valid loss: 0.001755, test output: no test selected
train loss: 0.003136, valid loss: 0.002049, test output: no test selected


KeyboardInterrupt: 

In [None]:
from helpers.saving_models import save_model, load_model

save_model(gru, 'gru_weights')

In [5]:
from BackTest import BackTest

In [6]:
bw = BinanceWrapper('5lJ0uGit9PuUxHka3hBWhPmsi7dWyxEwvEntUZFKmm0xfNz3VjHWi5WSr5W1VBJV','BFWVs8ko7Cd4sjdQ9amGJTnToGWy9TbQWIjeorSCj23FGiwFaknzkgLPcrgWrxsw')

In [7]:
from models.GRU.GRU import GRUnet
gru = GRUnet(num_features=11, num_rows=30, batch_size=1, hidden_size=50, num_layers=5, eval_mode=False).float().cuda()

In [None]:
cnn = CNN().cuda().float()

In [8]:
from helpers.saving_models import load_model
load_model(gru, 'gru_weights')

In [9]:
from strategies.example_strategies import RNNStrat
strat = RNNStrat(gru)

In [10]:
b = BackTest(bw, strat)

In [None]:
b.profit_test(candles, model_type="gru")