In [1]:
import os
os.chdir('../../../../')
os.getcwd()

'/Users/rosscopeland/Desktop/personal/code/vivaldi/back_testing'

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema
from v2.model import Trading
from v2.strategy.indicators.smma import SMMA
from v2.strategy.indicators.stochastic_oscillator import StochasticOscillator
from v2.strategy.indicators.bollinger_bands import BollingerBands
from v2.strategy.indicators.rsi import RSI
from v2.strategy.indicators.macd import MACD
from v2.strategy.indicators.param import Param
from sklearn.model_selection import train_test_split
import pickle
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

In [44]:
def load_config():
    my_config = {}
    with open('config.config') as config:
        for line in config:
            args = line.split('=')
            my_config[args[0]] = args[1].rstrip().split(',')
    return my_config

model = Trading(load_config())

In [53]:
datasets = model.dfs
appended_dataset = pd.DataFrame()
for d in datasets:
    training_set = d[0]
    training_set['trough'] = training_set.iloc[argrelextrema(training_set.close.values, np.less_equal, order=480)[0]]['close']
    training_set['peak'] = training_set.iloc[argrelextrema(training_set.close.values, np.greater_equal, order=480)[0]]['close']
    ema_fast = Param(5, 10000, 0, 'ema_fast', 60)
    ema_slow= Param(6, 10001, 0, 'ema_slow', 120)
    signal = Param(5, 10001, 0, 'signal', 90)
    macd_ = MACD(_params=[ema_fast, ema_slow, signal], _name='macd')
    macd_.genData(training_set, gen_new_values=False)
    boll_period = Param(5, 10000, 0, 'period', 90)
    boll_bands = BollingerBands(_params=[boll_period], _name='bollinger_bands')
    boll_bands.genData(training_set, gen_new_values=False)
    stoch_highlow = Param(5, 10000, 0, 'highlow_range', 90.0)
    stoch_k = Param(5, 10000, 0, 'k_period', 270.0)
    stoch_oscillator = StochasticOscillator(_params=[stoch_highlow, stoch_k], _name='stochastic_oscillator')
    stoch_oscillator.genData(training_set, gen_new_values=False)
    rsi_period = Param(5, 10000, 0, 'period', 90.0)
    rsi_ = RSI(_params=[rsi_period], _name='rsi')
    rsi_.genData(training_set, gen_new_values=False)
    smma_period = Param(5, 10000, 0, 'period', 90.0)
    smma_ = SMMA(_params=[smma_period], _name='smma')
    smma_.genData(training_set, gen_new_values=False)
    training_set[['trough', 'peak']] = training_set[['trough', 'peak']].fillna(0)
    training_set['slope'] = (training_set['close'].rolling(window=30).max() - training_set['close'].rolling(window=30).min()) / training_set['close'].rolling(window=30).max()
    training_set = training_set.dropna()
    appended_dataset = appended_dataset.append(training_set)

In [54]:
appended_dataset.head()

Unnamed: 0,time,open,high,low,close,volume,trades,trough,peak,ema_slow,...,stosc_k,stosc_d,rsi_diff,rsi_u,rsi_d,rsi_smma_u,rsi_smma_d,rsi,smma,slope
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.1,1,0.0,0.0,203.646855,...,4.275846,23.660815,-0.00418,0.0,0.00418,0.328056,0.243793,57.367632,201.724654,0.013416
359,1383505860,207.43988,207.43988,206.89,207.01377,1.263692,5,0.0,0.0,203.702506,...,13.776266,23.698312,-0.65821,0.0,0.65821,0.324344,0.248482,56.621691,201.784494,0.013416
360,1383507660,207.59295,207.75,207.59295,207.75,0.1,2,0.0,0.0,203.769407,...,3.149727,23.696407,0.73623,0.73623,0.0,0.329004,0.245671,57.25043,201.851973,0.013767
361,1383507720,207.75,207.75,207.75,207.75,0.1,1,0.0,0.0,203.835202,...,3.149727,23.694475,0.0,0.0,0.0,0.325282,0.242892,57.25043,201.918675,0.012649
362,1383510600,207.75,207.75,207.75,207.75,0.314611,1,0.0,0.0,203.899909,...,3.149727,23.69621,0.0,0.0,0.0,0.321603,0.240145,57.25043,201.984609,0.009771


In [55]:
train_df = appended_dataset[["time", "open", "high", "low", "close", "volume", "ema_slow", "ema_fast", "macd", "stosc_k", "rsi", "smma", "slope"]]
final_df = pd.DataFrame()
PREDICT_NUM = 100
predics = [5, 10, 25, 50, 100]
for i in predics:
    temp_df = train_df
    temp_df["predict_number"] = i
    temp_df["predict_forecast"] = temp_df["close"].shift(-int(i))
    temp_df["classify_inc"] = (temp_df["predict_forecast"] - temp_df["close"] > 0)
    final_df = pd.concat([final_df, temp_df])
    
# final_df["classify_inc"].replace(True, 1)
# final_df["classify_inc"].replace(False, 0)
final_df["classify_inc"] = final_df["classify_inc"].astype(int)


    
final_df.head()

Unnamed: 0,time,open,high,low,close,volume,ema_slow,ema_fast,macd,stosc_k,rsi,smma,slope,predict_number,predict_forecast,classify_inc
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.1,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,5,208.21573,1
359,1383505860,207.43988,207.43988,206.89,207.01377,1.263692,203.702506,205.618262,1.915756,13.776266,56.621691,201.784494,0.013416,5,207.80137,1
360,1383507660,207.59295,207.75,207.59295,207.75,0.1,203.769407,205.688155,1.918748,3.149727,57.25043,201.851973,0.013767,5,207.17796,0
361,1383507720,207.75,207.75,207.75,207.75,0.1,203.835202,205.755757,1.920555,3.149727,57.25043,201.918675,0.012649,5,208.3,1
362,1383510600,207.75,207.75,207.75,207.75,0.314611,203.899909,205.821142,1.921233,3.149727,57.25043,201.984609,0.009771,5,208.47,1


In [56]:
final_df.dropna(inplace=True)

In [57]:
final_df.sort_values(by=['time'], inplace=True)
final_df

Unnamed: 0,time,open,high,low,close,volume,ema_slow,ema_fast,macd,stosc_k,rsi,smma,slope,predict_number,predict_forecast,classify_inc
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,5,208.21573,1
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,100,211.09000,1
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,10,208.52599,1
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,50,212.20000,1
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,25,208.90000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1737848,1590986940,9552.90000,9552.90000,9551.50000,9551.50000,0.331746,9546.490100,9556.706129,10.216029,59.027027,52.082370,9536.448233,0.005767,5,9549.80000,0
1737849,1590987000,9552.00000,9557.10000,9550.70000,9550.70000,9.495489,9546.559685,9556.509206,9.949522,59.891892,51.967338,9536.606586,0.005767,5,9546.20000,0
1737850,1590987060,9550.90000,9550.90000,9550.70000,9550.70000,0.110000,9546.628120,9556.318741,9.690621,59.891892,51.967338,9536.763179,0.005767,5,9545.20000,0
1737851,1590987120,9550.80000,9550.80000,9550.70000,9550.80000,2.486151,9546.697076,9556.137798,9.440722,59.783784,51.980894,9536.919144,0.005767,5,9543.90000,0


In [63]:
X = final_df.drop(["predict_forecast", "classify_inc"], axis=1)
y = final_df[["predict_forecast", "classify_inc"]]

X_train_, X_test_, y_train_, y_test_ = train_test_split(X, y, test_size=0.2, shuffle=False)

linear_y_train = y_train_.drop("classify_inc", axis=1).values
linear_y_test = y_test_.drop("classify_inc", axis=1).values

classify_y_train = y_train_.drop("predict_forecast", axis=1).values
classify_y_test = y_test_.drop("predict_forecast", axis=1).values

X_train = X_train_.values
X_test = X_test_.values

graph_X = []
graph_X_pred = []
graph_y = []
for i in range(len(X_test)):
    if X_test[i][-1] == PREDICT_NUM:
        graph_X.append(X_test[i][0])
        graph_X_pred.append(X_test[i])
        graph_y.append(linear_y_test[i])

graph_X = graph_X[-1440:] 
graph_X_pred = graph_X_pred[-1440:]
graph_y = graph_y[-1440:]
future_graph_X = [x + PREDICT_NUM for x in graph_X]

In [51]:
from sklearn.linear_model import Ridge

In [52]:
#ridge
ridge = Ridge(alpha=0.01)
ridge.fit(X_train, linear_y_train)
score = ridge.score(X_test, linear_y_test)

pickle.dump(ridge, open('v2/research/scripts/models/global_ridge.sav', 'wb+'))
with open("v2/research/scripts/training_data.csv", "a") as f:
    f.write("ridge_linear_model,{}\n".format(score))
predictions = ridge.predict(graph_X_pred)
plt.plot(future_graph_X, graph_y, label = "original prices", color="blue")
plt.plot(graph_X, predictions, label = "predicted prices", color="red")
plt.savefig('v2/research/scripts/graphs/ridge.png')
plt.clf()

<Figure size 432x288 with 0 Axes>

In [59]:
 from sklearn.ensemble import RandomForestClassifier
 from xgboost import XGBClassifier

In [61]:
#RF 
#do not use. You suck
rf = RandomForestClassifier(n_jobs=-1)
rf.fit(X_train, classify_y_train)
score = rf.score(X_test, classify_y_test)

with open("v2/research/scripts/training_data.csv", "a") as f:
    f.write("random_forest_model,{}\n".format(score))
predictions = rf.predict(graph_X_pred)
plt.plot(future_graph_X, graph_y, label = "original prices", color="blue")
plt.plot(graph_X, predictions, label = "predicted prices", color="red")
plt.savefig('v2/research/scripts/graphs/rf.png')
plt.clf()

<Figure size 432x288 with 0 Axes>

In [67]:
X_train_["predict_price"] = ridge.predict(X_train_.values)
X_test_["predict_price"] = ridge.predict(X_test_.values)

In [68]:
X_train = X_train_.values
X_test = X_test_.values

In [69]:
X_train_

Unnamed: 0,time,open,high,low,close,volume,ema_slow,ema_fast,macd,stosc_k,rsi,smma,slope,predict_number,predict_price
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,5,208.783730
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,100,209.458138
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,10,208.819225
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,50,209.103186
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,25,208.925710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1390381,1568166420,10105.00000,10106.80000,10095.00000,10103.80000,39.397563,10122.850923,10125.210586,2.359663,94.322581,48.036311,10117.307640,0.002419,10,10103.015981
1390382,1568166480,10096.30000,10103.90000,10095.00000,10095.00000,0.222281,10122.390577,10124.220075,1.829498,100.000000,46.837907,10117.059778,0.003081,100,10096.095113
1390382,1568166480,10096.30000,10103.90000,10095.00000,10095.00000,0.222281,10122.390577,10124.220075,1.829498,100.000000,46.837907,10117.059778,0.003081,50,10095.740161
1390382,1568166480,10096.30000,10103.90000,10095.00000,10095.00000,0.222281,10122.390577,10124.220075,1.829498,100.000000,46.837907,10117.059778,0.003081,25,10095.562685


In [70]:
#xgboost
xgb = XGBClassifier(n_jobs=-1)
xgb.fit(X_train, classify_y_train)
score = xgb.score(X_test, classify_y_test)

with open("v2/research/scripts/training_data.csv", "a") as f:
    f.write("xgboost_model_with_prediction,{}\n".format(score))
predictions = rf.predict(graph_X_pred)
plt.plot(future_graph_X, graph_y, label = "original prices", color="blue")
plt.plot(graph_X, predictions, label = "predicted prices", color="red")
plt.savefig('v2/research/scripts/graphs/xgboost.png')
plt.clf()



<Figure size 432x288 with 0 Axes>

In [73]:
five_min = final_df[final_df["predict_number"] == 5]
ten_min = final_df[final_df["predict_number"] == 10]
twenty_5_min = final_df[final_df["predict_number"] == 25]
fifty_min = final_df[final_df["predict_number"] == 50]
hundred_min = final_df[final_df["predict_number"] == 100]

In [92]:
import time
five_X = []
five_y = []

for x in X_test_.itertuples():
    if x.predict_number == 5:
        five_X.append(x.tolist())
    print(x)




five_X = five_min.drop(["predict_forecast", "classify_inc"], axis=1)
five_y = five_min[["classify_inc"]]

ten_X = ten_min.drop(["predict_forecast", "classify_inc"], axis=1)
ten_y = ten_min[["classify_inc"]]

twenty_X = twenty_5_min.drop(["predict_forecast", "classify_inc"], axis=1)
twenty_y = twenty_5_min[["classify_inc"]]

fifty_X = fifty_min.drop(["predict_forecast", "classify_inc"], axis=1)
fifty_y = fifty_min[["classify_inc"]]

hundred_X = hundred_min.drop(["predict_forecast", "classify_inc"], axis=1)
hundred_y = hundred_min[["classify_inc"]]

five_X["predict_price"] = ridge.predict(five_X)
ten_X["predict_price"] = ridge.predict(ten_X)
twenty_X["predict_price"] = ridge.predict(twenty_X)
fifty_X["predict_price"] = ridge.predict(fifty_X)
hundred_X["predict_price"] = ridge.predict(hundred_X)

Pandas(Index=1390382, time=1568166480, open=10096.3, high=10103.9, low=10095.0, close=10095.0, volume=0.222281, ema_slow=10122.3905766827, ema_fast=10124.220074991274, macd=1.8294983085743297, stosc_k=100.0, rsi=46.83790650394537, smma=10117.05977776218, slope=0.003081116312140855, predict_number=10, predict_price=10095.456199617767)
Pandas(Index=1390383, time=1568166540, open=10095.0, high=10101.2, low=10095.0, close=10101.2, volume=0.9570000000000001, ema_slow=10122.040319216869, ema_fast=10123.465318434182, macd=1.424999217313598, stosc_k=95.99999999999953, rsi=47.76632932221357, smma=10116.883558009267, slope=0.0030318891533425564, predict_number=100, predict_price=10099.903577346535)
Pandas(Index=1390383, time=1568166540, open=10095.0, high=10101.2, low=10095.0, close=10101.2, volume=0.9570000000000001, ema_slow=10122.040319216869, ema_fast=10123.465318434182, macd=1.424999217313598, stosc_k=95.99999999999953, rsi=47.76632932221357, smma=10116.883558009267, slope=0.003031889153342

AttributeError: 'Pandas' object has no attribute 'tolist'

In [84]:
print(xgb.score(five_X.values, five_y.values))
print(xgb.score(ten_X.values, ten_y.values))
print(xgb.score(twenty_X.values, twenty_y.values))
print(xgb.score(fifty_X.values, fifty_y.values))
print(xgb.score(hundred_X.values, hundred_y.values))


0.5502127250507852
0.5615358844678894
0.5785347562369078
0.5896599880035045
0.5954422367084906


In [85]:
final_df["predict_number"].unique()

array([  5, 100,  10,  50,  25])

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, RNN, BatchNormalization

In [None]:
model = Sequential()
model.add(LSTM(512, activation='relu', return_sequences=True))
BatchNormalization()
model.add(Dropout(0.2))

model.add(Dense(256, activation='relu'))
BatchNormalization()
model.add(Dropout(0.2))

model.add(LSTM(128, activation='relu'))
BatchNormalization()
model.add(Dropout(0.2))

model.add(Dense(1, activation='linear'))

opt = tf.keras.optimizers.Adam(lr=1e-3, decay=1e-5)

model.compile(loss='mean_absolute_error', warm_start=False, optimizer=opt, metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test))