<h3> More Price Prediction </h3>
<p> Here we use many different types of models to try and predict the price of a coin </p>

In [2]:
import os
os.chdir('../../')
os.getcwd()

'/Users/rosscopeland/Desktop/personal/code/vivaldi/back_testing'

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema
from v2.model import Trading
from v2.strategy.indicators.smma import SMMA
from v2.strategy.indicators.stochastic_oscillator import StochasticOscillator
from v2.strategy.indicators.bollinger_bands import BollingerBands
from v2.strategy.indicators.rsi import RSI
from v2.strategy.indicators.macd import MACD
from v2.strategy.indicators.param import Param
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBRegressor
import pickle
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

In [4]:
#here we'll be trying to do a regression for predicting the price. Since we are simulating production we can't normalize. Also we will be trying multiple learners inside of learners. For now we will start with lin reg inside of rf then into boost

In [5]:
def load_config():
    my_config = {}
    with open('config.config') as config:
        for line in config:
            args = line.split('=')
            my_config[args[0]] = args[1].rstrip().split(',')
    return my_config

model = Trading(load_config())

In [6]:
datasets = model.dfs
appended_dataset = pd.DataFrame()
for d in datasets:
    training_set = d[0]
    training_set['trough'] = training_set.iloc[argrelextrema(training_set.close.values, np.less_equal, order=480)[0]]['close']
    training_set['peak'] = training_set.iloc[argrelextrema(training_set.close.values, np.greater_equal, order=480)[0]]['close']
    ema_fast = Param(5, 10000, 0, 'ema_fast', 60)
    ema_slow= Param(6, 10001, 0, 'ema_slow', 120)
    signal = Param(5, 10001, 0, 'signal', 90)
    macd_ = MACD(_params=[ema_fast, ema_slow, signal], _name='macd')
    macd_.genData(training_set, gen_new_values=False)
    boll_period = Param(5, 10000, 0, 'period', 90)
    boll_bands = BollingerBands(_params=[boll_period], _name='bollinger_bands')
    boll_bands.genData(training_set, gen_new_values=False)
    stoch_highlow = Param(5, 10000, 0, 'highlow_range', 90.0)
    stoch_k = Param(5, 10000, 0, 'k_period', 270.0)
    stoch_oscillator = StochasticOscillator(_params=[stoch_highlow, stoch_k], _name='stochastic_oscillator')
    stoch_oscillator.genData(training_set, gen_new_values=False)
    rsi_period = Param(5, 10000, 0, 'period', 90.0)
    rsi_ = RSI(_params=[rsi_period], _name='rsi')
    rsi_.genData(training_set, gen_new_values=False)
    smma_period = Param(5, 10000, 0, 'period', 90.0)
    smma_ = SMMA(_params=[smma_period], _name='smma')
    smma_.genData(training_set, gen_new_values=False)
    training_set[['trough', 'peak']] = training_set[['trough', 'peak']].fillna(0)
    training_set['slope'] = (training_set['close'].rolling(window=30).max() - training_set['close'].rolling(window=30).min()) / training_set['close'].rolling(window=30).max()
    training_set = training_set.dropna()
    appended_dataset = appended_dataset.append(training_set)

In [7]:
appended_dataset.head()

Unnamed: 0,time,open,high,low,close,volume,trades,trough,peak,ema_slow,...,stosc_k,stosc_d,rsi_diff,rsi_u,rsi_d,rsi_smma_u,rsi_smma_d,rsi,smma,slope
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.1,1,0.0,0.0,203.646855,...,4.275846,23.660815,-0.00418,0.0,0.00418,0.328056,0.243793,57.367632,201.724654,0.013416
359,1383505860,207.43988,207.43988,206.89,207.01377,1.263692,5,0.0,0.0,203.702506,...,13.776266,23.698312,-0.65821,0.0,0.65821,0.324344,0.248482,56.621691,201.784494,0.013416
360,1383507660,207.59295,207.75,207.59295,207.75,0.1,2,0.0,0.0,203.769407,...,3.149727,23.696407,0.73623,0.73623,0.0,0.329004,0.245671,57.25043,201.851973,0.013767
361,1383507720,207.75,207.75,207.75,207.75,0.1,1,0.0,0.0,203.835202,...,3.149727,23.694475,0.0,0.0,0.0,0.325282,0.242892,57.25043,201.918675,0.012649
362,1383510600,207.75,207.75,207.75,207.75,0.314611,1,0.0,0.0,203.899909,...,3.149727,23.69621,0.0,0.0,0.0,0.321603,0.240145,57.25043,201.984609,0.009771


In [8]:
train_df = appended_dataset[["time", "open", "high", "low", "close", "volume", "ema_slow", "ema_fast", "macd", "stosc_k", "rsi", "smma", "slope"]]
final_df = pd.DataFrame()
PREDICT_NUM = 5
predics = [5, 10, 25, 50, 100]
for i in predics:
    temp_df = train_df
    temp_df["predict_number"] = i
    temp_df["predict_forecast"] = temp_df["close"].shift(-int(i))
    final_df = pd.concat([final_df, temp_df])
    
final_df.head()

Unnamed: 0,time,open,high,low,close,volume,ema_slow,ema_fast,macd,stosc_k,rsi,smma,slope,predict_number,predict_forecast
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.1,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,1,207.01377
359,1383505860,207.43988,207.43988,206.89,207.01377,1.263692,203.702506,205.618262,1.915756,13.776266,56.621691,201.784494,0.013416,1,207.75
360,1383507660,207.59295,207.75,207.59295,207.75,0.1,203.769407,205.688155,1.918748,3.149727,57.25043,201.851973,0.013767,1,207.75
361,1383507720,207.75,207.75,207.75,207.75,0.1,203.835202,205.755757,1.920555,3.149727,57.25043,201.918675,0.012649,1,207.75
362,1383510600,207.75,207.75,207.75,207.75,0.314611,203.899909,205.821142,1.921233,3.149727,57.25043,201.984609,0.009771,1,208.21573


In [9]:
final_df.dropna(inplace=True)

In [10]:
final_df.sort_values(by=['time'], inplace=True)
final_df

Unnamed: 0,time,open,high,low,close,volume,ema_slow,ema_fast,macd,stosc_k,rsi,smma,slope,predict_number,predict_forecast
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,1,207.01377
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,4,207.75000
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,5,208.21573
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,2,207.75000
358,1383503820,207.67198,207.67198,207.67198,207.67198,0.100000,203.646855,205.570957,1.924102,4.275846,57.367632,201.724654,0.013416,6,207.80137
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1531726,1577857740,7194.10000,7194.10000,7194.10000,7194.10000,0.020000,7196.904431,7195.969987,-0.934445,84.758364,49.649119,7195.014807,0.001347,3,7194.10000
1531726,1577857740,7194.10000,7194.10000,7194.10000,7194.10000,0.020000,7196.904431,7195.969987,-0.934445,84.758364,49.649119,7195.014807,0.001347,1,7194.10000
1531727,1577857920,7194.10000,7194.10000,7194.10000,7194.10000,0.003340,7196.858077,7195.908676,-0.949402,84.758364,49.649119,7195.004643,0.001347,2,7194.10000
1531727,1577857920,7194.10000,7194.10000,7194.10000,7194.10000,0.003340,7196.858077,7195.908676,-0.949402,84.758364,49.649119,7195.004643,0.001347,1,7194.10000


In [None]:
X = final_df.drop("predict_forecast", axis=1).values
y = final_df["predict_forecast"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

graph_X = []
graph_X_pred = []
graph_y = []
for i in range(len(X_test)):
    if X_test[i][-1] == PREDICT_NUM:
        graph_X.append(X_test[i][0])
        graph_X_pred.append(X_test[i])
        graph_y.append(y_test[i])


<h3> Linear Models </h3>
Here we now have the data for the data in the config. (Later we will try and make it so that it is all coins, but for now it is just XBT or whatever is in config). For this we will be using lots of different regression methods. We'll test them all then try and use model predictions for features going from worse to best. The different linear models that will be used are:

• Linear Regression

• Ridge

• Lasso

• MultiTaskLasso

• ElasticNet

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

In [12]:
#lin reg
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
print(lin_reg.score(X_test, y_test))

0.9996599789203734


In [13]:
#ridge
alphas = [0.01, 0.05, 0.075]
max_score = 0
alpha_val = 0
global_ridge = 0
for a in tqdm(alphas):
    ridge = Ridge(alpha=a)
    ridge.fit(X_train, y_train)
    score = ridge.score(X_test, y_test)
    if score > max_score:
        alpha_val = a
        max_score = score
        global_ridge = ridge

print("Max score is: {} with alpha of: {}".format(max_score, alpha_val))

100%|██████████| 3/3 [00:06<00:00,  2.09s/it]

Max score is: 0.9996599789203819 with alpha of: 0.01





In [14]:
#lasso
alphas = [0.01, 0.1, 0.5]
max_score = 0
alpha_val = 0
global_lasso = 0
for a in tqdm(alphas):
    lasso = Lasso(alpha=a)
    lasso.fit(X_train, y_train)
    score = lasso.score(X_test, y_test)
    if score > max_score:
        alpha_val = a
        max_score = score
        global_lasso = lasso

print("Max score is: {} with alpha of: {}".format(max_score, alpha_val))

100%|██████████| 3/3 [00:49<00:00, 16.51s/it]

Max score is: 0.9996097418013838 with alpha of: 0.01





In [15]:
#multi task lasso
alphas = [0.01, 0.1, 0.5, 1, 2]
l1_ratio = [0.01, 0.1, 0.3, 0.5]
max_score = 0
alpha_val = 0
l1_value = 0
global_e_net = 0
for a in tqdm(alphas):
    for l in l1_ratio:
        e_net = ElasticNet(alpha=a, l1_ratio=l)
        e_net.fit(X_train, y_train)
        score = e_net.score(X_test, y_test)
        if score > max_score:
            alpha_val = a
            l1_value = l
            max_score = score
            global_e_net = e_net

print("Max score is: {} with alpha of: {} and l1 val of: {}".format(max_score, alpha_val, l1_value))

100%|██████████| 5/5 [04:42<00:00, 56.59s/it]

Max score is: 0.9996097426264032 with alpha of: 0.01 and l1 val of: 0.01





<h3>Support Vector Machines</h3>
<br>
Now we'll be testing different SVM models to see how well they do with regression. The different models we'll test are:

• SVR

• NuSVR

• LinearSVR

Unlike others I am splitting it up into individual parameters instead of an exhaustive search just based off how long it takes to train each model

In [12]:
from sklearn.svm import SVR
from sklearn.svm import NuSVR
from sklearn.svm import LinearSVR

In [13]:
#SVR
kernels = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']
degrees = [1, 5, 7]
C_vals = [0.1, 0.5, 0.7, 1, 2]
max_score = 0
ker_val = "rbf"
deg_val = 3
C_val = 1
global_svr = 0

for k in tqdm(kernels):
    svr_model = SVR(kernel=k)
    svr_model.fit(X_train, y_train)
    score = svr_model.score(X_test, y_test)
    if score > max_score:
        max_score = score
        ker_val = k
        global_svr = svr_model

for d in tqdm(degrees):
    svr_model = SVR(kernel=ker_val, degree=d)
    svr_model.fit(X_train, y_train)
    score = svr_model.score(X_test, y_test)
    if score > max_score:
        ker_val = k
        max_score = score
        deg_val = d
        global_svr = svr_model

for c in tqdm(C_vals):
    svr_model = SVR(kernel=ker_val, degree=deg_val, C=c)
    svr_model.fit(X_train, y_train)
    score = svr_model.score(X_test, y_test)
    if score > max_score:
        ker_val = k
        max_score = score
        deg_val = d
        C_val = c
        global_svr = svr_model

                
print("Max score is: {} with kernel of: {}, degree val of: {}, and c_val of: {}".format(max_score, ker_val, deg_val, C_val))

0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
#NuSVR
kernels = ['linear', 'poly', 'sigmoid', 'precomputed']
degrees = [1, 5, 7]
C_vals = [0.1, 0.5, 0.7, 2]
nus = [0.1, 0.3, 0.5, 0.7, 1]

max_score = 0
ker_val = 'rbf'
deg_val = 3
C_val = 1
nu_val = 0.5
global_n_svr = 0

for n in tqdm(nus):
    n_svr_model = NuSVR(nu=n)
    n_svr_model.fit(X_train, y_train)
    score = n_svr_model.score(X_test, y_test)
    if score > max_score:
        max_score = score
        nu_val = n
        global_n_svr = n_svr_model

for k in tqdm(kernels):
    n_svr_model = NuSVR(nu=nu_val, kernel=k)
    n_svr_model.fit(X_train, y_train)
    score = n_svr_model.score(X_test, y_test)
    if score > max_score:
        max_score = score
        ker_val = k
        global_n_svr = n_svr_model

for c in tqdm(C_vals):
    n_svr_model = NuSVR(C=c, nu=nu_val, kernel=ker_val)
    n_svr_model.fit(X_train, y_train)
    score = n_svr_model.score(X_test, y_test)
    if score > max_score:
        max_score = score
        C_val = c
        global_n_svr = n_svr_model

for d in tqdm(degrees):
    n_svr_model = NuSVR(C=C_val, nu=nu_val, kernel=ker_val, degree=d)
    n_svr_model.fit(X_train, y_train)
    score = n_svr_model.score(X_test, y_test)
    if score > max_score:
        max_score = score
        deg_val = d
        global_n_svr = n_svr_model
                    
print("Max score is: {} with kernel of: {}, degree val of: {}, and c_val of: {}".format(max_score, ker_val, deg_val, C_val))

In [None]:
#Linear SVR
losses = ['epsilon_insensitive', 'squared_epsilon_insensitive']
C_values = [0.1, 0.5, 0.7, 1, 2]

max_score = 0
loss_val = ""
deg_val = 0
C_val = 0
global_lin_svr = 0

for l in tqdm(losses):
    lin_svr = LinearSVR(loss=l)
    lin_svr.fit(X_train, y_train)
    score = lin_svr.score(X_test, y_test)
    if score > max_score:
        max_score = score
        loss_val = l
        global_lin_svr = lin_svr

for c in tqdm(C_values):
    lin_svr = LinearSVR(loss=loss_val, C=c)
    lin_svr.fit(X_train, y_train)
    score = lin_svr.score(X_test, y_test)
    if score > max_score:
        max_score = score
        C_val = c
        global_lin_svr = lin_svr
                    
print("Max score is: {} with loss of: {} and c_val of: {}".format(max_score, loss_val, C_val))

<h3>Stocastic Gradient Descent</h3>
<br>
After using SVM, we will now look to SGD and their one regression model

• SGDRegressor

In [13]:
from sklearn.linear_model import SGDRegressor

In [15]:
#SGD
losses = ['squared_loss', 'huber', 'epsilon_insensitive','squared_epsilon_insensitive']
alphas = [0.0001, 0.01, 0.1, 0.5, 0.9]
l1_ratios = [0.01, 0.15, 0.3, 0.5, 0.8]

max_score = 0
alph = 0.0001
l1_rat = 0.15
loss_val = ""
global_sgd = 0

for l in tqdm(losses):
    for a in alphas:
        for r in l1_ratios:
            sgd_model = SGDRegressor(loss=l, alpha=a, l1_ratio=r)
            sgd_model.fit(X_train, y_train)
            score = sgd.score(X_test, y_test)
            if score > max_score:
                max_score = score
                loss_val = l
                alph = a
                l1_rat = r
                global_sgd = sgd_model

print("Max score is: {} with loss of: {}, alpha of: {}, and l1 ratio of: {}".format(max_score, loss_val, alph, l1_rat))
                

0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import RadiusNeighborsRegressor

In [14]:
neighbors = [1, 5, 10, 20]
leaf_sizes = [2, 5, 15, 30, 60]

max_score = 0
leaf_val = 0
neighbor_val = 0
global_k_neighbor = 0

for l in tqdm(leaf_sizes):
    for n in neighbors:
        k_neigh_model = KNeighborsRegressor(n_neighbors=n, leaf_size=l, n_jobs=-1)
        k_neigh_model.fit(X_train, y_train)
        score = k_neigh_model(X_test, y_test)
        if score > max_score:
            max_score = score
            neighbor_val = n
            leaf_val = l
            global_k_neighbor = k_neigh_model

print("Max score is: {} with {} nieghbors, and a leaf size of: {}".format(max_score, neighbor_val, leaf_val))

0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
radius= [.1, .5, 1, 2, 10]
leaf_sizes = [2, 5, 15, 30, 60]

max_score = 0
leaf_val = 0
rad_val = 0
global_rad = 0

for l in tqdm(leaf_sizes):
    for r in radius:
        radius_model = RadiusNeighborsRegressor(radius=r, leaf_size=l, n_jobs=-1)
        radius_model.fit(X_train, y_train)
        score = radius_model(X_test, y_test)
        if score > max_score:
            max_score = score
            rad_val = r
            leaf_val = l
            global_rad = radius_model

print("Max score is: {} with {} nieghbors, and a leaf size of: {}".format(max_score, rad_val, leaf_val))

In [None]:
#ensemble regressors

In [None]:
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.ensemble import ExtraTreesRegressor
 from sklearn.ensemble import AdaBoostRegressor
 from sklearn.ensemble import GradientBoostingRegressor
 from xgboost import XGBRegressor

NameError: name 'appended_dataset' is not defined

In [None]:
from sklearn.neural_network import MLPRegressor
#RNN
#LSTM