<a href="https://colab.research.google.com/github/michaelmarchesi/Bitcoin_forecasting/blob/main/RandomForest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import json
import requests
 
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, LSTM,BatchNormalization
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from keras.preprocessing.sequence import TimeseriesGenerator
import plotly.graph_objects as go
 
%matplotlib inline

In [2]:
endpoint = 'https://min-api.cryptocompare.com/data/histoday'
res = requests.get(endpoint + '?fsym=BTC&tsym=USD&limit=2000')
df = pd.DataFrame(json.loads(res.content)['Data'])
df = df.set_index('time')

df.index = pd.to_datetime(df.index, unit='s').astype('datetime64[ns, Europe/Paris]').tz_convert('America/Edmonton')

In [3]:
def calcMACD(data):
  period12 = data.ewm(span=12).mean()
  period26 = data.ewm(span=26).mean()
  macd = []  # List to hold the MACD line values
  counter=0  # Loop to substantiate the MACD line
  while counter < (len(period12)):
    macd.append(period12.iloc[counter,0] - period26.iloc[counter,0])  # Subtract the 26 day EW moving average from the 12 day.
    counter += 1
  return macd

In [4]:
df['macd'] = calcMACD(df)
y = df['close']
features = ['macd','volumeto', 'high', 'low']
X = df[features]
X.describe()


Unnamed: 0,macd,volumeto,high,low
count,2001.0,2001.0,2001.0,2001.0
mean,140.992945,459813300.0,6717.347726,6303.111659
std,619.281528,680493600.0,7161.761892,6601.745692
min,-1409.758845,7377922.0,229.62,224.07
25%,-47.515214,56085870.0,920.49,880.79
50%,9.837955,257704500.0,6401.5,6084.0
75%,203.24237,538772500.0,9400.99,9026.42
max,4963.45232,8100819000.0,52622.68,50874.81


In [5]:
from sklearn.model_selection import train_test_split
train_X, val_X, train_y, val_y = train_test_split(X, y, shuffle=False, train_size=.9)

from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(random_state=0,verbose=1, max_depth=50)

print(train_X)

                                 macd      volumeto      high       low
time                                                                   
2015-08-28 16:00:00-06:00    0.000000  1.007220e+07    233.09    226.35
2015-08-29 16:00:00-06:00   -0.017276  9.118089e+06    232.32    225.44
2015-08-30 16:00:00-06:00   -0.038448  1.252197e+07    231.79    224.20
2015-08-31 16:00:00-06:00   -0.059411  1.520624e+07    231.45    225.76
2015-09-01 16:00:00-06:00   -0.103772  1.344378e+07    230.59    225.57
...                               ...           ...       ...       ...
2020-07-27 16:00:00-06:00  339.972539  9.386678e+08  11253.39  10590.98
2020-07-28 16:00:00-06:00  416.744951  6.892065e+08  11348.53  10854.04
2020-07-29 16:00:00-06:00  458.921228  5.035635e+08  11182.76  10875.14
2020-07-30 16:00:00-06:00  508.381603  6.081512e+08  11454.11  10981.87
2020-07-31 16:00:00-06:00  574.896009  7.412529e+08  11874.77  11240.98

[1800 rows x 4 columns]


In [6]:
rf_model.fit(train_X, train_y)
print(val_X)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


                                  macd      volumeto      high       low
time                                                                    
2020-08-01 16:00:00-06:00   638.973609  7.216395e+08  12106.89  10694.22
2020-08-02 16:00:00-06:00   631.590833  4.739950e+08  11476.29  10945.66
2020-08-03 16:00:00-06:00   613.732523  3.933018e+08  11415.16  11021.07
2020-08-04 16:00:00-06:00   622.777864  5.553956e+08  11791.62  11095.47
2020-08-05 16:00:00-06:00   632.195749  5.870338e+08  11909.81  11577.80
...                                ...           ...       ...       ...
2021-02-13 16:00:00-07:00  3836.056969  1.841963e+09  49642.39  47096.92
2021-02-14 16:00:00-07:00  3868.349710  2.502098e+09  49031.74  45910.00
2021-02-15 16:00:00-07:00  3971.601064  3.237085e+09  50561.53  47068.63
2021-02-16 16:00:00-07:00  4171.657810  3.410519e+09  52622.68  48967.72
2021-02-17 16:00:00-07:00  4275.107194  2.206082e+09  52550.60  50874.81

[201 rows x 4 columns]


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.6s finished


In [7]:
rf_pred = rf_model.predict(val_X)
print(rf_pred)

[11057.4544 11255.9192 11122.2233 11463.0295 11905.3508 11764.9632
 11818.3847 11817.1732 11994.8721 11576.9482 11439.3455 11563.3267
 11827.258  11940.7734 11893.1035 12243.5141 12265.1967 11952.8824
 11887.698  11820.5613 11573.1664 11748.5152 11812.0917 11428.0987
 11514.3938 11478.3097 11524.7824 11564.5821 11573.4632 11754.9683
 11973.5645 11536.8474 10862.3811 10323.8341 10287.4011 10166.9538
 10201.0422 10291.343  10145.8825 10365.485  10282.1219 10410.6653
 10384.6801 10522.4266 10827.3569 10930.6875 10820.0044 10956.2795
 11118.5006 10939.8363 10654.7053 10523.964  10382.7746 10486.1089
 10670.0979 10669.2107 10661.5593 10782.4759 10662.0662 10677.4618
 10771.2637 10553.162  10563.6194 10610.635  10671.647  10663.9109
 10617.8304 10825.4274 11098.7435 11310.8954 11170.4419 11410.3784
 11555.9806 11553.3387 11511.5864 11458.0955 11180.728  11559.4644
 11623.1896 12020.5554 12482.2756 12670.0044 12676.0289 12670.7625
 12683.5042 12682.1394 13599.9975 13654.8956 12955.6048 12991.

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [8]:
df['close'].tail()

from sklearn.metrics import mean_absolute_error
rf_val_mae = mean_absolute_error(val_y,rf_pred)
rf_val_mae

5330.754682587069

In [9]:
split_percent = 0.9
split = int(split_percent*len(df.index))
date_train = df.index[:split]
date_test = df.index[split:]
trace1 = {
    'x': date_train,
#     'open': prediction_open,
    'y': df['close'],
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 2,
        'color': 'blue'
    },
    'name': 'real close',
    'showlegend': True
}

trace2 = {
    'x': date_test,
#     'open': prediction_open,
    'y': rf_pred,
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 2,
        'color': 'red'
    },
    'name': 'prediction close',
    'showlegend': True
}

trace3 = {
    'x': date_test,
#     'open': prediction_open,
    'y': val_y,
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 2,
        'color': 'blue'
    },
    'name': 'actual test close',
    'showlegend': True
}

fig = go.Figure(data=[trace2,trace3])
fig.show()

In [10]:

endpoint = 'https://min-api.cryptocompare.com/data/histominute'
res = requests.get(endpoint + '?fsym=BTC&tsym=USD&limit=30')
df_update = pd.DataFrame(json.loads(res.content)['Data'])
df_update = df_update.set_index('time')

df_update.index = pd.to_datetime(df_update.index, unit='s').astype('datetime64[ns, Europe/Paris]').tz_convert('America/Edmonton')
df_update['macd'] = calcMACD(df_update)
df_update.describe()


Unnamed: 0,high,low,open,volumefrom,volumeto,close,macd
count,31.0,31.0,31.0,31.0,31.0,31.0,31.0
mean,51688.255806,51637.630323,51666.377097,35.86129,1852626.0,51660.397097,-6.622267
std,59.396743,61.28665,67.438381,69.368796,3583323.0,60.457436,6.26902
min,51582.99,51541.55,51557.1,0.0,0.0,51557.1,-18.184305
25%,51644.145,51595.875,51624.18,16.325,842951.3,51624.18,-11.996287
50%,51679.73,51628.77,51658.16,20.6,1063274.0,51644.98,-6.097867
75%,51723.465,51673.585,51712.74,29.215,1507041.0,51710.19,-1.39706
max,51827.79,51792.42,51827.79,402.53,20792640.0,51805.99,4.130029


In [11]:
import pickle

In [12]:
model_file = 'model1.pkl'
pickle.dump(rf_model,open(model_file,'wb'))
loaded_model = pickle.load(open(model_file,'rb'))



new_predictions = loaded_model.predict(X)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [13]:

endpoint = 'https://min-api.cryptocompare.com/data/histominute'
res = requests.get(endpoint + '?fsym=BTC&tsym=USD&limit=100')
df_update = pd.DataFrame(json.loads(res.content)['Data'])
df_update = df_update.set_index('time')

df_update.index = pd.to_datetime(df_update.index, unit='s').astype('datetime64[ns, Europe/Paris]').tz_convert('America/Edmonton')
df_update['macd'] = calcMACD(df_update)
df_update.head()


y = df_update['close']
features_update = ['macd','volumeto', 'high', 'low']
X = df_update[features]
df = df.append(df_update).drop_duplicates()
df.tail(25)

# X.describe()

Unnamed: 0_level_0,high,low,open,volumefrom,volumeto,close,conversionType,conversionSymbol,macd
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-02-18 15:14:00-07:00,51676.84,51625.25,51638.71,41.35,2135159.7,51674.45,direct,,-38.625216
2021-02-18 15:15:00-07:00,51679.73,51610.69,51674.45,21.29,1099203.37,51612.08,direct,,-42.490331
2021-02-18 15:16:00-07:00,51640.73,51593.26,51612.08,28.69,1480805.65,51604.83,direct,,-48.138579
2021-02-18 15:17:00-07:00,51641.42,51598.49,51604.83,20.6,1063273.6,51620.73,direct,,-51.960032
2021-02-18 15:18:00-07:00,51622.03,51547.13,51620.73,31.43,1620679.49,51557.1,direct,,-55.905626
2021-02-18 15:19:00-07:00,51582.99,51541.55,51557.1,29.74,1533277.26,51574.17,direct,,-61.468606
2021-02-18 15:20:00-07:00,51729.74,51569.24,51574.17,28.4,1466864.92,51729.74,direct,,-53.438042
2021-02-18 15:21:00-07:00,51737.43,51654.03,51729.74,33.9,1752762.57,51723.72,direct,,-45.924727
2021-02-18 15:22:00-07:00,51731.1,51678.64,51723.72,11.51,595165.74,51680.48,direct,,-40.019138
2021-02-18 15:23:00-07:00,51718.67,51677.66,51680.48,16.58,857455.54,51716.4,direct,,-35.926514


In [14]:
df.tail()

Unnamed: 0_level_0,high,low,open,volumefrom,volumeto,close,conversionType,conversionSymbol,macd
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-02-18 15:34:00-07:00,51668.01,51614.99,51629.89,402.53,20792641.35,51654.89,invert_divide,ETH,-34.807098
2021-02-18 15:35:00-07:00,51716.72,51656.67,51658.16,20.66,1067901.29,51710.43,direct,,-27.691848
2021-02-18 15:36:00-07:00,51718.58,51628.77,51710.43,15.82,817224.47,51641.93,direct,,-21.653347
2021-02-18 15:37:00-07:00,51666.59,51626.42,51641.93,19.4,1001868.98,51644.98,direct,,-20.821174
2021-02-18 15:38:00-07:00,51644.98,51644.95,51644.98,0.0,0.0,51644.95,direct,,-21.655098


In [15]:
new_predictions = loaded_model.predict(X)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [20]:
trace4 = {
    'x': X.index,
#     'open': prediction_open,
    'y': new_predictions,
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 2,
        'color': 'red'
    },
    'name': 'predicted close',
    'showlegend': True
}
trace3 = {
    'x': X.index,
#     'open': prediction_open,
    'y': df_update['close'],
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 4,
        'color': 'green'
    },
    'name': 'actual test close',
    'showlegend': True
}
fig = go.Figure(data=[trace3,trace4])
fig.show()