<a href="https://colab.research.google.com/github/michaelmarchesi/Bitcoin_forecasting/blob/main/BlackForestPredictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import requests
 
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, LSTM,BatchNormalization
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from keras.preprocessing.sequence import TimeseriesGenerator
import plotly.graph_objects as go
 
%matplotlib inline

In [None]:
endpoint = 'https://min-api.cryptocompare.com/data/histominute'
res = requests.get(endpoint + '?fsym=BTC&tsym=USD&limit=2000')
df = pd.DataFrame(json.loads(res.content)['Data'])
df = df.set_index('time')

df.index = pd.to_datetime(df.index, unit='s').astype('datetime64[ns, Europe/Paris]').tz_convert('America/Edmonton')

In [None]:
def calcMACD(data):
  period12 = data.ewm(span=12).mean()
  period26 = data.ewm(span=26).mean()
  macd = []  # List to hold the MACD line values
  counter=0  # Loop to substantiate the MACD line
  while counter < (len(period12)):
    macd.append(period12.iloc[counter,0] - period26.iloc[counter,0])  # Subtract the 26 day EW moving average from the 12 day.
    counter += 1
  return macd

In [None]:
df['macd'] = calcMACD(df)
y = df['close']
features = ['macd','volumeto', 'high', 'low']
X = df[features]
X.describe()


Unnamed: 0,macd,volumeto,high,low
count,2001.0,2001.0,2001.0,2001.0
mean,9.715114,4047027.0,51151.653538,51097.393418
std,50.98668,15094350.0,1076.630642,1080.574545
min,-121.260554,0.0,48616.21,48561.76
25%,-27.852416,961803.3,50538.0,50423.03
50%,8.051738,1580225.0,51374.4,51316.21
75%,41.397127,2578473.0,52146.13,52103.19
max,184.289582,234288200.0,52622.68,52565.85


In [None]:
from sklearn.model_selection import train_test_split
train_X, val_X, train_y, val_y = train_test_split(X, y, shuffle=False, train_size=.9)

from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(random_state=0,verbose=1, max_depth=50)

print(train_X)

                                macd    volumeto      high       low
time                                                                
2021-02-16 14:32:00-07:00   0.000000   764834.27  48616.21  48561.76
2021-02-16 14:33:00-07:00   0.680929   535914.86  48646.56  48612.18
2021-02-16 14:34:00-07:00   3.728306  3471388.98  48738.81  48645.12
2021-02-16 14:35:00-07:00   4.982505  3386058.68  48737.69  48696.27
2021-02-16 14:36:00-07:00   7.474326  4115748.42  48785.82  48731.32
...                              ...         ...       ...       ...
2021-02-17 20:27:00-07:00 -13.694103   946948.61  51969.90  51919.82
2021-02-17 20:28:00-07:00  -7.858432   920652.30  51995.07  51944.21
2021-02-17 20:29:00-07:00  -3.519050  1170305.73  51991.03  51938.56
2021-02-17 20:30:00-07:00  -2.072650  1464887.78  51966.04  51947.74
2021-02-17 20:31:00-07:00   1.821169   795480.88  52000.35  51955.52

[1800 rows x 4 columns]


In [None]:
rf_model.fit(train_X, train_y)
print(val_X)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


                                macd    volumeto      high       low
time                                                                
2021-02-17 20:32:00-07:00   6.838250   930762.73  52025.26  52000.35
2021-02-17 20:33:00-07:00  10.532337   438738.74  52023.27  52013.04
2021-02-17 20:34:00-07:00  17.642948  2432928.59  52077.63  52018.19
2021-02-17 20:35:00-07:00  22.356356  2541729.34  52069.40  52025.67
2021-02-17 20:36:00-07:00  23.295164   377708.24  52038.07  52027.50
...                              ...         ...       ...       ...
2021-02-17 23:48:00-07:00 -32.557372  3311970.29  51634.41  51587.25
2021-02-17 23:49:00-07:00 -24.028841  1605995.35  51637.98  51585.58
2021-02-17 23:50:00-07:00 -16.009749  1160093.84  51651.31  51604.27
2021-02-17 23:51:00-07:00  -8.501916  1761350.42  51664.38  51637.89
2021-02-17 23:52:00-07:00  -2.780486        0.00  51661.15  51660.42

[201 rows x 4 columns]


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.6s finished


In [None]:
rf_pred = rf_model.predict(val_X)
print(rf_pred)

[52019.1627 52023.9254 52059.222  52062.445  52038.7685 52066.5206
 52072.8676 52084.9675 52089.6427 52090.958  52104.0962 52097.2183
 52101.5838 52107.6154 52098.5151 52093.1105 52097.46   52091.8241
 52104.0202 52138.7332 52161.4012 52168.371  52158.6079 52133.5599
 52107.7654 52108.7615 52157.4477 52194.0987 52172.7282 52163.0649
 52172.2479 52176.1104 52168.5584 52165.4155 52166.1291 52167.2383
 52165.2638 52165.5347 52153.1086 52136.3398 52123.4062 52135.8973
 52142.3017 52136.6869 52109.934  52110.8857 52129.8967 52134.8583
 52125.177  52140.1859 52135.1993 52156.6733 52200.5783 52244.4382
 52268.1543 52286.088  52324.1394 52311.8824 52283.6628 52274.922
 52313.7189 52318.0578 52267.8768 52270.2054 52232.0107 52209.7048
 52222.4129 52234.7347 52237.8001 52230.5334 52230.5687 52233.7732
 52237.8996 52220.4174 52204.7106 52197.0978 52195.4378 52172.4482
 52172.6696 52156.7085 52119.2447 52135.0407 52129.1071 52148.2461
 52153.8432 52166.7669 52190.5444 52211.2979 52201.9831 52188.9

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [None]:
df['close'].tail()

from sklearn.metrics import mean_absolute_error
rf_val_mae = mean_absolute_error(val_y,rf_pred)
rf_val_mae

12.211609950246089

In [None]:
split_percent = 0.9
split = int(split_percent*len(df.index))
date_train = df.index[:split]
date_test = df.index[split:]
trace1 = {
    'x': date_train,
#     'open': prediction_open,
    'y': df['close'],
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 2,
        'color': 'blue'
    },
    'name': 'real close',
    'showlegend': True
}

trace2 = {
    'x': date_test,
#     'open': prediction_open,
    'y': rf_pred,
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 2,
        'color': 'red'
    },
    'name': 'prediction close',
    'showlegend': True
}

trace3 = {
    'x': date_test,
#     'open': prediction_open,
    'y': val_y,
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 2,
        'color': 'blue'
    },
    'name': 'actual test close',
    'showlegend': True
}

fig = go.Figure(data=[trace2,trace3])
fig.show()

In [None]:

endpoint = 'https://min-api.cryptocompare.com/data/histominute'
res = requests.get(endpoint + '?fsym=BTC&tsym=USD&limit=30')
df_update = pd.DataFrame(json.loads(res.content)['Data'])
df_update = df_update.set_index('time')

df_update.index = pd.to_datetime(df_update.index, unit='s').astype('datetime64[ns, Europe/Paris]').tz_convert('America/Edmonton')
df_update['macd'] = calcMACD(df_update)
df_update.describe()


Unnamed: 0,high,low,open,volumefrom,volumeto,close,macd
count,31.0,31.0,31.0,31.0,31.0,31.0,31.0
mean,51812.419677,51764.688065,51793.348065,31.083581,1610651.0,51791.588065,13.121192
std,83.646892,92.82088,86.963254,55.387506,2874648.0,89.201759,14.808488
min,51694.27,51567.27,51665.94,0.0,0.0,51665.94,-3.843613
25%,51728.055,51688.035,51717.8,10.28,531671.2,51709.72,-0.3548
50%,51796.84,51746.75,51781.34,16.09,834860.8,51781.34,7.882647
75%,51895.54,51858.07,51884.985,26.805,1389171.0,51885.39,28.909463
max,51927.05,51899.32,51921.58,314.47,16324130.0,51921.58,35.540375


In [None]:
import pickle

In [None]:
model_file = 'model1.pkl'
pickle.dump(rf_model,open(model_file,'wb'))
loaded_model = pickle.load(open(model_file,'rb'))



new_predictions = loaded_model.predict(X)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [None]:

endpoint = 'https://min-api.cryptocompare.com/data/histominute'
res = requests.get(endpoint + '?fsym=BTC&tsym=USD&limit=100')
df_update = pd.DataFrame(json.loads(res.content)['Data'])
df_update = df_update.set_index('time')

df_update.index = pd.to_datetime(df_update.index, unit='s').astype('datetime64[ns, Europe/Paris]').tz_convert('America/Edmonton')
df_update['macd'] = calcMACD(df_update)
df_update.head()


y = df_update['close']
features_update = ['macd','volumeto', 'high', 'low']
X = df_update[features]
df = df.append(df_update).drop_duplicates()
df.tail(25)

# X.describe()

Unnamed: 0_level_0,high,low,open,volumefrom,volumeto,close,conversionType,conversionSymbol,macd
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-02-18 00:36:00-07:00,51787.34,51665.94,51665.94,15.53,803884.44,51781.34,direct,,-34.185961
2021-02-18 00:37:00-07:00,51781.34,51623.65,51781.34,52.29,2703688.81,51666.66,direct,,-28.180097
2021-02-18 00:38:00-07:00,51699.54,51663.48,51666.66,10.13,523552.09,51696.02,direct,,-29.665012
2021-02-18 00:39:00-07:00,51713.67,51691.62,51696.02,7.862,406393.01,51713.67,direct,,-29.365246
2021-02-18 00:40:00-07:00,51724.31,51711.92,51713.67,7.822,404522.12,51721.93,direct,,-27.948418
2021-02-18 00:41:00-07:00,51749.85,51716.8,51721.93,10.44,539790.29,51746.75,direct,,-24.485865
2021-02-18 00:42:00-07:00,51792.51,51746.75,51746.75,13.53,700794.79,51792.23,direct,,-18.096223
2021-02-18 00:43:00-07:00,51796.84,51737.48,51792.23,27.13,1404068.54,51772.0,direct,,-12.539003
2021-02-18 00:44:00-07:00,51816.15,51771.45,51772.0,24.67,1277779.44,51815.92,direct,,-6.50387
2021-02-18 00:45:00-07:00,51863.28,51812.91,51815.92,72.77,3772164.85,51841.21,direct,,2.05356


In [None]:
df.tail()

Unnamed: 0_level_0,high,low,open,volumefrom,volumeto,close,conversionType,conversionSymbol,macd
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-02-18 00:56:00-07:00,51892.0,51819.38,51878.42,14.78,766597.53,51819.38,direct,,33.251664
2021-02-18 00:57:00-07:00,51832.1,51785.98,51819.38,13.01,673803.65,51829.36,direct,,26.723165
2021-02-18 00:58:00-07:00,51838.65,51757.45,51829.36,8.815,456601.58,51758.37,direct,,21.825984
2021-02-18 00:59:00-07:00,51759.9,51720.53,51758.37,22.49,1163077.49,51726.88,direct,,11.461037
2021-02-18 01:00:00-07:00,51726.88,51705.93,51726.88,0.0,0.0,51705.93,direct,,0.576731


In [None]:
new_predictions = loaded_model.predict(X)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [None]:
trace4 = {
    'x': X.index,
#     'open': prediction_open,
    'y': new_predictions,
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 2,
        'color': 'red'
    },
    'name': 'predicted close',
    'showlegend': True
}
trace3 = {
    'x': X.index,
#     'open': prediction_open,
    'y': df_update['close'],
#     'high': prediction_high,
#     'low': prediction_low,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 4,
        'color': 'green'
    },
    'name': 'actual test close',
    'showlegend': True
}
fig = go.Figure(data=[trace3,trace4])
fig.show()