In [7]:
import numpy as np
import pandas as pd
from pathlib import Path
import talib as ta
from sklearn.model_selection import train_test_split
from pycaret.regression import *

In [25]:
#read the btc csv
df = pd.read_csv(
    Path("./Resources/BTC_USD.csv"), 
    index_col='Date', 
    infer_datetime_format=True, 
    parse_dates=True
)

In [26]:
#add our target, next days close
future_close = 1
df['Future Price'] = df['Close'].shift(-future_close)
df.drop(['High', 'Low', 'Open', 'Adj Close', 'value_classification'], axis=1, inplace=True)

In [27]:
#test df
df.tail()

Unnamed: 0_level_0,Close,Volume,50 SMA,200 SMA,10 EMA,20 EMA,RSI,value,Future Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-11-10,17586.771484,83202283721,19584.08373,23547.594272,19118.173558,19554.70252,38.1313,22,17034.292969
2022-11-11,17034.292969,55871616488,19536.498574,23430.474194,18739.286178,19314.663515,35.836612,25,16799.185547
2022-11-12,16799.185547,29717699419,19486.529512,23323.882817,18386.540609,19075.094185,34.874802,21,16353.365234
2022-11-13,16353.365234,27209183682,19434.856582,23209.444038,18016.872359,18815.881904,33.062715,22,16412.138672
2022-11-14,16412.138672,48737193984,19387.057402,23092.635591,17725.102598,18586.953977,33.552888,24,


In [28]:
#copy df
future_df = df.copy()
#create data set
X = np.array(future_df[df.columns])
#remove future_close row
X = X[:len(df)-future_close]
#create target
y = np.array(df['Future Price'])
#remove future_close row
y = y[:-future_close]
#split data train test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=0, shuffle=False)


In [29]:
#create train dataframe
train_df = pd.DataFrame(X_train, columns = df.columns)
#show train
train_df.head()

Unnamed: 0,Close,Volume,50 SMA,200 SMA,10 EMA,20 EMA,RSI,value,Future Price
0,9170.540039,9959400000.0,14036.866816,7856.521897,10769.754729,11614.719069,32.50563,30.0,8830.75
1,8830.75,12726900000.0,13882.201816,7889.533597,10417.208414,11349.579157,31.21476,15.0,9174.910156
2,9174.910156,7263790000.0,13711.562012,7923.813749,10191.336004,11142.467824,34.070623,40.0,8277.009766
3,8277.009766,7073550000.0,13487.154199,7953.831648,9843.276688,10869.567056,30.511391,24.0,6955.27002
4,6955.27002,9285290000.0,13243.443584,7974.519998,9318.184566,10496.776862,26.176396,11.0,7754.0


In [30]:
#create test dataframe
test_df = pd.DataFrame(X_test, columns = future_df.columns)
#show data
test_df.head()

Unnamed: 0,Close,Volume,50 SMA,200 SMA,10 EMA,20 EMA,RSI,value,Future Price
0,39214.21875,26545600000.0,40435.970859,49283.40791,39268.05763,40017.743043,45.736592,27.0,39105.148438
1,39105.148438,17467550000.0,40386.915781,49251.008496,39238.437777,39930.829271,45.356934,26.0,37709.785156
2,37709.785156,23450130000.0,40306.432656,49211.589238,38960.500937,39719.30126,40.702034,26.0,43193.234375
3,43193.234375,35690010000.0,40332.065313,49205.413965,39730.088835,40050.152033,58.658017,20.0,44354.636719
4,44354.636719,32479050000.0,40382.732812,49188.220547,40570.915723,40460.102956,61.328984,51.0,43924.117188


In [32]:
#setup the model
regression_setup = setup(data = train_df, target = 'Future Price', session_id = 1, use_gpu=True)

Unnamed: 0,Description,Value
0,session_id,1
1,Target,Future Price
2,Original Data,"(1482, 9)"
3,Missing Values,False
4,Numeric Features,8
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(1037, 6)"


In [37]:
#test models by r2
best_model = compare_models(sort = 'MAE')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
omp,Orthogonal Matching Pursuit,535.0233,1005932.0569,985.4328,0.9969,0.0367,0.0259,0.004
llar,Lasso Least Angle Regression,537.5674,1012518.2906,988.6249,0.9969,0.037,0.0263,0.005
en,Elastic Net,538.4747,1018479.4688,991.8528,0.9969,0.0371,0.0263,0.011
lasso,Lasso Regression,538.4779,1018489.825,991.8584,0.9969,0.0371,0.0263,0.007
ridge,Ridge Regression,538.4802,1018488.3188,991.8578,0.9969,0.0371,0.0263,0.004
br,Bayesian Ridge,538.572,1017592.3177,991.3291,0.9969,0.037,0.0262,0.004
lr,Linear Regression,538.5915,1017597.4625,991.3254,0.9969,0.037,0.0262,0.004
lar,Least Angle Regression,542.82,1023019.1514,993.9754,0.9968,0.0375,0.0267,0.005
gbr,Gradient Boosting Regressor,580.5754,1147394.7332,1057.5336,0.9965,0.0396,0.0289,0.122
rf,Random Forest Regressor,592.4293,1159715.7717,1062.3766,0.9964,0.0393,0.0285,0.752


In [38]:
#create model
model = create_model(best_model)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,515.1002,1118586.6802,1057.6326,0.9966,0.0321,0.0227
1,574.7613,1157976.1132,1076.093,0.9968,0.0358,0.0256
2,437.625,611983.7027,782.2939,0.9979,0.0311,0.0228
3,701.3232,1671485.1919,1292.8593,0.9948,0.0498,0.0355
4,439.6796,675202.0651,821.7068,0.9982,0.0314,0.0209
5,541.7686,1059445.4029,1029.2936,0.9954,0.0381,0.0275
6,512.6648,674006.4717,820.979,0.9979,0.0382,0.0277
7,436.1134,632641.4055,795.3876,0.9984,0.0345,0.023
8,464.2871,777411.3655,881.7093,0.9974,0.033,0.0234
9,726.9095,1680582.1705,1296.3727,0.9955,0.0433,0.0301


In [39]:
evaluate_model(model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [40]:
#test the predictions
btc_predictions = predict_model(model, data=test_df)
#print
btc_predictions

Unnamed: 0,Close,Volume,50 SMA,200 SMA,10 EMA,20 EMA,RSI,value,Future Price,Label
0,39214.218750,2.654560e+10,40435.970859,49283.407910,39268.057630,40017.743043,45.736592,27.0,39105.148438,39268.795892
1,39105.148438,1.746755e+10,40386.915781,49251.008496,39238.437777,39930.829271,45.356934,26.0,37709.785156,39159.636204
2,37709.785156,2.345013e+10,40306.432656,49211.589238,38960.500937,39719.301260,40.702034,26.0,43193.234375,37763.129521
3,43193.234375,3.569001e+10,40332.065313,49205.413965,39730.088835,40050.152033,58.658017,20.0,44354.636719,43251.072039
4,44354.636719,3.247905e+10,40382.732812,49188.220547,40570.915723,40460.102956,61.328984,51.0,43924.117188,44413.426070
...,...,...,...,...,...,...,...,...,...,...
257,15880.780273,1.029052e+11,19603.296309,23657.006880,19458.485130,19761.853155,24.217638,29.0,17586.771484,15916.237310
258,17586.771484,8.320228e+10,19584.083730,23547.594272,19118.173558,19554.702520,38.131300,22.0,17034.292969,17623.626460
259,17034.292969,5.587162e+10,19536.498574,23430.474194,18739.286178,19314.663515,35.836612,25.0,16799.185547,17070.695228
260,16799.185547,2.971770e+10,19486.529512,23323.882817,18386.540609,19075.094185,34.874802,21.0,16353.365234,16835.395152
