In [1]:
import numpy as np
import pandas as pd
import talib as ta
import pandas_datareader as webreader
from sklearn.model_selection import train_test_split
from pycaret.regression import *



In [2]:
# the pandas webreader provides remote data access to apis
import pandas_datareader as webreader

date_today = "2022-11-10"
date_start = "2010-01-01"

# set the symbol to bitcoin-usd quotes
# for more symbols check yahoo.finance.com
symbol = "BTC-USD"

In [3]:
# now we will send the request to the yahoo finance api endpoint
df = webreader.DataReader(symbol, start=date_start, end=date_today, data_source="yahoo")

In [4]:
#ta lib technical indicators
df['50 SMA'] = ta.SMA(df['Close'], timeperiod=50)
df['200 SMA'] = ta.SMA(df['Close'], timeperiod=200)
df['10 EMA'] = ta.EMA(df['Close'], timeperiod=10)
df['20 EMA'] = ta.EMA(df['Close'], timeperiod=20)
df['RSI'] = ta.RSI(df['Close'], timeperiod=14)

In [5]:
df.tail()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,50 SMA,200 SMA,10 EMA,20 EMA,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-11-07,21053.246094,20489.972656,20924.621094,20602.816406,53510852236,20602.816406,19683.553633,23881.031875,20634.032692,20341.873001,55.038707
2022-11-08,20664.607422,17603.544922,20600.671875,18541.271484,118992465607,18541.271484,19663.496484,23775.036631,20253.530654,20170.387142,35.913352
2022-11-09,18590.458984,15682.692383,18543.761719,15880.780273,102905151606,15880.780273,19603.296309,23657.00688,19458.48513,19761.853155,24.217638
2022-11-10,18054.3125,15834.018555,15883.158203,17586.771484,83202283721,17586.771484,19584.08373,23547.594272,19118.173558,19554.70252,38.1313
2022-11-11,17650.943359,16543.482422,17583.251953,17034.292969,55871616488,17034.292969,19536.498574,23430.474194,18739.286178,19314.663515,35.836612


In [6]:
#predecit the next day
future_prediction = 1
df['Future Price'] = df['Close'].shift(-future_prediction)

In [7]:
#test close and future price
df = df[['Close', 'RSI', 'Future Price']]
df.tail()

Unnamed: 0_level_0,Close,RSI,Future Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-11-07,20602.816406,55.038707,18541.271484
2022-11-08,18541.271484,35.913352,15880.780273
2022-11-09,15880.780273,24.217638,17586.771484
2022-11-10,17586.771484,38.1313,17034.292969
2022-11-11,17034.292969,35.836612,


In [8]:
#copy df
future_df = df.copy()
#create data set
X = np.array(future_df[df.columns])
#remove the future days
X = X[:len(df)-future_prediction]
#create target
y = np.array(df['Future Price'])
#remove future days
y = y[:-future_prediction]
#split data train test, note training variables
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=0, shuffle=False)

In [9]:
#turn train into dataframe
train_df = pd.DataFrame(X_train, columns = df.columns)
#show data
train_df.head()

Unnamed: 0,Close,RSI,Future Price
0,457.334015,,424.440002
1,424.440002,,394.79599
2,394.79599,,408.903992
3,408.903992,,398.821014
4,398.821014,,402.152008


In [10]:
#test to df
test_df = pd.DataFrame(X_test, columns = future_df.columns)
#show data
test_df.head()

Unnamed: 0,Close,RSI,Future Price
0,48905.492188,67.287827,49321.652344
1,49321.652344,68.138209,49546.148438
2,49546.148438,68.61223,47706.117188
3,47706.117188,60.647974,48960.789062
4,48960.789062,63.738806,46942.21875


In [11]:
#setup the model
regression_setup = setup(data = train_df, target = 'Future Price', session_id = 1, use_gpu=True)

Unnamed: 0,Description,Value
0,session_id,1
1,Target,Future Price
2,Original Data,"(2530, 3)"
3,Missing Values,True
4,Numeric Features,2
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(1770, 2)"


In [12]:
#test models sort by r2
best_model = compare_models(sort = 'r2')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lr,Linear Regression,262.2158,429633.1953,634.3136,0.9973,0.0698,0.0467,0.004
br,Bayesian Ridge,258.6531,429805.6488,634.4131,0.9973,0.055,0.0387,0.007
omp,Orthogonal Matching Pursuit,255.4732,431064.8934,635.3814,0.9973,0.0427,0.0311,0.005
huber,Huber Regressor,253.8204,429438.0775,634.2178,0.9973,0.038,0.026,0.016
lar,Least Angle Regression,262.2156,429632.6741,634.3133,0.9973,0.0698,0.0467,0.005
en,Elastic Net,262.1747,429632.6359,634.3126,0.9973,0.0697,0.0466,0.008
ridge,Ridge Regression,262.2158,429632.9203,634.3134,0.9973,0.0698,0.0467,0.005
llar,Lasso Least Angle Regression,264.4853,432698.8741,636.7982,0.9973,0.0705,0.0538,0.005
lasso,Lasso Regression,262.1947,429633.0562,634.3133,0.9973,0.0698,0.0466,0.008
rf,Random Forest Regressor,299.3128,574378.9111,740.1501,0.9964,0.0426,0.03,0.831


In [13]:
#create model
model = create_model(best_model)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,338.2947,748955.0,865.4219,0.9959,0.0583,0.0399
1,295.6256,523328.625,723.4146,0.9971,0.0729,0.0508
2,226.3854,254819.7031,504.7967,0.9987,0.0733,0.0489
3,203.9884,214871.4844,463.5423,0.9981,0.0701,0.0479
4,334.7495,849109.0,921.4711,0.9944,0.0726,0.048
5,247.4333,296572.9375,544.5851,0.998,0.0563,0.0402
6,286.0637,618033.875,786.1513,0.9961,0.07,0.0471
7,214.577,220462.2344,469.5341,0.9983,0.0912,0.0564
8,249.5583,332073.9688,576.2586,0.9981,0.068,0.047
9,225.4818,238105.125,487.9602,0.998,0.0658,0.0405


In [14]:
#evaluate the model
evaluate_model(model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [15]:
#evaluate the predictions
unseen_predictions = predict_model(model, data=test_df)
#print
unseen_predictions

Unnamed: 0,Close,RSI,Future Price,Label
0,48905.492188,67.287827,49321.652344,48976.128906
1,49321.652344,68.138209,49546.148438,49394.902344
2,49546.148438,68.612230,47706.117188,49620.851562
3,47706.117188,60.647974,48960.789062,47757.195312
4,48960.789062,63.738806,46942.218750,49021.257812
...,...,...,...,...
442,20926.486328,59.671529,20602.816406,20963.843750
443,20602.816406,55.038707,18541.271484,20626.734375
444,18541.271484,35.913352,15880.780273,18509.417969
445,15880.780273,24.217638,17586.771484,15814.250000
