In [1]:
import pandas as pd
import numpy as np
import math

import plotly.express as px


from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly


from pycaret.regression import *


In [2]:
df = pd.read_csv(f"data/{'btc'}_daily.csv", parse_dates = ['time'])
pycaret_data = df.copy(deep = True)
prophet_data = df.copy(deep = True)


# <center style="background-color:#2E86AB;color: white; vertical-align: middle; padding:10px 5px;\">Prophet</center>

In [3]:
prophet_data.rename(columns={"open":"y","time":"ds"},inplace=True)

In [4]:
m = Prophet()
m.fit(prophet_data)
future = m.make_future_dataframe(periods=0)
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()


19:12:27 - cmdstanpy - INFO - Chain [1] start processing
19:12:28 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
3443,2022-10-09,36013.867922,27255.914775,44158.925303
3444,2022-10-10,36208.679773,27566.102294,44821.176655
3445,2022-10-11,36396.323426,28374.992794,45409.137732
3446,2022-10-12,36576.833,27724.494296,44809.453673
3447,2022-10-13,36811.605948,28146.235462,45542.600607


In [5]:
plot_plotly(m, forecast)

# <center style="background-color:#2E86AB;color: white; vertical-align: middle; padding:10px 5px;\">Pycaret</center>

In [6]:
pycaret_data['Day'] = [i.day for i in pycaret_data['time']]
pycaret_data['Month'] = [i.month for i in pycaret_data['time']]
pycaret_data['Year'] = [i.year for i in pycaret_data['time']]
pycaret_data['Series'] = np.arange(1,len(pycaret_data)+1)
pycaret_data.rename(columns={'open': 'Label'},inplace=True)
pycaret_data = pycaret_data[['Series', 'Year', 'Month','Day', 'Label']]
pycaret_data.head()

Unnamed: 0,Series,Year,Month,Day,Label
0,1,2013,4,29,134.444
1,2,2013,4,30,144.0
2,3,2013,5,1,139.0
3,4,2013,5,2,116.38
4,5,2013,5,3,106.25


In [7]:
train = pycaret_data[pycaret_data['Year'] < 2022]
test = pycaret_data[pycaret_data['Year'] >= 2022]
train.shape, test.shape

((3163, 5), (285, 5))

In [56]:
s = setup(
    data = train,
    test_data = test,
    target = 'Label',
    fold_strategy = 'timeseries',
    numeric_features = ['Year','Month','Day','Series'],
    fold = 30,
    transform_target = True,
    session_id = 123,
    silent=True
    )


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Label
2,Original Data,"(3163, 5)"
3,Missing Values,False
4,Numeric Features,4
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(3163, 4)"


AttributeError: 'Simple_Imputer' object has no attribute 'fill_value_categorical'

In [58]:
best = compare_models(sort = 'MAE')


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
dt,Decision Tree Regressor,2022.5888,22555525.4205,2439.3851,-2.1071,0.2659,0.1963,0.006
knn,K Neighbors Regressor,2071.0232,23610899.3307,2494.1133,-2.4592,0.282,0.2093,0.007
rf,Random Forest Regressor,2122.6761,24319347.0433,2560.0861,-2.5061,0.2889,0.2109,0.0873
xgboost,Extreme Gradient Boosting,2128.6783,24308508.8825,2549.8914,-2.2507,0.2752,0.2051,0.0687
gbr,Gradient Boosting Regressor,2213.087,25871435.7629,2610.8786,-3.2459,0.2968,0.223,0.0253
lightgbm,Light Gradient Boosting Machine,2342.9206,26339491.5766,2757.208,-5.3495,0.3431,0.2589,0.012
et,Extra Trees Regressor,2356.3762,30985894.1452,2830.5937,-10.6046,0.3795,0.2653,0.062
catboost,CatBoost Regressor,3090.045,51327057.2734,3668.7342,-15.6343,0.4392,0.3001,0.681
ada,AdaBoost Regressor,3236.0991,46276041.7168,3588.5585,-19.3054,0.4367,0.348,0.0193
br,Bayesian Ridge,3901.13,55985731.5619,4282.7135,-80.7102,0.6557,0.6412,0.0057


In [48]:
prediction_holdout = predict_model(best);


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Extreme Gradient Boosting,15708.4941,339198304.0,18417.3379,-2.593,0.5597,0.6653


In [53]:
xgb = create_model('xgboost')
xgb_tuned = tune_model(xgb) 

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,136.1365,27467.4668,165.7331,-1.6932,0.3264,0.3234
1,113.8918,14301.6338,119.5894,-8.9933,0.3955,0.4659
2,171.3775,40053.7773,200.1344,-2.6918,0.5665,0.3794
3,188.5155,70645.2344,265.7917,-0.6675,0.3174,0.1952
4,4734.7109,44858936.0,6697.6812,-1.0024,1.5746,0.6733
5,3937.1172,17602084.0,4195.4839,-7.63,0.4631,0.558
6,2701.4158,8894571.0,2982.3767,-0.0741,0.4743,0.5078
7,1802.2715,4831402.0,2198.0449,-2.078,0.2579,0.243
8,12474.7539,379490144.0,19480.5059,-0.7486,0.9299,0.3986
9,8912.3945,116514104.0,10794.1699,-0.2339,0.2369,0.2124


In [59]:
dt = create_model('dt')
dt_tuned = tune_model(dt) 

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,69.7421,14572.4774,120.7165,-0.5012,0.5702,0.2941
1,321.0456,122799.3536,350.4274,-5.2243,0.5424,0.3909
2,83.969,8909.2159,94.3886,-0.2423,0.1819,0.1725
3,89.3896,12221.6534,110.5516,-1.8791,0.1977,0.1778
4,47.4452,2910.3321,53.9475,-2.5804,0.1455,0.1379
5,70.9397,5665.3697,75.2687,-7.951,0.2736,0.2973
6,15.6437,538.1087,23.1972,-0.3403,0.0906,0.0594
7,33.5938,1708.2248,41.3307,-0.2454,0.149,0.1307
8,34.3314,1702.3413,41.2594,-0.0106,0.1055,0.0879
9,45.6218,2399.374,48.9834,-7.6369,0.1183,0.1042


In [60]:
predictions = predict_model(dt_tuned, data=pycaret_data)
predictions['Date'] = df['time']
predictions['Org Label'] = df['open']

In [61]:
fig = px.line(predictions, x='Date', y=["Label",'Org Label'], template = 'plotly_dark')
fig.show()