In [352]:
import pandas as pd
import yfinance
from pycaret import regression
import numpy as np

In [353]:
data = yfinance.download("MSFT",period="2y")
data = data["Adj Close"]
data = data.reset_index()
data["Date"] = pd.to_datetime(data["Date"])
data["Day"] = [i.day for i in data["Date"]]
data["Month"] = [i.month for i in data["Date"]]
data["Year"] = [i.year for i in data["Date"]]
data["Series"] = np.arange(1,len(data)+1)
data.drop("Date",axis=1,inplace=True,errors="ignore")
data = data[["Series","Year","Month","Day","Adj Close"]]
data

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Series,Year,Month,Day,Adj Close
0,1,2021,1,5,214.082687
1,2,2021,1,6,208.531647
2,3,2021,1,7,214.465866
3,4,2021,1,8,215.772568
4,5,2021,1,11,213.679871
...,...,...,...,...,...
499,500,2022,12,28,234.529999
500,501,2022,12,29,241.009995
501,502,2022,12,30,239.820007
502,503,2023,1,3,239.580002


In [354]:
data.to_csv("ibm.csv")

In [355]:
train = data[data["Year"]<2023]
test = data[data["Year"]>=2022]
print(train.shape)
print(test.shape)

(502, 5)
(253, 5)


In [356]:
s = regression.setup(data = train,
          test_data = test,
          target = 'Adj Close',
          fold_strategy = 'timeseries',
          numeric_features = ['Year','Month','Day','Series'],
          transform_target = True,
          fold=9,
          use_gpu = True,
          session_id = 123,
          silent=True)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Adj Close
2,Original Data,"(502, 5)"
3,Missing Values,False
4,Numeric Features,4
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(502, 4)"


In [357]:
best = regression.compare_models(sort='MAE')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
rf,Random Forest Regressor,18.8238,503.7831,20.9503,-3.2941,0.0761,0.0683,0.21
knn,K Neighbors Regressor,19.269,558.6289,21.614,-3.5603,0.0777,0.0698,0.0222
ada,AdaBoost Regressor,19.4504,537.5213,21.5812,-3.6572,0.078,0.0703,0.0389
gbr,Gradient Boosting Regressor,19.6649,554.3085,21.7227,-3.7065,0.0786,0.0713,0.0267
lightgbm,Light Gradient Boosting Machine,19.9144,571.4886,21.8471,-4.614,0.0785,0.0709,0.18
et,Extra Trees Regressor,20.0323,620.5525,22.5376,-3.8249,0.0817,0.0732,0.2089
dt,Decision Tree Regressor,20.2855,588.5375,22.7181,-3.8905,0.0825,0.0738,0.0056
dummy,Dummy Regressor,31.4096,1388.7552,33.4445,-17.7827,0.1234,0.1114,0.0044
llar,Lasso Least Angle Regression,31.4096,1388.7551,33.4445,-17.7827,0.1234,0.1114,0.0056
omp,Orthogonal Matching Pursuit,34.8717,1945.3925,37.4745,-12.5634,0.129,0.1315,0.0056


In [358]:
future_dates = pd.date_range(start = '2023-01-05', end = '2025-01-01', freq = 'B')
future_df = pd.DataFrame()
future_df["Day"] = [i.day for i in future_dates]
future_df['Month'] = [i.month for i in future_dates]
future_df['Year'] = [i.year for i in future_dates] 
future_df['Series'] = np.arange(15358,(15358+len(future_dates)))
future_df

Unnamed: 0,Day,Month,Year,Series
0,5,1,2023,15358
1,6,1,2023,15359
2,9,1,2023,15360
3,10,1,2023,15361
4,11,1,2023,15362
...,...,...,...,...
515,26,12,2024,15873
516,27,12,2024,15874
517,30,12,2024,15875
518,31,12,2024,15876


In [359]:
future_df.dtypes

Day       int64
Month     int64
Year      int64
Series    int32
dtype: object

In [360]:
predictions_future = regression.predict_model(best,data=future_df)
predictions_future

Unnamed: 0,Day,Month,Year,Series,Label
0,5,1,2023,15358,244.902329
1,6,1,2023,15359,244.217545
2,9,1,2023,15360,244.017208
3,10,1,2023,15361,244.088839
4,11,1,2023,15362,244.387957
...,...,...,...,...,...
515,26,12,2024,15873,238.380730
516,27,12,2024,15874,238.155230
517,30,12,2024,15875,240.783033
518,31,12,2024,15876,240.696647


In [361]:
import plotly.express as px
concat_df = pd.concat([data,predictions_future], axis=0)
concat_df['Date'] = pd.to_datetime(dict(year=concat_df["Year"], month=concat_df["Month"], day=concat_df["Day"]))
concat_df.set_index(concat_df["Date"],inplace=True)
concat_df.drop("Date",axis=1,inplace=True,errors="ignore")
concat_df

Unnamed: 0_level_0,Series,Year,Month,Day,Adj Close,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-05,1,2021,1,5,214.082687,
2021-01-06,2,2021,1,6,208.531647,
2021-01-07,3,2021,1,7,214.465866,
2021-01-08,4,2021,1,8,215.772568,
2021-01-11,5,2021,1,11,213.679871,
...,...,...,...,...,...,...
2024-12-26,15873,2024,12,26,,238.380730
2024-12-27,15874,2024,12,27,,238.155230
2024-12-30,15875,2024,12,30,,240.783033
2024-12-31,15876,2024,12,31,,240.696647


In [362]:
fig = px.line(concat_df, x=concat_df.index, y=["Adj Close", "Label"], template = 'plotly_dark')
fig.show()

In [363]:
current_predictions = regression.predict_model(best,data=data)
compare_df = pd.concat([data,current_predictions["Label"]],axis=1)
compare_df['Date'] = pd.to_datetime(dict(year=compare_df["Year"], month=compare_df["Month"], day=compare_df["Day"]))
compare_df.set_index(compare_df["Date"],inplace=True)
compare_df.drop("Date",axis=1,inplace=True,errors="ignore")
compare_df

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Random Forest Regressor,1.1929,2.7947,1.6717,0.9972,0.0064,0.0045


Unnamed: 0_level_0,Series,Year,Month,Day,Adj Close,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-05,1,2021,1,5,214.082687,212.555079
2021-01-06,2,2021,1,6,208.531647,210.277018
2021-01-07,3,2021,1,7,214.465866,212.778874
2021-01-08,4,2021,1,8,215.772568,214.615201
2021-01-11,5,2021,1,11,213.679871,213.549467
...,...,...,...,...,...,...
2022-12-28,500,2022,12,28,234.529999,235.904638
2022-12-29,501,2022,12,29,241.009995,238.989536
2022-12-30,502,2022,12,30,239.820007,240.783033
2023-01-03,503,2023,1,3,239.580002,244.115225


In [364]:
fig = px.line(compare_df, x=compare_df.index, y=["Adj Close", "Label"], template = 'plotly_dark')
fig.show()

In [367]:
regression.save_model(best,"saved_model")

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=False, features_todrop=[],
                                       id_columns=[], ml_usecase='regression',
                                       numerical_features=['Year', 'Month',
                                                           'Day', 'Series'],
                                       target='Adj Close', time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_...
                                                                                  ccp_alpha=0.0,
                                                                                  criterion='mse',
                                                                                  max_de