<a href="https://colab.research.google.com/github/saotomryo/Use_pycaret_timeseries/blob/main/forecast_pycaret_timeseries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pycaret 3.0の時系列分析を試してみます。

## Pycaret 3.0のインストール（正式版がリリースされた後は "--pre"はいらなくなると思います。


In [None]:
!pip install --pre pycaret

In [None]:
import os
import datetime as dt
from datetime import datetime, timedelta
import pandas_datareader.data as web
import pandas as pd
from pycaret.time_series import * 
import numpy as np



## 今回は「Nomura NF Nikkei 225 ETF」を使って、日経平均の予測をしてみようと思います。

In [None]:
#銘柄コード入力
ticker_symbol="1321"
ticker_symbol_dr=ticker_symbol + ".JP"
 
#2017-01-01以降の株価取得
start='2017-01-01'
end = dt.date.today()
 
#データ取得
df = web.DataReader(ticker_symbol_dr, data_source='stooq', start=start,end=end)
#2列目に銘柄コード追加
df.insert(0, "code", ticker_symbol, allow_duplicates=False)

In [None]:
df

Unnamed: 0_level_0,code,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-07-14,1321,27010,27365,26955,27290,67749
2022-07-13,1321,27050,27190,27035,27120,73945
2022-07-12,1321,27310,27345,26925,26985,118640
2022-07-11,1321,27570,27715,27355,27440,356219
2022-07-08,1321,27270,27555,27220,27255,277260
...,...,...,...,...,...,...
2017-01-11,1321,19860,19890,19830,19860,351023
2017-01-10,1321,19910,19980,19750,19820,388580
2017-01-06,1321,19850,19980,19830,19950,474555
2017-01-05,1321,20100,20120,19960,20020,679991


## pycaretで時系列分析をする際に日付情報の欠損値が許容されないため、祝日、休日を含めた日付データを作成する。


In [None]:
# 開始日付から2000日分の日付データを作成する
date  = pd.date_range(start, periods=2000, freq="D")
date_df = pd.DataFrame({"Date": date})

# 作成した日付データとマージする。
df = pd.merge(date_df, df, on="Date", how="left")

df = df.reset_index()
data = df[['Date','Close']]

In [None]:
data

Unnamed: 0,Date,Close
0,2017-01-01,
1,2017-01-02,
2,2017-01-03,
3,2017-01-04,20080.0
4,2017-01-05,20020.0
...,...,...
1995,2022-06-19,
1996,2022-06-20,26840.0
1997,2022-06-21,27355.0
1998,2022-06-22,27275.0


In [None]:
#　日付データをpd.Period形式に変換する。（pycaretの日付データがpd.Period形式しか受け付けていないため。）

tmp_list = []

for i in range(data.shape[0]):
     #idx = pd.date_range(data.index[i], periods=1)
     #idx = idx.to_period()
     idx = pd.Period(data.Date.iloc[i],freq='1D')
     tmp_list.append(idx)

data['date'] = np.array(tmp_list)

In [None]:
# 欠損値を線形補完する。

data = data.set_index('date')
data = data.drop('Date',axis=1)

data['Close'] = data['Close'].interpolate(axis=0,limit_direction='both')

In [None]:
# 訓練、け
test_days = dt.timedelta(days=60)
end_val = (end - test_days).strftime('%Y-%m-%d')
df_train = data[data.index <=  end_val]
df_test = data[data.index >  end_val]

In [None]:
df_train

Unnamed: 0_level_0,Close
date,Unnamed: 1_level_1
2017-01-01,20080.000000
2017-01-02,20080.000000
2017-01-03,20080.000000
2017-01-04,20080.000000
2017-01-05,20020.000000
...,...
2022-05-15,27643.333333
2022-05-16,27700.000000
2022-05-17,27795.000000
2022-05-18,28015.000000


## 1週間(7日）周期として、30日分予測するモデルを作成する。

In [None]:
s = setup(df_train, fh = 30, session_id = 123,seasonal_period = 7,fold=1)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Close
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(1965, 1)"
5,Transformed data shape,"(1965, 1)"
6,Transformed train set shape,"(1935, 1)"
7,Transformed test set shape,"(30, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [None]:
# compare models
best = compare_models()

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.0218,0.8173,447.6431,500.3906,0.0157,0.0157,-0.0597,4.76
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.1343,1.0293,496.9386,630.1986,0.0173,0.0175,-0.6809,1.28
par_cds_dt,Passive Aggressive w/ Cond. Deseasonalize & Detrending,1.1479,1.0233,502.8997,626.5036,0.0178,0.0176,-0.6612,1.26
polytrend,Polynomial Trend Forecaster,1.1515,1.0143,504.4689,621.0318,0.0179,0.0177,-0.6323,0.34
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.1708,1.0852,512.9205,664.442,0.0178,0.018,-0.8685,5.16
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.1902,1.1162,521.4182,683.4279,0.0181,0.0183,-0.9768,1.46
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.192,1.1002,522.2131,673.6409,0.0181,0.0184,-0.9206,1.74
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.2286,1.1242,538.2606,688.3327,0.0187,0.0189,-1.0053,1.14
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,1.2312,1.121,539.3911,686.3769,0.0187,0.019,-0.9939,1.26
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,1.2312,1.121,539.3792,686.3533,0.0187,0.019,-0.9938,1.21


Processing:   0%|          | 0/117 [00:00<?, ?it/s]

## 予測結果をグラフで確認

In [None]:
# forecast plot
plot_model(best, plot = 'forecast')

In [None]:
pred = best.predict([31,32,33,34,35,36,37,38,39,40])

In [None]:
pred

Unnamed: 0,Close
2022-05-20,28329.264343
2022-05-21,28326.558303
2022-05-22,28296.997454
2022-05-23,28211.464191
2022-05-24,28268.33312
2022-05-25,28307.629372
2022-05-26,28328.229927
2022-05-27,28340.559505
2022-05-28,28313.412204
2022-05-29,28275.54995


In [None]:
df_test[:10]

Unnamed: 0_level_0,Close
date,Unnamed: 1_level_1
2022-05-20,27865.0
2022-05-21,27951.666667
2022-05-22,28038.333333
2022-05-23,28125.0
2022-05-24,27880.0
2022-05-25,27810.0
2022-05-26,27745.0
2022-05-27,27905.0
2022-05-28,28115.0
2022-05-29,28325.0


# 7日分予測モデルを作成する

In [None]:

s = setup(df_train, fh = 7, session_id = 123,seasonal_period = 7,fold=1)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Close
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(1965, 1)"
5,Transformed data shape,"(1965, 1)"
6,Transformed train set shape,"(1958, 1)"
7,Transformed test set shape,"(7, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [None]:
# compare models
best = compare_models()

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,1.1229,0.9028,495.7764,557.7631,0.0181,0.0179,-0.8898,1.21
snaive,Seasonal Naive Forecaster,1.1794,1.0446,520.7143,645.3319,0.0191,0.0189,-1.5298,0.49
croston,Croston,1.2188,1.0361,538.141,640.1219,0.0197,0.0195,-1.4891,2.47
arima,ARIMA,1.2455,1.0904,549.9329,673.6302,0.0202,0.0199,-1.7565,1.15
exp_smooth,Exponential Smoothing,1.2713,1.0933,561.3236,675.448,0.0206,0.0203,-1.7714,1.44
par_cds_dt,Passive Aggressive w/ Cond. Deseasonalize & Detrending,1.3437,1.1367,593.248,702.2279,0.0218,0.0214,-1.9955,1.28
naive,Naive Forecaster,1.3614,1.15,601.0714,710.4642,0.0221,0.0217,-2.0662,3.17
ets,ETS,1.3695,1.1568,604.6424,714.6848,0.0222,0.0218,-2.1027,0.64
theta,Theta Forecaster,1.3999,1.1828,618.0699,730.7296,0.0227,0.0223,-2.2436,0.64
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.42,1.1627,626.9392,718.3347,0.023,0.0226,-2.1345,1.51


Processing:   0%|          | 0/117 [00:00<?, ?it/s]

In [None]:
# forecast plot
plot_model(best, plot = 'forecast')