参考：　データ分析で株価予測をしてみた　https://qiita.com/kazama0119/items/c838114f8687518ba58e


dt,endvalue
2016/1/4,1000
2016/1/5,1010
 :
 :
 :


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import itertools
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore') # 計算警告を非表示

## データ読み込み

In [None]:
def read_data():
    # 日付形式で読み込む
    dateparse = lambda dates: pd.datetime.strptime(dates, '%Y/%m/%d')
    df = pd.read_csv('input_data/xxxxxxxx.csv', index_col='dt', date_parser=dateparse, dtype='float')
    return df

df = read_data()

## 可視化

In [None]:
plt.plot(df['endvalue'])
plt.show()

## 前処理

In [None]:
new_idx = pd.date_range(df.index[0], df.index[-1], freq='D')
df = df.reindex(new_idx, fill_value=np.nan)
df = df.interpolate()

In [None]:
df.head(10)

In [None]:
plt.plot(df['endvalue'])
plt.show()

## パラメータ設定

In [None]:
df_diff = df - df.shift()
df_diff = df_diff.dropna()
df_diff.plot()

In [None]:
# 自動ARMAパラメータ推定関数
res_selection = sm.tsa.arma_order_select_ic(df_diff, ic='aic', trend='nc')
res_selection

In [None]:
p = res_selection['aic_min_order'][0]
d = 1
q = res_selection['aic_min_order'][1]

## モデルの構築

In [None]:
SARIMA_yen_data = sm.tsa.statespace.SARIMAX(df, order=(p, d, q), seasonal_order=(0, 1, 1, 12), enforce_stationarity = False, enforce_invertibility = False).fit()
pred = SARIMA_yen_data.predict("2016-02-01", "2017-02-01", freq="D")

## 結果と予測の比較

In [None]:
plt.plot(df['endvalue'])
plt.plot(pred, c="r")
plt.show()

## パラメータ設定 ~ 結果と予測の比較  2

In [None]:
def selectparameter(df ,s):
    p = d = q = range(0, 2)
    pdq = list(itertools.product(p, d, q))
    seasonal_pdq = [(x[0], x[1], x[2], s) for x in list(itertools.product(p, d, q))]
    parameters = []
    BICs = np.array([])
    for param in pdq:
        for param_seasonal in seasonal_pdq:
            try:
                mod = sm.tsa.statespace.SARIMAX(df,
                                            order=param,
                                            seasonal_order=param_seasonal)
                results = mod.fit()
                parameters.append([param, param_seasonal, results.bic])
                BICs = np.append(BICs,results.bic)
            except:
                continue
    return parameters[np.argmin(BICs)]


In [None]:
# 予測
params = selectparameter(df, 12)
print(params)
order = params[0]
seasonal_order = params[1]


In [None]:
SARIMA_yen_data = sm.tsa.statespace.SARIMAX(df, order=order, seasonal_order=seasonal_order, enforce_stationarity = False, enforce_invertibility = False).fit()
pred = SARIMA_yen_data.predict("2016-02-01", "2017-02-01", freq="D")

In [None]:
plt.plot(df['endvalue'])
plt.plot(pred, c="r")
plt.show()