In [None]:
pip install pmdarima

In [None]:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from pmdarima.arima import auto_arima
from pmdarima.arima import ndiffs

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
def adf_test(train):
    stat, p_value, lags, nobs, crit, icb = adfuller(train)
    return p_value

In [None]:
def diff(train, p_v1):
    
    kpss_diffs = ndiffs(train, alpha=0.05, test='kpss', max_d=6)
    adf_diffs = ndiffs(train, alpha=0.05, test='adf', max_d=6)
    d = max(adf_diffs, kpss_diffs)
    print('.....ndiffs :', d)
    
    #차분
    diff_train = train.diff(d) 
    diff_train = diff_train.dropna(axis=0)
    p_v2 = adf_test(diff_train)
    print('.....diff p_v :', p_v2)
    
    plot_acf(diff_train)
    plot_pacf(diff_train, method='ywm')
    
    if p_v1 > p_v2:
        return d
    return 0

In [None]:
## 최적 모델 찾기

def autoarima(train, diff, m52):

    m_list = [1, 7, 12] #계절성 주기
    if m52 == 'o':
        m_list.append(52)
    
    model = None #모델
    aic = 99999 #AIC
    cycle = 1 #계절성 주기
    
    for m_test in m_list: #52까지 하면 웹꺼짐..
        arima = auto_arima(
                y = train, 
                # d = diff, #차분 횟수
                m = m_test, 
                max_p=5, max_q=5,
                seasonal = True, 
                trace = True,
                error_action="ignore"
                )
        if arima.aic() < aic:
            aic = arima.aic()
            model = arima
            cycle = m_test
            
    return model

In [None]:
data = pd.read_csv()

In [None]:
split = int(0.7*len(data)) #7:3

train = data[:split] #학습
test = data[split:] #예측

In [None]:
p_v1 = adf_test(train = train)
print('.....original pvalue :', p_v1)

In [None]:
plot_acf(train)
plot_pacf(train, method='ywm')

In [None]:
ndiff = diff(train = train, p_v1 = p_v1) #차분 횟수 저장

In [None]:
print('< AutoArima >')
model = autoarima(
            train = train, 
            diff = ndiff,
            m52 = 'x'
        ) #모델 저장

In [None]:
print('< Auto Arima Model >', model, sep = '\n')

In [None]:
arima = SARIMAX(train, order = model.order, seasonal_order = model.seasonal_order)

In [None]:
arima_fit = arima.fit()
print('< ARIMA fit summary >')
print(arima_fit.summary())

In [None]:
print('< Diagnostic plots for standardized residuals >')
print(arima_fit.plot_diagnostics(figsize = (16,9)))

In [None]:
data['y_predict'] = arima_fit.predict(
                            start = data.index[0],
                            end = data.index[-1],
                            dynamic=False,
                            typ = 'levels'
                       )
                        
data[['y','y_predict']].plot(figsize=(16,9))