In [1]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.graphics.gofplots import qqplot
from statsmodels.tsa.stattools import adfuller
from tqdm import tqdm_notebook
from itertools import product
from typing import Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [15]:
df = pd.read_csv('data\BTC_final.csv')
df=df[['Adj Close']]
df.head()

Unnamed: 0,Adj Close
0,11786.299805
1,11296.400391
2,10106.299805
3,10221.099609
4,9170.540039


In [16]:
ad_fuller_result = adfuller(df['Adj Close'])

print(f'ADF Statistic: {ad_fuller_result[0]}')
print(f'p-value: {ad_fuller_result[1]}')

ADF Statistic: -1.3639653753586114
p-value: 0.599424531921265


In [17]:
eps_diff = np.diff(df['Adj Close'], n=1)

ad_fuller_result = adfuller(eps_diff)

print(f'ADF Statistic: {ad_fuller_result[0]}')
print(f'p-value: {ad_fuller_result[1]}')

ADF Statistic: -8.311394347344411
p-value: 3.78050429631799e-13


In [18]:
def optimize_ARIMA(endog: Union[pd.Series, list], order_list: list, d: int) -> pd.DataFrame:
    
    results = []
    
    for order in tqdm_notebook(order_list):
        try: 
            model = SARIMAX(endog, order=(order[0], d, order[1]), simple_differencing=False).fit(disp=False)
        except:
            continue
            
        aic = model.aic
        results.append([order, aic])
        
    result_df = pd.DataFrame(results)
    result_df.columns = ['(p,q)', 'AIC']
    
    #Sort in ascending order, lower AIC is better
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    
    return result_df

In [19]:
ps = range(0, 5, 1)
qs = range(0, 5, 1)
d = 1

order_list = list(product(ps, qs))

In [20]:
train = df['Adj Close'][:-7]

result_df = optimize_ARIMA(train, order_list, d)
result_df

  0%|          | 0/25 [00:00<?, ?it/s]

Unnamed: 0,"(p,q)",AIC
0,"(2, 4)",29835.642895
1,"(3, 4)",29837.521666
2,"(4, 4)",29842.997881
3,"(2, 2)",29846.003468
4,"(2, 3)",29847.934666
5,"(3, 3)",29850.008065
6,"(4, 3)",29851.882875
7,"(0, 0)",29852.818772
8,"(1, 0)",29852.874037
9,"(0, 1)",29852.89223


In [21]:
model = SARIMAX(train, order=(2,1,4), simple_differencing=False)
model_fit = model.fit(disp=False)

print(model_fit.summary())

                               SARIMAX Results                                
Dep. Variable:              Adj Close   No. Observations:                 1791
Model:               SARIMAX(2, 1, 4)   Log Likelihood              -14910.821
Date:                Thu, 08 Jun 2023   AIC                          29835.643
Time:                        14:53:08   BIC                          29874.073
Sample:                             0   HQIC                         29849.833
                               - 1791                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.5636      0.004   -130.034      0.000      -0.572      -0.555
ar.L2         -0.9907      0.004   -244.746      0.000      -0.999      -0.983
ma.L1          0.5356      0.016     33.593      0.0

In [32]:
test = df.iloc[int(df.shape[0]*0.9):]
test

Unnamed: 0,Adj Close
1618,20231.261719
1619,20190.115234
1620,20548.246094
1621,21637.587891
1622,21731.117188
...,...
1793,16919.804688
1794,16717.173828
1795,16552.572266
1796,16642.341797


In [33]:
ARIMA_pred = model_fit.get_prediction(int(df.shape[0]*0.9), int(df.shape[0])+1).predicted_mean

test['ARIMA_pred'] = ARIMA_pred
test

Unnamed: 0,Adj Close,ARIMA_pred
1618,20231.261719,19326.542524
1619,20190.115234,20135.377953
1620,20548.246094,20214.650649
1621,21637.587891,20617.469384
1622,21731.117188,21564.073250
...,...,...
1793,16919.804688,16780.917918
1794,16717.173828,16828.931268
1795,16552.572266,16817.694830
1796,16642.341797,16776.459314


In [34]:
print(f"MSE：{mean_squared_error(test['Adj Close'], test['ARIMA_pred'])}")
print(f"MAE：{ mean_absolute_error(test['Adj Close'], test['ARIMA_pred'])}")


MSE：348579.2216315061
MAE：385.45034473266327
