In [28]:
# SARIMA example
from sklearn.metrics import mean_squared_error,mean_absolute_error,mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.graphics.gofplots import qqplot
from statsmodels.tsa.stattools import adfuller
from tqdm import tqdm_notebook
from itertools import product
from typing import Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [29]:
def optimize_ARIMA(endog: Union[pd.Series, list], order_list: list, d: int) -> pd.DataFrame:
    results = []
    
    for order in tqdm_notebook(order_list):
        try: 
            model = SARIMAX(endog, order=(order[0], d, order[1]), simple_differencing=False).fit(disp=False)
        except:
            continue
            
        aic = model.aic
        results.append([order, aic])
        
    result_df = pd.DataFrame(results)
    result_df.columns = ['(p,q)', 'AIC']
    
    #Sort in ascending order, lower AIC is better
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    
    return result_df

In [30]:
def ARIMA_model(file,splits,I):  
    # data
    path =f'../Data/{file}.csv'
    dataset = pd.read_csv(path)
    dataset=dataset['Close']

    print(f'{file} {splits}')

    # Split dataset
    train_data, test_data = train_test_split(dataset, train_size=splits, shuffle=False)




    #find AR(p),MA(q)
    ps = range(0, 13, 1)
    qs = range(0, 13, 1)
    d = I

    order_list = list(product(ps, qs))

    result_df = optimize_ARIMA(train_data, order_list, d)
    p=result_df['(p,q)'][0][0]
    q=result_df['(p,q)'][0][1]

    print(f'ARIMA({p},{d},{q})')


    # Take the logarithm
    train_data = np.log(train_data)
    test_data= np.log(test_data)

    # Calculate mean and standard deviation
    mean = np.mean(train_data)
    std = np.std(train_data)*3

    # # Z-score normalization
    standardized_train = (train_data - mean) / std



    model = SARIMAX(standardized_train, order=(p, d, q), seasonal_order=(0, 0, 0,0))
    model_fit = model.fit(disp=True)

    # # make prediction
    test_predict = model_fit.predict(start=len(train_data), end=(len(train_data)+test_data.shape[0]-1))
    train_predict = model_fit.predict(start=0, end=(train_data.shape[0]-1))
        

    original_predict_train = np.exp(train_predict*std+mean)
    original_predict_test = np.exp(test_predict*std+mean)

    original_train = np.exp(train_data)
    original_test = np.exp(test_data)

    train_mae=mean_absolute_error(original_predict_train,original_train)
    train_rmse=mean_squared_error(original_predict_train,original_train)**0.5
    test_mae=mean_absolute_error(original_predict_test,original_test)
    test_rmse=mean_squared_error(original_predict_test,original_test)**0.5


    print(f'{file} -- ARIMA({p},{d},{q}) is done')
    return p,q,train_mae,train_rmse,test_mae,test_rmse

In [31]:
# load dataset
# file_list=['BTCUSD-all','BTCUSD-N2Y','BTCUSD-N4Y',
#            'ETHUSD-all','ETHUSD-N2Y','ETHUSD-N4Y',           
#            'USDTUSD-all','USDTUSD-N2Y','USDTUSD-N4Y',  
#            'BNBUSD-all','BNBUSD-N2Y','BNBUSD-N4Y'        
#            ]

# file_list=['BTCUSD-all']
file_list=['BTCUSD-1m1h','ETHUSD-1m1h','USDTUSD-1m1h','BNBUSD-1m1h']

spilt_list=[0.7]
I_list=[0,1,2]

files_list=list()
spilts_list=list()
Classifiers_list=list()


train_mae=list()
train_rmse=list()
test_mae=list()
test_rmse=list()


In [32]:
for file in file_list:
    for spilts in spilt_list:  
        for I in I_list:
            result=ARIMA_model(file,spilts,I)
            files_list.append(file)
            spilts_list.append(spilts)
            Classifiers_list.append(f'ARIMA({result[0]},{I},{result[1]})')
        
            
            train_mae.append(result[2])
            train_rmse.append(result[3])
            test_mae.append(result[4])
            test_rmse.append(result[5])
                


BTCUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(6,0,3)
BTCUSD-1m1h -- ARIMA(6,0,3) is done
BTCUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(2,1,6)
BTCUSD-1m1h -- ARIMA(2,1,6) is done
BTCUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(2,2,7)
BTCUSD-1m1h -- ARIMA(2,2,7) is done
ETHUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(12,0,12)
ETHUSD-1m1h -- ARIMA(12,0,12) is done
ETHUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(4,1,4)
ETHUSD-1m1h -- ARIMA(4,1,4) is done
ETHUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(5,2,6)
ETHUSD-1m1h -- ARIMA(5,2,6) is done
USDTUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(2,0,4)
USDTUSD-1m1h -- ARIMA(2,0,4) is done
USDTUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(1,1,1)
USDTUSD-1m1h -- ARIMA(1,1,1) is done
USDTUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(3,2,11)
USDTUSD-1m1h -- ARIMA(3,2,11) is done
BNBUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(8,0,6)
BNBUSD-1m1h -- ARIMA(8,0,6) is done
BNBUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(4,1,9)
BNBUSD-1m1h -- ARIMA(4,1,9) is done
BNBUSD-1m1h 0.7


  0%|          | 0/169 [00:00<?, ?it/s]

ARIMA(4,2,10)
BNBUSD-1m1h -- ARIMA(4,2,10) is done


In [33]:
Summary={'Data':files_list,'Classifier':Classifiers_list,'Test Split':spilts_list,
            'train_mae':train_mae,          
            'train_rmse':train_rmse,          
            'test_mae':test_mae,           
            'test_rmse':test_rmse            
         }

df_Summary = pd.DataFrame(Summary)
df_Summary

Unnamed: 0,Data,Classifier,Test Split,train_mae,train_rmse,test_mae,test_rmse
0,BTCUSD-1m1h,"ARIMA(6,0,3)",0.7,108.09944,174.190517,1131.066807,1217.853435
1,BTCUSD-1m1h,"ARIMA(2,1,6)",0.7,109.189421,173.14587,1045.60937,1126.992361
2,BTCUSD-1m1h,"ARIMA(2,2,7)",0.7,111.05824,176.620329,933.972617,1012.461506
3,ETHUSD-1m1h,"ARIMA(12,0,12)",0.7,7.617605,12.049271,107.624522,111.915636
4,ETHUSD-1m1h,"ARIMA(4,1,4)",0.7,7.603434,12.194404,109.203743,113.257896
5,ETHUSD-1m1h,"ARIMA(5,2,6)",0.7,7.818152,12.49851,91.062606,97.035341
6,USDTUSD-1m1h,"ARIMA(2,0,4)",0.7,9.8e-05,0.000129,0.000202,0.00024
7,USDTUSD-1m1h,"ARIMA(1,1,1)",0.7,9.9e-05,0.000131,0.000158,0.000187
8,USDTUSD-1m1h,"ARIMA(3,2,11)",0.7,0.0001,0.000131,0.000145,0.000176
9,BNBUSD-1m1h,"ARIMA(8,0,6)",0.7,1.123064,1.975178,8.717533,10.286385


In [34]:
df_Summary.to_excel("Summary-ARIMA(1m1h).xlsx",index=False)  
df_Summary

Unnamed: 0,Data,Classifier,Test Split,train_mae,train_rmse,test_mae,test_rmse
0,BTCUSD-1m1h,"ARIMA(6,0,3)",0.7,108.09944,174.190517,1131.066807,1217.853435
1,BTCUSD-1m1h,"ARIMA(2,1,6)",0.7,109.189421,173.14587,1045.60937,1126.992361
2,BTCUSD-1m1h,"ARIMA(2,2,7)",0.7,111.05824,176.620329,933.972617,1012.461506
3,ETHUSD-1m1h,"ARIMA(12,0,12)",0.7,7.617605,12.049271,107.624522,111.915636
4,ETHUSD-1m1h,"ARIMA(4,1,4)",0.7,7.603434,12.194404,109.203743,113.257896
5,ETHUSD-1m1h,"ARIMA(5,2,6)",0.7,7.818152,12.49851,91.062606,97.035341
6,USDTUSD-1m1h,"ARIMA(2,0,4)",0.7,9.8e-05,0.000129,0.000202,0.00024
7,USDTUSD-1m1h,"ARIMA(1,1,1)",0.7,9.9e-05,0.000131,0.000158,0.000187
8,USDTUSD-1m1h,"ARIMA(3,2,11)",0.7,0.0001,0.000131,0.000145,0.000176
9,BNBUSD-1m1h,"ARIMA(8,0,6)",0.7,1.123064,1.975178,8.717533,10.286385
