# ARIMA exhausting fitting

This file determines the ARIMA order using the pmdarima package.

In [None]:
import os
import sys

import pandas as pd
import numpy as np
import math

import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs
import statsmodels.stats as sms
from statsmodels.tsa.arima.model import ARIMA

import pmdarima as pm

import random as rnd

import matplotlib.pyplot as plt
import matplotlib as mpl

In [None]:
data = pd.read_csv('EURUSD_2019_01_03-without ID.csv', header=0, index_col=None, parse_dates=['Datetime'])
data = data['Middle']
data_lenght = len(data.index)
lrets = np.log(data/data.shift(1)).fillna(0)

results = pd.DataFrame(columns=['Time frame', 'Order', 'Number of orders', 'Number of valid fits'])
df_counter = 0

times = dict({30:3635, 60:7270, 720:87240})

for t in times:
    for i in range(0,10):
        rnd_index = rnd.randint(1, data_lenght - times[t]) # 1, because lrets[0] is always 0
        lrets_part = lrets[rnd_index : rnd_index+times[t]]
        
        mdl = pm.auto_arima(y=lrets_part, 
                     start_p=2, 
                     d=None, 
                     start_q=2, 
                     max_p=5,
                     max_d=2, 
                     max_q=5, 
                     max_order=12,
                     stepwise=True, 
                     n_jobs=1,    
                     seasonal=False, 
                     stationary=False,
                     # most likely no need to change, just to know that there are such a possibilities to change
                     information_criterion='aic', 
                     alpha=0.05, test='kpss', 
                     seasonal_test='ocsb',                                     
                     method='lbfgs', 
                     random=False,  
                     n_fits=10, 
                     return_valid_fits=False,                     
                     with_intercept='auto')

        order = mdl.get_params().get('order')
        ljung = sms.diagnostic.acorr_ljungbox(mdl.resid, lags=[25], boxpierce=False)     
        
        # writing the results
        index = results[(results['Order'] == order) & (results['Time frame'] == t)].index
        if len(index) == 0:
            if ljung['lb_pvalue'].iloc[0] < 0.05:
                results.loc[df_counter] = [t, order, 1, 0]                
            else:
                results.loc[df_counter] = [t, order, 1, 1]            
            df_counter += 1
        else:
            results['Number of orders'].loc[index] = results['Number of orders'].loc[index] + 1
            if ljung['lb_pvalue'].iloc[0] > 0.05:                
                results['Number of valid fits'].loc[index] = results['Number of valid fits'].loc[index] + 1
        
results.to_csv('arima_exhFit_table.csv')