In [1]:
import glob
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

In [2]:
df=pd.read_csv(f'./../../data/train/arima_model.csv')
df=df.set_index('date')

In [3]:
targets=['bit_close','eth_close','ada_close','bnb_close']
df=df[targets]

In [4]:
df.index=pd.to_datetime(df.index)

In [5]:
predictions=pd.DataFrame()
for column in df.columns:
    data=df[column]
    train, test = train_test_split(data,test_size = 0.011,shuffle = False)
    #https://stackoverflow.com/questions/58510659/error-valuewarning-a-date-index-has-been-provided-but-it-has-no-associated-fr/58511282
    train.index = pd.DatetimeIndex(train.index).to_period('D')
    test.index = pd.DatetimeIndex(test.index).to_period('D')

    best_aic = 99 * (10 ** 16)
    best_p = 0
    best_q = 0

    # Use nested for loop to iterate over values of p and q.
    for p in range(5):
        for q in range(5):
            # Insert try and except statements.
            try:
                # Instantiate ARIMA model.
                arima = ARIMA(endog = train, # endog = y - variable
                             order = (p, 1, q)) # values of p, d, q

                # Fit ARIMA model.
                model = arima.fit()

                # Is my current model's AIC better than our best_aic?
                if model.aic < best_aic:
                    # If so, let's overwrite best_aic, best_p, and best_q.
                    best_aic = model.aic
                    best_p = p
                    best_q = q

            except:
                pass
    model = ARIMA(endog = train,
                 order = (best_p, 1, best_q))
    

    # Fit ARIMA model.
    arima = model.fit()
    
    print(column,'AIC:',arima.aic)

    # Generate predictions based on test set.
    preds = model.predict(params = arima.params,
                          start = test.index[0],
                          end = test.index[-1])
    predictions[column]=preds[:11]

bit_close AIC: -3034.7640516431316
eth_close AIC: -2534.248723654989
ada_close AIC: -1865.2402535231881
bnb_close AIC: -2167.937472905454


In [6]:
predictions=predictions.set_index(test.index)

In [7]:
temp=[]
for i in predictions.columns:
    temp.append(i+'_univariate_arima')
predictions.columns=temp

In [8]:
predictions.to_csv(f'../../data/predictions/univariate_arima.csv')

In [9]:
-3034.7640516431316+ -2534.248723654989+ -1865.2402535231881+ -2167.937472905454

-9602.190501726764

In [10]:
-9602.190501726764/4

-2400.547625431691