In [None]:
import pandas as pd
import numpy as np
import warnings
import itertools
import matplotlib
import matplotlib.pyplot as plt
from pylab import rcParams
import statsmodels.api as sm
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
df = pd.read_csv("Gemini_1INCHUSD_1h.csv", index_col = ['date'], parse_dates = True)
df = df[['unix','symbol','open','high','low','close','Volume 1INCH' ,'Volume USD']]
df.head(n=10)

In [None]:
y = df['Volume USD']
fig, ax = plt.subplots(3, 1, figsize = (14, 5))
ax[0].plot(y, marker = 'o', linestyle = '-', label = "Volume USD Original Data")
ax[1].plot(y.resample('D').mean(), marker = '.', linestyle = '-', label = "Volume USD Resampled DAILY")
ax[2].plot(y.resample('M').mean(), marker = 'o', linestyle = '-', label = "Volume USD Resampled MONTHLY")
#ax[3].plot(y.resample('Y').mean(), marker = '.', linestyle = '-', label = "Volume USD Resampled ANNUAL")
for i in range(0, 3):
    ax[i].legend()

'''
Seasonal-Decomposition of USD Volume
-> Additive Decomposition Model for USD Volume
'''
decomposition = seasonal_decompose(y, model = 'additive', freq = 12)
fig = decomposition.plot()

In [None]:
'''
Basic Autocorrelation Coefficients/Values/Functions:
[pct_change, autocorr, corr, shift, diff, np.log, mean difference, rolling correlation, pacf, acf, white noise, lag plots]
'''
from statsmodels.tsa.stattools import pacf, acf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

y = df['Volume USD']

df['Percent Change - Volume USD'] = df['Volume USD'].pct_change()
df['Volume USD and Volume 1INCH Corr'] = df['Volume USD'].corr(df['Volume 1INCH'])
df['Autocorr Volume USD'] = df['Volume USD'].autocorr()
df['Volume USD First Difference'] = df['Volume USD'].diff(1)
df['Log Volume USD'] = df['Volume USD'].apply(lambda x: np.log(x))
df['Mean Difference - Volume USD'] = df['Volume USD'] - df['Volume USD'].mean()
df['Rolling Correlation - Volume USD'] = df['Volume USD'].rolling(window = 12).corr()
y_detrend = (y - y.rolling(window = 12).mean()) / y.rolling(window = 12).std()
lag_correlation = acf(df['Log Volume USD'].diff(1).iloc[1:], fft = False)
partial_lag_correlation = pacf(df['Log Volume USD'].diff(1).iloc[1:])

plot_pacf(df['Log Volume USD'])
plot_acf(df['Log Volume USD'])

fig, ax = plt.subplots(4, 1, figsize = (14, 5))
ax[0].plot(df['Log Volume USD'], marker = '.', linestyle = '-', label = "Log Volume USD")
ax[1].plot(df['Mean Difference - Volume USD'], marker = 'o', linestyle = '-', label = "Mean Difference - Volume USD")
ax[2].plot(df['Rolling Correlation - Volume USD'], marker = '.', linestyle = '-')
ax[3].plot(df['Autocorr Volume USD'])

In [None]:
'''
AutoRegression -> ARMA (Autoregressive Moving Average Model)
Autoregressive (AR) Model is when present value of a time series can be predicted from using previous values of the same series. 
It's a regression using its same series, though shifted by a time step, called a lag. The present value is a weighted average of its past values. 
Both the t-1 and t-2 are lags of the time series y. The error term (noise) is represented as e. The values a1 and a2 are the coefficients of the model.
'''

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)

import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARMA 
from statsmodels.graphics.tsaplots import plot_acf
from pandas.plotting import lag_plot
from pandas.plotting import autocorrelation_plot
from pandas import concat

train = (df['Volume USD'][df['Volume USD'].index < '2022-03-12'])
test = (df['Volume USD'][df['Volume USD'].index >= '2022-03-12'])
train = np.array(train)
test = np.array(test)

In [None]:
from statsmodels.tsa.ar_model import AR

ar_model = sm.tsa.AR(train)
ar_fit = ar_model.fit()
print("Lag: {}".format(ar_fit.k_ar))
print("Coefficients: {}".format(ar_fit.params))

In [None]:
predictions = ar_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
print('Volume USD Value 1:',test[1],'Predicted value: ',predictions[1])

In [None]:
'''
SARIMA Model -> Optimization, Finding best P, Q, D, and grid searching
'''
p = d = q = range(0, 2) 
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

'''
Determining the best parameters, p, q, d values, and seasonal order
'''
best_aic = np.inf
best_pdq = None
best_seasonal_pdq = None
model_temporary = None

for param in pdq:   
    for seasonal_parameters in seasonal_pdq: 
        model_temporary = sm.tsa.SARIMAX(train,order = param, seasonal_order = seasonal_parameters,enforce_invertibility=False,
                             enforce_stationarity = False)
        results = model_temporary.fit(disp = False)
        
        if results.aic < best_aic:
            best_aic = results.aic
            best_pdq = param
            best_seasonal_pdq = seasonal_parameters
print("ARIMA Params {} * {} - AIC:{}".format(best_pdq, best_seasonal_pdq, best_aic))  

def find_best_params(best_aic, best_pdq, best_seasonal_pdq, temp_model):
    best_seasonal_pdq = None
    best_aic = np.inf
    best_pdq = None
    temp_model = None

    seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

    for param in pdq:
        for seasonal_params in seasonal_pdq:
            temp_model = sm.tsa.SARIMAX(train, order = param, seasonal_order = seasonal_params, enforce_invertibility = False, enforce_stationarity = False)
            temp_model_results = temp_model.fit(disp = False)
        
        if temp_model_results.aic < best_aic:
            best_aic = temp_model_results.aic
            best_pdq = param
            best_seasonal_pdq = seasonal_pdq
    return best_seasonal_pdq, best_aic, best_pdq

sarima_model = sm.tsa.SARIMAX(train,order=(1,1,1),seasonal_order=(0, 1, 1, 12),enforce_invertibility=False,
                             enforce_stationarity=False)
sarima_fit = sarima_model.fit()
predictions = sarima_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)

plt.figure(figsize=(12,6))
plt.plot(test)
plt.plot(predictions, color='red')
plt.show()

In [None]:
'''
ARIMA Model -> Forecasted Values
-> Optimize for best parameters
'''

import warnings
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARMA',
                        FutureWarning)
warnings.filterwarnings('ignore', 'statsmodels.tsa.arima_model.ARIMA',
                        FutureWarning)
                        
best_aic_arima = np.inf
best_pdq = None
temp_arima_model = None

p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))

for params in pdq:
    temp_arima_model = sm.tsa.ARIMA(train, order = params)
    temp_arima_model_res = temp_arima_model.fit()
    if temp_arima_model_res.aic < best_aic_arima:
        best_aic_arima = temp_arima_model_res.aic
        best_pdq = params
print("Best ARIMA {} model - AIC: {}".format(best_pdq, best_aic_arima))

In [None]:
arima_model = sm.tsa.ARIMA(df['Volume USD'].iloc[1:], order = (0, 1, 1))
arima_model_res = arima_model.fit()
df['Forecasted Volume USD'] = arima_model_res.fittedvalues

fig, ax = plt.subplots(figsize = (15, 5))
ax.plot(df['Forecasted Volume USD'], marker = '.', linestyle = '-', label = "Forecasted Volume USD")
ax.plot(df['Volume USD'], marker = '.', linestyle = '-', label = "Volume USD")
ax.legend()

arima_model_res.summary()