In [None]:
import os
import sys

# change working directory
current_dir = os.getcwd() 
print(f"Current Working Directory: {current_dir}")
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))
os.chdir(parent_dir)
current_dir = os.getcwd()
print(f"Current Working Directory: {current_dir}")
#We need to be in the main directory that contains tests, models, etc folders

In [None]:
from ARIMA_model import log_return_df, threshold_array, window_array, arima_profit
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
import pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import pickle

In [None]:
with open('dqn_trading_results.pkl', 'rb') as f:
    results = pickle.load(f)
    train_dates = results['train_dates']
    threshold = results['ARIMA_best_threshold']
    window_size = results['ARIMA_best_window_size']

Ntrain = len(train_dates)

In [None]:
print(log_return_df.head())
# Plot the log of daily returns
plt.figure(figsize=(12, 6))
plt.plot(log_return_df['Date'][:Ntrain], log_return_df['Log_Daily_Return'][:Ntrain], label='Log Daily Return')
plt.title('Log of Daily Returns of Training Data')
plt.xlabel('Date')
plt.ylabel('Log Daily Return')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

## Test for stationarity of the udiff series

Time series are stationary if they do not contain trends or seasonal swings. The Dickey-Fuller test can be used to test for stationarity. 

In [None]:
# Perform Dickey-Fuller test
dftest = adfuller(log_return_df.Log_Daily_Return[:Ntrain], autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
for key, value in dftest[4].items():
    dfoutput['Critical Value ({0})'.format(key)] = value
    
dfoutput

## ACF and PACF Charts

Making autocorrelation and partial autocorrelation charts help us choose hyperparameters for the ARIMA model.

The ACF gives us a measure of how much each "y" value is correlated to the previous n "y" values prior.

The PACF is the partial correlation function gives us (a sample of) the amount of correlation between two "y" values separated by n lags excluding the impact of all the "y" values in between them. 

In [None]:
# the autocorrelation chart provides just the correlation at increasing lags
# fig, ax = plt.subplots(figsize=(12,5))
# plot_acf(log_return_df['Log_Daily_Return'], lags=10, ax=ax)
# plt.show()

# fig, ax = plt.subplots(figsize=(12,5))
# plot_pacf(log_return_df['Log_Daily_Return'], lags=10, ax=ax)
# plt.show()

fig, axes = plt.subplots(1, 2, figsize=(18, 5))

plot_acf(log_return_df['Log_Daily_Return'][:Ntrain], lags=10, ax=axes[0])
axes[0].set_title('Autocorrelation (ACF)')

plot_pacf(log_return_df['Log_Daily_Return'][:Ntrain], lags=10, ax=axes[1])
axes[1].set_title('Partial Autocorrelation (PACF)')

plt.tight_layout()
plt.show()

### Fitting ARIMA on the whole training dataset

In [None]:
from statsmodels.tsa.arima.model import ARIMA

# Notice that you have to use udiff - the differenced data rather than the original data.
ar1 = ARIMA(log_return_df['Log_Daily_Return'][:Ntrain], order = (1, 0,1)).fit()
ar1.summary()

In [None]:
# Plot profit for different threshold values using the best window_size
profits = []
for th in threshold_array:
    profit = arima_profit(0, Ntrain, window_size, th, 1, 1)
    profits.append(profit)

profits_window = []
for win in window_array:
    profit = arima_profit(0, Ntrain, win, threshold, 1, 1)
    profits_window.append(profit)

fig, axes = plt.subplots(1, 2, figsize=(18, 6))

# Plot for different thresholds
for i, profit in enumerate(profits):
    axes[0].plot(train_dates, profit, label=f'Threshold={threshold_array[i]}')
axes[0].legend()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Profit')
axes[0].set_title(f'Profit vs Threshold (Window Size = {window_size})')
axes[0].grid(True)

# Plot for different window sizes
for i, profit in enumerate(profits_window):
    axes[1].plot(train_dates, profit, label=f'Window={window_array[i]}')
axes[1].legend()
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Profit')
axes[1].set_title(f'Profit vs Window Size (Threshold = {threshold})')
axes[1].grid(True)

plt.tight_layout()
plt.show()
