## Importing all Important Packages:

In [1]:
## Import Important Packages & Tools

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from pandas.plotting import register_matplotlib_converters
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima_model import ARMA
register_matplotlib_converters()
from time import time

In [None]:
## Capital Markets Data
import yfinance as yf
import pandas as pd
import numpy as np

## Nividia Daily Price Data (for past 3 years)


In [None]:
# Download Nvidia data
symbol = 'NVDA'
start_date = '2020-01-01'
end_date = '2025-01-01'  # Current date

In [None]:
nvda_data = yf.download(symbol, start=start_date, end=end_date)

In [None]:
nvda_data.info()

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, ncols=1, figsize=(20, 16))
decomposed_data['trend'].plot(ax=ax1)
decomposed_data['seasonal'].plot(ax=ax2)
decomposed_data['random_noise'].plot(ax=ax3);

In [None]:
#read data
def parser(s):
    return datetime.strptime(s, '%Y-%m-%d')
    
catfish_sales = pd.read_csv('catfish.csv', parse_dates=[0], index_col=0, date_parser=parser)

In [8]:
#infer the frequency of the data
catfish_sales = catfish_sales.asfreq(pd.infer_freq(catfish_sales.index))

In [9]:
start_date = datetime(2000,1,1)
end_date = datetime(2004,1,1)
lim_catfish_sales = catfish_sales[start_date:end_date]

In [None]:
lim_catfish_sales.head(n=10)

In [None]:
lim_catfish_sales.shape

In [None]:
plt.figure(figsize=(10,4))
plt.plot(lim_catfish_sales)
plt.title('Catfish Sales in 1000s of Pounds', fontsize=20)
plt.ylabel('Sales', fontsize=16)

In [13]:
first_diff = lim_catfish_sales.diff()[1:]

In [None]:
plt.figure(figsize=(10,4))
plt.plot(first_diff)
plt.title('First Difference of Catfish Sales', fontsize=20)
plt.ylabel('Sales', fontsize=16)

# ACF

In [15]:
acf_vals = acf(first_diff)

In [None]:
acf_vals

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

acf_original = plot_acf(acf_vals)

## Based on ACF, we should start with a MA(1) process

# PACF

In [19]:
pacf_vals = pacf(first_diff)


In [None]:
pacf_original = plot_pacf(pacf_vals)

Based on PACF, we should start with a AR(4) process

## ***Get training and testing sets***

In [22]:
train_end = datetime(2003,7,1)
test_end = datetime(2004,1,1)

train_data = first_diff[:train_end]
test_data = first_diff[train_end + timedelta(days=1):test_end]

In [None]:
from statsmodels.tsa.stattools import adfuller
adf_test = adfuller(train_data)
print(f'p-value: {adf_test[1]}')

In [None]:
df_train_diff = train_data.diff().dropna()
df_train_diff.plot()

In [None]:
acf_diff = plot_acf(df_train_diff)
pacf_diff = plot_pacf(df_train_diff)

In [None]:
adf_test = adfuller(df_train_diff)
print(f'p-value: {adf_test[1]}')

# Fit the ARMA Model

In [28]:
from statsmodels.tsa.arima.model import ARIMA

# define model
model = ARIMA(train_data, order=(4,1,1))

In [None]:
#fit the model
start = time()
model_fit = model.fit()
end = time()
print('Model Fitting Time:', end - start)

In [None]:
#summary of the model
print(model_fit.summary())

## Auto-fitting ARIMA Model using pmdarima

In [None]:
!pip install pmdarima

In [None]:
import pmdarima as pm
auto_arima = pm.auto_arima(train_data, stepwise=False, seasonal=False)
auto_arima

In [None]:
auto_arima.summary()

## MODEL EVALUATION & PREDICTION

### So the ARMA(4,1) model is:
$\hat{y_t} = -0.87y_{t-1} - 0.42y_{t-2} - 0.56y_{t-3} - 0.61y_{t-4} + 0.52\varepsilon_{t-1}$

In [31]:
#get prediction start and end dates
pred_start_date = test_data.index[0]
pred_end_date = test_data.index[-1]

In [32]:
#get the predictions and residuals
predictions = model_fit.predict(start=pred_start_date, end=pred_end_date)
residuals = test_data - predictions

In [None]:
plt.figure(figsize=(10,4))
plt.plot(residuals)
plt.title('Residuals from AR Model', fontsize=20)
plt.ylabel('Error', fontsize=16)
plt.axhline(0, color='r', linestyle='--', alpha=0.2)

In [None]:
plt.figure(figsize=(10,4))

plt.plot(test_data)
plt.plot(predictions)

plt.legend(('Data', 'Predictions'), fontsize=16)

plt.title('First Difference of Catfish Sales', fontsize=20)
plt.ylabel('Sales', fontsize=16)

In [None]:
forecast_test_auto.head()

In [None]:
forecast_test_auto = auto_arima.predict(n_periods=len(test_data))
lim_catfish_sales['forecast_auto'] = [None]*len(train_data) + list(forecast_test_auto)

In [None]:

lim_catfish_sales.plot()