SARIMA (Seasonal AutoRegressive Integrated Moving Average) is an extension of the ARIMA model that incorporates seasonality. It is widely used for time series forecasting when data exhibits seasonal patterns.

SARIMA Notation
A SARIMA model is represented as:

𝑆
𝐴
𝑅
𝐼
𝑀
𝐴
(
𝑝
,
𝑑
,
𝑞
)
×
(
𝑃
,
𝐷
,
𝑄
,
𝑠
)
SARIMA(p,d,q)×(P,D,Q,s)
where:

𝑝
p = order of the autoregressive (AR) term
𝑑
d = degree of differencing (to make the series stationary)
𝑞
q = order of the moving average (MA) term
𝑃
P = seasonal autoregressive order
𝐷
D = seasonal differencing order
𝑄
Q = seasonal moving average order
𝑠
s = seasonality period (e.g., 12 for monthly data, 4 for quarterly data)
When to Use SARIMA?
When your data has both trend and seasonality.
When traditional ARIMA models do not capture seasonal fluctuations.

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import seaborn as sns
import datetime as dt
from datetime import date, timedelta
import plotly.graph_objects as go
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose


In [1]:
from datetime import date, timedelta
# define the dates to fetch data
today = date.today()
d1 = today.strftime("%Y-%m-%d")
end_date = d1
d2 = date.today() - timedelta(days=739)
d2 = d2.strftime("%Y-%m-%d")
start_date = d2
print("data will be between these dates: ", start_date, "and", end_date)



In [4]:
# define the ticker
ticker= "GOOGL"
df=yf.download(ticker, start_date, end_date, progress= False)
df.head()





In [None]:
# prompt: Using dataframe df: df.columns

# Print the column names of the DataFrame.
df.columns




In [None]:
# prompt: add a new column name it date which is equal to df.index

df['Date'] = df.index
df.reset_index(drop=True, inplace=True)



In [None]:
df.insert(0, 'Date', df.index, True)

In [None]:
df.info()



In [None]:
# Plot Date vs Close column as a line plot
plt.figure(figsize=(10, 5))
plt.plot(df['Date'], df['Close'], linestyle='-', color='b')
plt.xlabel('Date')
plt.ylabel('Close')
plt.title('Stock Close Price Over Time')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()



In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Remove duplicate 'Date' column
df = df.loc[:,~df.columns.duplicated()]

# Assuming df is your DataFrame and it has 'Date' and 'Close' columns
# Convert 'Date' column to datetime if it's not already
df['Date'] = pd.to_datetime(df['Date'])

# Plot Date vs Close column as a line plot
plt.figure(figsize=(10, 5))
plt.plot(df['Date'], df['Close'], linestyle='-', color='b')
plt.xlabel('Date')
plt.ylabel('Close')
plt.title('Stock Close Price Over Time')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()



In [None]:
# Perform the Augmented Dickey-Fuller test on the 'Close' prices to check for stationarity.
# Note: Using df[df.columns[1]] selects the 'Close, GOOGL' column.

close_series = df[df.columns[1]]
adf_result = adfuller(close_series)

print("ADF Statistic: {:.6f}".format(adf_result[0]))
print("p-value: {:.6f}".format(adf_result[1]))
print("Critical Values:")
for key, value in adf_result[4].items():
    print("   {}: {:.3f}".format(key, value))



In [None]:
# New cell: Perform seasonal decomposition
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt

decomposition = seasonal_decompose(df['Close'], period=30, model='additive')
fig = decomposition.plot()
fig.set_size_inches(10, 8)
plt.tight_layout()
plt.show()



In [None]:
from statsmodels.tsa.stattools import adfuller

# Ensure that 'df' is defined before proceeding.
if 'df' not in globals():
    import yfinance as yf
    import datetime as dt
    from datetime import date, timedelta
    today = date.today()
    d1 = today.strftime("%Y-%m-%d")
    end_date = d1
    d2 = date.today() - timedelta(days=739)
    start_date = d2.strftime("%Y-%m-%d")
    ticker = "GOOGL"
    df = yf.download(ticker, start_date, end_date, progress=False)

if 'decomposition' not in globals():
    from statsmodels.tsa.seasonal import seasonal_decompose
    decomposition = seasonal_decompose(df['Close'], period=30, model='additive')

# Drop missing values from residuals
residuals = decomposition.resid.dropna()

# Perform ADF test on the residuals
adf_result_res = adfuller(residuals)
print("ADF Statistic: {:.6f}".format(adf_result_res[0]))
print("p-value: {:.6f}".format(adf_result_res[1]))
print("Critical Values:")
for key, value in adf_result_res[4].items():
    print("   {}: {:.3f}".format(key, value))



In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(16, 4))

# Plot the Autocorrelation Function (ACF)
plot_acf(residuals, lags=40, ax=axes[0])
axes[0].set_title("ACF of Residuals")

# Plot the Partial Autocorrelation Function (PACF)
plot_pacf(residuals, lags=40, ax=axes[1], method='ywm')
axes[1].set_title("PACF of Residuals")

plt.tight_layout()
plt.show()

# Note: Examine the PACF plot and choose the lag where the coefficients drop off;
# that lag represents the recommended value for p (AR order).



In [None]:
import pandas as pd
import pmdarima as pm

# Ensure the index is in datetime format
df.index = pd.to_datetime(df.index)

# Select the Close price series (from the second column)
close_series = df[df.columns[1]]

# Fit an ARIMA model with order=(2,1,2) (non-seasonal)
model = pm.ARIMA(order=(2, 1, 2), seasonal=False)
model.fit(close_series)
print(model.summary())

# Forecast the next 30 time steps
n_periods = 30
forecast, conf_int = model.predict(n_periods=n_periods, return_conf_int=True)

# Create a date range starting from the last date in the original df for plotting
forecast_index = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=n_periods, freq='D')

# Plot historical data and forecast
plt.figure(figsize=(10, 5))
plt.plot(df.index, close_series, label='Historical')
plt.plot(forecast_index, forecast, color='red', label='Forecast')
plt.fill_between(forecast_index, conf_int[:, 0], conf_int[:, 1], color='pink', alpha=0.3, label='Confidence Interval')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('ARIMA Forecast (order=(2,1,2))')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()







