In [6]:
# 1. Dependencies
import pandas as pd
import numpy as np
import math
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 2. Load Dataset
file_path = 'C:\\Users\\loydt\\Downloads\\Projects\\Superstore Sales Dataset.xlsx'
data = pd.read_excel(file_path)

# Convert Sales column to numeric, if necessary
data['Sales'] = pd.to_numeric(data['Sales'], errors='coerce')
series = data['Sales'].dropna()  # Ensure no missing values for analysis

# Apply logarithm transformation directly to the Series
series = series.apply(lambda x: math.log(x) if x > 0 else None)

# 3. Define a function for stationarity and autocorrelation tests
def test_stationarity_and_autocorrelation(series):
    # ADF Test
    adf_result = adfuller(series)
    print(f"ADF Statistic: {adf_result[0]}")
    print(f"p-value: {adf_result[1]}")
    
    # KPSS Test
    kpss_result = kpss(series, regression='c')
    print(f"KPSS Statistic: {kpss_result[0]}")
    print(f"p-value: {kpss_result[1]}")
    
    # ACF/PACF Plots using DarkPlotly
    fig = make_subplots(rows=1, cols=2, subplot_titles=("ACF", "PACF"))
    
    # ACF Plot
    acf_values = sm.tsa.stattools.acf(series, nlags=20)
    fig.add_trace(go.Bar(x=list(range(len(acf_values))), y=acf_values), row=1, col=1)

    # PACF Plot
    pacf_values = sm.tsa.stattools.pacf(series, nlags=20)
    fig.add_trace(go.Bar(x=list(range(len(pacf_values))), y=pacf_values), row=1, col=2)

    fig.update_layout(title_text="ACF and PACF Plots", template="plotly_dark")
    fig.show()

    # Check for stationarity
    if adf_result[1] < 0.05 and kpss_result[1] > 0.05:
        print("Series is stationary.")
        return True
    else:
        print("Series is non-stationary.")
        return False

# 4. Process the time series data
def arima_model(series):
    if not test_stationarity_and_autocorrelation(series):
        # If non-stationary, apply differencing (Integrated Component)
        differenced_series = series.diff().dropna()
        print("Applied differencing.")
        
        # Check stationarity again after differencing
        if test_stationarity_and_autocorrelation(differenced_series):
            # If stationary, apply AR component
            model_ar = sm.tsa.ARIMA(differenced_series, order=(1, 0, 0)).fit()
            print(model_ar.summary())
            
            # Check for autocorrelation in residuals
            residuals = model_ar.resid
            if not test_stationarity_and_autocorrelation(residuals):
                # If autocorrelation is present, apply MA component
                model_ma = sm.tsa.ARIMA(differenced_series, order=(1, 0, 1)).fit()
                print(model_ma.summary())
                
                # Final check for autocorrelation
                if test_stationarity_and_autocorrelation(model_ma.resid):
                    print("Model fit successful with MA component.")
                    return model_ma
                else:
                    print("Autocorrelation still present after MA.")
            else:
                print("No autocorrelation in AR model residuals.")
        else:
            print("Differenced series is still non-stationary.")
    else:
        # If initially stationary, apply AR component directly
        model_ar = sm.tsa.ARIMA(series, order=(1, 0, 0)).fit()
        print(model_ar.summary())
        
        # Check for autocorrelation in residuals
        residuals = model_ar.resid
        if not test_stationarity_and_autocorrelation(residuals):
            # If autocorrelation is present, apply MA component
            model_ma = sm.tsa.ARIMA(series, order=(1, 0, 1)).fit()
            print(model_ma.summary())
            
            # Final check for autocorrelation
            if test_stationarity_and_autocorrelation(model_ma.resid):
                print("Model fit successful with MA component.")
                return model_ma
            else:
                print("Autocorrelation still present after MA.")
        else:
            print("No autocorrelation in AR model residuals.")

# Execute the ARIMA model process
resulting_model = arima_model(series)  # Using the processed series


ADF Statistic: -68.4498114214115
p-value: 0.0
KPSS Statistic: 0.07649061258135344
p-value: 0.1



The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.




Series is stationary.
                               SARIMAX Results                                
Dep. Variable:                  Sales   No. Observations:                 9800
Model:                 ARIMA(1, 0, 0)   Log Likelihood              -18788.007
Date:                Wed, 30 Oct 2024   AIC                          37582.014
Time:                        20:47:10   BIC                          37603.584
Sample:                             0   HQIC                         37589.323
                               - 9800                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.1111      0.017    240.792      0.000       4.078       4.145
ar.L1          0.0142      0.010      1.415      0.157      -0.005       0.034
sigma2         2.7085      0.0


The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.




Series is stationary.
No autocorrelation in AR model residuals.
