In [1]:
%pip install seaborn

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.statespace.sarimax import SARIMAX

Matplotlib is building the font cache; this may take a moment.


In [3]:
# Load the dataset
df = pd.read_csv("cleaned_transactions_data.csv")

In [4]:
# Convert Transaction_Date to datetime
df["Transaction_Date"] = pd.to_datetime(df["Transaction_Date"])

In [8]:
# Group sales by month so we can forecast trends.
df_monthly_sales = df.groupby(df["Transaction_Date"].dt.to_period("M")).agg({"Total_Cost": "sum"}).reset_index()

In [9]:
# Convert period index back to timestamp format for easier plotting.
df_monthly_sales["Transaction_Date"] = df_monthly_sales["Transaction_Date"].dt.to_timestamp()

In [10]:
# Use a 3-month rolling average to make sales trends more stable.
df_monthly_sales["Smoothed_Sales"] = df_monthly_sales["Total_Cost"].rolling(window=3, min_periods=1).mean()

In [14]:
# Fit an ARIMA(2,1,2) model to forecast sales.
arima_model_smooth = SARIMAX(df_monthly_sales["Smoothed_Sales"], 
                             order=(2,1,2),  # Model parameters
                             enforce_stationarity=False, 
                             enforce_invertibility=False)

# Fit the model
arima_results_smooth = arima_model_smooth.fit()

In [16]:
# Predict sales for the next 6 months.
forecast_steps = 6
forecast_index = pd.date_range(start=df_monthly_sales["Transaction_Date"].iloc[-1], periods=forecast_steps + 1, freq="ME")[1:]
forecast_arima_smooth = arima_results_smooth.get_forecast(steps=forecast_steps)

In [17]:
# Get the predicted sales values.
forecast_mean_arima_smooth = forecast_arima_smooth.predicted_mean

In [None]:
# Use Seaborn for a clearer and better-looking plot.
sns.set_style("whitegrid")
plt.figure(figsize=(12, 6))

# Plot actual and forecasted sales
sns.lineplot(x=df_monthly_sales["Transaction_Date"], y=df_monthly_sales["Smoothed_Sales"], marker="o", linestyle="-", color="black", label="Smoothed Sales")
sns.lineplot(x=forecast_index, y=forecast_mean_arima_smooth, marker="o", linestyle="--", color="blue", label="Smoothed ARIMA Forecast")

plt.xlabel("Date")
plt.ylabel("Total Sales ($)")
plt.title("6-Month Sales Forecast using Smoothed ARIMA")
plt.legend()
plt.show()