In [33]:
# Step 1 – Load Data

# Import libraries
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing

import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Load cleaned dataset
data_path = "../data/clean/cleaned_data.csv"
df = pd.read_csv(data_path, parse_dates=["Date"])


In [25]:
# Step 2 – Aggregate Weekly Sales Time Series
weekly_ts = (
    df.groupby("Date")["Weekly_Sales"]
      .sum()
      .sort_index()
)

weekly_ts.head()

Date
2010-01-10    42239875.87
2010-02-04    50423831.26
2010-02-07    48917484.50
2010-03-09    47194257.61
2010-03-12    49909027.88
Name: Weekly_Sales, dtype: float64

In [32]:
# Step 3 – Ensure proper weekly frequency
weekly_ts = weekly_ts.asfreq("W")

# Forward-fill missing values
weekly_ts = weekly_ts.ffill()
weekly_ts.head()

Date
2010-01-10    42239875.87
2010-01-17    42239875.87
2010-01-24    42239875.87
2010-01-31    42239875.87
2010-02-07    48917484.50
Freq: W-SUN, Name: Weekly_Sales, dtype: float64

In [34]:
# Step 4 – Forecast Model (Exponential Smoothing)
model = ExponentialSmoothing(
    weekly_ts,
    trend="add",
    seasonal="add",
    seasonal_periods=52,
    use_boxcox=True,              
    initialization_method="estimated" 
)

fit = model.fit()

In [28]:
# Step 5 – Forecast next steps
forecast_horizon = 12  # number of weeks ahead
forecast = fit.forecast(forecast_horizon)

forecast

2012-12-16    4.790502e+07
2012-12-23    4.793723e+07
2012-12-30    4.792500e+07
2013-01-06    4.743805e+07
2013-01-13    4.792794e+07
2013-01-20    4.809474e+07
2013-01-27    4.790537e+07
2013-02-03    4.855795e+07
2013-02-10    4.833410e+07
2013-02-17    4.822796e+07
2013-02-24    4.807824e+07
2013-03-03    4.709633e+07
Freq: W-SUN, dtype: float64

In [29]:
# Step 6 – Confidence Interval (95%)
residuals = fit.resid
sigma = residuals.std()

lower = forecast - 1.96 * sigma
upper = forecast + 1.96 * sigma

In [30]:
# Step 7 – Save results for Power BI
forecast_df = pd.DataFrame({
    "Date": forecast.index,
    "Forecast_Weekly_Sales": forecast.values,
    "Forecast_Lower": lower.values,
    "Forecast_Upper": upper.values
})

forecast_df.head()

Unnamed: 0,Date,Forecast_Weekly_Sales,Forecast_Lower,Forecast_Upper
0,2012-12-16,47905020.0,45420800.0,50389250.0
1,2012-12-23,47937230.0,45453010.0,50421460.0
2,2012-12-30,47925000.0,45440770.0,50409230.0
3,2013-01-06,47438050.0,44953820.0,49922280.0
4,2013-01-13,47927940.0,45443710.0,50412170.0


In [31]:
output_path = "../data/clean/forecast_results.csv"
forecast_df.to_csv(output_path, index=False)

output_path

'../data/clean/forecast_results.csv'