In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA

# Suppress warnings
warnings.filterwarnings("ignore")

# Simulate weekly sales data
np.random.seed(42)
weeks = pd.date_range(start='2020-01-05', periods=156, freq='W')
sales = np.random.normal(loc=2000, scale=300, size=len(weeks))

# Add holiday sales spikes
holiday_weeks = ['2020-11-08', '2020-12-20', '2021-11-07', '2021-12-19', '2022-10-23', '2022-12-18']
for hw in holiday_weeks:
    idx = np.where(weeks == pd.to_datetime(hw))[0]
    if idx.size > 0:
        sales[idx[0]] += 1000

# Create dataframe
df = pd.DataFrame({'Week': weeks, 'Sales': sales})
df.set_index('Week', inplace=True)

# Plot sales
plt.figure(figsize=(12, 6))
sns.lineplot(data=df, x=df.index, y='Sales')
plt.title('Weekly Sales Over Time')
plt.xlabel('Week')
plt.ylabel('Sales')
plt.grid(True)
plt.show()

# ADF Test
def adf_test(series):
    result = adfuller(series.dropna())
    print("ADF Statistic:", result[0])
    print("p-value:", result[1])
    if result[1] < 0.05:
        print("=> Series is Stationary")
    else:
        print("=> Series is NOT Stationary")

print("Original Series ADF Test:")
adf_test(df['Sales'])

# Differencing
df['Sales_diff'] = df['Sales'].diff()
print("\nAfter 1st Differencing ADF Test:")
adf_test(df['Sales_diff'])

# Drop NA for ARIMA
df.dropna(inplace=True)

# Fit ARIMA model (manual order selection: ARIMA(1,1,1))
model = ARIMA(df['Sales'], order=(1,1,1))
model_fit = model.fit()

# Forecast 12 weeks
forecast_steps = 12
future_weeks = pd.date_range(start=df.index[-1] + pd.Timedelta(weeks=1), periods=forecast_steps, freq='W')
forecast = model_fit.forecast(steps=forecast_steps)

# Prepare forecast df
forecast_df = pd.DataFrame({'Week': future_weeks, 'Forecasted_Sales': forecast})
forecast_df.set_index('Week', inplace=True)

# Plot
plt.figure(figsize=(12, 6))
plt.plot(df['Sales'], label='Historical Sales')
plt.plot(forecast_df['Forecasted_Sales'], label='Forecasted Sales', color='red')
plt.title('Sales Forecast for Next 12 Weeks')
plt.xlabel('Week')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Display forecasted sales
print(forecast_df)
