# Problem Statement
The objective is to forecast daily sales for Rossmann stores using historical sales data.
We compare traditional time-series models (ARIMA / Prophet) with a machine learning model (XGBoost).


## Setup & Dataset Upload

In [None]:
from google.colab import files
files.upload()

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_absolute_error, mean_squared_error

## Load & Prepare Data

In [None]:
df = pd.read_csv('train.csv')
df.head()

### Convert Date & sort

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')

### Filter one store ( Forecasting is easier and clearer on one store.)

In [None]:
store_df = df[df['Store'] == 1][['Date', 'Sales']]

## Trainâ€“Test Split

In [None]:
train = store_df.iloc[:-90]
test = store_df.iloc[-90:]

# ðŸ”µ MODEL 1: ARIMA

### ARIMA Model

In [None]:
from statsmodels.tsa.arima.model import ARIMA

arima_model = ARIMA(train['Sales'], order=(5,1,0))
arima_fit = arima_model.fit()

arima_pred = arima_fit.forecast(steps=len(test))

 ### ARIMA Metrics

In [None]:
mae_arima = mean_absolute_error(test['Sales'], arima_pred)
rmse_arima = np.sqrt(mean_squared_error(test['Sales'], arima_pred))

# ðŸŸ¢ MODEL 2: Prophet

### Install & Prepare Prophet

In [None]:
!pip install prophet

In [None]:
from prophet import Prophet

#### Prepare data

In [None]:
prophet_train = train.rename(columns={'Date':'ds', 'Sales':'y'})

#### Train model

In [None]:
prophet = Prophet()
prophet.fit(prophet_train)

#### Forecast

In [None]:
future = prophet.make_future_dataframe(periods=90)
forecast = prophet.predict(future)

#### Extract predictions

In [None]:
prophet_pred = forecast.iloc[-90:]['yhat']

### Prophet Metrics

In [None]:
mae_prophet = mean_absolute_error(test['Sales'], prophet_pred)
rmse_prophet = np.sqrt(mean_squared_error(test['Sales'], prophet_pred))

## ðŸ”´ MODEL 3: XGBoost (ML Approach)

### Feature Engineering

In [None]:
store_df['day'] = store_df['Date'].dt.day
store_df['month'] = store_df['Date'].dt.month
store_df['year'] = store_df['Date'].dt.year
store_df['weekday'] = store_df['Date'].dt.weekday

#### Split again:

In [None]:
X = store_df.drop(['Sales','Date'], axis=1)
y = store_df['Sales']

X_train, X_test = X.iloc[:-90], X.iloc[-90:]
y_train, y_test = y.iloc[:-90], y.iloc[-90:]

### Train XGBoost

In [None]:
!pip install xgboost

In [None]:
from xgboost import XGBRegressor

xgb_model = XGBRegressor(
    n_estimators=200,
    learning_rate=0.05,
    random_state=42
)

xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

### XGBoost Metrics

In [None]:
mae_xgb = mean_absolute_error(y_test, xgb_pred)
rmse_xgb = np.sqrt(mean_squared_error(y_test, xgb_pred))

## Model Comparison

In [None]:
results = pd.DataFrame({
    'Model': ['ARIMA', 'Prophet', 'XGBoost'],
    'MAE': [mae_arima, mae_prophet, mae_xgb],
    'RMSE': [rmse_arima, rmse_prophet, rmse_xgb]
})

results

# Final Insights

## Model Comparison & Insights

- ARIMA captures short-term trends but struggles with complex patterns.
- Prophet handles seasonality better than ARIMA.
- XGBoost achieved the lowest error by leveraging engineered time-based features.
- Machine learning models outperform classical time-series models when sufficient features are available.