# Converting Script-Based Time Series Project to a Jupyter Workflow

This notebook demonstrates how to transform the existing `time_series_forecasting` Python script project into an interactive, reproducible notebook workflow while preserving production-quality structure.

## 1. Assess Current Project Structure (Transitioning to Notebook-Only)
We are removing standalone scripts (`src/`) and consolidating logic here. After consolidation, you can delete the `src/` directory and keep only:
```
configs/
notebooks/
README.md
requirements.txt
```
All data handling, modeling, and evaluation will run from this notebook.

## 2. Environment Setup (Notebook-Only)
Install dependencies once in shell, then rely on this notebook. For reproducibility pin versions in `requirements.txt`.
```
python -m pip install -r requirements.txt
```
(No separate package/module install step necessary now.)

## 3. Notebook Skeleton
We'll implement:
1. Data generation/loading
2. Model helpers (Moving Average, ARIMA, Prophet if available)
3. Rolling-origin evaluation
4. Metrics (sMAPE, MAE)
5. Visualization & results table
6. (Optional) Hyperparameter tweak cell


In [None]:
# Global imports & config (Notebook-Only Implementation)
import numpy as np, pandas as pd, warnings, math, json
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Any
warnings.filterwarnings('ignore')

SEED = 42
rng = np.random.default_rng(SEED)

print('Notebook-only time series pipeline initialized.')

In [None]:
# 4. Data Generation / Loading Cell
DATA_DIR = Path('../data')
RAW = DATA_DIR / 'raw'
PROC = DATA_DIR / 'processed'
RAW.mkdir(parents=True, exist_ok=True)
PROC.mkdir(parents=True, exist_ok=True)

def generate_series(periods=120):
    idx = pd.date_range('2010-01-01', periods=periods, freq='M')
    t = np.arange(periods)
    seasonal = 10 + 15 * np.sin(2 * np.pi * t / 12)
    trend = 100 + 2 * t
    noise = rng.normal(0, 8, size=periods)
    y = trend + seasonal + noise
    return pd.DataFrame({'ds': idx, 'y': y})

series_path = PROC / 'series.csv'
if series_path.exists():
    df = pd.read_csv(series_path)
    df['ds'] = pd.to_datetime(df['ds'])
else:
    df = generate_series()
    df.to_csv(RAW / 'synthetic_series.csv', index=False)
    df.to_csv(series_path, index=False)

print('Dataset shape:', df.shape)
df.head()

In [None]:
# 5. Model Helper Implementations (Moving Average, ARIMA, Prophet optional)
from statsmodels.tsa.arima.model import ARIMA
try:
    from prophet import Prophet
except Exception:
    Prophet = None

class MovingAverageModel:
    def __init__(self, window=3):
        self.window = window
        self.history = []
    def fit(self, y):
        self.history = list(y)
        return self
    def predict(self, horizon):
        preds, hist = [], self.history.copy()
        for _ in range(horizon):
            preds.append(np.mean(hist[-self.window:]))
            hist.append(preds[-1])
        return np.array(preds)

class ARIMAModel:
    def __init__(self, order=(1,1,1)):
        self.order = order
        self._model = None
    def fit(self, y):
        self._model = ARIMA(y, order=self.order).fit()
        return self
    def predict(self, horizon):
        return self._model.forecast(steps=horizon).values

class ProphetWrapper:
    def __init__(self, yearly_seasonality=True):
        self.enabled = Prophet is not None
        if self.enabled:
            self.m = Prophet(yearly_seasonality=yearly_seasonality, weekly_seasonality=False, daily_seasonality=False)
    def fit(self, df):
        if not self.enabled: return self
        self.m.fit(df[['ds','y']])
        return self
    def predict(self, horizon):
        if not self.enabled:
            return np.full(horizon, np.nan)
        future = self.m.make_future_dataframe(periods=horizon, freq='M')
        fcst = self.m.predict(future)
        return fcst.tail(horizon)['yhat'].values

print('Prophet available:', Prophet is not None)

In [None]:
# 6. Metrics (sMAPE, MAE)
import numpy as np

def smape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    denom = np.abs(y_true) + np.abs(y_pred)
    denom[denom == 0] = 1e-8
    return 100 * np.mean(np.abs(y_true - y_pred) / denom)

def mae(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return float(np.mean(np.abs(y_true - y_pred)))

print('Metric test:', smape([1,2,3],[1,2,3]), mae([1,2],[1,3]))

In [None]:
# 7. Rolling-Origin Evaluation

def rolling_origin_eval(df, models, horizon=12, initial_train=60):
    y = df['y'].reset_index(drop=True)
    out = []
    for name, model_obj in models.items():
        preds_all, trues_all = [], []
        for start in range(initial_train, len(y) - horizon + 1, horizon):
            train_y = y[:start]
            future_true = y[start:start + horizon]
            if name == 'prophet':
                model_obj.fit(df.iloc[:start][['ds','y']])
            else:
                model_obj.fit(train_y)
            preds = model_obj.predict(horizon)
            preds_all.extend(preds)
            trues_all.extend(future_true.values)
        s = smape(trues_all, preds_all)
        m = mae(trues_all, preds_all)
        out.append({'model': name, 'smape': s, 'mae': m})
    return out

models = {
    'moving_average': MovingAverageModel(window=3),
    'arima': ARIMAModel(order=(1,1,1)),
    'prophet': ProphetWrapper(yearly_seasonality=True)
}
results = rolling_origin_eval(df, models, horizon=12, initial_train=60)
results

In [None]:
# 8. Results Table & Visualization
import pandas as pd
import matplotlib.pyplot as plt

res_df = pd.DataFrame(results).sort_values('smape')
print(res_df)

plt.figure(figsize=(8,4))
plt.plot(df['ds'], df['y'], label='Actual', color='black', linewidth=1.2)
plt.title('Time Series (Synthetic)')
plt.xlabel('Date'); plt.ylabel('Value'); plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# 9. Optional Hyperparameter Tweaks (re-run cell to iterate)
# Adjust moving average window or ARIMA order; Prophet yearly seasonality toggle
NEW_WINDOW = 5
models_tuned = {
    'moving_average': MovingAverageModel(window=NEW_WINDOW),
    'arima': ARIMAModel(order=(2,1,2)),
    'prophet': ProphetWrapper(yearly_seasonality=True)
}
results_tuned = rolling_origin_eval(df, models_tuned, horizon=12, initial_train=60)
print(pd.DataFrame(results_tuned))