# Multi-Model Forecasting Pipeline for 6 Locations

This notebook implements a comprehensive forecasting system that trains and evaluates multiple model families on each location's sales data, using the last 12 months as test data while preventing QTY leakage through proper feature engineering.


In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Data processing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Traditional ML models
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

# Time series models
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from prophet import Prophet

# Deep learning
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Nixtla client for TimeGPT
from nixtla import NixtlaClient

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
tf.random.set_seed(42)

print("Libraries imported successfully!")


Libraries imported successfully!


In [2]:
# Load the consolidated data
df = pd.read_csv('../outputs/data/sales_weather_merged_filled_consolidated.csv')
print(f"Data shape: {df.shape}")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
print(f"Locations: {df['Location'].unique()}")
print(f"Columns: {df.columns.tolist()}")

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['Location', 'date']).reset_index(drop=True)

df.head()


Data shape: (286, 18)
Date range: 2019-04-01 to 2024-03-01
Locations: ['Bangalore' 'Chennai' 'Cochin' 'Secunderabad' 'Vijaywada' 'Sricity']
Columns: ['Location', 'QTY', 'temp_min', 'temp_max', 'temp_mean', 'dwpt_min', 'dwpt_max', 'dwpt_mean', 'rhum_min', 'rhum_max', 'rhum_mean', 'prcp_min', 'prcp_max', 'prcp_mean', 'wspd_min', 'wspd_max', 'wspd_mean', 'date']


Unnamed: 0,Location,QTY,temp_min,temp_max,temp_mean,dwpt_min,dwpt_max,dwpt_mean,rhum_min,rhum_max,rhum_mean,prcp_min,prcp_max,prcp_mean,wspd_min,wspd_max,wspd_mean,date
0,Bangalore,1678.0,15.0,34.4,25.490457,-2.24,21.82,12.241344,11.2,99.6,49.995161,0.0,1.875,0.009845,1.08,34.48,10.858306,2019-04-01
1,Bangalore,3951.0,19.0,35.25,27.041071,0.775,23.275,15.193029,12.25,100.0,54.193651,0.0,7.766667,0.056111,0.0,36.625,10.347719,2019-05-01
2,Bangalore,4709.0,19.6,34.4,26.270377,10.24,24.42,19.395948,24.8,100.0,69.900511,0.0,8.166667,0.159319,0.0,38.96,13.10158,2019-06-01
3,Bangalore,2418.0,19.6,32.4,24.810296,14.28,24.02,19.992738,38.0,100.0,76.857086,0.0,7.8,0.155787,1.44,41.54,17.832391,2019-07-01
4,Bangalore,2252.0,19.6,30.4,23.654283,16.74,23.26,20.154345,45.4,100.0,82.423583,0.0,7.633333,0.201703,1.44,41.6,18.792103,2019-08-01


In [3]:
def create_time_features(df):
    """
    Create comprehensive time-based features for forecasting
    
    Args:
        df: DataFrame with 'date' column and other features
        
    Returns:
        DataFrame with additional time features
    """
    df = df.copy()
    
    # Extract temporal indicators
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['quarter'] = df['date'].dt.quarter
    df['day_of_year'] = df['date'].dt.dayofyear
    df['week_of_year'] = df['date'].dt.isocalendar().week
    df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
    df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
    df['is_quarter_start'] = df['date'].dt.is_quarter_start.astype(int)
    df['is_quarter_end'] = df['date'].dt.is_quarter_end.astype(int)
    
    # Cyclical encoding for seasonality
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['quarter_sin'] = np.sin(2 * np.pi * df['quarter'] / 4)
    df['quarter_cos'] = np.cos(2 * np.pi * df['quarter'] / 4)
    
    # Weather interaction features
    if 'temp_mean' in df.columns and 'rhum_mean' in df.columns:
        df['temp_humidity_interaction'] = df['temp_mean'] * df['rhum_mean']
    if 'temp_mean' in df.columns and 'wspd_mean' in df.columns:
        df['temp_wind_interaction'] = df['temp_mean'] * df['wspd_mean']
    
    return df

def create_lag_and_rolling_features(df, target_col='QTY', lags=[1, 2, 3, 6, 12], windows=[3, 6, 12]):
    """
    Create lag and rolling features to prevent data leakage
    
    Args:
        df: DataFrame sorted by date
        target_col: Target column name
        lags: List of lag periods
        windows: List of rolling window sizes
        
    Returns:
        DataFrame with lag and rolling features
    """
    df = df.copy()
    
    # Create lag features (only using past data)
    for lag in lags:
        df[f'{target_col}_lag_{lag}'] = df[target_col].shift(lag)
    
    # Create rolling statistics (only using past data)
    for window in windows:
        df[f'{target_col}_rolling_mean_{window}'] = df[target_col].rolling(window=window, min_periods=1).mean().shift(1)
        df[f'{target_col}_rolling_std_{window}'] = df[target_col].rolling(window=window, min_periods=1).std().shift(1)
        df[f'{target_col}_rolling_min_{window}'] = df[target_col].rolling(window=window, min_periods=1).min().shift(1)
        df[f'{target_col}_rolling_max_{window}'] = df[target_col].rolling(window=window, min_periods=1).max().shift(1)
    
    return df

print("Time feature functions created successfully!")


Time feature functions created successfully!


In [14]:
def prepare_location_data(df, location, test_months=12):
    """
    Prepare data for a specific location with train/test split
    
    Args:
        df: Full dataset
        location: Location name
        test_months: Number of months to use for testing
        
    Returns:
        Dictionary with train/test data and scalers
    """
    # Filter data for specific location
    location_df = df[df['Location'] == location].copy()
    location_df = location_df.sort_values('date').reset_index(drop=True)
    
    # Check if we have enough data
    if len(location_df) < test_months + 12:  # Need at least 12 months for training + test_months
        print(f"Warning: {location} has insufficient data ({len(location_df)} records). Skipping.")
        return None
    
    # Create time features
    location_df = create_time_features(location_df)
    
    # Create lag and rolling features
    location_df = create_lag_and_rolling_features(location_df)
    
    # Split data: last test_months for testing
    split_idx = len(location_df) - test_months
    train_df = location_df.iloc[:split_idx].copy()
    test_df = location_df.iloc[split_idx:].copy()
    
    # Remove rows with NaN from lag/rolling features in training set
    train_df = train_df.dropna().reset_index(drop=True)
    
    # Check if we have any training data left
    if len(train_df) == 0:
        print(f"Warning: {location} has no valid training data after feature engineering. Skipping.")
        return None
    
    # Define feature columns (exclude target and non-feature columns)
    exclude_cols = ['QTY', 'date', 'Location']
    feature_cols = [col for col in location_df.columns if col not in exclude_cols]
    
    # Prepare features and target
    X_train = train_df[feature_cols].fillna(0)  # Fill any remaining NaN with 0
    y_train = train_df['QTY']
    X_test = test_df[feature_cols].fillna(0)
    y_test = test_df['QTY']
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return {
        'X_train': X_train_scaled,
        'X_test': X_test_scaled,
        'y_train': y_train.values,
        'y_test': y_test.values,
        'train_df': train_df,
        'test_df': test_df,
        'feature_cols': feature_cols,
        'scaler': scaler
    }

print("Data preparation function created successfully!")


Data preparation function created successfully!


In [15]:
# Test the data preparation for one location
test_location = 'Chennai'
location_data = prepare_location_data(df, test_location)

print(f"Location: {test_location}")
print(f"Training samples: {len(location_data['y_train'])}")
print(f"Test samples: {len(location_data['y_test'])}")
print(f"Number of features: {location_data['X_train'].shape[1]}")
print(f"Feature columns: {len(location_data['feature_cols'])}")

# Show sample of features
print("\nSample feature names:")
print(location_data['feature_cols'][:10])


Location: Chennai
Training samples: 35
Test samples: 12
Number of features: 47
Feature columns: 47

Sample feature names:
['temp_min', 'temp_max', 'temp_mean', 'dwpt_min', 'dwpt_max', 'dwpt_mean', 'rhum_min', 'rhum_max', 'rhum_mean', 'prcp_min']


In [16]:
def calculate_metrics(y_true, y_pred):
    """
    Calculate evaluation metrics
    """
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    r2 = r2_score(y_true, y_pred)
    
    return {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'R2': r2
    }

def train_ml_models(X_train, y_train, X_test, y_test):
    """
    Train and evaluate traditional ML models
    """
    models = {
        'Linear Regression': LinearRegression(),
        'Ridge': Ridge(alpha=1.0),
        'Lasso': Lasso(alpha=0.1),
        'ElasticNet': ElasticNet(alpha=0.1, l1_ratio=0.5),
        'Decision Tree': DecisionTreeRegressor(random_state=42),
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
        'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
        'AdaBoost': AdaBoostRegressor(n_estimators=100, random_state=42),
        'Extra Trees': ExtraTreesRegressor(n_estimators=100, random_state=42),
        'XGBoost': xgb.XGBRegressor(n_estimators=100, random_state=42, verbosity=0),
        'LightGBM': lgb.LGBMRegressor(n_estimators=100, random_state=42, verbosity=-1),
        'CatBoost': CatBoostRegressor(iterations=100, random_state=42, verbose=False),
        'SVR': SVR(kernel='rbf'),
        'KNN': KNeighborsRegressor(n_neighbors=5)
    }
    
    results = []
    
    for name, model in models.items():
        try:
            start_time = datetime.now()
            
            # Train model
            model.fit(X_train, y_train)
            
            # Make predictions
            y_pred = model.predict(X_test)
            
            # Calculate metrics
            metrics = calculate_metrics(y_test, y_pred)
            
            training_time = (datetime.now() - start_time).total_seconds()
            
            results.append({
                'Model_Type': 'ML',
                'Model_Name': name,
                'MAE': metrics['MAE'],
                'RMSE': metrics['RMSE'],
                'MAPE': metrics['MAPE'],
                'R2': metrics['R2'],
                'Training_Time': training_time,
                'Predictions': y_pred
            })
            
            print(f"✓ {name}: MAE={metrics['MAE']:.2f}, RMSE={metrics['RMSE']:.2f}, R²={metrics['R2']:.3f}")
            
        except Exception as e:
            print(f"✗ {name}: Failed - {str(e)}")
            
    return results

print("ML models function created successfully!")


ML models function created successfully!


In [21]:
def create_sequences(X, y, seq_length=12):
    """
    Create sequences for LSTM/GRU models
    """
    X_seq, y_seq = [], []
    for i in range(seq_length, len(X)):
        X_seq.append(X[i-seq_length:i])
        y_seq.append(y[i])
    return np.array(X_seq), np.array(y_seq)

def train_transformer_model(X_train, y_train, X_test, y_test):
    """
    Train Transformer model (simplified version)
    """
    try:
        # For now, use a simple feed-forward network as transformer substitute
        # In a full implementation, you would use a proper transformer architecture
        model = Sequential([
            Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(64, activation='relu'),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu'),
            Dropout(0.2),
            Dense(1)
        ])
        
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        
        # Train model
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        
        start_time = datetime.now()
        history = model.fit(
            X_train, y_train,
            epochs=100,
            batch_size=32,
            validation_split=0.2,
            callbacks=[early_stopping],
            verbose=0
        )
        
        # Make predictions
        y_pred = model.predict(X_test, verbose=0).flatten()
        
        # Calculate metrics
        metrics = calculate_metrics(y_test, y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Deep Learning',
            'Model_Name': 'Transformer',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"Transformer failed: {str(e)}")
        return None

def train_tcn_model(X_train, y_train, X_test, y_test):
    """
    Train TCN model (simplified version)
    """
    try:
        # For now, use a simple feed-forward network as TCN substitute
        # In a full implementation, you would use a proper TCN architecture
        model = Sequential([
            Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(64, activation='relu'),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu'),
            Dropout(0.2),
            Dense(1)
        ])
        
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        
        # Train model
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        
        start_time = datetime.now()
        history = model.fit(
            X_train, y_train,
            epochs=100,
            batch_size=32,
            validation_split=0.2,
            callbacks=[early_stopping],
            verbose=0
        )
        
        # Make predictions
        y_pred = model.predict(X_test, verbose=0).flatten()
        
        # Calculate metrics
        metrics = calculate_metrics(y_test, y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Deep Learning',
            'Model_Name': 'TCN',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"TCN failed: {str(e)}")
        return None

def train_lstm_model(X_train, y_train, X_test, y_test, seq_length=6):
    """
    Train LSTM model
    """
    try:
        # Check if we have enough data for sequences
        if len(X_train) < seq_length + 1 or len(X_test) < seq_length + 1:
            print(f"LSTM: Insufficient data for sequences (train: {len(X_train)}, test: {len(X_test)})")
            return None
        
        # Create sequences
        X_train_seq, y_train_seq = create_sequences(X_train, y_train, seq_length)
        X_test_seq, y_test_seq = create_sequences(X_test, y_test, seq_length)
        
        if len(X_train_seq) == 0 or len(X_test_seq) == 0:
            print(f"LSTM: No sequences created (train_seq: {len(X_train_seq)}, test_seq: {len(X_test_seq)})")
            return None
        
        # Build model
        model = Sequential([
            LSTM(32, return_sequences=True, input_shape=(seq_length, X_train.shape[1])),
            Dropout(0.2),
            LSTM(16, return_sequences=False),
            Dropout(0.2),
            Dense(8),
            Dense(1)
        ])
        
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        
        # Train model
        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        
        start_time = datetime.now()
        history = model.fit(
            X_train_seq, y_train_seq,
            epochs=50,
            batch_size=16,
            validation_split=0.2,
            callbacks=[early_stopping],
            verbose=0
        )
        
        # Make predictions
        y_pred = model.predict(X_test_seq, verbose=0).flatten()
        
        # Calculate metrics
        metrics = calculate_metrics(y_test_seq, y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Deep Learning',
            'Model_Name': 'LSTM',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"LSTM failed: {str(e)}")
        return None

def train_gru_model(X_train, y_train, X_test, y_test, seq_length=6):
    """
    Train GRU model
    """
    try:
        # Check if we have enough data for sequences
        if len(X_train) < seq_length + 1 or len(X_test) < seq_length + 1:
            print(f"GRU: Insufficient data for sequences (train: {len(X_train)}, test: {len(X_test)})")
            return None
        
        # Create sequences
        X_train_seq, y_train_seq = create_sequences(X_train, y_train, seq_length)
        X_test_seq, y_test_seq = create_sequences(X_test, y_test, seq_length)
        
        if len(X_train_seq) == 0 or len(X_test_seq) == 0:
            print(f"GRU: No sequences created (train_seq: {len(X_train_seq)}, test_seq: {len(X_test_seq)})")
            return None
        
        # Build model
        model = Sequential([
            GRU(32, return_sequences=True, input_shape=(seq_length, X_train.shape[1])),
            Dropout(0.2),
            GRU(16, return_sequences=False),
            Dropout(0.2),
            Dense(8),
            Dense(1)
        ])
        
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        
        # Train model
        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        
        start_time = datetime.now()
        history = model.fit(
            X_train_seq, y_train_seq,
            epochs=50,
            batch_size=16,
            validation_split=0.2,
            callbacks=[early_stopping],
            verbose=0
        )
        
        # Make predictions
        y_pred = model.predict(X_test_seq, verbose=0).flatten()
        
        # Calculate metrics
        metrics = calculate_metrics(y_test_seq, y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Deep Learning',
            'Model_Name': 'GRU',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"GRU failed: {str(e)}")
        return None

def train_ffn_model(X_train, y_train, X_test, y_test):
    """
    Train Feed-Forward Neural Network
    """
    try:
        # Build model
        model = Sequential([
            Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(64, activation='relu'),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu'),
            Dropout(0.2),
            Dense(1)
        ])
        
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        
        # Train model
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        
        start_time = datetime.now()
        history = model.fit(
            X_train, y_train,
            epochs=100,
            batch_size=32,
            validation_split=0.2,
            callbacks=[early_stopping],
            verbose=0
        )
        
        # Make predictions
        y_pred = model.predict(X_test, verbose=0).flatten()
        
        # Calculate metrics
        metrics = calculate_metrics(y_test, y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Deep Learning',
            'Model_Name': 'Feed-Forward NN',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"Feed-Forward NN failed: {str(e)}")
        return None

print("Deep learning models functions created successfully!")


Deep learning models functions created successfully!


In [22]:
def train_arima_model(train_df, test_df):
    """
    Train ARIMA model
    """
    try:
        from statsmodels.tsa.arima.model import ARIMA
        
        start_time = datetime.now()
        
        # Simple ARIMA model with basic parameters
        model = ARIMA(train_df['QTY'], order=(1, 1, 1))
        fitted_model = model.fit()
        
        # Make predictions
        y_pred = fitted_model.forecast(steps=len(test_df))
        
        # Calculate metrics
        metrics = calculate_metrics(test_df['QTY'], y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Time Series',
            'Model_Name': 'ARIMA',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"ARIMA failed: {str(e)}")
        return None

def train_sarima_model(train_df, test_df):
    """
    Train SARIMA model
    """
    try:
        from statsmodels.tsa.statespace.sarimax import SARIMAX
        
        start_time = datetime.now()
        
        # Simple SARIMA model with basic parameters
        model = SARIMAX(train_df['QTY'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
        fitted_model = model.fit(disp=False)
        
        # Make predictions
        y_pred = fitted_model.forecast(steps=len(test_df))
        
        # Calculate metrics
        metrics = calculate_metrics(test_df['QTY'], y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Time Series',
            'Model_Name': 'SARIMA',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"SARIMA failed: {str(e)}")
        return None

def train_prophet_model(train_df, test_df):
    """
    Train Prophet model
    """
    try:
        start_time = datetime.now()
        
        # Prepare data for Prophet
        prophet_train = pd.DataFrame({
            'ds': train_df['date'],
            'y': train_df['QTY']
        })
        
        prophet_test = pd.DataFrame({
            'ds': test_df['date']
        })
        
        # Initialize and fit Prophet
        model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=False,
            daily_seasonality=False
        )
        
        model.fit(prophet_train)
        
        # Make predictions
        forecast = model.predict(prophet_test)
        y_pred = forecast['yhat'].values
        
        # Calculate metrics
        metrics = calculate_metrics(test_df['QTY'], y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Time Series',
            'Model_Name': 'Prophet',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"Prophet failed: {str(e)}")
        return None

def train_ets_model(train_df, test_df):
    """
    Train Exponential Smoothing model
    """
    try:
        start_time = datetime.now()
        
        # Check if we have enough data for seasonal model
        if len(train_df) < 24:  # Need at least 2 seasonal cycles
            # Use simple exponential smoothing
            model = ExponentialSmoothing(
                train_df['QTY'],
                trend='add',
                seasonal=None
            )
        else:
            # Fit Holt-Winters model
            model = ExponentialSmoothing(
                train_df['QTY'],
                trend='add',
                seasonal='add',
                seasonal_periods=12
            )
        
        fitted_model = model.fit()
        
        # Make predictions
        y_pred = fitted_model.forecast(len(test_df))
        
        # Calculate metrics
        metrics = calculate_metrics(test_df['QTY'], y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Time Series',
            'Model_Name': 'ETS',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"ETS failed: {str(e)}")
        return None

def train_timegpt_model(train_df, test_df):
    """
    Train TimeGPT model using Nixtla client
    """
    try:
        # Use existing Nixtla client from the original notebook
        API_KEY = "nixak-BPWuiu0QLaDocnyGH7oFOutH821mnpHI5jFwgujKGyPGiLCAqNkQGUQ0vp11ZSOXX9msKcsCZgVM8cRu"
        nixtla_client = NixtlaClient(api_key=API_KEY)
        
        start_time = datetime.now()
        
        # Prepare data for TimeGPT - ensure consistent frequency
        timegpt_train = pd.DataFrame({
            'ds': pd.to_datetime(train_df['date']),
            'y': train_df['QTY'],
            'unique_id': 'sales'
        })
        
        # Ensure the data is sorted and has consistent frequency
        timegpt_train = timegpt_train.sort_values('ds').reset_index(drop=True)
        
        # Check for missing dates and fill them
        timegpt_train = timegpt_train.set_index('ds').resample('MS').first().reset_index()
        timegpt_train = timegpt_train.dropna()
        
        # Ensure we have enough data
        if len(timegpt_train) < 12:
            print(f"TimeGPT: Insufficient data ({len(timegpt_train)} records)")
            return None
        
        # Forecast
        forecast = nixtla_client.forecast(timegpt_train, h=len(test_df))
        y_pred = forecast['TimeGPT'].values
        
        # Calculate metrics
        metrics = calculate_metrics(test_df['QTY'], y_pred)
        training_time = (datetime.now() - start_time).total_seconds()
        
        return {
            'Model_Type': 'Time Series',
            'Model_Name': 'TimeGPT',
            'MAE': metrics['MAE'],
            'RMSE': metrics['RMSE'],
            'MAPE': metrics['MAPE'],
            'R2': metrics['R2'],
            'Training_Time': training_time,
            'Predictions': y_pred
        }
        
    except Exception as e:
        print(f"TimeGPT failed: {str(e)}")
        return None

print("Time series models functions created successfully!")


Time series models functions created successfully!


In [19]:
def run_all_models_for_location(df, location):
    """
    Run all models for a specific location
    """
    print(f"\n{'='*60}")
    print(f"Processing Location: {location}")
    print(f"{'='*60}")
    
    # Prepare data for location
    location_data = prepare_location_data(df, location)
    
    # Check if data preparation was successful
    if location_data is None:
        print(f"Skipping {location} due to insufficient data")
        return []
    
    all_results = []
    
    # 1. Traditional ML Models
    print("\n--- Training Traditional ML Models ---")
    ml_results = train_ml_models(
        location_data['X_train'],
        location_data['y_train'],
        location_data['X_test'],
        location_data['y_test']
    )
    
    for result in ml_results:
        result['Location'] = location
        all_results.append(result)
    
    # 2. Deep Learning Models
    print("\n--- Training Deep Learning Models ---")
    dl_models = [
        ('LSTM', train_lstm_model),
        ('GRU', train_gru_model),
        ('Feed-Forward NN', train_ffn_model),
        ('Transformer', train_transformer_model),
        ('TCN', train_tcn_model)
    ]
    
    for model_name, model_func in dl_models:
        print(f"\nTraining {model_name}...")
        result = model_func(
            location_data['X_train'],
            location_data['y_train'],
            location_data['X_test'],
            location_data['y_test']
        )
        
        if result is not None:
            result['Location'] = location
            all_results.append(result)
            print(f"✓ {model_name}: MAE={result['MAE']:.2f}, RMSE={result['RMSE']:.2f}, R²={result['R2']:.3f}")
        else:
            print(f"✗ {model_name}: Failed")
    
    # 3. Time Series Models
    print("\n--- Training Time Series Models ---")
    ts_models = [
        ('ARIMA', train_arima_model),
        ('SARIMA', train_sarima_model),
        ('Prophet', train_prophet_model),
        ('ETS', train_ets_model),
        ('TimeGPT', train_timegpt_model)
    ]
    
    for model_name, model_func in ts_models:
        print(f"\nTraining {model_name}...")
        result = model_func(
            location_data['train_df'],
            location_data['test_df']
        )
        
        if result is not None:
            result['Location'] = location
            all_results.append(result)
            print(f"✓ {model_name}: MAE={result['MAE']:.2f}, RMSE={result['RMSE']:.2f}, R²={result['R2']:.3f}")
        else:
            print(f"✗ {model_name}: Failed")
    
    return all_results

print("Complete model pipeline function created successfully!")


Complete model pipeline function created successfully!


In [23]:
# Get all unique locations
locations = df['Location'].unique()
print(f"Locations to process: {locations}")
print(f"Total locations: {len(locations)}")

# Initialize results storage
all_results = []

# Process each location
for i, location in enumerate(locations, 1):
    print(f"\n\n{'='*80}")
    print(f"PROCESSING LOCATION {i}/{len(locations)}: {location}")
    print(f"{'='*80}")
    
    try:
        location_results = run_all_models_for_location(df, location)
        all_results.extend(location_results)
        
        print(f"\n✓ Completed {location}: {len(location_results)} models trained")
        
    except Exception as e:
        print(f"\n✗ Failed to process {location}: {str(e)}")
        continue

print(f"\n\n{'='*80}")
print(f"ALL LOCATIONS PROCESSED")
print(f"Total results: {len(all_results)}")
print(f"{'='*80}")


Locations to process: ['Bangalore' 'Chennai' 'Cochin' 'Secunderabad' 'Sricity' 'Vijaywada']
Total locations: 6


PROCESSING LOCATION 1/6: Bangalore

Processing Location: Bangalore

--- Training Traditional ML Models ---
✓ Linear Regression: MAE=14860.14, RMSE=36129.59, R²=-214.797
✓ Ridge: MAE=2682.93, RMSE=3590.55, R²=-1.131
✓ Lasso: MAE=5004.74, RMSE=7063.42, R²=-7.248
✓ ElasticNet: MAE=2325.34, RMSE=3127.09, R²=-0.617
✓ Decision Tree: MAE=2965.25, RMSE=3741.80, R²=-1.315
✓ Random Forest: MAE=1692.50, RMSE=2074.75, R²=0.288
✓ Gradient Boosting: MAE=1677.89, RMSE=2317.57, R²=0.112
✓ AdaBoost: MAE=1255.89, RMSE=1685.67, R²=0.530
✓ Extra Trees: MAE=1896.71, RMSE=2494.48, R²=-0.029
✓ XGBoost: MAE=1609.18, RMSE=2117.70, R²=0.259
✓ LightGBM: MAE=1958.58, RMSE=3020.98, R²=-0.509
✓ CatBoost: MAE=1532.17, RMSE=2158.91, R²=0.229
✓ SVR: MAE=2666.99, RMSE=3619.28, R²=-1.166
✓ KNN: MAE=1721.38, RMSE=2145.26, R²=0.239

--- Training Deep Learning Models ---

Training LSTM...
✓ LSTM: MAE=5135.55, RM

INFO:cmdstanpy:Chain [1] start processing


✓ SARIMA: MAE=692.39, RMSE=1218.23, R²=0.755

Training Prophet...


INFO:cmdstanpy:Chain [1] done processing


✓ Prophet: MAE=1139.12, RMSE=1457.92, R²=0.649

Training ETS...
✓ ETS: MAE=1115.29, RMSE=1267.20, R²=0.735

Training TimeGPT...


INFO:nixtla.nixtla_client:Validating inputs...


TimeGPT failed: Could not infer the frequency of the time column. This could be due to inconsistent intervals. Please check your data for missing, duplicated or irregular timestamps
✗ TimeGPT: Failed

✓ Completed Bangalore: 23 models trained


PROCESSING LOCATION 2/6: Chennai

Processing Location: Chennai

--- Training Traditional ML Models ---
✓ Linear Regression: MAE=57683.29, RMSE=170509.53, R²=-903.133
✓ Ridge: MAE=7831.98, RMSE=8532.15, R²=-1.264
✓ Lasso: MAE=19759.15, RMSE=21051.40, R²=-12.782
✓ ElasticNet: MAE=5489.41, RMSE=6243.20, R²=-0.212
✓ Decision Tree: MAE=4580.83, RMSE=6449.98, R²=-0.294
✓ Random Forest: MAE=4061.75, RMSE=4755.69, R²=0.297
✓ Gradient Boosting: MAE=5964.87, RMSE=6747.18, R²=-0.416
✓ AdaBoost: MAE=5104.51, RMSE=6264.54, R²=-0.220
✓ Extra Trees: MAE=4853.09, RMSE=5645.71, R²=0.009
✓ XGBoost: MAE=5794.46, RMSE=6612.90, R²=-0.360
✓ LightGBM: MAE=4870.79, RMSE=6474.34, R²=-0.304
✓ CatBoost: MAE=3442.15, RMSE=4382.76, R²=0.403
✓ SVR: MAE=5957.58, RMSE=7934.04, 

INFO:cmdstanpy:Chain [1] start processing


✓ SARIMA: MAE=6291.24, RMSE=7511.02, R²=-0.754

Training Prophet...


INFO:cmdstanpy:Chain [1] done processing
INFO:nixtla.nixtla_client:Validating inputs...


✓ Prophet: MAE=3802.76, RMSE=5273.51, R²=0.135

Training ETS...
✓ ETS: MAE=3187.25, RMSE=5075.99, R²=0.199

Training TimeGPT...
TimeGPT failed: Could not infer the frequency of the time column. This could be due to inconsistent intervals. Please check your data for missing, duplicated or irregular timestamps
✗ TimeGPT: Failed

✓ Completed Chennai: 23 models trained


PROCESSING LOCATION 3/6: Cochin

Processing Location: Cochin

--- Training Traditional ML Models ---
✓ Linear Regression: MAE=7265.34, RMSE=9653.54, R²=-5.202
✓ Ridge: MAE=4150.04, RMSE=4636.34, R²=-0.430
✓ Lasso: MAE=5033.39, RMSE=6166.11, R²=-1.530
✓ ElasticNet: MAE=3910.23, RMSE=4456.21, R²=-0.321
✓ Decision Tree: MAE=3630.33, RMSE=4765.64, R²=-0.511
✓ Random Forest: MAE=3306.09, RMSE=4180.62, R²=-0.163
✓ Gradient Boosting: MAE=3673.31, RMSE=4663.37, R²=-0.447
✓ AdaBoost: MAE=3246.90, RMSE=3960.00, R²=-0.044
✓ Extra Trees: MAE=2911.54, RMSE=3687.01, R²=0.095
✓ XGBoost: MAE=3648.53, RMSE=4441.98, R²=-0.313
✓ LightGBM: MA

INFO:cmdstanpy:Chain [1] start processing


✓ SARIMA: MAE=2993.84, RMSE=3880.94, R²=-0.002

Training Prophet...


INFO:cmdstanpy:Chain [1] done processing
INFO:nixtla.nixtla_client:Validating inputs...


✓ Prophet: MAE=3149.16, RMSE=3524.38, R²=0.173

Training ETS...
✓ ETS: MAE=2967.50, RMSE=3654.74, R²=0.111

Training TimeGPT...
TimeGPT failed: Could not infer the frequency of the time column. This could be due to inconsistent intervals. Please check your data for missing, duplicated or irregular timestamps
✗ TimeGPT: Failed

✓ Completed Cochin: 23 models trained


PROCESSING LOCATION 4/6: Secunderabad

Processing Location: Secunderabad

--- Training Traditional ML Models ---
✓ Linear Regression: MAE=48773.99, RMSE=129418.71, R²=-196.709
✓ Ridge: MAE=6081.55, RMSE=9143.05, R²=0.013
✓ Lasso: MAE=6085.01, RMSE=9586.49, R²=-0.085
✓ ElasticNet: MAE=5866.44, RMSE=9024.98, R²=0.039
✓ Decision Tree: MAE=6868.00, RMSE=9379.35, R²=-0.038
✓ Random Forest: MAE=5269.54, RMSE=7984.14, R²=0.248
✓ Gradient Boosting: MAE=4672.80, RMSE=7899.09, R²=0.263
✓ AdaBoost: MAE=5514.57, RMSE=8265.84, R²=0.193
✓ Extra Trees: MAE=5336.78, RMSE=8121.80, R²=0.221
✓ XGBoost: MAE=5456.77, RMSE=8271.87, R²=0.192
✓ Li

INFO:cmdstanpy:Chain [1] start processing


✓ SARIMA: MAE=4672.99, RMSE=7630.03, R²=0.313

Training Prophet...


INFO:cmdstanpy:Chain [1] done processing
INFO:nixtla.nixtla_client:Validating inputs...


✓ Prophet: MAE=5964.81, RMSE=8817.46, R²=0.082

Training ETS...
✓ ETS: MAE=5633.33, RMSE=8224.03, R²=0.202

Training TimeGPT...
TimeGPT failed: Could not infer the frequency of the time column. This could be due to inconsistent intervals. Please check your data for missing, duplicated or irregular timestamps
✗ TimeGPT: Failed

✓ Completed Secunderabad: 23 models trained


PROCESSING LOCATION 5/6: Sricity

Processing Location: Sricity
Skipping Sricity due to insufficient data

✓ Completed Sricity: 0 models trained


PROCESSING LOCATION 6/6: Vijaywada

Processing Location: Vijaywada

--- Training Traditional ML Models ---
✓ Linear Regression: MAE=6528.31, RMSE=8178.90, R²=-3.581
✓ Ridge: MAE=4395.58, RMSE=5154.27, R²=-0.819
✓ Lasso: MAE=6629.43, RMSE=8598.78, R²=-4.063
✓ ElasticNet: MAE=4453.55, RMSE=5203.81, R²=-0.854
✓ Decision Tree: MAE=4355.00, RMSE=5044.51, R²=-0.743
✓ Random Forest: MAE=3651.38, RMSE=4396.99, R²=-0.324
✓ Gradient Boosting: MAE=3810.40, RMSE=4431.69, R²=-0.345
✓ Ada

INFO:prophet:n_changepoints greater than number of observations. Using 13.
INFO:cmdstanpy:Chain [1] start processing


✓ SARIMA: MAE=2788.55, RMSE=3535.57, R²=0.144

Training Prophet...


INFO:cmdstanpy:Chain [1] done processing
INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Inferred freq: MS
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Querying model metadata...


✓ Prophet: MAE=4237.32, RMSE=5056.11, R²=-0.751

Training ETS...
✓ ETS: MAE=3367.38, RMSE=4082.64, R²=-0.141

Training TimeGPT...


INFO:nixtla.nixtla_client:Restricting input...
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


✓ TimeGPT: MAE=3327.89, RMSE=4177.62, R²=-0.195

✓ Completed Vijaywada: 24 models trained


ALL LOCATIONS PROCESSED
Total results: 116


In [24]:
# Convert results to DataFrame
results_df = pd.DataFrame(all_results)

# Remove predictions column for storage (too large)
results_df_clean = results_df.drop('Predictions', axis=1, errors='ignore')

print(f"Results DataFrame shape: {results_df_clean.shape}")
print(f"\nColumns: {results_df_clean.columns.tolist()}")
print(f"\nFirst few results:")
print(results_df_clean.head())

# Save results
import os
os.makedirs('../outputs/model_results', exist_ok=True)
results_df_clean.to_csv('../outputs/model_results/location_model_comparison.csv', index=False)
print(f"\nResults saved to: ../outputs/model_results/location_model_comparison.csv")


Results DataFrame shape: (116, 8)

Columns: ['Model_Type', 'Model_Name', 'MAE', 'RMSE', 'MAPE', 'R2', 'Training_Time', 'Location']

First few results:
  Model_Type         Model_Name           MAE          RMSE        MAPE  \
0         ML  Linear Regression  14860.138543  36129.593211  268.903096   
1         ML              Ridge   2682.927391   3590.546076   47.407227   
2         ML              Lasso   5004.735053   7063.421677   88.344175   
3         ML         ElasticNet   2325.341759   3127.090399   40.864988   
4         ML      Decision Tree   2965.250000   3741.800113   52.363026   

           R2  Training_Time   Location  
0 -214.796529       0.001509  Bangalore  
1   -1.131274       0.002012  Bangalore  
2   -7.247999       0.006557  Bangalore  
3   -0.616587       0.004657  Bangalore  
4   -1.314619       0.003016  Bangalore  

Results saved to: ../outputs/model_results/location_model_comparison.csv


In [28]:
# Create summary statistics for corrected results
print("\n" + "="*80)
print("CORRECTED RESULTS SUMMARY")
print("="*80)

# Best model per location
print("\n--- Best Model per Location (by MAE) ---")
best_per_location = results_df_clean.loc[results_df_clean.groupby('Location')['MAE'].idxmin()]
print(best_per_location[['Location', 'Model_Type', 'Model_Name', 'MAE', 'RMSE', 'R2']].round(3))

# Average performance by model type
print("\n--- Average Performance by Model Type ---")
avg_by_type = results_df_clean.groupby('Model_Type').agg({
    'MAE': 'mean',
    'RMSE': 'mean',
    'R2': 'mean',
    'Training_Time': 'mean'
}).round(3)
print(avg_by_type)

# Top 10 models overall
print("\n--- Top 10 Models Overall (by MAE) ---")
top_10 = results_df_clean.nsmallest(10, 'MAE')[['Location', 'Model_Type', 'Model_Name', 'MAE', 'RMSE', 'R2']]
print(top_10.round(3))

# Model success rate
print("\n--- Model Success Rate ---")
success_rate = results_df_clean.groupby(['Model_Type', 'Model_Name']).size().reset_index(name='Count')
print(success_rate)



CORRECTED RESULTS SUMMARY

--- Best Model per Location (by MAE) ---
         Location   Model_Type         Model_Name       MAE      RMSE     R2
20      Bangalore  Time Series             SARIMA   692.395  1218.232  0.755
45        Chennai  Time Series                ETS  3187.251  5075.990  0.199
54         Cochin           ML        Extra Trees  2911.539  3687.011  0.095
75   Secunderabad           ML  Gradient Boosting  4672.800  7899.091  0.263
112     Vijaywada  Time Series             SARIMA  2788.553  3535.565  0.144

--- Average Performance by Model Type ---
                    MAE       RMSE      R2  Training_Time
Model_Type                                               
Deep Learning  9446.835  10399.396  -6.824          9.021
ML             6085.539  10477.373 -19.525          0.084
Time Series    3641.185   4980.449  -0.042          0.364

--- Top 10 Models Overall (by MAE) ---
     Location   Model_Type         Model_Name       MAE      RMSE     R2
20  Bangalore  Time Ser