# Experiment with Arima model

In [1]:
!pip install kaggle wandb onnx -Uq
from google.colab import drive
drive.mount('/content/drive')

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive


In [2]:
! mkdir ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c walmart-recruiting-store-sales-forecasting
! unzip walmart-recruiting-store-sales-forecasting.zip
!unzip train.csv.zip
!unzip features.csv.zip

Downloading walmart-recruiting-store-sales-forecasting.zip to /content
  0% 0.00/2.70M [00:00<?, ?B/s]
100% 2.70M/2.70M [00:00<00:00, 636MB/s]
Archive:  walmart-recruiting-store-sales-forecasting.zip
  inflating: features.csv.zip        
  inflating: sampleSubmission.csv.zip  
  inflating: stores.csv              
  inflating: test.csv.zip            
  inflating: train.csv.zip           
Archive:  train.csv.zip
  inflating: train.csv               
Archive:  features.csv.zip
  inflating: features.csv            


In [3]:
!pip install statsmodels mlflow dagshub scikit-learn pandas numpy matplotlib seaborn joblib -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.7/24.7 MB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.9/139.9 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Core libraries
import mlflow
import mlflow.sklearn
import dagshub
from datetime import datetime, timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
import os

# ARIMA and time series
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
import seaborn as sns

# Suppress warnings
import logging
logging.getLogger('statsmodels').setLevel(logging.WARNING)



In [9]:
import warnings
warnings.filterwarnings('ignore', message='No frequency information was provided')
warnings.filterwarnings('ignore', message='An unsupported index was provided')
warnings.filterwarnings('ignore', message='A date index has been provided, but it has no associated frequency')
warnings.filterwarnings('ignore', message='No supported index is available')
warnings.filterwarnings('ignore', category=FutureWarning, module='statsmodels')



In [10]:
class WalmartSimplePreprocessingPipeline:
    """
    Simplified preprocessing pipeline for ARIMA models
    ARIMA doesn't use external regressors, so we focus on:
    - Time series data preparation
    - Store-Dept grouping
    - Basic date features for context
    """

    def __init__(self):
        self.fitted = False
        self.outlier_thresholds = None

    def load_and_prepare_data(self):
        """Load and merge train.csv, stores.csv datasets (minimal for ARIMA)"""
        print("📊 Loading datasets...")

        # Load datasets
        train_df = pd.read_csv('train.csv')
        stores_df = pd.read_csv('stores.csv')

        print(f"   📈 Train data: {train_df.shape}")
        print(f"   🏪 Stores data: {stores_df.shape}")

        # Convert Date column to datetime
        train_df['Date'] = pd.to_datetime(train_df['Date'])

        # Merge with stores for Type information (useful for outlier detection)
        train_full = train_df.merge(stores_df, on='Store', how='left')

        print(f"   ✅ Merged data: {train_full.shape}")
        print(f"   📅 Date range: {train_full['Date'].min()} to {train_full['Date'].max()}")

        return train_full

    def create_temporal_split(self, df, train_ratio=0.8):
        """Create temporal split to prevent data leakage"""
        print(f"📅 Creating temporal split ({int(train_ratio*100)}/{int((1-train_ratio)*100)})...")

        # Sort by date to ensure temporal order
        df_sorted = df.sort_values('Date').reset_index(drop=True)

        # Find split point
        split_idx = int(len(df_sorted) * train_ratio)
        split_date = df_sorted.iloc[split_idx]['Date']

        # Create splits
        train_data = df_sorted.iloc[:split_idx].copy()
        val_data = df_sorted.iloc[split_idx:].copy()

        # Create split info dictionary
        split_info = {
            'split_date': split_date,
            'train_size': len(train_data),
            'val_size': len(val_data),
            'train_date_range': (train_data['Date'].min(), train_data['Date'].max()),
            'val_date_range': (val_data['Date'].min(), val_data['Date'].max())
        }

        print(f"   📊 Split date: {split_date}")
        print(f"   📈 Train: {len(train_data):,} records ({train_data['Date'].min()} to {train_data['Date'].max()})")
        print(f"   📉 Val: {len(val_data):,} records ({val_data['Date'].min()} to {val_data['Date'].max()})")

        return train_data, val_data, split_info

    def fit(self, train_data):
        """Fit the preprocessing pipeline on training data"""
        print("🔧 Fitting preprocessing pipeline on training data...")

        # Fit outlier removal thresholds on training data only
        # Separate thresholds for holiday vs non-holiday weeks since they have different patterns
        self.outlier_thresholds = {
            # Non-holiday weeks (regular business)
            'non_holiday': {
                'A': {'lower': -1000, 'upper': 35000},   # Type A stores - regular weeks
                'B': {'lower': -500, 'upper': 20000},    # Type B stores - regular weeks
                'C': {'lower': -200, 'upper': 12000}     # Type C stores - regular weeks
            },
            # Holiday weeks (higher sales expected - Super Bowl, Labor Day, Thanksgiving, Christmas)
            'holiday': {
                'A': {'lower': -1000, 'upper': 80000},   # Type A stores - holiday weeks (much higher)
                'B': {'lower': -500, 'upper': 50000},    # Type B stores - holiday weeks
                'C': {'lower': -200, 'upper': 30000}     # Type C stores - holiday weeks
            }
        }

        print("✅ Pipeline fitted on training data with holiday-aware outlier thresholds")
        self.fitted = True
        return self

    def transform(self, data, is_validation=False):
        """Transform data using fitted pipeline (minimal for ARIMA)"""
        if not self.fitted:
            raise ValueError("Pipeline must be fitted before transform!")

        print(f"🔄 Transforming {'validation' if is_validation else 'training'} data...")

        df = data.copy()

        # Step 1: Create basic date features (for context, not used in ARIMA model)
        df = self._create_basic_date_features(df)

        # Step 2: Remove outliers (only on training data)
        if not is_validation:
            df = self._remove_outliers(df)

        print(f"✅ Transform complete. Shape: {df.shape}")
        return df

    def fit_transform(self, train_data):
        """Fit and transform training data in one step"""
        return self.fit(train_data).transform(train_data, is_validation=False)

    def _create_basic_date_features(self, df):
        """Create basic date features for context (not used in ARIMA)"""
        df = df.copy()
        df['Month'] = df['Date'].dt.month
        df['DayOfWeek'] = df['Date'].dt.dayofweek
        df['WeekOfYear'] = df['Date'].dt.isocalendar().week
        df['IsWeekend'] = (df['DayOfWeek'] >= 5).astype(int)
        start_date = df['Date'].min()
        df['DaysFromStart'] = (df['Date'] - start_date).dt.days
        df['WeeksFromStart'] = df['DaysFromStart'] // 7
        return df

    def _remove_outliers(self, df):
        """Remove outliers from training data only - separate thresholds for holiday vs non-holiday weeks"""
        initial_len = len(df)
        df_clean = df.copy()

        non_holiday_removed = 0
        holiday_removed = 0

        # Apply different thresholds based on holiday status
        for store_type in ['A', 'B', 'C']:
            # Process non-holiday weeks
            non_holiday_mask = (df_clean['IsHoliday'] == False) & (df_clean['Type'] == store_type)

            if non_holiday_mask.any():
                thresholds = self.outlier_thresholds['non_holiday'][store_type]
                outlier_mask = (
                    (df_clean['Weekly_Sales'] < thresholds['lower']) |
                    (df_clean['Weekly_Sales'] > thresholds['upper'])
                )
                outliers_to_remove = non_holiday_mask & outlier_mask
                non_holiday_removed += outliers_to_remove.sum()
                df_clean = df_clean[~outliers_to_remove]

            # Process holiday weeks
            holiday_mask = (df_clean['IsHoliday'] == True) & (df_clean['Type'] == store_type)

            if holiday_mask.any():
                thresholds = self.outlier_thresholds['holiday'][store_type]
                outlier_mask = (
                    (df_clean['Weekly_Sales'] < thresholds['lower']) |
                    (df_clean['Weekly_Sales'] > thresholds['upper'])
                )
                outliers_to_remove = holiday_mask & outlier_mask
                holiday_removed += outliers_to_remove.sum()
                df_clean = df_clean[~outliers_to_remove]

        total_removed = initial_len - len(df_clean)

        print(f"   🗑️ Removed {total_removed:,} outliers from training data")
        print(f"      📅 Non-holiday outliers: {non_holiday_removed:,}")
        print(f"      🎉 Holiday outliers: {holiday_removed:,}")

        return df_clean


def setup_mlflow():
    """Setup MLflow and DagsHub tracking"""
    print("🔧 Setting up MLflow and DagsHub...")

    # End any active runs first
    try:
        mlflow.end_run()
    except:
        pass

    # Initialize DagsHub
    try:
        dagshub.init(
            repo_owner='konstantine25b',
            repo_name='Walmart-Recruiting---Store-Sales-Forecasting',
            mlflow=True
        )
        print("✅ DagsHub initialized successfully!")
    except Exception as e:
        print(f"⚠️ DagsHub init warning: {e}")

    # Set MLflow tracking URI
    mlflow.set_tracking_uri("https://dagshub.com/konstantine25b/Walmart-Recruiting---Store-Sales-Forecasting.mlflow")

    # Create unique experiment name with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    experiment_name = f"Experiment_ARIMA_{timestamp}"

    try:
        experiment_id = mlflow.create_experiment(experiment_name)
        print(f"✅ Created new experiment: {experiment_name}")
    except mlflow.exceptions.MlflowException as e:
        if "already exists" in str(e):
            experiment = mlflow.get_experiment_by_name(experiment_name)
            experiment_id = experiment.experiment_id
            print(f"✅ Using existing experiment: {experiment_name}")
        else:
            # Fallback to default experiment
            experiment_name = "Default"
            mlflow.set_experiment(experiment_name)
            print(f"⚠️ Using default experiment due to: {e}")

    mlflow.set_experiment(experiment_name)

    print(f"✅ MLflow setup complete!")
    print(f"🔗 Tracking URI: {mlflow.get_tracking_uri()}")
    print(f"📊 Experiment: {experiment_name}")

    return experiment_name


def get_preprocessed_data():
    """
    Use preprocessing pipeline to get model-ready data

    Returns:
        train_data, val_data: Time series data ready for ARIMA
        split_info: Information about the temporal split
    """
    print("🔄 Getting preprocessed data using pipeline...")

    # Create the preprocessing pipeline
    pipeline = WalmartSimplePreprocessingPipeline()

    # Load raw data
    train_full = pipeline.load_and_prepare_data()

    # Create temporal split
    train_data, val_data, split_info = pipeline.create_temporal_split(train_full)

    # Fit and transform data using pipeline
    pipeline.fit(train_data)
    train_processed = pipeline.transform(train_data, is_validation=False)
    val_processed = pipeline.transform(val_data, is_validation=True)

    print(f"✅ Data preprocessing complete!")
    print(f"   📊 Training shape: {train_processed.shape}")
    print(f"   📊 Validation shape: {val_processed.shape}")

    return train_processed, val_processed, split_info


def calculate_wmae(y_true, y_pred, is_holiday, holiday_weight=5.0):
    """Calculate Weighted Mean Absolute Error (WMAE)"""
    abs_errors = np.abs(y_true - y_pred)
    weights = np.where(is_holiday, holiday_weight, 1.0)
    wmae = np.sum(weights * abs_errors) / np.sum(weights)
    return wmae


def check_stationarity(ts, title):
    """Check stationarity of time series using Augmented Dickey-Fuller test"""
    # Perform Augmented Dickey-Fuller test
    result = adfuller(ts.dropna())

    is_stationary = result[1] <= 0.05

    return {
        'title': title,
        'adf_statistic': result[0],
        'p_value': result[1],
        'critical_values': result[4],
        'is_stationary': is_stationary
    }


In [11]:
def find_arima_order(ts, max_p=1, max_d=1, max_q=1, max_time=5):
    """Simple ARIMA order - just use (1,1,1) without search"""
    # No hyperparameter search - just use simple ARIMA(1,1,1)
    return (1, 1, 1), None


def train_arima_models(train_data, val_data):
    """Train simple ARIMA(1,1,1) models for each Store-Dept combination"""
    print("📈 Training simple ARIMA(1,1,1) models for each Store-Dept combination...")
    print("   ⏰ No time limit - training all combinations")

    # Get unique combinations from training data
    train_combinations = train_data.groupby(['Store', 'Dept']).size().index.tolist()
    print(f"   📊 Training models for {len(train_combinations)} combinations")

    # Train all combinations with simple models
    print(f"   🎯 Training simple ARIMA(1,1,1) for all combinations")

    models = {}
    training_errors = {}
    model_orders = {}

    successful_models = 0
    failed_models = 0

    for i, (store, dept) in enumerate(train_combinations):
        try:
            # Filter data for this combination
            store_dept_data = train_data[
                (train_data['Store'] == store) &
                (train_data['Dept'] == dept)
            ].copy()

            # Skip if insufficient data
            if len(store_dept_data) < 10:  # Very minimal data requirement
                failed_models += 1
                continue

            # Sort by date and prepare time series with proper DatetimeIndex
            store_dept_data = store_dept_data.sort_values('Date')

            # Create proper time series with DatetimeIndex - let pandas infer frequency
            dates = pd.DatetimeIndex(store_dept_data['Date'])
            ts = pd.Series(
                store_dept_data['Weekly_Sales'].values,
                index=dates
            )

            # Check for constant series
            if ts.var() == 0 or ts.std() < 1e-6:
                failed_models += 1
                continue

            # Use simple ARIMA(1,1,1) - no hyperparameter search
            best_order = (1, 1, 1)

            # Fit simple ARIMA model
            model = ARIMA(ts, order=best_order)
            fitted_model = model.fit()  # Use default optimization

            models[(store, dept)] = fitted_model
            model_orders[(store, dept)] = best_order
            successful_models += 1

            # Progress updates every 200 models
            if i % 200 == 0:
                print(f"   ✅ Trained {i+1}/{len(train_combinations)} models ({successful_models} successful, {failed_models} failed)")

        except Exception as e:
            failed_models += 1
            if failed_models < 3:  # Only print first few errors
                print(f"   ⚠️ Failed to train model for Store {store}, Dept {dept}: {e}")

    print(f"✅ ARIMA training complete!")
    print(f"   🎯 Successful models: {successful_models}")
    print(f"   ❌ Failed models: {failed_models}")
    print(f"   📊 Coverage: {successful_models}/{len(train_combinations)} ({successful_models/len(train_combinations)*100:.1f}%)")

    return models, training_errors, model_orders


def make_arima_predictions(models, val_data, train_data=None):
    """Make predictions using ONLY trained ARIMA models - no fallbacks"""
    print("📈 Making ARIMA predictions (no fallbacks)...")

    predictions = []
    actuals = []
    holidays = []
    successful_predictions = 0
    skipped_predictions = 0

    # Get validation combinations
    val_combinations = val_data.groupby(['Store', 'Dept']).groups.keys()

    for store, dept in val_combinations:
        try:
            # Get validation data for this combination
            store_dept_val = val_data[
                (val_data['Store'] == store) &
                (val_data['Dept'] == dept)
            ].copy()

            # Sort by date
            store_dept_val = store_dept_val.sort_values('Date')

            # ONLY use combinations where we have trained ARIMA models
            if (store, dept) in models:
                # Use trained ARIMA model
                fitted_model = models[(store, dept)]

                # Make forecast with proper periods
                n_periods = len(store_dept_val)

                try:
                    # Use forecast method with steps parameter
                    forecast = fitted_model.forecast(steps=n_periods)

                    # Convert to list if it's a single value or array
                    if isinstance(forecast, (int, float)):
                        forecast_list = [forecast] * n_periods
                    elif hasattr(forecast, '__iter__'):
                        forecast_list = list(forecast)
                        # Ensure forecast matches validation length
                        if len(forecast_list) < n_periods:
                            last_val = forecast_list[-1] if forecast_list else 0
                            forecast_list.extend([last_val] * (n_periods - len(forecast_list)))
                        elif len(forecast_list) > n_periods:
                            forecast_list = forecast_list[:n_periods]
                    else:
                        # Skip this combination if forecast fails
                        skipped_predictions += len(store_dept_val)
                        continue

                    # Store results
                    predictions.extend(forecast_list)
                    actuals.extend(store_dept_val['Weekly_Sales'].tolist())
                    holidays.extend(store_dept_val['IsHoliday'].tolist())
                    successful_predictions += len(store_dept_val)

                except Exception as forecast_error:
                    # Skip this combination if forecasting fails
                    skipped_predictions += len(store_dept_val)
                    continue
            else:
                # Skip combinations without trained models
                skipped_predictions += len(store_dept_val)
                continue

        except Exception as e:
            # Skip this combination if any error occurs
            skipped_predictions += len(val_data[
                (val_data['Store'] == store) &
                (val_data['Dept'] == dept)
            ])
            continue

    print(f"✅ Predictions complete!")
    print(f"   🎯 ARIMA predictions: {successful_predictions}")
    print(f"   ⏭️ Skipped (no model): {skipped_predictions}")

    return np.array(predictions), np.array(actuals), np.array(holidays)


In [12]:
def calculate_training_wmae(models, train_data):
    """Calculate training WMAE on fitted values from trained ARIMA models"""
    print("📊 Calculating training WMAE on fitted values...")

    train_predictions = []
    train_actuals = []
    train_holidays = []

    for (store, dept), fitted_model in models.items():
        try:
            # Get training data for this combination
            store_dept_data = train_data[
                (train_data['Store'] == store) &
                (train_data['Dept'] == dept)
            ].copy()

            # Sort by date
            store_dept_data = store_dept_data.sort_values('Date')

            # Get fitted values from the model
            fitted_values = fitted_model.fittedvalues
            actual_values = store_dept_data['Weekly_Sales'].values
            holiday_values = store_dept_data['IsHoliday'].values

            # Align fitted values with actual values (fitted values might be shorter)
            if len(fitted_values) > 0 and len(fitted_values) <= len(actual_values):
                # Take the last N actual values to match fitted values length
                start_idx = len(actual_values) - len(fitted_values)
                aligned_actuals = actual_values[start_idx:]
                aligned_holidays = holiday_values[start_idx:]

                train_predictions.extend(fitted_values.values)
                train_actuals.extend(aligned_actuals)
                train_holidays.extend(aligned_holidays)

        except Exception as e:
            # Skip problematic models
            continue

    if len(train_predictions) > 0:
        train_wmae = calculate_wmae(
            np.array(train_actuals),
            np.array(train_predictions),
            np.array(train_holidays).astype(bool)
        )
        print(f"   📈 Training WMAE: ${train_wmae:,.2f}")
        return train_wmae
    else:
        print("   ⚠️ No training predictions available for WMAE calculation")
        return None


In [13]:
def main():
    """Main experiment execution"""
    print("🚀 Starting Experiment ARIMA: ARIMA Models for Walmart Sales Forecasting")
    print("=" * 80)

    # Setup MLflow tracking
    experiment_name = setup_mlflow()

    with mlflow.start_run(run_name="ARIMA_Walmart_Sales_Complete") as run:
        print(f"🔄 Starting MLflow run: {run.info.run_id}")

        # Log experiment metadata
        mlflow.log_param("experiment_type", "ARIMA_Individual_Models")
        mlflow.log_param("model_type", "ARIMA")
        mlflow.log_param("feature_engineering", "Minimal_Time_Series_Only")
        mlflow.log_param("data_split", "temporal_80_20")
        mlflow.log_param("external_regressors", "None")
        mlflow.log_param("outlier_removal", "Holiday_Aware_Thresholds")
        mlflow.log_param("holiday_weight_evaluation", "5x")

        # Log outlier thresholds for transparency
        mlflow.log_param("outlier_thresholds_non_holiday_A", "[-1000, 35000]")
        mlflow.log_param("outlier_thresholds_non_holiday_B", "[-500, 20000]")
        mlflow.log_param("outlier_thresholds_non_holiday_C", "[-200, 12000]")
        mlflow.log_param("outlier_thresholds_holiday_A", "[-1000, 80000]")
        mlflow.log_param("outlier_thresholds_holiday_B", "[-500, 50000]")
        mlflow.log_param("outlier_thresholds_holiday_C", "[-200, 30000]")

        try:
            # Step 1: Get preprocessed data
            print("\n📊 Step 1: Data preprocessing...")
            train_data, val_data, split_info = get_preprocessed_data()

            # Log data info
            mlflow.log_metric("train_samples", len(train_data))
            mlflow.log_metric("val_samples", len(val_data))
            mlflow.log_param("split_date", str(split_info['split_date']))

            # Log Store-Dept combination info
            train_combinations = set(zip(train_data['Store'], train_data['Dept']))
            val_combinations = set(zip(val_data['Store'], val_data['Dept']))

            mlflow.log_metric("train_combinations", len(train_combinations))
            mlflow.log_metric("val_combinations", len(val_combinations))
            mlflow.log_metric("missing_combinations", len(train_combinations - val_combinations))

            # Step 2: Train ARIMA models
            print("\n📈 Step 2: Training ARIMA models...")
            models, training_errors, model_orders = train_arima_models(train_data, val_data)

            # Log training info
            mlflow.log_metric("successful_models", len(models))
            mlflow.log_metric("avg_training_mae", np.mean(list(training_errors.values())) if training_errors else 0)

            # Log model order statistics
            if model_orders:
                orders = list(model_orders.values())
                avg_p = np.mean([o[0] for o in orders])
                avg_d = np.mean([o[1] for o in orders])
                avg_q = np.mean([o[2] for o in orders])

                mlflow.log_metric("avg_arima_p", avg_p)
                mlflow.log_metric("avg_arima_d", avg_d)
                mlflow.log_metric("avg_arima_q", avg_q)

            # Step 3: Make predictions
            print("\n📈 Step 3: Making predictions...")
            y_pred, y_true, is_holiday = make_arima_predictions(models, val_data, train_data)

            # Step 3.5: Calculate training WMAE
            print("\n📊 Step 3.5: Training performance...")
            train_wmae = calculate_training_wmae(models, train_data)

            # Step 4: Calculate metrics
            print("\n📊 Step 4: Calculating validation metrics...")

            # Validation metrics
            val_mae = mean_absolute_error(y_true, y_pred)
            val_rmse = np.sqrt(mean_squared_error(y_true, y_pred))
            val_r2 = r2_score(y_true, y_pred)
            val_wmae = calculate_wmae(y_true, y_pred, is_holiday)

            # Holiday breakdown
            holiday_mask = is_holiday.astype(bool)
            holiday_mae = mean_absolute_error(y_true[holiday_mask], y_pred[holiday_mask]) if holiday_mask.any() else 0
            non_holiday_mae = mean_absolute_error(y_true[~holiday_mask], y_pred[~holiday_mask]) if (~holiday_mask).any() else 0

            # Log all metrics
            mlflow.log_metric("val_wmae", val_wmae)
            mlflow.log_metric("val_mae", val_mae)
            mlflow.log_metric("val_rmse", val_rmse)
            mlflow.log_metric("val_r2", val_r2)
            mlflow.log_metric("holiday_mae", holiday_mae)
            mlflow.log_metric("non_holiday_mae", non_holiday_mae)
            mlflow.log_metric("holiday_samples", int(holiday_mask.sum()))
            mlflow.log_metric("non_holiday_samples", int((~holiday_mask).sum()))

            # Log training WMAE if available
            if train_wmae is not None:
                mlflow.log_metric("train_wmae", train_wmae)

            # Print results
            print("\n" + "=" * 60)
            print("🎯 EXPERIMENT ARIMA RESULTS SUMMARY")
            print("=" * 60)

            if train_wmae is not None:
                print("📊 Training Metrics:")
                print(f"   Training WMAE: ${train_wmae:,.2f}")
                print()

            print("📊 Validation Metrics:")
            print(f"   WMAE (Competition Metric): ${val_wmae:,.2f}")
            print(f"   MAE: ${val_mae:,.2f}")
            print(f"   RMSE: ${val_rmse:,.2f}")
            print(f"   R²: {val_r2:.4f}")

            print("\n📊 Holiday Breakdown:")
            print(f"   Holiday MAE: ${holiday_mae:,.2f} ({int(holiday_mask.sum())} samples)")
            print(f"   Non-Holiday MAE: ${non_holiday_mae:,.2f} ({int((~holiday_mask).sum())} samples)")

            print("\n📊 Model Statistics:")
            print(f"   Successful models trained: {len(models):,}")
            print(f"   Store-Dept combinations: {len(train_combinations):,}")
            print(f"   Average training MAE: ${np.mean(list(training_errors.values())):,.2f}" if training_errors else "   No training errors calculated")

            # Save model summary
            print("\n💾 Saving model artifacts...")
            model_summary = {
                'total_models': len(models),
                'model_orders': {f"{k[0]}_{k[1]}": v for k, v in model_orders.items()},
                'training_errors': {f"{k[0]}_{k[1]}": v for k, v in training_errors.items()},
                'validation_metrics': {
                    'wmae': val_wmae,
                    'mae': val_mae,
                    'rmse': val_rmse,
                    'r2': val_r2
                }
            }

            # Save summary to file and log as artifact
            import json
            with open('arima_model_summary.json', 'w') as f:
                json.dump(model_summary, f, indent=2)
            mlflow.log_artifact('arima_model_summary.json')

            print("✅ Experiment ARIMA completed successfully!")

            # Get experiment and run URLs
            experiment = mlflow.get_experiment_by_name(experiment_name)
            if experiment:
                experiment_id = experiment.experiment_id
                run_id = run.info.run_id
                base_url = "https://dagshub.com/konstantine25b/Walmart-Recruiting---Store-Sales-Forecasting.mlflow"

                print(f"🏃 View run at: {base_url}/#/experiments/{experiment_id}/runs/{run_id}")
                print(f"�� View experiment at: {base_url}/#/experiments/{experiment_id}")

            print(f"\n🎉 Experiment ARIMA: Individual ARIMA Models - COMPLETE!")

        except Exception as e:
            print(f"❌ Experiment failed: {e}")
            mlflow.log_param("error", str(e))
            raise


if __name__ == "__main__":
    main()


🚀 Starting Experiment ARIMA: ARIMA Models for Walmart Sales Forecasting
🔧 Setting up MLflow and DagsHub...


✅ DagsHub initialized successfully!
✅ Created new experiment: Experiment_ARIMA_20250715_091050
✅ MLflow setup complete!
🔗 Tracking URI: https://dagshub.com/konstantine25b/Walmart-Recruiting---Store-Sales-Forecasting.mlflow
📊 Experiment: Experiment_ARIMA_20250715_091050
🔄 Starting MLflow run: b1d1da1fa1fa496893067dc71ceb0b9e

📊 Step 1: Data preprocessing...
🔄 Getting preprocessed data using pipeline...
📊 Loading datasets...
   📈 Train data: (421570, 5)
   🏪 Stores data: (45, 3)
   ✅ Merged data: (421570, 7)
   📅 Date range: 2010-02-05 00:00:00 to 2012-10-26 00:00:00
📅 Creating temporal split (80/19)...
   📊 Split date: 2012-04-13 00:00:00
   📈 Train: 337,256 records (2010-02-05 00:00:00 to 2012-04-13 00:00:00)
   📉 Val: 84,314 records (2012-04-13 00:00:00 to 2012-10-26 00:00:00)
🔧 Fitting preprocessing pipeline on training data...
✅ Pipeline fitted on training data with holiday-aware outlier thresholds
🔄 Transforming training data...
   🗑️ Removed 60,884 outliers from training data
    



   ✅ Trained 201/3248 models (178 successful, 23 failed)
   ✅ Trained 401/3248 models (353 successful, 48 failed)
   ✅ Trained 601/3248 models (534 successful, 67 failed)
   ✅ Trained 801/3248 models (701 successful, 100 failed)
   ✅ Trained 1001/3248 models (865 successful, 136 failed)
   ✅ Trained 1201/3248 models (1043 successful, 158 failed)
   ✅ Trained 1601/3248 models (1386 successful, 215 failed)
   ✅ Trained 1801/3248 models (1548 successful, 253 failed)




   ✅ Trained 2201/3248 models (1899 successful, 302 failed)
   ✅ Trained 2401/3248 models (2069 successful, 332 failed)
   ✅ Trained 2601/3248 models (2241 successful, 360 failed)
   ✅ Trained 2801/3248 models (2406 successful, 395 failed)
   ✅ Trained 3201/3248 models (2730 successful, 471 failed)
✅ ARIMA training complete!
   🎯 Successful models: 2767
   ❌ Failed models: 481
   📊 Coverage: 2767/3248 (85.2%)

📈 Step 3: Making predictions...
📈 Making ARIMA predictions (no fallbacks)...
✅ Predictions complete!
   🎯 ARIMA predictions: 73344
   ⏭️ Skipped (no model): 10970

📊 Step 3.5: Training performance...
📊 Calculating training WMAE on fitted values...
   📈 Training WMAE: $1,556.27

📊 Step 4: Calculating validation metrics...

🎯 EXPERIMENT ARIMA RESULTS SUMMARY
📊 Training Metrics:
   Training WMAE: $1,556.27

📊 Validation Metrics:
   WMAE (Competition Metric): $2,011.67
   MAE: $1,976.46
   RMSE: $4,574.46
   R²: 0.8273

📊 Holiday Breakdown:
   Holiday MAE: $2,261.87 (2580 samples)
  