In [27]:
# Cell 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy import stats
import json
import os
from pathlib import Path

# Visualization settings
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
sns.set_palette("viridis")

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')


from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import xgboost as xgb
from datetime import datetime

# # Set visualization style
# plt.style.use('seaborn')
# sns.set_palette("husl")

In [28]:
def create_lag_features(data, target_col='AQI', n_lags=7):
    """Create lag features for the target column"""
    data = data.sort_values('Date')
    for i in range(1, n_lags + 1):
        data[f'{target_col}_lag_{i}'] = data[target_col].shift(i)
    return data

# Cell 2: Load pre-split data for each city
processed_dir = '../data/processed'
cities = ['bengaluru', 'chennai', 'delhi', 'hyderabad']

# Dictionary to store data for each city
city_data = {}

for city in cities:
    city_dir = f'{processed_dir}/{city.lower()}'
    city_data[city] = {
        'train': pd.read_csv(f'{city_dir}/train.csv'),
        'val': pd.read_csv(f'{city_dir}/val.csv'),
        'test': pd.read_csv(f'{city_dir}/test.csv')
    }
    
    # Convert date columns to datetime
    for split in ['train', 'val', 'test']:
        city_data[city][split]['Date'] = pd.to_datetime(city_data[city][split]['Date'])
        
        # Check if lag features exist, if not create them
        if 'AQI_lag_1' not in city_data[city][split].columns:
            print(f"Creating lag features for {city} {split} set")
            city_data[city][split] = create_lag_features(city_data[city][split])
    
    print(f"\n{city.title()} data loaded:")
    print(f"Train: {city_data[city]['train'].shape[0]} samples")
    print(f"Validation: {city_data[city]['val'].shape[0]} samples")
    print(f"Test: {city_data[city]['test'].shape[0]} samples")

Creating lag features for bengaluru train set
Creating lag features for bengaluru val set
Creating lag features for bengaluru test set

Bengaluru data loaded:
Train: 1241 samples
Validation: 287 samples
Test: 382 samples
Creating lag features for chennai train set
Creating lag features for chennai val set
Creating lag features for chennai test set

Chennai data loaded:
Train: 1224 samples
Validation: 283 samples
Test: 377 samples
Creating lag features for delhi train set
Creating lag features for delhi val set
Creating lag features for delhi test set

Delhi data loaded:
Train: 1299 samples
Validation: 300 samples
Test: 400 samples
Creating lag features for hyderabad train set
Creating lag features for hyderabad val set
Creating lag features for hyderabad test set

Hyderabad data loaded:
Train: 1222 samples
Validation: 282 samples
Test: 376 samples


In [29]:
# 1. First, let's load the preprocessed data for each city
def load_city_data(city_name):
    """Load preprocessed data for a specific city"""
    data_dir = "../data/processed"
    city_data = {}
    
    for split in ['train', 'val', 'test']:
        file_path = f"{data_dir}/{city_name}/{split}.csv"
        # print(file_path)
        if os.path.exists(file_path):
            city_data[split] = pd.read_csv(file_path)
            city_data[split]['date'] = pd.to_datetime(city_data[split]['Date'])
    
    return city_data



In [30]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

device = (
    torch.device("mps") 
    if torch.backends.mps.is_available() 
    else torch.device("cpu")
)
print(f"Using device: {device}")


Using device: cpu


In [31]:
class CNNRegressor(nn.Module):
    def __init__(self, input_channels=1, input_length=7):
        super(CNNRegressor, self).__init__()
        
        # Improved CNN architecture
        self.features = nn.Sequential(
            # First conv block
            nn.Conv1d(input_channels, 32, kernel_size=3, padding=1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            # Second conv block
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            # Third conv block
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            # Global average pooling
            nn.AdaptiveAvgPool1d(1)
        )
        
        # Fully connected layers
        self.classifier = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x.squeeze()


In [32]:
def calculate_cnn_feature_importance(model, X_val, feature_names):
    """Calculate feature importance for CNN model using gradient-based method
    
    Args:
        model (CNNRegressor): Trained CNN model
        X_val (torch.Tensor): Validation input data
        feature_names (list): List of feature names
        
    Returns:
        dict: Dictionary mapping feature names to their importance scores
    """
    model.eval()
    X_val = X_val.requires_grad_(True)
    
    # Forward pass
    output = model(X_val)
    
    # Calculate gradients with respect to input
    output.backward(torch.ones_like(output))
    
    # Get absolute gradients and average across samples
    gradients = torch.abs(X_val.grad)
    importance = torch.mean(gradients, dim=0).squeeze()
    
    # Convert to dictionary format and move to CPU if needed
    feature_importance = dict(zip(feature_names, importance.detach().cpu().numpy()))
    
    return feature_importance

In [33]:
def plot_metrics_comparison(city, results):
    """Plot comparison of metrics for different models
    
    Args:
        city (str): Name of the city
        results (dict): Dictionary containing model metrics
        
    Returns:
        matplotlib.figure.Figure: Figure object containing the plots
    """
    # Define metrics and splits
    metrics = ['rmse', 'mae', 'r2']
    splits = ['val', 'test']
    
    # Create figure
    fig, axes = plt.subplots(1, len(metrics), figsize=(6*len(metrics), 6))
    if len(metrics) == 1:
        axes = [axes]
    
    # Plot each metric
    for i, metric in enumerate(metrics):
        ax = axes[i]
        
        # Collect values for each split
        values = {split: [] for split in splits}
        
        for split in splits:
            metric_key = f'{split}_{metric}'
            if metric_key in results:
                values[split].append(results[metric_key])
        
        # Plot bars
        x = np.arange(1)
        width = 0.35
        
        for j, split in enumerate(splits):
            if values[split]:
                ax.bar(x + j*width, values[split], width, label=split.capitalize())
        
        # Customize plot
        ax.set_xlabel('Model')
        ax.set_ylabel(metric.upper())
        ax.set_title(f'{metric.upper()} Comparison')
        ax.set_xticks(x + width/2)
        ax.set_xticklabels(['CNN'])
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    plt.suptitle(f'Model Performance Comparison - {city.title()}', y=1.05)
    plt.tight_layout()
    return fig

In [34]:
def plot_feature_importance(city, feature_importance):
    """Plot feature importance for CNN model
    
    Args:
        city (str): Name of the city
        feature_importance (dict): Dictionary mapping features to their importance scores
        
    Returns:
        matplotlib.figure.Figure: Figure object containing the plot
    """
    if not feature_importance:
        print("No feature importance data available")
        return
    
    # Sort features by importance
    sorted_features = sorted(feature_importance.items(), key=lambda x: abs(x[1]), reverse=True)
    features, values = zip(*sorted_features)
    
    # Create figure
    fig, ax = plt.subplots(figsize=(15, 8))
    
    # Create bar plot
    bars = ax.barh(features, values)
    
    # Color bars based on sign
    for bar in bars:
        if bar.get_width() < 0:
            bar.set_color('red')
        else:
            bar.set_color('blue')
    
    # Add vertical line at x=0
    ax.axvline(x=0, color='black', linestyle='--', alpha=0.3)
    
    # Customize plot
    ax.set_title(f'Feature Importance - CNN ({city.title()})')
    ax.set_xlabel('Importance')
    ax.set_ylabel('Features')
    
    plt.tight_layout()
    return fig

In [35]:
def plot_predictions(city, results, data_split, actual_values):
    """Plot actual vs predicted values
    
    Args:
        city (str): Name of the city
        results (dict): Dictionary containing model predictions
        data_split (str): Either 'val' or 'test'
        actual_values (array-like): Actual target values
        
    Returns:
        matplotlib.figure.Figure: Figure object containing the plot
    """
    fig, ax = plt.subplots(figsize=(15, 8))
    
    # Plot actual values
    ax.plot(actual_values, label='Actual', color='black', alpha=0.7)
    
    # Plot predictions
    predictions = results['predictions'][data_split]
    ax.plot(predictions, label='CNN', alpha=0.7)
    
    # Customize plot
    ax.set_xlabel('Time')
    ax.set_ylabel('AQI')
    ax.set_title(f'Actual vs Predicted AQI - {data_split.capitalize()}')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig

In [47]:
def save_cnn_results(city, results, feature_importance, model):
    """Save CNN model results and visualizations"""
    # Create results directory with a simple model name
    model_name = model.__class__.__name__  # This will give us just 'SmallCNNRegressor' or similar
    results_dir = f"./results/cnn_models/{model_name}/{city}"
    os.makedirs(results_dir, exist_ok=True)
    os.makedirs(f"{results_dir}/visualizations", exist_ok=True)
    
    # Convert numpy arrays and float32 to native Python types
    metrics = {
        'val_rmse': float(results['val_metrics']['rmse']),
        'val_mae': float(results['val_metrics']['mae']),
        'val_r2': float(results['val_metrics']['r2']),
        'test_rmse': float(results['test_metrics']['rmse']),
        'test_mae': float(results['test_metrics']['mae']),
        'test_r2': float(results['test_metrics']['r2']),
        'model_config': {
            'name': model_name,
            'parameters': {
                'epochs': 200,
                'batch_size': 32,
                'learning_rate': 0.001
            },
            'feature_importance': {k: float(v) for k, v in feature_importance.items()}
        }
    }
    
    # Save metrics to JSON file
    with open(f"{results_dir}/cnn_results.json", 'w') as f:
        json.dump(metrics, f, indent=4)
    
    # Generate and save plots
    fig = plot_metrics_comparison(city, metrics)
    fig.savefig(f"{results_dir}/visualizations/metrics_comparison.png", bbox_inches='tight', dpi=300)
    plt.close(fig)
    
    fig = plot_feature_importance(city, feature_importance)
    fig.savefig(f"{results_dir}/visualizations/feature_importance.png", bbox_inches='tight', dpi=300)
    plt.close(fig)
    
    # Plot predictions for each split
    for split in ['val', 'test']:
        actual_values = results['actual_values'][split]
        fig = plot_predictions(city, results, split, actual_values)
        fig.savefig(f"{results_dir}/visualizations/predictions_{split}.png", bbox_inches='tight', dpi=300)
        plt.close(fig)

In [37]:
def prepare_cnn_data_torch(city_data):
    numeric_cols = city_data['train'].select_dtypes(include=[np.number]).columns
    numeric_cols = [col for col in numeric_cols if col != 'AQI']
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(city_data['train'][numeric_cols])
    X_val = scaler.transform(city_data['val'][numeric_cols])
    X_test = scaler.transform(city_data['test'][numeric_cols])
    
    X_train = torch.tensor(X_train[:, :, np.newaxis], dtype=torch.float32).permute(0, 2, 1)
    X_val = torch.tensor(X_val[:, :, np.newaxis], dtype=torch.float32).permute(0, 2, 1)
    X_test = torch.tensor(X_test[:, :, np.newaxis], dtype=torch.float32).permute(0, 2, 1)
    
    y_train = torch.tensor(city_data['train']['AQI'].values, dtype=torch.float32).unsqueeze(1)
    y_val = torch.tensor(city_data['val']['AQI'].values, dtype=torch.float32).unsqueeze(1)
    y_test = torch.tensor(city_data['test']['AQI'].values, dtype=torch.float32).unsqueeze(1)
    
    return (X_train, y_train), (X_val, y_val), (X_test, y_test), len(numeric_cols)


In [19]:
def train_and_evaluate_cnn_torch(city_data, city_name, epochs=200, batch_size=32, lr=0.001):
    """Train and evaluate CNN model with improved training process"""
    print(f"\n🔄 Preprocessing data for {city_name}...")
    
    # Get numeric columns (excluding AQI)
    numeric_cols = city_data['train'].select_dtypes(include=[np.number]).columns
    numeric_cols = [col for col in numeric_cols if col not in ['AQI']]
    
    # Handle missing values in features and target
    X_train = city_data['train'][numeric_cols].fillna(method='ffill').fillna(method='bfill')
    y_train = city_data['train']['AQI'].fillna(method='ffill').fillna(method='bfill')
    
    X_val = city_data['val'][numeric_cols].fillna(method='ffill').fillna(method='bfill')
    y_val = city_data['val']['AQI'].fillna(method='ffill').fillna(method='bfill')
    
    X_test = city_data['test'][numeric_cols].fillna(method='ffill').fillna(method='bfill')
    y_test = city_data['test']['AQI'].fillna(method='ffill').fillna(method='bfill')
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)
    
    # Scale targets (important for neural networks)
    target_scaler = StandardScaler()
    y_train_scaled = target_scaler.fit_transform(y_train.values.reshape(-1, 1)).ravel()
    y_val_scaled = target_scaler.transform(y_val.values.reshape(-1, 1)).ravel()
    y_test_scaled = target_scaler.transform(y_test.values.reshape(-1, 1)).ravel()
    
    # Convert to PyTorch tensors
    X_train = torch.FloatTensor(X_train_scaled).unsqueeze(1)
    y_train = torch.FloatTensor(y_train_scaled)
    X_val = torch.FloatTensor(X_val_scaled).unsqueeze(1)
    y_val = torch.FloatTensor(y_val_scaled)
    X_test = torch.FloatTensor(X_test_scaled).unsqueeze(1)
    y_test = torch.FloatTensor(y_test_scaled)
    
    # Create data loaders
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)
    test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size)
    
    # Initialize model
    print("🔧 Initializing model...")
    device = torch.device('cpu')
    input_len = X_train.shape[2]
    model = CNNRegressor(input_channels=1, input_length=input_len).to(device)
    
    # Use AdamW optimizer with weight decay
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)
    
    # Use MSE loss
    criterion = nn.MSELoss()
    
    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
    
    # Training loop
    print("🏃 Starting training...")
    best_val_loss = float('inf')
    patience = 15
    patience_counter = 0
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        val_preds = []
        val_true = []
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
                val_preds.extend(outputs.cpu().numpy())
                val_true.extend(batch_y.cpu().numpy())
        
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        
        # Learning rate scheduling
        scheduler.step(avg_val_loss)
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1
        
        if patience_counter >= patience:
            print(f"⚠️ Early stopping at epoch {epoch+1}")
            model.load_state_dict(best_model_state)
            break
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
    
    # Final evaluation
    print("📊 Evaluating model...")
    model.eval()
    
    # Get predictions
    def get_predictions(loader):
        preds = []
        true = []
        with torch.no_grad():
            for batch_X, batch_y in loader:
                batch_X = batch_X.to(device)
                outputs = model(batch_X)
                preds.extend(outputs.cpu().numpy())
                true.extend(batch_y.numpy())
        return np.array(preds), np.array(true)
    
    val_preds, val_true = get_predictions(val_loader)
    test_preds, test_true = get_predictions(test_loader)
    
    # Inverse transform predictions and true values
    val_preds = target_scaler.inverse_transform(val_preds.reshape(-1, 1)).ravel()
    val_true = target_scaler.inverse_transform(val_true.reshape(-1, 1)).ravel()
    test_preds = target_scaler.inverse_transform(test_preds.reshape(-1, 1)).ravel()
    test_true = target_scaler.inverse_transform(test_true.reshape(-1, 1)).ravel()
    
    # Calculate metrics
    def calc_metrics(y_true, y_pred):
        return {
            'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
            'mae': mean_absolute_error(y_true, y_pred),
            'r2': r2_score(y_true, y_pred)
        }
    
    val_metrics = calc_metrics(val_true, val_preds)
    test_metrics = calc_metrics(test_true, test_preds)
    
    print(f"\n📍 Evaluation Results for {city_name} (PyTorch CNN)")
    print("\nValidation Metrics:")
    print(f"RMSE: {val_metrics['rmse']:.2f}")
    print(f"MAE: {val_metrics['mae']:.2f}")
    print(f"R²: {val_metrics['r2']:.2f}")
    
    print("\nTest Metrics:")
    print(f"RMSE: {test_metrics['rmse']:.2f}")
    print(f"MAE: {test_metrics['mae']:.2f}")
    print(f"R²: {test_metrics['r2']:.2f}")
    # Calculate feature importance
    feature_importance = calculate_cnn_feature_importance(model, X_val, numeric_cols)

    results = {
        'model': model,
        'val_metrics': val_metrics,
        'test_metrics': test_metrics,
        'predictions': {
            'val': val_preds,
            'test': test_preds
        },
        'actual_values': {
            'val': y_val.numpy(),
            'test': y_test.numpy()
        }
    }
    save_cnn_results(city_name, results, feature_importance)
    return results

In [22]:
# cnn_torch_results = train_and_evaluate_cnn_torch(city_data['delhi'], 'delhi')



🔄 Preprocessing data for delhi...
🔧 Initializing model...
🏃 Starting training...
Epoch [10/200], Train Loss: 0.2529, Val Loss: 0.0784
Epoch [20/200], Train Loss: 0.1955, Val Loss: 0.0695
Epoch [30/200], Train Loss: 0.1803, Val Loss: 0.0846
⚠️ Early stopping at epoch 33
📊 Evaluating model...

📍 Evaluation Results for delhi (PyTorch CNN)

Validation Metrics:
RMSE: 32.59
MAE: 25.21
R²: 0.92

Test Metrics:
RMSE: 34.40
MAE: 25.40
R²: 0.91


In [43]:
def train_and_evaluate_cnn_torch_v2(city_data, city_name, model, epochs=200, batch_size=32, lr=0.001):
    """Train and evaluate CNN model with improved training process - Version 2 with model parameter support"""
    print(f"\n🔄 Preprocessing data for {city_name}...")
    
    # Get numeric columns (excluding AQI)
    numeric_cols = city_data['train'].select_dtypes(include=[np.number]).columns
    numeric_cols = [col for col in numeric_cols if col not in ['AQI']]
    
    # Handle missing values in features and target
    X_train = city_data['train'][numeric_cols].fillna(method='ffill').fillna(method='bfill')
    y_train = city_data['train']['AQI'].fillna(method='ffill').fillna(method='bfill')
    
    X_val = city_data['val'][numeric_cols].fillna(method='ffill').fillna(method='bfill')
    y_val = city_data['val']['AQI'].fillna(method='ffill').fillna(method='bfill')
    
    X_test = city_data['test'][numeric_cols].fillna(method='ffill').fillna(method='bfill')
    y_test = city_data['test']['AQI'].fillna(method='ffill').fillna(method='bfill')
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)
    
    # Scale targets (important for neural networks)
    target_scaler = StandardScaler()
    y_train_scaled = target_scaler.fit_transform(y_train.values.reshape(-1, 1)).ravel()
    y_val_scaled = target_scaler.transform(y_val.values.reshape(-1, 1)).ravel()
    y_test_scaled = target_scaler.transform(y_test.values.reshape(-1, 1)).ravel()
    
    # Convert to PyTorch tensors
    X_train = torch.FloatTensor(X_train_scaled).unsqueeze(1)
    y_train = torch.FloatTensor(y_train_scaled)
    X_val = torch.FloatTensor(X_val_scaled).unsqueeze(1)
    y_val = torch.FloatTensor(y_val_scaled)
    X_test = torch.FloatTensor(X_test_scaled).unsqueeze(1)
    y_test = torch.FloatTensor(y_test_scaled)
    
    # Create data loaders
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)
    test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size)
    
    # Initialize model
    print("🔧 Initializing model...")
    
    print(f"Using model: {model.__class__.__name__}")
    model = model.to(device)
    
    # Use AdamW optimizer with weight decay
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)
    
    # Use MSE loss
    criterion = nn.MSELoss()
    
    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
    
    # Training loop
    print("🏃 Starting training...")
    best_val_loss = float('inf')
    patience = 15
    patience_counter = 0
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        val_preds = []
        val_true = []
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
                val_preds.extend(outputs.cpu().numpy())
                val_true.extend(batch_y.cpu().numpy())
        
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        
        # Learning rate scheduling
        scheduler.step(avg_val_loss)
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1
        
        if patience_counter >= patience:
            print(f"⚠️ Early stopping at epoch {epoch+1}")
            model.load_state_dict(best_model_state)
            break
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
    
    # Final evaluation
    print("📊 Evaluating model...")
    model.eval()
    
    # Get predictions
    def get_predictions(loader):
        preds = []
        true = []
        with torch.no_grad():
            for batch_X, batch_y in loader:
                batch_X = batch_X.to(device)
                outputs = model(batch_X)
                preds.extend(outputs.cpu().numpy())
                true.extend(batch_y.numpy())
        return np.array(preds), np.array(true)
    
    val_preds, val_true = get_predictions(val_loader)
    test_preds, test_true = get_predictions(test_loader)
    
    # Inverse transform predictions and true values
    val_preds = target_scaler.inverse_transform(val_preds.reshape(-1, 1)).ravel()
    val_true = target_scaler.inverse_transform(val_true.reshape(-1, 1)).ravel()
    test_preds = target_scaler.inverse_transform(test_preds.reshape(-1, 1)).ravel()
    test_true = target_scaler.inverse_transform(test_true.reshape(-1, 1)).ravel()
    
    # Calculate metrics
    def calc_metrics(y_true, y_pred):
        return {
            'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
            'mae': mean_absolute_error(y_true, y_pred),
            'r2': r2_score(y_true, y_pred)
        }
    
    val_metrics = calc_metrics(val_true, val_preds)
    test_metrics = calc_metrics(test_true, test_preds)
    
    print(f"\n📍 Evaluation Results for {city_name} (PyTorch CNN)")
    print("\nValidation Metrics:")
    print(f"RMSE: {val_metrics['rmse']:.2f}")
    print(f"MAE: {val_metrics['mae']:.2f}")
    print(f"R²: {val_metrics['r2']:.2f}")
    
    print("\nTest Metrics:")
    print(f"RMSE: {test_metrics['rmse']:.2f}")
    print(f"MAE: {test_metrics['mae']:.2f}")
    print(f"R²: {test_metrics['r2']:.2f}")
    
     # Calculate feature importance
    feature_importance = calculate_cnn_feature_importance(model, X_val, numeric_cols)

    results = {
        'model': model,
        'val_metrics': val_metrics,
        'test_metrics': test_metrics,
        'predictions': {
            'val': val_preds,
            'test': test_preds
        },
        'actual_values': {
            'val': y_val.numpy(),
            'test': y_test.numpy()
        }
    }
    save_cnn_results(city_name, results, feature_importance, model)
    return results

In [39]:
# Small CNN (our current model as baseline)
class SmallCNNRegressor(nn.Module):
    def __init__(self, input_channels=1, input_length=7):
        super(SmallCNNRegressor, self).__init__()
        
        # Small architecture [32, 64, 128] -> [64, 32, 1]
        self.features = nn.Sequential(
            nn.Conv1d(input_channels, 32, kernel_size=3, padding=1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.AdaptiveAvgPool1d(1)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x.squeeze()


# Medium CNN with more layers and units
class MediumCNNRegressor(nn.Module):
    def __init__(self, input_channels=1, input_length=7):
        super(MediumCNNRegressor, self).__init__()
        
        # Medium architecture [64, 128, 256, 512] -> [256, 128, 64, 1]
        self.features = nn.Sequential(
            nn.Conv1d(input_channels, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Conv1d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.AdaptiveAvgPool1d(1)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x.squeeze()

# Large CNN with even more layers and units
class LargeCNNRegressor(nn.Module):
    def __init__(self, input_channels=1, input_length=7):
        super(LargeCNNRegressor, self).__init__()
        
        # Large architecture [128, 256, 512, 1024, 2048] -> [1024, 512, 256, 128, 1]
        self.features = nn.Sequential(
            nn.Conv1d(input_channels, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Conv1d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Conv1d(512, 1024, kernel_size=3, padding=1),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Conv1d(1024, 2048, kernel_size=3, padding=1),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.AdaptiveAvgPool1d(1)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x.squeeze()

        

def train_model_variant(model_class, model_name, city_data, city_name, **kwargs):
    """Train a specific model variant"""
    # Default parameters
    params = {
        'epochs': 200,
        'batch_size': 32,
        'learning_rate': 0.001
    }
    params.update(kwargs)
    
    print(f"\n🔄 Training {model_name} for {city_name}...")
    model = model_class(input_channels=1)
    results = train_and_evaluate_cnn_torch_v2(
        city_data, 
        city_name,
        model=model,
        epochs=params['epochs'],
        batch_size=params['batch_size'],
        lr=params['learning_rate']
    )
    return results

# Example usage:
model_configs = {
    'small_cnn': {
        'model_class': SmallCNNRegressor,
        'params': {
            'batch_size': 32,
            'epochs': 200,
            'learning_rate': 0.001
        }
    },
    'medium_cnn': {
        'model_class': MediumCNNRegressor,
        'params': {
            'batch_size': 64,
            'epochs': 200,
            'learning_rate': 0.0005
        }
    },
    'large_cnn': {
        'model_class': LargeCNNRegressor,
        'params': {
            'batch_size': 32,
            'epochs': 200,
            'learning_rate': 0.0005
        }
    }
}



In [40]:
def load_city_data(city_name):
    """Load and prepare data for a specific city"""
    # First, let's search for the data files
    print(f"Loading data for {city_name}...")
    
    def create_lag_features(data, target_col='AQI', n_lags=7):
        """Create lag features for the target column"""
        data = data.sort_values('Date')
        for i in range(1, n_lags + 1):
            data[f'{target_col}_lag_{i}'] = data[target_col].shift(i)
        return data

    # Load data from the processed directory
    processed_dir = '../data/processed'
    city_dir = f'{processed_dir}/{city_name.lower()}'
    
    city_data = {}
    
    # Load train, validation, and test sets
    for split in ['train', 'val', 'test']:
        file_path = f'{city_dir}/{split}.csv'
        try:
            data = pd.read_csv(file_path)
            data['Date'] = pd.to_datetime(data['Date'])
            
            # Create lag features if they don't exist
            if 'AQI_lag_1' not in data.columns:
                data = create_lag_features(data)
            
            city_data[split] = data
            print(f"Loaded {split} set: {len(data)} samples")
        except FileNotFoundError:
            print(f"Warning: Could not find {file_path}")
            return None
    
    return city_data



In [49]:
# Train all variants
results = {}
cities = ["delhi", "bengaluru", "chennai", "hyderabad"]

for city in cities:
    city_data = load_city_data(city)
    for model_name, config in model_configs.items():
        results[model_name] = train_model_variant(
            config['model_class'],
            model_name,
            city_data,
            city,
            **config['params']
        )

Loading data for delhi...
Loaded train set: 1299 samples
Loaded val set: 300 samples
Loaded test set: 400 samples

🔄 Training small_cnn for delhi...

🔄 Preprocessing data for delhi...
🔧 Initializing model...
Using model: SmallCNNRegressor
🏃 Starting training...
Epoch [10/200], Train Loss: 0.3076, Val Loss: 0.2369
Epoch [20/200], Train Loss: 0.2104, Val Loss: 0.1015
Epoch [30/200], Train Loss: 0.1927, Val Loss: 0.0861
Epoch [40/200], Train Loss: 0.1758, Val Loss: 0.0900
⚠️ Early stopping at epoch 41
📊 Evaluating model...

📍 Evaluation Results for delhi (PyTorch CNN)

Validation Metrics:
RMSE: 34.18
MAE: 26.55
R²: 0.91

Test Metrics:
RMSE: 33.81
MAE: 24.87
R²: 0.91

🔄 Training medium_cnn for delhi...

🔄 Preprocessing data for delhi...
🔧 Initializing model...
Using model: MediumCNNRegressor
🏃 Starting training...
Epoch [10/200], Train Loss: 0.2203, Val Loss: 0.1189
Epoch [20/200], Train Loss: 0.2003, Val Loss: 0.0720
Epoch [30/200], Train Loss: 0.1783, Val Loss: 0.0758
Epoch [40/200], Tra

KeyboardInterrupt: 

In [25]:
# # Train all variants
# results = {}
# for model_name, config in model_configs.items():
#     results[model_name] = train_model_variant(
#         config['model_class'],
#         model_name,
#         city_data,
#         'bengaluru',
#         **config['params']
#     )


🔄 Training small_cnn for bengaluru...

🔄 Preprocessing data for bengaluru...
🔧 Initializing model...
Using model: SmallCNNRegressor
🏃 Starting training...
Epoch [10/200], Train Loss: 0.5075, Val Loss: 0.1078
Epoch [20/200], Train Loss: 0.4118, Val Loss: 0.0739
Epoch [30/200], Train Loss: 0.3857, Val Loss: 0.0640
Epoch [40/200], Train Loss: 0.3019, Val Loss: 0.0638
Epoch [50/200], Train Loss: 0.3236, Val Loss: 0.0609
Epoch [60/200], Train Loss: 0.2905, Val Loss: 0.0674
Epoch [70/200], Train Loss: 0.3362, Val Loss: 0.0637
Epoch [80/200], Train Loss: 0.3008, Val Loss: 0.0727
⚠️ Early stopping at epoch 90
📊 Evaluating model...

📍 Evaluation Results for bengaluru (PyTorch CNN)

Validation Metrics:
RMSE: 14.86
MAE: 11.23
R²: 0.69

Test Metrics:
RMSE: 16.75
MAE: 13.16
R²: 0.78

🔄 Training medium_cnn for bengaluru...

🔄 Preprocessing data for bengaluru...
🔧 Initializing model...
Using model: MediumCNNRegressor
🏃 Starting training...
Epoch [10/200], Train Loss: 0.4627, Val Loss: 0.0714
Epoch [

In [26]:
# # Train all variants
# results = {}
# for model_name, config in model_configs.items():
#     results[model_name] = train_model_variant(
#         config['model_class'],
#         model_name,
#         city_data,
#         'chennai',
#         **config['params']
#     )


🔄 Training small_cnn for chennai...

🔄 Preprocessing data for chennai...
🔧 Initializing model...
Using model: SmallCNNRegressor
🏃 Starting training...
Epoch [10/200], Train Loss: 0.5495, Val Loss: 0.1582
Epoch [20/200], Train Loss: 0.4220, Val Loss: 0.0642
Epoch [30/200], Train Loss: 0.2935, Val Loss: 0.0779
⚠️ Early stopping at epoch 40
📊 Evaluating model...

📍 Evaluation Results for chennai (PyTorch CNN)

Validation Metrics:
RMSE: 14.92
MAE: 11.57
R²: 0.68

Test Metrics:
RMSE: 16.73
MAE: 13.11
R²: 0.78

🔄 Training medium_cnn for chennai...

🔄 Preprocessing data for chennai...
🔧 Initializing model...
Using model: MediumCNNRegressor
🏃 Starting training...
Epoch [10/200], Train Loss: 0.4508, Val Loss: 0.0827
Epoch [20/200], Train Loss: 0.3624, Val Loss: 0.0568
Epoch [30/200], Train Loss: 0.3611, Val Loss: 0.0608
Epoch [40/200], Train Loss: 0.3165, Val Loss: 0.0466
⚠️ Early stopping at epoch 42
📊 Evaluating model...

📍 Evaluation Results for chennai (PyTorch CNN)

Validation Metrics:
RM

In [None]:
# # Train all variants
# results = {}
# for model_name, config in model_configs.items():
#     results[model_name] = train_model_variant(
#         config['model_class'],
#         model_name,
#         city_data,
#         'hyderabad',
#         **config['params']
#     )


🔄 Training small_cnn for hyderabad...

🔄 Preprocessing data for hyderabad...
🔧 Initializing model...
Using model: SmallCNNRegressor
🏃 Starting training...
Epoch [10/200], Train Loss: 0.2407, Val Loss: 0.1460
Epoch [20/200], Train Loss: 0.1944, Val Loss: 0.0876
⚠️ Early stopping at epoch 26
📊 Evaluating model...

📍 Evaluation Results for hyderabad (PyTorch CNN)

Validation Metrics:
RMSE: 38.95
MAE: 30.86
R²: 0.89

Test Metrics:
RMSE: 36.83
MAE: 27.83
R²: 0.90

🔄 Training medium_cnn for hyderabad...

🔄 Preprocessing data for hyderabad...
🔧 Initializing model...
Using model: MediumCNNRegressor
🏃 Starting training...
Epoch [10/200], Train Loss: 0.2466, Val Loss: 0.1416
Epoch [20/200], Train Loss: 0.1919, Val Loss: 0.0654
Epoch [30/200], Train Loss: 0.1764, Val Loss: 0.1036
⚠️ Early stopping at epoch 35
📊 Evaluating model...

📍 Evaluation Results for hyderabad (PyTorch CNN)

Validation Metrics:
RMSE: 32.71
MAE: 26.08
R²: 0.92

Test Metrics:
RMSE: 33.90
MAE: 25.55
R²: 0.91

🔄 Training large

KeyboardInterrupt: 