# Clean MLP Implementation for Store Sales Forecasting

A properly designed Multi-Layer Perceptron for time series forecasting with correct data handling, feature engineering, and model architecture.

In [None]:
import sys
import os
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import time
import json
import wandb
from typing import Dict, Tuple, List, Optional
warnings.filterwarnings('ignore')

sys.path.append('../src')
from evaluation.metrics import summary

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

# Paths
DATA_DIR = Path('../content/data_processed')
RESULTS_DIR = Path('../results/neural_networks/mlp')
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
class DataProcessor:
    """Handles all data preprocessing operations"""
    
    def __init__(self):
        self.scalers = {}
        self.encoders = {}
        self.target_transform_params = {}
        
    def fit_target_transform(self, target_values: np.ndarray) -> np.ndarray:
        """Fit and apply target transformation (log1p with shift for negative values)"""
        min_val = target_values.min()
        shift = abs(min_val) + 1 if min_val <= 0 else 0
        
        self.target_transform_params = {
            'method': 'log1p_shift',
            'shift': shift,
            'original_min': min_val,
            'original_max': target_values.max(),
            'original_mean': target_values.mean()
        }
        
        transformed = np.log1p(target_values + shift)
        print(f"Target transform: shift={shift:.3f}, original_range=[{min_val:.3f}, {target_values.max():.3f}]")
        print(f"Transformed range: [{transformed.min():.3f}, {transformed.max():.3f}]")
        
        return transformed
    
    def transform_target(self, target_values: np.ndarray) -> np.ndarray:
        """Apply fitted target transformation"""
        shift = self.target_transform_params['shift']
        return np.log1p(target_values + shift)
    
    def inverse_transform_target(self, transformed_values: np.ndarray) -> np.ndarray:
        """Inverse target transformation"""
        shift = self.target_transform_params['shift']
        return np.expm1(transformed_values) - shift
    
    def add_time_features(self, df: pd.DataFrame) -> pd.DataFrame:
        """Add comprehensive time-based features"""
        df = df.copy()
        
        if 'date' not in df.columns:
            return df
            
        df['date'] = pd.to_datetime(df['date'])
        
        # Basic time features
        df['year'] = df['date'].dt.year
        df['month'] = df['date'].dt.month
        df['day'] = df['date'].dt.day
        df['dayofweek'] = df['date'].dt.dayofweek
        df['quarter'] = df['date'].dt.quarter
        df['dayofyear'] = df['date'].dt.dayofyear
        df['weekofyear'] = df['date'].dt.isocalendar().week
        
        # Binary features
        df['is_weekend'] = (df['dayofweek'] >= 5).astype(int)
        df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
        df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
        df['is_quarter_start'] = df['date'].dt.is_quarter_start.astype(int)
        df['is_quarter_end'] = df['date'].dt.is_quarter_end.astype(int)
        
        # Cyclical features (important for time series)
        df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
        df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
        df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31)
        df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31)
        df['dayofweek_sin'] = np.sin(2 * np.pi * df['dayofweek'] / 7)
        df['dayofweek_cos'] = np.cos(2 * np.pi * df['dayofweek'] / 7)
        df['dayofyear_sin'] = np.sin(2 * np.pi * df['dayofyear'] / 365)
        df['dayofyear_cos'] = np.cos(2 * np.pi * df['dayofyear'] / 365)
        
        print(f"Added {len([c for c in df.columns if c not in ['date']])} time features")
        return df
    
    def fit_categorical_encoders(self, df: pd.DataFrame, categorical_columns: List[str]):
        """Fit label encoders for categorical variables"""
        for col in categorical_columns:
            if col in df.columns:
                encoder = LabelEncoder()
                # Add 'unknown' category to handle unseen values
                unique_vals = list(df[col].unique()) + ['<UNKNOWN>']
                encoder.fit(unique_vals)
                self.encoders[col] = encoder
                print(f"Encoder for {col}: {len(encoder.classes_)} classes")
    
    def transform_categorical(self, df: pd.DataFrame, categorical_columns: List[str]) -> pd.DataFrame:
        """Transform categorical variables using fitted encoders"""
        df = df.copy()
        
        for col in categorical_columns:
            if col in df.columns and col in self.encoders:
                # Handle unseen categories
                mask = df[col].isin(self.encoders[col].classes_)
                df.loc[~mask, col] = '<UNKNOWN>'
                df[col] = self.encoders[col].transform(df[col])
        
        return df
    
    def fit_numerical_scalers(self, df: pd.DataFrame, numerical_columns: List[str]):
        """Fit scalers for numerical features"""
        for col in numerical_columns:
            if col in df.columns:
                scaler = StandardScaler()
                scaler.fit(df[[col]])
                self.scalers[col] = scaler
        print(f"Fitted scalers for {len(self.scalers)} numerical columns")
    
    def transform_numerical(self, df: pd.DataFrame, numerical_columns: List[str]) -> pd.DataFrame:
        """Transform numerical features using fitted scalers"""
        df = df.copy()
        
        for col in numerical_columns:
            if col in df.columns and col in self.scalers:
                df[col] = self.scalers[col].transform(df[[col]]).flatten()
        
        return df
    
    def save_processors(self, path: Path):
        """Save all processors for later use"""
        import joblib
        
        processor_data = {
            'scalers': self.scalers,
            'encoders': self.encoders,
            'target_transform_params': self.target_transform_params
        }
        
        joblib.dump(processor_data, path / 'data_processors.pkl')
        print(f"Saved processors to {path / 'data_processors.pkl'}")

In [None]:
class SalesDataset(Dataset):
    def __init__(self, features: torch.Tensor, targets: torch.Tensor):
        assert len(features) == len(targets), "Features and targets must have same length"
        
        self.features = features.float()
        self.targets = targets.float()
        
        assert not torch.isnan(self.features).any(), "Features contain NaN values"
        assert not torch.isnan(self.targets).any(), "Targets contain NaN values"
        assert not torch.isinf(self.features).any(), "Features contain Inf values"
        assert not torch.isinf(self.targets).any(), "Targets contain Inf values"
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]
    
    @property
    def input_dim(self):
        return self.features.shape[1]

In [None]:
class MLP(nn.Module):    
    def __init__(self, 
            input_dim: int, 
            hidden_dims: List[int] = [512, 256, 128], 
            dropout: float = 0.3, 
            batch_norm: bool = True,
            skip_connections: bool = False
        ):
        super().__init__()
        
        self.input_dim = input_dim
        self.hidden_dims = hidden_dims
        self.dropout = dropout
        self.skip_connections = skip_connections
        self.activation = nn.ReLU()
        
        # Build network layers
        layers = []
        prev_dim = input_dim
        
        for i, hidden_dim in enumerate(hidden_dims):
            # Linear layer
            layers.append(nn.Linear(prev_dim, hidden_dim))
            
            # Batch normalization
            if batch_norm:
                layers.append(nn.BatchNorm1d(hidden_dim))
            
            # Activation
            layers.append(self.activation)
            
            # Dropout
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            
            prev_dim = hidden_dim
        
        # Output layer
        layers.append(nn.Linear(prev_dim, 1))
        
        self.network = nn.Sequential(*layers)
        self._initialize_weights()
    
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
            elif isinstance(module, nn.BatchNorm1d):
                nn.init.constant_(module.weight, 1)
                nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        return self.network(x).squeeze(-1)
    
    def get_num_parameters(self):
        return sum(p.numel() for p in self.parameters() if p.requires_grad)

In [None]:
from sklearn.metrics import mean_squared_log_error

class RMSLELoss(nn.Module):
    def __init__(self, epsilon=1e-6):
        super().__init__()
        self.epsilon = epsilon
    
    def forward(self, predictions, targets):
        pred_clamped = torch.clamp(predictions, min=0) + self.epsilon
        
        log_pred = torch.log1p(pred_clamped)
        log_true = torch.log1p(targets + self.epsilon)
        
        mse_log = torch.mean((log_pred - log_true) ** 2)
        rmsle = torch.sqrt(mse_log)
        
        return rmsle
    

class Trainer:
    def __init__(self, model, train_loader, val_loader, test_loader, 
                 config: dict, device, use_wandb: bool = True):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.config = config
        self.device = device
        self.use_wandb = use_wandb
        
        # Initialize WandB
        if self.use_wandb:
            wandb.init(
                project="store-sales-forecasting",
                name=f"mlp-rmsle-{int(time.time())}",
                config=config,
                tags=["mlp", "neural-network", "time-series", "rmsle-loss"],
                reinit=True
            )
            print("WandB initialized successfully")
        
        # Loss and optimizer            
        self.criterion = RMSLELoss(epsilon=1e-6)
        print("Using RMSLE loss function")
        self.optimizer = optim.AdamW(
            model.parameters(), 
            lr=config['learning_rate'],
            weight_decay=config['weight_decay'],
            betas=(0.9, 0.95)
        )
        
        # Learning rate scheduler
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, mode='min', factor=0.7, patience=5, min_lr=1e-6
        )
        
        # Training state
        self.best_val_loss = float('inf')
        self.best_model_state = None
        self.train_losses = []
        self.val_losses = []
        self.patience_counter = 0
    
    def train_epoch(self):
        """Train for one epoch"""
        self.model.train()
        total_loss = 0
        num_batches = 0
        
        for batch_idx, (features, targets) in enumerate(self.train_loader):
            features, targets = features.to(self.device), targets.to(self.device)
            
            # Forward pass
            self.optimizer.zero_grad()
            outputs = self.model(features)
            loss = self.criterion(outputs, targets)
            
            # Backward pass
            loss.backward()
            
            # Gradient clipping
            if self.config.get('gradient_clip', 0) > 0:
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config['gradient_clip'])
            
            self.optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1
        
        return total_loss / num_batches
    
    def evaluate(self, dataloader):
        """Evaluate model on given dataloader"""
        self.model.eval()
        total_loss = 0
        predictions = []
        actuals = []
        
        with torch.no_grad():
            for features, targets in dataloader:
                features, targets = features.to(self.device), targets.to(self.device)
                outputs = self.model(features)
                loss = self.criterion(outputs, targets)
                
                total_loss += loss.item()
                predictions.extend(outputs.cpu().numpy())
                actuals.extend(targets.cpu().numpy())
        
        avg_loss = total_loss / len(dataloader)
        return avg_loss, np.array(predictions), np.array(actuals)
    
    def train(self):
        """Full training loop"""
        print("Starting training...")
        print(f"Model parameters: {self.model.get_num_parameters():,}")
        
        for epoch in range(self.config['epochs']):
            start_time = time.time()
            
            # Train
            train_loss = self.train_epoch()
            
            # Validate
            val_loss, val_preds, val_actuals = self.evaluate(self.val_loader)
            
            # Calculate metrics
            val_metrics = summary(val_actuals, val_preds) if val_preds.std() > 1e-8 else {'MAE': float('inf'), 'RMSE': float('inf'), 'RMSLE': float('inf'), 'MAPE': float('inf'), 'SMAPE': float('inf')}
            
            # Learning rate scheduling based on RMSLE loss
            self.scheduler.step(val_loss)
            
            # Early stopping and best model saving
            if val_loss < self.best_val_loss - self.config['min_delta']:
                self.best_val_loss = val_loss
                self.best_model_state = self.model.state_dict().copy()
                self.patience_counter = 0
                
                # Save best model
                self.save_checkpoint(epoch, val_metrics)
                status = "Best"
            else:
                self.patience_counter += 1
                status = f"{self.patience_counter}/{self.config['patience']}"
            
            # Logging
            epoch_time = time.time() - start_time
            lr = self.optimizer.param_groups[0]['lr']
            
            wandb.log({
                'epoch': epoch + 1,
                'train_rmsle_loss': train_loss,
                'val_rmsle_loss': val_loss,
                'val_mae': val_metrics['MAE'],
                'val_rmse': val_metrics['RMSE'],
                'val_rmsle': val_metrics['RMSLE'],
                'val_mape': val_metrics['MAPE'],
                'val_smape': val_metrics['SMAPE'],
                'learning_rate': lr,
                'epoch_time': epoch_time,
                'patience_counter': self.patience_counter
            })
            
            print(f"Epoch {epoch+1:3d}/{self.config['epochs']} | "
                f"Train RMSLE: {train_loss:.6f} | Val RMSLE: {val_loss:.6f} | "
                f"Val RMSLE Metric: {val_metrics['RMSLE']:.6f} | "
                f"LR: {lr:.2e} | Time: {epoch_time:.1f}s | {status}")
            
            self.train_losses.append(train_loss)
            self.val_losses.append(val_loss)
            
            # Early stopping
            if self.patience_counter >= self.config['patience']:
                print(f"Early stopping at epoch {epoch+1}")
                break
        
        # Load best model
        if self.best_model_state is not None:
            self.model.load_state_dict(self.best_model_state)
            print(f"\nLoaded best model with validation RMSLE loss: {self.best_val_loss:.6f}")
        
        # Finish WandB run
        if self.use_wandb:
            wandb.finish()
    
    def save_checkpoint(self, epoch, val_metrics):
        """Save model checkpoint"""
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'val_loss': self.best_val_loss,
            'val_metrics': val_metrics,
            'config': self.config,
            'train_losses': self.train_losses,
            'val_losses': self.val_losses
        }
        
        torch.save(checkpoint, RESULTS_DIR / 'best_model.pt')
    
    def final_evaluation(self):
        """Final evaluation on test set"""
        print("\n" + "="*50)
        print("FINAL EVALUATION")
        print("="*50)
        
        # Test evaluation
        test_loss, test_preds, test_actuals = self.evaluate(self.test_loader)
        
        # Calculate metrics
        test_metrics = summary(test_actuals, test_preds)
        
        print(f"Test RMSLE Loss: {test_loss:.6f}")
        print("\nTest Metrics (original scale):")
        for metric, value in test_metrics.items():
            print(f"  {metric}: {value:.6f}")
        
        return test_metrics, test_preds, test_actuals

In [None]:
# Configuration
CONFIG = {
    # Model architecture
    'hidden_dims': [512, 256, 128, 64],
    'dropout': 0.3,
    'activation': 'gelu',
    'batch_norm': True,
    
    # Training
    'learning_rate': 1e-3,
    'weight_decay': 1e-4,
    'batch_size': 512,
    'epochs': 100,
    'patience': 15,
    'min_delta': 1e-5,
    'gradient_clip': 1.0,
    
    # Data
    'categorical_columns': ['store_nbr', 'family', 'city', 'state', 'type', 'cluster'],
    'target_column': 'sales'
}

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")

In [None]:
# Load preprocessed data
print("Loading preprocessed data...")
train_df = pd.read_parquet(DATA_DIR / 'train.parquet')
val_df = pd.read_parquet(DATA_DIR / 'val.parquet')
test_df = pd.read_parquet(DATA_DIR / 'test.parquet')

print(f"\nData shapes:")
print(f"  Train: {train_df.shape}")
print(f"  Validation: {val_df.shape}")
print(f"  Test: {test_df.shape}")

print(f"\nColumns: {list(train_df.columns)}")

In [None]:
print("Preparing data for MLP training...")

# Extract features and targets
target_col = CONFIG['target_column']
exclude_cols = [target_col, 'date'] if 'date' in train_df.columns else [target_col]
feature_cols = [col for col in train_df.columns if col not in exclude_cols]

# Identify which columns were originally categorical (now encoded as integers)
categorical_cols = [col for col in CONFIG['categorical_columns'] if col in feature_cols]
numerical_cols = [col for col in feature_cols if col not in categorical_cols]

print(f"\nData preparation:")
print(f"  Target column: {target_col}")
print(f"  Total feature columns: {len(feature_cols)}")
print(f"  Categorical columns (encoded): {len(categorical_cols)} - {categorical_cols}")
print(f"  Numerical columns (scaled): {len(numerical_cols)}")
print(f"  Excluded columns: {exclude_cols}")

X_train = train_df[feature_cols].values.astype(np.float32)
y_train = train_df[target_col].values.astype(np.float32)

X_val = val_df[feature_cols].values.astype(np.float32)
y_val = val_df[target_col].values.astype(np.float32)

X_test = test_df[feature_cols].values.astype(np.float32)
y_test = test_df[target_col].values.astype(np.float32)

print(f"\nFinal data shapes:")
print(f"  X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"  X_val: {X_val.shape}, y_val: {y_val.shape}")
print(f"  X_test: {X_test.shape}, y_test: {y_test.shape}")

# Check categorical value ranges (important for embeddings if used later)
print(f"\nCategorical variable ranges (for reference):")
for col in categorical_cols:
    if col in train_df.columns:
        min_val = train_df[col].min()
        max_val = train_df[col].max()
        unique_count = train_df[col].nunique()
        print(f"  {col}: range=[{min_val}, {max_val}], unique_values={unique_count}")


In [None]:
# Create datasets and data loaders
print("Creating datasets and data loaders...")

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create datasets
train_dataset = SalesDataset(X_train_tensor, y_train_tensor)
val_dataset = SalesDataset(X_val_tensor, y_val_tensor)
test_dataset = SalesDataset(X_test_tensor, y_test_tensor)

print(f"Dataset sizes:")
print(f"  Train: {len(train_dataset)} samples")
print(f"  Validation: {len(val_dataset)} samples")
print(f"  Test: {len(test_dataset)} samples")
print(f"  Input dimension: {train_dataset.input_dim}")

# Create data loaders
train_loader = DataLoader(
    train_dataset, 
    batch_size=CONFIG['batch_size'], 
    shuffle=True, 
    num_workers=0,
    pin_memory=True if device.type == 'cuda' else False
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=CONFIG['batch_size'], 
    shuffle=False, 
    num_workers=0,
    pin_memory=True if device.type == 'cuda' else False
)

test_loader = DataLoader(
    test_dataset, 
    batch_size=CONFIG['batch_size'], 
    shuffle=False, 
    num_workers=0,
    pin_memory=True if device.type == 'cuda' else False
)

print(f"\nData loaders:")
print(f"  Train batches: {len(train_loader)}")
print(f"  Validation batches: {len(val_loader)}")
print(f"  Test batches: {len(test_loader)}")

# Test data loading
print(f"\nTesting data loading...")
sample_batch = next(iter(train_loader))
print(f"  Sample batch shapes: {sample_batch[0].shape}, {sample_batch[1].shape}")
print(f"  Data loading successful!")

In [None]:
# Create and initialize model
print("Creating model...")

model = MLP(
    input_dim=train_dataset.input_dim,
    hidden_dims=CONFIG['hidden_dims'],
    dropout=CONFIG['dropout'],
    batch_norm=CONFIG['batch_norm']
).to(device)

print(f"Model created:")
print(f"  Parameters: {model.get_num_parameters():,}")
print(f"  Device: {device}")

# Print model architecture
print(f"\nModel architecture:")
print(model)

# Test forward pass
model.eval()
with torch.no_grad():
    sample_input = sample_batch[0][:5].to(device)
    sample_output = model(sample_input)
    print(f"\nForward pass test:")
    print(f"  Input shape: {sample_input.shape}")
    print(f"  Output shape: {sample_output.shape}")
    print(f"  Output range: [{sample_output.min():.3f}, {sample_output.max():.3f}]")
    print(f"  Forward pass successful!")

In [None]:
# Training
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    config=CONFIG,
    device=device,
    use_wandb=True
)

# Start training
trainer.train()

In [None]:
# Final evaluation
test_metrics, test_preds, test_actuals = trainer.final_evaluation()

# Save final results
results = {
    'model_type': 'MLP_Preprocessed_Data',
    'config': CONFIG,
    'model_parameters': model.get_num_parameters(),
    'test_metrics': test_metrics,
    'data_source': 'preprocessed_parquet_files'
}

with open(RESULTS_DIR / 'results.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f"\nResults saved to {RESULTS_DIR / 'results.json'}")

In [None]:
# Visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Training curves
axes[0, 0].plot(trainer.train_losses, label='Training RMSLE', alpha=0.8)
axes[0, 0].plot(trainer.val_losses, label='Validation RMSLE', alpha=0.8)
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('RMSLE Loss')
axes[0, 0].set_title('Training Progress - RMSLE Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Predictions vs Actuals
axes[0, 1].scatter(test_actuals, test_preds, alpha=0.6, s=1)
min_val, max_val = min(test_actuals.min(), test_preds.min()), max(test_actuals.max(), test_preds.max())
axes[0, 1].plot([min_val, max_val], [min_val, max_val], 'r--', lw=2)
axes[0, 1].set_xlabel('Actual Sales')
axes[0, 1].set_ylabel('Predicted Sales')
axes[0, 1].set_title('Predictions vs Actuals')
axes[0, 1].grid(True, alpha=0.3)

# 3. Residuals
residuals = test_actuals - test_preds
axes[1, 0].scatter(test_preds, residuals, alpha=0.6, s=1)
axes[1, 0].axhline(y=0, color='r', linestyle='--', lw=2)
axes[1, 0].set_xlabel('Predicted Sales')
axes[1, 0].set_ylabel('Residuals')
axes[1, 0].set_title('Residuals Plot')
axes[1, 0].grid(True, alpha=0.3)

# 4. Error distribution
axes[1, 1].hist(residuals, bins=50, alpha=0.7, density=True)
axes[1, 1].axvline(x=0, color='r', linestyle='--', lw=2)
axes[1, 1].set_xlabel('Residuals')
axes[1, 1].set_ylabel('Density')
axes[1, 1].set_title('Residuals Distribution')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(RESULTS_DIR / 'evaluation_plots.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"📊 Plots saved to {RESULTS_DIR / 'evaluation_plots.png'}")

# Summary statistics
print(f"\n📈 FINAL PERFORMANCE SUMMARY")
print(f"{'='*50}")
print(f"Model: MLP with {model.get_num_parameters():,} parameters")
print(f"Architecture: {CONFIG['hidden_dims']}")
print(f"")
print(f"Test Metrics (Original Scale):")
for metric, value in test_metrics.items():
    print(f"  {metric:6s}: {value:10.6f}")
print(f"")
print(f"Prediction Quality:")
print(f"  Actual range:    [{test_actuals.min():8.2f}, {test_actuals.max():8.2f}]")
print(f"  Predicted range: [{test_preds.min():8.2f}, {test_preds.max():8.2f}]")
print(f"  Mean error:      {residuals.mean():8.2f}")
print(f"  Std error:       {residuals.std():8.2f}")
print(f"{'='*50}")