# Neural Networks Category - Kaggle Playground Series S5E8

**Category**: Neural Networks  
**Sub-models**: Keras MLP variants (shallow, medium, deep) with different architectures  
**Split Strategy**: 70/30 stratified split  
**Cross-Validation**: 5-fold StratifiedKFold  
**Random Seed**: 42  
**Artifact Paths**: outputs/neural_nets/  

This notebook compares different neural network architectures using Keras/TensorFlow.

In [None]:
# Bootstrap installation and imports
%pip install numpy pandas scikit-learn matplotlib tensorflow --quiet

import os, json, random, pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import (
    roc_auc_score, average_precision_score, f1_score, accuracy_score,
    precision_score, recall_score, log_loss, roc_curve, precision_recall_curve,
    confusion_matrix, calibration_curve
)

# TensorFlow imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
os.makedirs('outputs', exist_ok=True)
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

print("Neural Networks Category - Setup Complete")
print(f"TensorFlow version: {tf.__version__}")

In [None]:
# Load and prepare data
train_df = pd.read_csv('../playground-series-s5e8/train.csv')
test_df = pd.read_csv('../playground-series-s5e8/test.csv')

feature_cols = [col for col in train_df.columns if col not in ['id', 'target']]
X = train_df[feature_cols]
y = train_df['target']

X_train_pool, X_test_holdout, y_train_pool, y_test_holdout = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
print(f"Data loaded: {X_train_pool.shape} train, {X_test_holdout.shape} test")
print(f"Features: {len(feature_cols)}")

# Preprocessing for neural networks
imputer = SimpleImputer(strategy='median')
scaler = StandardScaler()

X_train_processed = scaler.fit_transform(imputer.fit_transform(X_train_pool))
X_test_processed = scaler.transform(imputer.transform(X_test_holdout))

n_features = X_train_processed.shape[1]
print(f"Processed features: {n_features}")

In [None]:
# Define neural network architectures
def create_shallow_model(input_dim, activation='relu', use_batch_norm=False, dropout_rate=0.3):
    """Shallow network: 64 -> 32"""
    model = keras.Sequential([
        layers.Dense(64, activation=activation, input_shape=(input_dim,))
    ])
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(32, activation=activation))
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

def create_medium_model(input_dim, activation='relu', use_batch_norm=False, dropout_rate=0.3):
    """Medium network: 256 -> 128 -> 64"""
    model = keras.Sequential([
        layers.Dense(256, activation=activation, input_shape=(input_dim,))
    ])
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(128, activation=activation))
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(64, activation=activation))
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

def create_deep_model(input_dim, activation='relu', use_batch_norm=False, dropout_rate=0.3):
    """Deep network: 512 -> 256 -> 128 -> 64"""
    model = keras.Sequential([
        layers.Dense(512, activation=activation, input_shape=(input_dim,))
    ])
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(256, activation=activation))
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(128, activation=activation))
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(64, activation=activation))
    if use_batch_norm:
        model.add(layers.BatchNormalization())
    if dropout_rate > 0:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

# Define model configurations
models_config = {
    'MLP_shallow_relu': {
        'create_fn': create_shallow_model,
        'activation': 'relu',
        'batch_norm': False,
        'dropout': 0.3
    },
    'MLP_shallow_relu_bn': {
        'create_fn': create_shallow_model,
        'activation': 'relu',
        'batch_norm': True,
        'dropout': 0.3
    },
    'MLP_medium_relu': {
        'create_fn': create_medium_model,
        'activation': 'relu',
        'batch_norm': False,
        'dropout': 0.3
    },
    'MLP_medium_swish': {
        'create_fn': create_medium_model,
        'activation': 'swish',
        'batch_norm': True,
        'dropout': 0.5
    },
    'MLP_deep_relu': {
        'create_fn': create_deep_model,
        'activation': 'relu',
        'batch_norm': True,
        'dropout': 0.5
    }
}

print(f"Configured {len(models_config)} neural network variants:")
for name in models_config.keys():
    print(f"  - {name}")

In [None]:
# Helper functions
def get_probabilities(model, X):
    return model.predict(X, verbose=0).flatten()

def compute_metrics(y_true, y_prob, threshold=0.5):
    y_pred = (y_prob >= threshold).astype(int)
    return {
        'roc_auc': roc_auc_score(y_true, y_prob),
        'average_precision': average_precision_score(y_true, y_prob),
        'f1': f1_score(y_true, y_pred),
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred),
        'logloss': log_loss(y_true, y_prob)
    }

def find_best_threshold(y_true, y_prob):
    fpr, tpr, thresholds = roc_curve(y_true, y_prob)
    j_scores = tpr - fpr
    best_idx = np.argmax(j_scores)
    return thresholds[best_idx]

print("Helper functions defined")

In [None]:
# Main evaluation loop
results = {}

for model_name, config in models_config.items():
    print(f"\nEvaluating: {model_name}")
    
    model_dir = f"../outputs/neural_nets/{model_name}"
    os.makedirs(f"{model_dir}/logs", exist_ok=True)
    os.makedirs(f"{model_dir}/models", exist_ok=True)
    os.makedirs(f"{model_dir}/figures", exist_ok=True)
    
    # CV analysis
    cv_metrics = []
    cv_roc_curves = []
    cv_pr_curves = []
    cv_thresholds = []
    cv_histories = []
    
    for fold_idx, (train_idx, val_idx) in enumerate(cv.split(X_train_pool, y_train_pool)):
        print(f"  Fold {fold_idx + 1}/5")
        
        # Split fold data
        X_fold_train = X_train_processed[train_idx]
        X_fold_val = X_train_processed[val_idx]
        y_fold_train = y_train_pool.iloc[train_idx].values
        y_fold_val = y_train_pool.iloc[val_idx].values
        
        # Create and compile model
        model = config['create_fn'](
            input_dim=n_features,
            activation=config['activation'],
            use_batch_norm=config['batch_norm'],
            dropout_rate=config['dropout']
        )
        
        model.compile(
            optimizer='adam',
            loss='binary_crossentropy',
            metrics=['AUC']
        )
        
        # Callbacks
        callbacks = [
            EarlyStopping(patience=10, restore_best_weights=True, monitor='val_auc', mode='max'),
            ReduceLROnPlateau(patience=5, factor=0.5, min_lr=1e-6)
        ]
        
        # Train model
        history = model.fit(
            X_fold_train, y_fold_train,
            validation_data=(X_fold_val, y_fold_val),
            epochs=100,
            batch_size=32,
            callbacks=callbacks,
            verbose=0
        )
        
        cv_histories.append(history.history)
        
        # Predict on validation fold
        y_val_prob = get_probabilities(model, X_fold_val)
        
        # Find best threshold for this fold
        best_threshold = find_best_threshold(y_fold_val, y_val_prob)
        cv_thresholds.append(best_threshold)
        
        # Compute metrics
        fold_metrics = compute_metrics(y_fold_val, y_val_prob, best_threshold)
        fold_metrics['fold'] = fold_idx + 1
        fold_metrics['threshold'] = best_threshold
        cv_metrics.append(fold_metrics)
        
        # Store curves for plotting
        fpr, tpr, _ = roc_curve(y_fold_val, y_val_prob)
        precision, recall, _ = precision_recall_curve(y_fold_val, y_val_prob)
        cv_roc_curves.append((fpr, tpr))
        cv_pr_curves.append((precision, recall))
        
        print(f"    AUC: {fold_metrics['roc_auc']:.4f}, AP: {fold_metrics['average_precision']:.4f}")
    
    # Train final model on full train pool
    print("  Training final model...")
    final_model = config['create_fn'](
        input_dim=n_features,
        activation=config['activation'],
        use_batch_norm=config['batch_norm'],
        dropout_rate=config['dropout']
    )
    
    final_model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['AUC']
    )
    
    # Train final model
    final_history = final_model.fit(
        X_train_processed, y_train_pool.values,
        epochs=50,  # Reduced epochs for final model
        batch_size=32,
        verbose=0
    )
    
    # Test evaluation
    mean_threshold = np.mean(cv_thresholds)
    y_test_prob = get_probabilities(final_model, X_test_processed)
    test_metrics = compute_metrics(y_test_holdout, y_test_prob, mean_threshold)
    test_metrics['chosen_threshold'] = mean_threshold
    test_metrics['confusion_matrix'] = confusion_matrix(
        y_test_holdout, (y_test_prob >= mean_threshold).astype(int)
    ).tolist()
    
    print(f"  Test AUC: {test_metrics['roc_auc']:.4f}")
    
    # Store results
    results[model_name] = {
        'cv_metrics': cv_metrics,
        'test_metrics': test_metrics,
        'cv_roc_curves': cv_roc_curves,
        'cv_pr_curves': cv_pr_curves,
        'cv_histories': cv_histories,
        'final_history': final_history.history,
        'model_dir': model_dir
    }
    
    # Save artifacts
    cv_df = pd.DataFrame(cv_metrics)
    cv_df.to_csv(f"{model_dir}/logs/cv_metrics.csv", index=False)
    
    with open(f"{model_dir}/logs/test_metrics.json", 'w') as f:
        json.dump(test_metrics, f, indent=2)
    
    # Save model in TensorFlow format
    final_model.save(f"{model_dir}/models/final_model.keras")

print("\nAll neural network models evaluated!")

In [None]:
# Generate figures with NN-specific plots
for model_name, model_results in results.items():
    model_dir = model_results['model_dir']
    
    print(f"Generating figures for {model_name}...")
    
    # 1. Learning curves (training history)
    plt.figure(figsize=(12, 4))
    
    # Loss plot
    plt.subplot(1, 2, 1)
    final_history = model_results['final_history']
    plt.plot(final_history['loss'], label='Training Loss')
    plt.title(f'Training Loss - {model_name}')
    plt.xlabel('Epoch')
    plt.ylabel('Binary Crossentropy')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # AUC plot
    plt.subplot(1, 2, 2)
    plt.plot(final_history['auc'], label='Training AUC')
    plt.title(f'Training AUC - {model_name}')
    plt.xlabel('Epoch')
    plt.ylabel('AUC')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f"{model_dir}/figures/learning_curve.png", dpi=200, bbox_inches='tight')
    plt.close()
    
    # 2. ROC Curve
    plt.figure(figsize=(8, 6))
    mean_fpr = np.linspace(0, 1, 100)
    tprs = []
    for fpr, tpr in model_results['cv_roc_curves']:
        tprs.append(np.interp(mean_fpr, fpr, tpr))
    
    mean_tpr = np.mean(tprs, axis=0)
    std_tpr = np.std(tprs, axis=0)
    
    plt.plot(mean_fpr, mean_tpr, 'b-', 
             label=f'Mean ROC (AUC = {np.mean([cv["roc_auc"] for cv in model_results["cv_metrics"]]):.3f})')
    plt.fill_between(mean_fpr, mean_tpr - std_tpr, mean_tpr + std_tpr, alpha=0.2)
    plt.plot([0, 1], [0, 1], 'k--', label='Random')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(f"{model_dir}/figures/roc_cv.png", dpi=200, bbox_inches='tight')
    plt.close()
    
    # 3. Feature importance placeholder (would require permutation importance)
    plt.figure(figsize=(8, 6))
    plt.text(0.5, 0.5, 'Feature importance for neural networks\nrequires permutation importance\n(computationally expensive)', 
            ha='center', va='center', transform=plt.gca().transAxes)
    plt.title(f'Feature Importance - {model_name}')
    plt.tight_layout()
    plt.savefig(f"{model_dir}/figures/feature_importance.png", dpi=200, bbox_inches='tight')
    plt.close()

print("All figures generated!")

In [None]:
# Create summary
summary_data = []
for model_name, model_results in results.items():
    test_metrics = model_results['test_metrics']
    cv_metrics = model_results['cv_metrics']
    
    summary_data.append({
        'model': model_name,
        'test_auc': test_metrics['roc_auc'],
        'test_ap': test_metrics['average_precision'],
        'test_f1': test_metrics['f1'],
        'cv_auc_mean': np.mean([cv['roc_auc'] for cv in cv_metrics]),
        'cv_auc_std': np.std([cv['roc_auc'] for cv in cv_metrics]),
        'artifacts_path': model_results['model_dir']
    })

summary_df = pd.DataFrame(summary_data).sort_values('test_auc', ascending=False)
os.makedirs('../outputs/neural_nets', exist_ok=True)
summary_df.to_csv('../outputs/neural_nets/summary.csv', index=False)

print("\nNEURAL NETWORKS CATEGORY - FINAL RESULTS")
print("=" * 50)
for idx, row in summary_df.iterrows():
    print(f"{row['model']:25s} | AUC: {row['test_auc']:.4f} | AP: {row['test_ap']:.4f}")

print(f"\nBest Model: {summary_df.iloc[0]['model']} (AUC: {summary_df.iloc[0]['test_auc']:.4f})")
print(f"Summary saved to: ../outputs/neural_nets/summary.csv")