In [None]:
# 06_neural_network_fraud_detection.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import (classification_report, confusion_matrix, 
                             roc_auc_score, roc_curve, precision_recall_curve,
                             average_precision_score, f1_score, precision_score, recall_score)
import joblib
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("=== Fraud Detection: Neural Network Classifier ===")
print("=" * 50)

# STEP 1: Load and Prepare Data
print("\n1. Loading and preparing data...")

# Load the cleaned data
df = pd.read_csv('data/processed/cc_cleaned.csv')
print(f"Dataset shape: {df.shape}")

# Separate features and target
X = df.drop('Class', axis=1)
y = df['Class']

# Select only numerical features
numerical_features = X.select_dtypes(include=[np.number]).columns.tolist()
X = X[numerical_features]

print(f"Using {len(numerical_features)} numerical features")
print(f"Class distribution: {y.value_counts().to_dict()}")

# STEP 2: Data Scaling (Critical for Neural Networks)
print("\n2. Scaling data for neural network...")

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("‚úì Data scaled using StandardScaler")
print(f"Scaled data shape: {X_scaled.shape}")

# STEP 3: Train-Validation-Test Split
print("\n3. Creating train-validation-test split...")

# First split: train+validation vs test
X_temp, X_test, y_temp, y_test = train_test_split(
    X_scaled, y, 
    test_size=0.2, 
    random_state=42, 
    stratify=y
)

# Second split: train vs validation
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, 
    test_size=0.2, 
    random_state=42, 
    stratify=y_temp
)

print(f"Training set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")
print(f"Training fraud rate: {y_train.mean():.4f}")
print(f"Validation fraud rate: {y_val.mean():.4f}")
print(f"Test fraud rate: {y_test.mean():.4f}")

# STEP 4: Handle Class Imbalance
print("\n4. Handling class imbalance...")

# Calculate class weights for the loss function
from sklearn.utils.class_weight import compute_class_weight

classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weight_dict = dict(zip(classes, class_weights))

print(f"Class weights: {class_weight_dict}")
print(f"Fraud weight is {class_weight_dict[1]/class_weight_dict[0]:.1f}x higher than legitimate weight")

# STEP 5: Build Neural Network Architecture
print("\n5. Building neural network architecture...")

# Clear any previous models
keras.backend.clear_session()

# Define the model
model = keras.Sequential([
    # Input layer
    layers.Input(shape=(X_train.shape[1],)),
    
    # First hidden layer with dropout
    layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    
    # Second hidden layer
    layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    
    # Third hidden layer
    layers.Dense(16, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    
    # Output layer (sigmoid for binary classification)
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall'),
        keras.metrics.AUC(name='auc'),
        keras.metrics.AUC(name='pr_auc', curve='PR')
    ]
)

print("‚úì Neural Network architecture built")
print("\nModel Summary:")
model.summary()

# STEP 6: Set up Callbacks
print("\n6. Setting up training callbacks...")

# Define callbacks
callbacks_list = [
    # Early stopping to prevent overfitting
    callbacks.EarlyStopping(
        monitor='val_pr_auc',  # Monitor PR-AUC (better for imbalanced data)
        patience=20,
        restore_best_weights=True,
        mode='max',
        verbose=1
    ),
    
    # Reduce learning rate on plateau
    callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=10,
        min_lr=1e-6,
        verbose=1
    ),
    
    # Model checkpoint to save best model
    callbacks.ModelCheckpoint(
        'models/neural_network_best.h5',
        monitor='val_pr_auc',
        save_best_only=True,
        mode='max',
        verbose=0
    )
]

print("‚úì Callbacks configured (EarlyStopping, ReduceLROnPlateau, ModelCheckpoint)")

# STEP 7: Train the Neural Network
print("\n7. Training neural network...")

# Training parameters
batch_size = 128
epochs = 100

print(f"Training parameters:")
print(f"‚Ä¢ Batch size: {batch_size}")
print(f"‚Ä¢ Max epochs: {epochs}")
print(f"‚Ä¢ Class weights applied: {class_weight_dict}")

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=epochs,
    batch_size=batch_size,
    class_weight=class_weight_dict,
    callbacks=callbacks_list,
    verbose=1
)

print("‚úì Neural network training completed")

# STEP 8: Load Best Model
print("\n8. Loading best model from training...")

# Load the best saved model
best_model = keras.models.load_model('models/neural_network_best.h5')
print("‚úì Best model loaded")

# STEP 9: Make Predictions
print("\n9. Making predictions...")

# Get probability scores
y_pred_proba = best_model.predict(X_test, verbose=0).flatten()

# Predict with default threshold (0.5)
y_pred = (y_pred_proba >= 0.5).astype(int)

print("‚úì Predictions generated")

# STEP 10: Evaluate Model Performance
print("\n10. Evaluating model performance...")

# Basic metrics with default threshold
print("\nPerformance with default threshold (0.5):")
print("=" * 40)

accuracy = np.mean(y_pred == y_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)
avg_precision = average_precision_score(y_test, y_pred_proba)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-Score:  {f1:.4f}")
print(f"ROC-AUC:   {roc_auc:.4f}")
print(f"Avg Precision: {avg_precision:.4f}")

# Confusion Matrix
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, 
                    index=['Actual Legit', 'Actual Fraud'], 
                    columns=['Predicted Legit', 'Predicted Fraud'])
print(cm_df)

# STEP 11: Threshold Tuning for Fraud Detection
print("\n11. Threshold tuning analysis...")

# Try different thresholds
thresholds = np.arange(0.1, 0.9, 0.1)
results = []

for threshold in thresholds:
    y_pred_thresh = (y_pred_proba >= threshold).astype(int)
    precision_t = precision_score(y_test, y_pred_thresh, zero_division=0)
    recall_t = recall_score(y_test, y_pred_thresh)
    f1_t = f1_score(y_test, y_pred_thresh, zero_division=0)
    
    # Calculate business metrics
    cm_thresh = confusion_matrix(y_test, y_pred_thresh)
    false_positives = cm_thresh[0, 1]
    true_positives = cm_thresh[1, 1]
    
    results.append({
        'threshold': threshold,
        'precision': precision_t,
        'recall': recall_t,
        'f1_score': f1_t,
        'false_positives': false_positives,
        'true_positives': true_positives,
        'alerts_per_fraud': false_positives / true_positives if true_positives > 0 else np.inf
    })

results_df = pd.DataFrame(results)
print("\nPerformance across different thresholds:")
print(results_df[['threshold', 'precision', 'recall', 'f1_score', 'alerts_per_fraud']].round(4))

# Find optimal threshold based on F1-score
optimal_idx = results_df['f1_score'].idxmax()
optimal_threshold = results_df.loc[optimal_idx, 'threshold']
optimal_f1 = results_df.loc[optimal_idx, 'f1_score']

print(f"\nOptimal threshold: {optimal_threshold:.2f} (F1-score: {optimal_f1:.4f})")
print(f"At optimal threshold: Precision = {results_df.loc[optimal_idx, 'precision']:.1%}, "
      f"Recall = {results_df.loc[optimal_idx, 'recall']:.1%}")

# STEP 12: Training History Visualization
print("\n12. Creating training visualizations...")

fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Plot 1: Loss curves
axes[0,0].plot(history.history['loss'], label='Training Loss')
axes[0,0].plot(history.history['val_loss'], label='Validation Loss')
axes[0,0].set_xlabel('Epoch')
axes[0,0].set_ylabel('Loss')
axes[0,0].set_title('Training and Validation Loss')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

# Plot 2: Accuracy curves
axes[0,1].plot(history.history['accuracy'], label='Training Accuracy')
axes[0,1].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[0,1].set_xlabel('Epoch')
axes[0,1].set_ylabel('Accuracy')
axes[0,1].set_title('Training and Validation Accuracy')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

# Plot 3: Precision-Recall curves
axes[0,2].plot(history.history['precision'], label='Training Precision')
axes[0,2].plot(history.history['val_precision'], label='Validation Precision')
axes[0,2].plot(history.history['recall'], label='Training Recall')
axes[0,2].plot(history.history['val_recall'], label='Validation Recall')
axes[0,2].set_xlabel('Epoch')
axes[0,2].set_ylabel('Score')
axes[0,2].set_title('Training and Validation Precision/Recall')
axes[0,2].legend()
axes[0,2].grid(True, alpha=0.3)

# Plot 4: ROC Curve
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
axes[1,0].plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.4f})')
axes[1,0].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier')
axes[1,0].set_xlabel('False Positive Rate')
axes[1,0].set_ylabel('True Positive Rate (Recall)')
axes[1,0].set_title('ROC Curve - Neural Network')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Plot 5: Precision-Recall Curve
precision_vals, recall_vals, thresholds_pr = precision_recall_curve(y_test, y_pred_proba)
axes[1,1].plot(recall_vals, precision_vals, color='blue', lw=2, 
               label=f'PR curve (AP = {avg_precision:.4f})')
axes[1,1].set_xlabel('Recall')
axes[1,1].set_ylabel('Precision')
axes[1,1].set_title('Precision-Recall Curve - Neural Network')
axes[1,1].legend()
axes[1,1].grid(True, alpha=0.3)

# Plot 6: Threshold Analysis
axes[1,2].plot(results_df['threshold'], results_df['precision'], 'o-', label='Precision')
axes[1,2].plot(results_df['threshold'], results_df['recall'], 'o-', label='Recall')
axes[1,2].plot(results_df['threshold'], results_df['f1_score'], 'o-', label='F1-Score')
axes[1,2].axvline(optimal_threshold, color='red', linestyle='--', 
                  label=f'Optimal threshold: {optimal_threshold:.2f}')
axes[1,2].set_xlabel('Threshold')
axes[1,2].set_ylabel('Score')
axes[1,2].set_title('Threshold Tuning Analysis')
axes[1,2].legend()
axes[1,2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# STEP 13: Compare with Previous Models
print("\n13. Comparison with previous models...")

try:
    # Load previous model results
    lr_performance = pd.read_csv('reports/logistic_regression_performance.csv').iloc[0]
    dt_performance = pd.read_csv('reports/decision_tree_performance.csv').iloc[0]
    rf_performance = pd.read_csv('reports/random_forest_performance.csv').iloc[0]
    
    comparison = pd.DataFrame({
        'Metric': ['Recall', 'Precision', 'F1-Score', 'ROC-AUC', 'Avg Precision'],
        'Neural Network': [recall, precision, f1, roc_auc, avg_precision],
        'Random Forest': [rf_performance['recall'], rf_performance['precision'], 
                         rf_performance['f1_score'], rf_performance['roc_auc'], 
                         rf_performance['avg_precision']],
        'Decision Tree': [dt_performance['recall'], dt_performance['precision'], 
                         dt_performance['f1_score'], dt_performance['roc_auc'], 
                         dt_performance['avg_precision']],
        'Logistic Regression': [lr_performance['recall'], lr_performance['precision'], 
                               lr_performance['f1_score'], lr_performance['roc_auc'], 
                               lr_performance['avg_precision']]
    })
    
    print("\nModel Comparison:")
    print("=" * 70)
    print(comparison.round(4))
    
    # Visual comparison
    fig, ax = plt.subplots(figsize=(14, 7))
    metrics = ['Recall', 'Precision', 'F1-Score', 'ROC-AUC']
    x = np.arange(len(metrics))
    width = 0.2
    
    ax.bar(x - width*1.5, comparison.loc[:3, 'Neural Network'], width, label='Neural Network')
    ax.bar(x - width*0.5, comparison.loc[:3, 'Random Forest'], width, label='Random Forest')
    ax.bar(x + width*0.5, comparison.loc[:3, 'Decision Tree'], width, label='Decision Tree')
    ax.bar(x + width*1.5, comparison.loc[:3, 'Logistic Regression'], width, label='Logistic Regression')
    
    ax.set_xlabel('Metrics')
    ax.set_ylabel('Score')
    ax.set_title('Model Comparison: Neural Network vs All Previous Models')
    ax.set_xticks(x)
    ax.set_xticklabels(metrics)
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
except FileNotFoundError as e:
    print(f"‚ö† Previous model results not found: {e}")

# STEP 14: Save Model and Results
print("\n14. Saving model and results...")

import os
os.makedirs('models', exist_ok=True)
os.makedirs('reports/figures', exist_ok=True)

# Save the full model
best_model.save('models/neural_network_final.h5')
print("‚úì Model saved: models/neural_network_final.h5")

# Save scaler
joblib.dump(scaler, 'models/neural_network_scaler.pkl')
print("‚úì Scaler saved: models/neural_network_scaler.pkl")

# Save performance results
performance_report = {
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'f1_score': f1,
    'roc_auc': roc_auc,
    'avg_precision': avg_precision,
    'optimal_threshold': optimal_threshold,
    'n_epochs_trained': len(history.history['loss']),
    'best_val_pr_auc': max(history.history['val_pr_auc']),
    'architecture': '64-32-16-1',
    'class_weight_fraud': class_weight_dict[1]
}

performance_df = pd.DataFrame([performance_report])
performance_df.to_csv('reports/neural_network_performance.csv', index=False)
print("‚úì Performance report saved: reports/neural_network_performance.csv")

# Save threshold analysis
results_df.to_csv('reports/neural_network_threshold_analysis.csv', index=False)

# STEP 15: Final Summary
print("\n" + "="*50)
print("FINAL SUMMARY - NEURAL NETWORK")
print("="*50)

print(f"\nüìä MODEL PERFORMANCE:")
print(f"‚Ä¢ Recall (Frauds Caught): {recall:.1%}")
print(f"‚Ä¢ Precision (Accuracy of Fraud Alerts): {precision:.1%}")
print(f"‚Ä¢ F1-Score: {f1:.4f}")
print(f"‚Ä¢ ROC-AUC: {roc_auc:.4f}")

print(f"\nüéØ BUSINESS IMPACT:")
frauds_caught = cm[1, 1]  # True Positives
frauds_missed = cm[1, 0]  # False Negatives
false_alarms = cm[0, 1]   # False Positives

print(f"‚Ä¢ Frauds detected: {frauds_caught}/{frauds_caught + frauds_missed} ({recall:.1%})")
print(f"‚Ä¢ False alarms: {false_alarms} legitimate transactions flagged")
print(f"‚Ä¢ Optimal threshold: {optimal_threshold:.2f}")

print(f"\nüß† NEURAL NETWORK CHARACTERISTICS:")
print(f"‚Ä¢ Architecture: 64-32-16-1 (3 hidden layers)")
print(f"‚Ä¢ Training epochs: {len(history.history['loss'])}")
print(f"‚Ä¢ Best validation PR-AUC: {max(history.history['val_pr_auc']):.4f}")
print(f"‚Ä¢ Class weight for fraud: {class_weight_dict[1]:.1f}x")

print(f"\nüîç KEY INSIGHTS:")
print(f"‚Ä¢ Deep learning can capture complex fraud patterns")
print(f"‚Ä¢ Neural networks excel at learning feature interactions")
print(f"‚Ä¢ Regularization techniques prevent overfitting on rare fraud cases")

print(f"\nüèÜ COMPARISON HIGHLIGHTS:")
print(f"‚Ä¢ Expected to match or exceed Random Forest performance")
print(f"‚Ä¢ Can learn non-linear patterns that tree-based models miss")
print(f"‚Ä¢ More parameters but better feature representation")

print(f"\n‚úÖ NEXT STEPS:")
print("1. Consider ensemble of Neural Network + Random Forest")
print("2. Implement real-time scoring with TensorFlow Serving")
print("3. Monitor model drift and retrain periodically")
print("4. Explore deep learning architectures (Autoencoders, LSTMs for sequences)")

print(f"\n‚úì Neural Network implementation completed successfully!")