In [1]:
# =============================================================================
# NOTEBOOK 03: MODEL INFERENCE - NON-PCA PRIMARY (SPARKOV)
# =============================================================================
# Author: Muhammad Ali Tahir
# MS Data Science Program, Superior University Lahore
# Dataset: Sparkov Credit Card Transactions (Interpretable Features)
# =============================================================================

# %% [markdown]
# # 3. Model Inference - Sparkov Fraud Detection
#
# This notebook:
# - Loads the trained Neural Network model
# - Makes predictions on the test dataset
# - Saves raw probabilities for evaluation

# %% [markdown]
# ## 3.1 Import Libraries

# %%
import pandas as pd
import numpy as np
import pickle
import json
import warnings
warnings.filterwarnings('ignore')

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

print("Libraries imported successfully!")

# Device configuration
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {DEVICE}")

# %% [markdown]
# ## 3.2 Load Model Architecture

# %%
class FraudDetectionNN(nn.Module):
    def __init__(self, input_size, hidden_sizes=[256, 128, 64], dropout_rate=0.3):
        super(FraudDetectionNN, self).__init__()

        layers = []
        prev_size = input_size

        for hidden_size in hidden_sizes:
            layers.extend([
                nn.Linear(prev_size, hidden_size),
                nn.BatchNorm1d(hidden_size),
                nn.ReLU(),
                nn.Dropout(dropout_rate)
            ])
            prev_size = hidden_size

        # Output layer
        layers.append(nn.Linear(prev_size, 1))

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# %% [markdown]
# ## 3.3 Load Saved Artifacts

# %%
# Load model configuration
with open('../../models/non_pca_primary/model_config.json', 'r') as f:
    model_config = json.load(f)

print("=" * 60)
print("MODEL CONFIGURATION")
print("=" * 60)
print(f"Input size: {model_config['input_size']}")
print(f"Hidden sizes: {model_config['hidden_sizes']}")
print(f"Dropout rate: {model_config['dropout_rate']}")
print(f"Best F1 (training): {model_config['best_f1']:.4f}")
print(f"Epochs trained: {model_config['epochs_trained']}")

# %%
# Load encoders
with open('../../models/non_pca_primary/encoders.pkl', 'rb') as f:
    encoders = pickle.load(f)

print("\n" + "=" * 60)
print("ENCODERS LOADED")
print("=" * 60)
print(f"Gender mapping: {encoders['gender']}")
print(f"Category columns: {len(encoders['category_cols'])} categories")
print(f"State encoding: {len(encoders['state'])} states")

# %%
# Load scaler
with open('../../models/non_pca_primary/scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

print("\n✓ Scaler loaded")

# %%
# Load model
model = FraudDetectionNN(
    input_size=model_config['input_size'],
    hidden_sizes=model_config['hidden_sizes'],
    dropout_rate=model_config['dropout_rate']
)

model.load_state_dict(torch.load('../../models/non_pca_primary/nn_model.pth', map_location=DEVICE))
model = model.to(DEVICE)
model.eval()

print("\n✓ Model loaded and set to evaluation mode")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

# %% [markdown]
# ## 3.4 Define Preprocessing Function

# %%
def preprocess_for_inference(df, encoders, scaler):
    """
    Preprocess new data for inference using saved encoders and scaler.
    """
    df = df.copy()

    # ========== 1. TEMPORAL FEATURES ==========
    df['trans_date_trans_time'] = pd.to_datetime(df['trans_date_trans_time'])
    df['hour'] = df['trans_date_trans_time'].dt.hour
    df['day_of_week'] = df['trans_date_trans_time'].dt.dayofweek
    df['month'] = df['trans_date_trans_time'].dt.month

    # ========== 2. AGE CALCULATION ==========
    df['dob'] = pd.to_datetime(df['dob'])
    df['age'] = (df['trans_date_trans_time'] - df['dob']).dt.days // 365

    # ========== 3. DROP UNNECESSARY COLUMNS ==========
    drop_cols = ['Unnamed: 0', 'trans_date_trans_time', 'cc_num', 'merchant',
                 'first', 'last', 'street', 'city', 'zip', 'lat', 'long',
                 'job', 'dob', 'trans_num', 'unix_time', 'merch_lat', 'merch_long']
    df = df.drop(columns=[col for col in drop_cols if col in df.columns])

    # ========== 4. ENCODE CATEGORICAL FEATURES ==========
    # Gender
    df['gender'] = df['gender'].map(encoders['gender']).astype(int)

    # State: Target Encoding
    df['state_encoded'] = df['state'].map(encoders['state']).fillna(encoders['state_default']).astype(float)
    df = df.drop(columns=['state'])

    # Category: One-Hot Encoding
    category_dummies = pd.get_dummies(df['category'], prefix='cat')
    for col in encoders['category_cols']:
        if col not in category_dummies.columns:
            category_dummies[col] = 0
    category_dummies = category_dummies[encoders['category_cols']]

    df = pd.concat([df.reset_index(drop=True), category_dummies.reset_index(drop=True)], axis=1)
    df = df.drop(columns=['category'])

    # ========== 5. EXTRACT TARGET IF EXISTS ==========
    y = None
    if 'is_fraud' in df.columns:
        y = df['is_fraud'].values.astype(np.float32)
        df = df.drop(columns=['is_fraud'])

    # ========== 6. ENSURE NUMERIC & CORRECT ORDER ==========
    X = df.astype(np.float32)

    # ========== 7. SCALE NUMERIC FEATURES ==========
    numeric_cols = ['amt', 'city_pop', 'age', 'hour', 'day_of_week', 'month', 'state_encoded']
    numeric_cols = [col for col in numeric_cols if col in X.columns]
    X[numeric_cols] = scaler.transform(X[numeric_cols])

    # Ensure correct column order
    X = X[model_config['feature_columns']]
    X = X.astype(np.float32)

    return X, y

# %% [markdown]
# ## 3.5 Load Test Data

# %%
print("Loading test dataset...")
test_df = pd.read_csv('../../data/non_pca_primary/fraudTest.csv')

print(f"Test set shape: {test_df.shape}")
print(f"Test set fraud rate: {test_df['is_fraud'].mean()*100:.2f}%")

# %%
# Preprocess test data
print("\nPreprocessing test data...")
X_test, y_test = preprocess_for_inference(test_df, encoders, scaler)

print(f"Preprocessed features shape: {X_test.shape}")
print(f"Feature columns: {X_test.columns.tolist()}")

# %% [markdown]
# ## 3.6 Create DataLoader

# %%
class InferenceDataset(Dataset):
    def __init__(self, X):
        if hasattr(X, 'values'):
            X = X.values
        self.X = torch.FloatTensor(X.astype(np.float32))

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx]

# %%
BATCH_SIZE = 2048  # Larger batch for faster inference
test_dataset = InferenceDataset(X_test)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Batch size: {BATCH_SIZE}")
print(f"Number of batches: {len(test_loader)}")

# %% [markdown]
# ## 3.7 Run Inference

# %%
print("\n" + "=" * 60)
print("RUNNING INFERENCE")
print("=" * 60)

all_probabilities = []

model.eval()
with torch.no_grad():
    for batch_idx, X_batch in enumerate(test_loader):
        X_batch = X_batch.to(DEVICE)

        # Forward pass
        outputs = model(X_batch).squeeze()
        probabilities = torch.sigmoid(outputs)

        all_probabilities.extend(probabilities.cpu().numpy())

        # Progress
        if (batch_idx + 1) % 50 == 0:
            print(f"Processed batch {batch_idx + 1}/{len(test_loader)}")

all_probabilities = np.array(all_probabilities)

print(f"\n✓ Inference complete!")
print(f"Total predictions: {len(all_probabilities):,}")

# %% [markdown]
# ## 3.8 Prediction Statistics

# %%
print("\n" + "=" * 60)
print("PREDICTION STATISTICS")
print("=" * 60)

print(f"Probability range: {all_probabilities.min():.6f} to {all_probabilities.max():.6f}")
print(f"Probability mean: {all_probabilities.mean():.6f}")
print(f"Probability median: {np.median(all_probabilities):.6f}")
print(f"Probability std: {all_probabilities.std():.6f}")

# Distribution of probabilities
print(f"\nProbability distribution:")
print(f"  < 0.1:  {(all_probabilities < 0.1).sum():,} ({(all_probabilities < 0.1).mean()*100:.2f}%)")
print(f"  0.1-0.3: {((all_probabilities >= 0.1) & (all_probabilities < 0.3)).sum():,} ({((all_probabilities >= 0.1) & (all_probabilities < 0.3)).mean()*100:.2f}%)")
print(f"  0.3-0.5: {((all_probabilities >= 0.3) & (all_probabilities < 0.5)).sum():,} ({((all_probabilities >= 0.3) & (all_probabilities < 0.5)).mean()*100:.2f}%)")
print(f"  0.5-0.7: {((all_probabilities >= 0.5) & (all_probabilities < 0.7)).sum():,} ({((all_probabilities >= 0.5) & (all_probabilities < 0.7)).mean()*100:.2f}%)")
print(f"  0.7-0.9: {((all_probabilities >= 0.7) & (all_probabilities < 0.9)).sum():,} ({((all_probabilities >= 0.7) & (all_probabilities < 0.9)).mean()*100:.2f}%)")
print(f"  >= 0.9: {(all_probabilities >= 0.9).sum():,} ({(all_probabilities >= 0.9).mean()*100:.2f}%)")

# %% [markdown]
# ## 3.9 Quick Prediction Preview (Default Threshold 0.5)

# %%
DEFAULT_THRESHOLD = 0.5
y_pred_default = (all_probabilities >= DEFAULT_THRESHOLD).astype(int)

print("\n" + "=" * 60)
print(f"PREDICTIONS AT DEFAULT THRESHOLD ({DEFAULT_THRESHOLD})")
print("=" * 60)

print(f"Predicted Fraud: {y_pred_default.sum():,}")
print(f"Predicted Non-Fraud: {(y_pred_default == 0).sum():,}")

if y_test is not None:
    print(f"\nActual Fraud: {y_test.sum():,.0f}")
    print(f"Actual Non-Fraud: {(y_test == 0).sum():,.0f}")

    # Quick metrics
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

    accuracy = accuracy_score(y_test, y_pred_default)
    precision = precision_score(y_test, y_pred_default, zero_division=0)
    recall = recall_score(y_test, y_pred_default, zero_division=0)
    f1 = f1_score(y_test, y_pred_default, zero_division=0)

    print(f"\nQuick Metrics (threshold={DEFAULT_THRESHOLD}):")
    print(f"  Accuracy:  {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall:    {recall:.4f}")
    print(f"  F1 Score:  {f1:.4f}")

# %% [markdown]
# ## 3.10 Save Predictions

# %%
# Create predictions dataframe
predictions_df = pd.DataFrame({
    'probability': all_probabilities,
    'prediction_default': y_pred_default
})

if y_test is not None:
    predictions_df['actual'] = y_test

# Save predictions
predictions_df.to_csv('../../outputs/non_pca_primary/neural_network/test_predictions.csv', index=False)
print("\n✓ Saved: test_predictions.csv")

# %%
# Save raw probabilities as numpy array (for faster loading in evaluation)
np.save('../../outputs/non_pca_primary/neural_network/test_probabilities.npy', all_probabilities)
print("✓ Saved: test_probabilities.npy")

if y_test is not None:
    np.save('../../outputs/non_pca_primary/neural_network/test_labels.npy', y_test)
    print("✓ Saved: test_labels.npy")

# %% [markdown]
# ## 3.11 Inference Summary

# %%
print("\n" + "=" * 70)
print("INFERENCE SUMMARY")
print("=" * 70)
print(f"""
Dataset: Non-PCA Primary (Sparkov)
Model: Neural Network (22 features → 256 → 128 → 64 → 1)

Test Set:
- Total samples: {len(all_probabilities):,}
- Actual fraud rate: {y_test.mean()*100:.2f}%

Predictions (threshold=0.5):
- Predicted frauds: {y_pred_default.sum():,}
- Predicted non-frauds: {(y_pred_default == 0).sum():,}

Files saved:
- outputs/non_pca_primary/neural_network/test_predictions.csv
- outputs/non_pca_primary/neural_network/test_probabilities.npy
- outputs/non_pca_primary/neural_network/test_labels.npy

Next step: Run 04_non_pca_primary_evaluation.ipynb for:
- Threshold optimization
- Detailed metrics at various thresholds
- Confusion matrix & ROC curves
- Final model performance analysis
""")

# %% [markdown]
# ## 3.12 Sample Predictions

# %%
# Show sample predictions
print("\n" + "=" * 60)
print("SAMPLE PREDICTIONS (First 20)")
print("=" * 60)

sample_df = predictions_df.head(20).copy()
sample_df['probability'] = sample_df['probability'].round(4)
print(sample_df.to_string(index=False))

# %%
# Show high-confidence fraud predictions
print("\n" + "=" * 60)
print("HIGH-CONFIDENCE FRAUD PREDICTIONS (prob >= 0.9)")
print("=" * 60)

high_conf_fraud = predictions_df[predictions_df['probability'] >= 0.9]
print(f"Count: {len(high_conf_fraud):,}")

if y_test is not None and len(high_conf_fraud) > 0:
    actual_fraud_in_high_conf = high_conf_fraud['actual'].sum()
    print(f"Actually fraud: {actual_fraud_in_high_conf:,.0f} ({actual_fraud_in_high_conf/len(high_conf_fraud)*100:.1f}%)")

# %%
# Show low-confidence predictions (uncertain)
print("\n" + "=" * 60)
print("UNCERTAIN PREDICTIONS (0.4 <= prob <= 0.6)")
print("=" * 60)

uncertain = predictions_df[(predictions_df['probability'] >= 0.4) & (predictions_df['probability'] <= 0.6)]
print(f"Count: {len(uncertain):,} ({len(uncertain)/len(predictions_df)*100:.2f}% of total)")

if y_test is not None and len(uncertain) > 0:
    actual_fraud_in_uncertain = uncertain['actual'].sum()
    print(f"Actually fraud: {actual_fraud_in_uncertain:,.0f} ({actual_fraud_in_uncertain/len(uncertain)*100:.1f}%)")

print("\n" + "=" * 70)
print("INFERENCE COMPLETE!")
print("=" * 70)

Libraries imported successfully!
Using device: cuda
MODEL CONFIGURATION
Input size: 22
Hidden sizes: [256, 128, 64]
Dropout rate: 0.3
Best F1 (training): 0.2134
Epochs trained: 11

ENCODERS LOADED
Gender mapping: {'M': 1, 'F': 0}
Category columns: 14 categories
State encoding: 51 states

✓ Scaler loaded

✓ Model loaded and set to evaluation mode
Total parameters: 48,001
Loading test dataset...
Test set shape: (555719, 23)
Test set fraud rate: 0.39%

Preprocessing test data...
Preprocessed features shape: (555719, 22)
Feature columns: ['amt', 'gender', 'city_pop', 'hour', 'day_of_week', 'month', 'age', 'state_encoded', 'cat_entertainment', 'cat_food_dining', 'cat_gas_transport', 'cat_grocery_net', 'cat_grocery_pos', 'cat_health_fitness', 'cat_home', 'cat_kids_pets', 'cat_misc_net', 'cat_misc_pos', 'cat_personal_care', 'cat_shopping_net', 'cat_shopping_pos', 'cat_travel']
Batch size: 2048
Number of batches: 272

RUNNING INFERENCE
Processed batch 50/272
Processed batch 100/272
Processed b