In [5]:
from joblib import load
import pandas as pd
import numpy as np

# Step 1: Load the saved models and preprocessors
models = {}
model_names = ['HistGradientBoosting', 'Random_Forest', 'Logistic_Regression']
for name in model_names:
    models[name] = load(f"{name}_model.joblib")

le = load("label_encoder.joblib")      # Label encoder for 'type'
scaler = load("standard_scaler.joblib") # Standard scaler

print("Expected numerical features for scaler:", scaler.feature_names_in_)

# Step 2: Prepare new data for prediction
new_data = pd.DataFrame({
    'amount': [100.50, 5000.00, 25.30],
    'transaction_type': ['purchase', 'transfer', 'withdrawal'],
    'account_age_days': [30, 200, 10],  # Not used in model but kept for reference
    'newbalanceOrig': [1000.00, 2000.00, 500.00],
    'oldbalanceDest': [500.00, 10000.00, 200.00],
    'oldbalanceOrg': [1100.50, 7000.00, 525.30]
})

# Step 3: Handle unseen categories in transaction_type
known_classes = le.classes_
default_category = known_classes[0]
print(f"Known transaction types: {known_classes}")
print(f"Using default category '{default_category}' for unseen values")

new_data['transaction_type'] = new_data['transaction_type'].apply(
    lambda x: x if x in known_classes else default_category
)

# Encode categorical 'type'
new_data['type'] = le.transform(new_data['transaction_type'])

# Step 4: Add missing columns required by scaler
for col in scaler.feature_names_in_:
    if col not in new_data.columns:
        if col == 'isFlaggedFraud':
            new_data[col] = 0
        elif col == 'step':
            new_data[col] = 1
        elif col == 'newbalanceDest':
            new_data[col] = 0.0
        else:
            new_data[col] = 0.0  # default filler for numeric fields

# Step 5: Reorder columns to match scaler training
new_data = new_data.reindex(columns=scaler.feature_names_in_)

# Step 6: Scale features
scaled_features = scaler.transform(new_data)

# Step 7: Make predictions with each model
for name, model in models.items():
    predictions = model.predict(scaled_features)
    if hasattr(model, "predict_proba"):
        probabilities = model.predict_proba(scaled_features)[:, 1]
        print(f"\n{name} Predictions:")
        for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
            label = 'Fraud' if pred == 1 else 'Non-Fraud'
            print(f"Transaction {i+1}: {label} (Fraud Probability: {prob:.4f})")
    else:
        print(f"\n{name} Predictions:")
        for i, pred in enumerate(predictions):
            label = 'Fraud' if pred == 1 else 'Non-Fraud'
            print(f"Transaction {i+1}: {label}")

# Step 8: Ensemble predictions (majority voting)
all_predictions = np.array([model.predict(scaled_features) for model in models.values()])
ensemble_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=all_predictions)

print("\nEnsemble Predictions (Majority Voting):")
for i, pred in enumerate(ensemble_predictions):
    label = 'Fraud' if pred == 1 else 'Non-Fraud'
    print(f"Transaction {i+1}: {label}")


Expected numerical features for scaler: ['step' 'type' 'amount' 'oldbalanceOrg' 'newbalanceOrig' 'oldbalanceDest'
 'newbalanceDest' 'isFlaggedFraud']
Known transaction types: ['CASH_IN' 'CASH_OUT' 'DEBIT' 'PAYMENT' 'TRANSFER']
Using default category 'CASH_IN' for unseen values

HistGradientBoosting Predictions:
Transaction 1: Fraud (Fraud Probability: 1.0000)
Transaction 2: Fraud (Fraud Probability: 1.0000)
Transaction 3: Fraud (Fraud Probability: 1.0000)


[Parallel(n_jobs=14)]: Using backend ThreadingBackend with 14 concurrent workers.
[Parallel(n_jobs=14)]: Done  13 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 134 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 337 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 620 tasks      | elapsed:    0.1s
[Parallel(n_jobs=14)]: Done 985 tasks      | elapsed:    0.1s
[Parallel(n_jobs=14)]: Done 1430 tasks      | elapsed:    0.1s
[Parallel(n_jobs=14)]: Done 1957 tasks      | elapsed:    0.2s
[Parallel(n_jobs=14)]: Done 2000 out of 2000 | elapsed:    0.2s finished
[Parallel(n_jobs=14)]: Using backend ThreadingBackend with 14 concurrent workers.
[Parallel(n_jobs=14)]: Done  13 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 134 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 337 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 620 tasks      | elapsed:    0.1s
[Parallel(n_jobs=14)]: Done 985 tasks      | elapsed:    0.1s
[Parallel(n_jobs=


Random_Forest Predictions:
Transaction 1: Non-Fraud (Fraud Probability: 0.4315)
Transaction 2: Non-Fraud (Fraud Probability: 0.4315)
Transaction 3: Non-Fraud (Fraud Probability: 0.4315)

Logistic_Regression Predictions:
Transaction 1: Non-Fraud (Fraud Probability: 0.1003)
Transaction 2: Non-Fraud (Fraud Probability: 0.1078)
Transaction 3: Non-Fraud (Fraud Probability: 0.1002)

Ensemble Predictions (Majority Voting):
Transaction 1: Non-Fraud
Transaction 2: Non-Fraud
Transaction 3: Non-Fraud


[Parallel(n_jobs=14)]: Done 134 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 337 tasks      | elapsed:    0.0s
[Parallel(n_jobs=14)]: Done 620 tasks      | elapsed:    0.1s
[Parallel(n_jobs=14)]: Done 985 tasks      | elapsed:    0.1s
[Parallel(n_jobs=14)]: Done 1430 tasks      | elapsed:    0.1s
[Parallel(n_jobs=14)]: Done 1957 tasks      | elapsed:    0.2s
[Parallel(n_jobs=14)]: Done 2000 out of 2000 | elapsed:    0.2s finished
