In [11]:
import pandas as pd
import numpy as np
import joblib
from datetime import datetime

# Load the saved model, scaler, and label encoders
rf_model = joblib.load('fraud_detection_rf_model.pkl')
scaler = joblib.load('scaler.pkl')
label_encoders = {
    'transaction_type': joblib.load('label_encoder_transaction_type.pkl'),
    'location': joblib.load('label_encoder_location.pkl'),
    'device_id': joblib.load('label_encoder_device_id.pkl'),
    'ip_address': joblib.load('label_encoder_ip_address.pkl')
}

# Function to preprocess a single input transaction
def preprocess_input(transaction_dict):
    """
    Preprocess a single transaction input for prediction.
    Input: Dictionary with raw transaction data
    Output: Preprocessed numpy array ready for model prediction
    """
    # Convert dictionary to DataFrame
    df_input = pd.DataFrame([transaction_dict])
    
    # Extract time-based features
    df_input['transaction_time'] = pd.to_datetime(df_input['transaction_time'])
    df_input['hour'] = df_input['transaction_time'].dt.hour
    df_input['day_of_week'] = df_input['transaction_time'].dt.dayofweek
    
    # Drop transaction_time after extracting features
    df_input = df_input.drop(columns=['transaction_time'])
    
    # Encode categorical variables using loaded label encoders
    for col, le in label_encoders.items():
        # Handle unseen labels gracefully
        df_input[col] = df_input[col].apply(lambda x: le.transform([x])[0] if x in le.classes_ 
                                           else le.transform([le.classes_[0]])[0])
    
    # Ensure columns match training data (excluding transaction_id and is_fraud)
    expected_cols = ['user_id', 'transaction_amount', 'transaction_type', 'location', 
                     'device_id', 'ip_address', 'is_mobile', 'hour', 'day_of_week']
    df_input = df_input[expected_cols]
    
    # Scale numerical features
    numerical_cols = ['transaction_amount', 'user_id', 'hour', 'day_of_week']
    df_input[numerical_cols] = scaler.transform(df_input[numerical_cols])
    
    return df_input.values

# Example input transaction (customize as needed)
sample_transaction = {
    'user_id': 103,                         # User ID
    'transaction_amount': 400.00,         # Amount in INR
    'transaction_time': '2/20/2022 7:00:00',  # Late-night transaction
    'transaction_type': 'payment',         # Type of transaction
    'location': 'Visakhapatnam',                   # City
    'device_id': 'device_103',              # Unusual device
    'ip_address': '192.168.0.103',          # Unusual IP
    'is_mobile': 1                          # Mobile device
}

# Preprocess the input
processed_input = preprocess_input(sample_transaction)

# Make prediction
prediction = rf_model.predict(processed_input)
probability = rf_model.predict_proba(processed_input)[:, 1]  # Probability of fraud

# Output result
print(f"Sample Transaction: {sample_transaction}")
print(f"Prediction (0 = Non-Fraud, 1 = Fraud): {prediction[0]}")
print(f"Probability of Fraud: {probability[0]:.2%}")

# Interpret the result
if prediction[0] == 1:
    print("This transaction is predicted to be FRAUDULENT.")
else:
    print("This transaction is predicted to be NON-FRAUDULENT.")

Sample Transaction: {'user_id': 103, 'transaction_amount': 400.0, 'transaction_time': '2/20/2022 7:00:00', 'transaction_type': 'payment', 'location': 'Visakhapatnam', 'device_id': 'device_103', 'ip_address': '192.168.0.103', 'is_mobile': 1}
Prediction (0 = Non-Fraud, 1 = Fraud): 0
Probability of Fraud: 1.00%
This transaction is predicted to be NON-FRAUDULENT.


