In [90]:
# train_fraud_model.py

import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall

# Load dataset
data = pd.read_csv('data.csv')  # Replace with your file path

# Features and target
X = data[['Hour_of_Day', 'Amount', 'V1', 'V2', 'V3', 'V4', 'V5',
          'Merchant_Type', 'Location_Distance', 'Transaction_Frequency',
          'Is_International', 'Device_Type']]
y = data['Class']

# Define numeric and categorical features
numeric_features = ['Hour_of_Day', 'Amount', 'V1', 'V2', 'V3', 'V4', 'V5',
                    'Location_Distance', 'Transaction_Frequency', 'Is_International']
categorical_features = ['Merchant_Type', 'Device_Type']

# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

# Fit and transform features
X_processed = preprocessor.fit_transform(X)

# Save the preprocessor
joblib.dump(preprocessor, 'preprocessor.pkl')

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.5, stratify=y, random_state=42
)

# Compute class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

# Define the model
model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)

# Early stopping
early_stop = EarlyStopping(
    monitor='val_accuracy',
    patience=2,
    restore_best_weights=True
)

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=512,
    validation_split=0.2,
    class_weight=class_weight_dict,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
results = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {results[1]:.4f}')
print(f'Test Precision: {results[2]:.4f}')
print(f'Test Recall: {results[3]:.4f}')

# Save the model
model.save('fraud_modelx.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.9718
Test Precision: 0.9743
Test Recall: 0.9692


In [106]:
# predict_fraud.py

import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.models import load_model

# Define feature names
numeric_features = ['Hour_of_Day', 'Amount', 'V1', 'V2', 'V3', 'V4', 'V5',
                    'Location_Distance', 'Transaction_Frequency', 'Is_International']
categorical_features = ['Merchant_Type', 'Device_Type']

# Load the trained model and preprocessor
model = load_model('fraud_modelx.h5')
preprocessor = joblib.load('preprocessor.pkl')

# Risk level helper
def get_risk_level(score):
    if score >= 0.85:
        return 'High Risk'
    elif score >= 0.5:
        return 'Medium Risk'
    elif score >= 0.2:
        return 'Low Risk'
    else:
        return 'Very Low Risk'

# Prediction function
def predict_fraud(user_input: dict):
    """
    Predict if a transaction is fraud or not.
    user_input: dict with keys matching the model features
    """
    df = pd.DataFrame([user_input])
    X_processed = preprocessor.transform(df)
    prediction = model.predict(X_processed)[0][0]
    result = 'Fraud' if prediction >= 0.5 else 'Not Fraud'
    risk_level = get_risk_level(prediction)
    print(f"Prediction Score: {prediction:.4f} ({prediction*100:.2f}%) => {result}")
    print(f"Risk Level: {risk_level}")
    return result

# Example usage
if __name__ == '__main__':
    input_data = {
        'Hour_of_Day': 10,
        'Amount': 30,
        'V1': 0.2,
        'V2': -0.1,
        'V3': 0.3,
        'V4': -0.1,
        'V5': 0,
        'Merchant_Type': 'Retail',
        'Location_Distance': 5,
        'Transaction_Frequency': 1,
        'Is_International': 0,
        'Device_Type': 'Desktop'
    }

    predict_fraud(input_data)


Prediction Score: 0.1969 (19.69%) => Not Fraud
Risk Level: Very Low Risk
