In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
import joblib

# Load dataset
df = pd.read_csv("Fraud Detection Dataset.csv")

# Define relevant features (exclude identifiers)
features = [
    'Transaction_Amount', 'Transaction_Type', 'Time_of_Transaction', 'Device_Used', 
    'Location', 'Previous_Fraudulent_Transactions', 'Account_Age', 
    'Number_of_Transactions_Last_24H', 'Payment_Method'
]
X = df[features]
y = df['Fraudulent']
 
# Define categorical and numerical columns
categorical_cols = ['Transaction_Type', 'Device_Used', 'Location', 'Payment_Method']
numerical_cols = [
    'Transaction_Amount', 'Time_of_Transaction', 'Previous_Fraudulent_Transactions', 
    'Account_Age', 'Number_of_Transactions_Last_24H'
]

# Create preprocessing pipelines
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean'))
])

# Combine into a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_pipeline, categorical_cols),
        ('num', num_pipeline, numerical_cols)
    ])

# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=1)

# Train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Save the preprocessor and model
joblib.dump(preprocessor, 'preprocessor.pkl')
joblib.dump(rf_model, 'fraud_detection_model.pkl')

# Evaluate the model
y_pred = rf_model.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")