# CREDIT CARD FRAUD DETECTION

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
# Load the dataset
data_path = ("C:\\Users\\admin\\Desktop\\fraud.csv")
df = pd.read_csv(data_path)

In [3]:
# Data Preprocessing
# Exclude non-numeric columns from X
X = df.drop(["is_fraud", "trans_date_trans_time", "merchant", "category", "first", "last", "gender", "street", "city", "state", "zip", "job", "dob", "trans_num"], axis=1)
y = df["is_fraud"]  # Target variable

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Feature Scaling (standardization)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
# Model 1: Logistic Regression
logistic_reg = LogisticRegression(random_state=42)
logistic_reg.fit(X_train, y_train)
y_pred_logistic = logistic_reg.predict(X_test)


In [7]:
# Model 2: Decision Trees
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)
y_pred_dt = decision_tree.predict(X_test)


In [8]:
random_forest = RandomForestClassifier(random_state=42, n_jobs=-1)

In [9]:
# Model Evaluation
def evaluate_model(model_name, y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred)
    class_report = classification_report(y_true, y_pred)
    
    print(f"------ {model_name} Model ------")
    print(f"Accuracy: {accuracy:.4f}")
    print("Confusion Matrix:\n", conf_matrix)
    print("Classification Report:\n", class_report)

In [10]:
def evaluate_model(model_name, y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    confusion = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred)
    
    print(f"Evaluation for {model_name}:")
    print(f"Accuracy: {accuracy:.2f}")
    print("Confusion Matrix:")
    print(confusion)
    print("Classification Report:")
    print(report)

In [None]:
# Now you can use the evaluate_model function to evaluate your models
evaluate_model("Logistic Regression", y_test, y_pred_logistic)
evaluate_model("Decision Trees", y_test, y_pred_dt)
evaluate_model("Random Forests", y_test, y_pred_dt)

Evaluation for Logistic Regression:
Accuracy: 1.00
Confusion Matrix:
[[110683     35]
 [   426      0]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    110718
           1       0.00      0.00      0.00       426

    accuracy                           1.00    111144
   macro avg       0.50      0.50      0.50    111144
weighted avg       0.99      1.00      0.99    111144

Evaluation for Logistic Regression:
Accuracy: 1.00
Confusion Matrix:
[[110683     35]
 [   426      0]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    110718
           1       0.00      0.00      0.00       426

    accuracy                           1.00    111144
   macro avg       0.50      0.50      0.50    111144
weighted avg       0.99      1.00      0.99    111144

Evaluation for Logistic Regression:
Accuracy: 1.00
Confusion Matrix:
[[110683     35]
 [   4