In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [2]:
train_df = pd.read_csv('fraudTrain.csv')
test_df = pd.read_csv('fraudTest.csv')

In [3]:
full_df = pd.concat([train_df, test_df], axis=0)

In [4]:
full_df = full_df.drop(columns=['Unnamed: 0', 'trans_date_trans_time', 'cc_num'])

In [5]:
for col in full_df.select_dtypes(include=['object']).columns:
    full_df[col] = LabelEncoder().fit_transform(full_df[col])

In [6]:
train_processed = full_df.iloc[:len(train_df)]
test_processed = full_df.iloc[len(train_df):]

In [7]:
X_train = train_processed.drop('is_fraud', axis=1)
y_train = train_processed['is_fraud']
X_test = test_processed.drop('is_fraud', axis=1)
y_test = test_processed['is_fraud']

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier()
}

In [10]:
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(f"\n{name}")
    print(confusion_matrix(y_test, preds))
    print(classification_report(y_test, preds))
    print("ROC AUC Score:", roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))


Logistic Regression
[[553227    347]
 [  2145      0]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    553574
           1       0.00      0.00      0.00      2145

    accuracy                           1.00    555719
   macro avg       0.50      0.50      0.50    555719
weighted avg       0.99      1.00      0.99    555719

ROC AUC Score: 0.831879859011191

Decision Tree
[[550816   2758]
 [   884   1261]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    553574
           1       0.31      0.59      0.41      2145

    accuracy                           0.99    555719
   macro avg       0.66      0.79      0.70    555719
weighted avg       1.00      0.99      0.99    555719

ROC AUC Score: 0.791448308736693
