In [22]:
# Import Libraries and Constant 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_fscore_support

RANDOM_STATE = 42

In [17]:
# Load Data
df = pd.read_csv('./data/creditcard.csv')
df['Class']  = df['Class'].astype('int8')
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
df['Time']   = pd.to_numeric(df['Time'], errors='coerce')

In [18]:
# Basic sanity
print(df.shape); print(df.dtypes.head())
print(df['Class'].value_counts(), df['Class'].value_counts(normalize=True))
print(df[['Time','Amount']].describe(percentiles=[.5,.9,.99]))

(284807, 31)
Time    float64
V1      float64
V2      float64
V3      float64
V4      float64
dtype: object
Class
0    284315
1       492
Name: count, dtype: int64 Class
0    0.998273
1    0.001727
Name: proportion, dtype: float64
                Time         Amount
count  284807.000000  284807.000000
mean    94813.859575      88.349619
std     47488.145955     250.120109
min         0.000000       0.000000
50%     84692.000000      22.000000
90%    157640.400000     203.000000
99%    170560.940000    1017.970000
max    172792.000000   25691.160000


In [19]:
# Train/test split (stratified)
X = df[['Amount','Time']].copy()
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2 , random_state=RANDOM_STATE)

In [21]:
# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # fit on train
X_test_scaled  = scaler.transform(X_test)

In [26]:
# Naive Baseline
y_pred_proba_zero = np.zeros(len(y_test))
print("ROC-AUC:", roc_auc_score(y_test, y_pred_proba_zero))         # expect 0.5 or error; handle safely
print("PR-AUC :", average_precision_score(y_test, y_pred_proba_zero)) # equals positive class prevalence
prec, rec, f1, _ = precision_recall_fscore_support(y_test, y_pred_proba_zero, average='binary', zero_division=0)
print("Precision/Recall/F1:", prec, rec, f1)

ROC-AUC: 0.5
PR-AUC : 0.0017204452090867595
Precision/Recall/F1: 0.0 0.0 0.0
