In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.cluster import DBSCAN
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
# 1. 데이터 불러오기
df = pd.read_csv(r"C:\Users\khj98\Documents\python\sk쉴더스루키즈\data\USV2_creditcard.csv")  # Kaggle 신용카드 데이터

In [4]:
# 2. 특성과 라벨 분리
X = df.drop(columns=["Class"])
y = df["Class"]  # 나중에 평가에만 사용

# 3. 스케일링 (시간, 금액은 스케일 차이 큼)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
# ---------------------------
# 🧪 1. Isolation Forest
# ---------------------------
print("=== Isolation Forest ===")

iso_model = IsolationForest(n_estimators=100, contamination=0.0017, random_state=42)
iso_preds = iso_model.fit_predict(X_scaled)

# Isolation Forest의 이상치 결과는 -1 → 1로 변환
iso_preds = np.where(iso_preds == -1, 1, 0)

print(confusion_matrix(y, iso_preds))
print(classification_report(y, iso_preds, digits=4))

=== Isolation Forest ===
[[283955    360]
 [   367    125]]
              precision    recall  f1-score   support

           0     0.9987    0.9987    0.9987    284315
           1     0.2577    0.2541    0.2559       492

    accuracy                         0.9974    284807
   macro avg     0.6282    0.6264    0.6273    284807
weighted avg     0.9974    0.9974    0.9974    284807



In [6]:

# ---------------------------
# 🧪 2. DBSCAN
# ---------------------------
print("\n=== DBSCAN ===")

dbscan = DBSCAN(eps=2, min_samples=5, n_jobs=-1)
db_preds = dbscan.fit_predict(X_scaled)

# DBSCAN은 noise를 -1로 표시 → 1로 간주 (부정 거래)
db_preds = np.where(db_preds == -1, 1, 0)

print(confusion_matrix(y, db_preds))
print(classification_report(y, db_preds, digits=4))


=== DBSCAN ===
[[230399  53916]
 [    71    421]]
              precision    recall  f1-score   support

           0     0.9997    0.8104    0.8951    284315
           1     0.0077    0.8557    0.0154       492

    accuracy                         0.8104    284807
   macro avg     0.5037    0.8330    0.4552    284807
weighted avg     0.9980    0.8104    0.8936    284807

