In [101]:
import warnings
warnings.filterwarnings('ignore')

In [99]:
from collections import Counter
from sklearn.datasets import make_classification
import pandas as pd

# 불균형 데이터셋 생성
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, 
                           n_redundant=0, n_classes=2, n_clusters_per_class=1,
                           weights=[0.99, 0.01], flip_y=0, random_state=42)

print(f"Original dataset shape %s" % Counter(y))
# Original dataset shape Counter({0: 990, 1: 10})

Original dataset shape Counter({0: 990, 1: 10})


In [102]:
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=42)
X_resampled_under, y_resampled_under = rus.fit_resample(X, y)

print(f"Resampled dataset shape %s" % Counter(y_resampled_under))
# Resampled dataset shape Counter({0: 10, 1: 10})

Resampled dataset shape Counter({0: 10, 1: 10})


In [103]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled_smote, y_resampled_smote = smote.fit_resample(X, y)

print(f"Resampled dataset shape %s" % Counter(y_resampled_smote))
# Resampled dataset shape Counter({0: 990, 1: 990})

Resampled dataset shape Counter({0: 990, 1: 990})


In [106]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 1. 가중치 미적용
model_no_weight = LogisticRegression(solver='liblinear')
model_no_weight.fit(X_train, y_train)
pred_no_weight = model_no_weight.predict(X_test)
print("--- No Class Weight ---")
print(classification_report(y_test, pred_no_weight, zero_division=0))

# 2. 가중치 적용
model_with_weight = LogisticRegression(solver='liblinear', class_weight='balanced')
model_with_weight.fit(X_train, y_train)
pred_with_weight = model_with_weight.predict(X_test)
print("\n--- With Class Weight ('balanced') ---")
print(classification_report(y_test, pred_with_weight, zero_division=0))

--- No Class Weight ---
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       297
           1       1.00      0.33      0.50         3

    accuracy                           0.99       300
   macro avg       1.00      0.67      0.75       300
weighted avg       0.99      0.99      0.99       300


--- With Class Weight ('balanced') ---
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       297
           1       0.50      0.33      0.40         3

    accuracy                           0.99       300
   macro avg       0.75      0.66      0.70       300
weighted avg       0.99      0.99      0.99       300

