## 위스콘신 유방암 데이터를 통해 로지스틱회귀 분류 문제 해결

In [12]:
# 필요모듈 import
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

# 데이터, 모델링
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

# 전처리
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 평가지표
from sklearn.metrics import accuracy_score, roc_auc_score

In [13]:
# 데이터 준비
cancer = load_breast_cancer()

In [14]:
# 스케일링 (정규분포)
data_scaled = StandardScaler().fit_transform(cancer.data)

In [15]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(data_scaled, cancer.target, test_size=0.3, random_state=0)

In [16]:
lr_clf = LogisticRegression()
lr_clf.fit(X_train, y_train)
lr_preds = lr_clf.predict(X_test)

accuracy_score(y_test, lr_preds), roc_auc_score(y_test, lr_preds)

(0.9766081871345029, 0.9715608465608465)

In [17]:
"""
panalty :  규제 방식
C       :  Regularzaion Strength

"""

params = {'penalty': ['l2','l1'],
         'C':[0.01, 0.1, 1, 5, 10]}

grid_clf = GridSearchCV(lr_clf, params, scoring='accuracy', cv=3)
grid_clf.fit(data_scaled,cancer.target)

GridSearchCV(cv=3, estimator=LogisticRegression(),
             param_grid={'C': [0.01, 0.1, 1, 5, 10], 'penalty': ['l2', 'l1']},
             scoring='accuracy')

In [18]:
grid_clf.best_params_

{'C': 1, 'penalty': 'l2'}

In [19]:
grid_clf.best_score_

0.975392184164114