# 로지스틱 회귀(Logistic Regression)
- 이름은 회귀이지만 분류 모델
- 가볍고 빠르며 성능이 뛰어나다

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

cancer = load_breast_cancer()
scaler = StandardScaler()
data_scaled = scaler.fit_transform(cancer.data)

X_train, X_test, y_train, y_test = train_test_split(data_scaled, cancer.target, test_size=0.3)

lr = LogisticRegression()
lr.fit(X_train, y_train)
pred = lr.predict(X_test)

print('Score of accuracy: ', accuracy_score(y_test, pred))
print('Score of roc_auc_score: ', roc_auc_score(y_test, pred))

Score of accuracy:  0.9649122807017544
Score of roc_auc_score:  0.9588661551577153


## 하이퍼 파라미터 조정
- 로지스틱 회귀의 클래스 파라미터
    - penalty: 규제의 유형(L1, L2)
    - C: alpha의 역수

In [4]:
from sklearn.model_selection import GridSearchCV

params = {
    'penalty': ['l2', 'l1'],
    'C': [0.01, 0.1, 1, 5, 10],
    'solver': ['liblinear']
}

grid_clf = GridSearchCV(lr, param_grid=params, scoring='accuracy', cv=3)
grid_clf.fit(data_scaled, cancer.target)
print(grid_clf.best_params_, grid_clf.best_score_)

{'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'} 0.9789103690685413
