# 회귀분석 연습

In [None]:
import os
import pandas as pd
import numpy as np
import hds
from plt_rcs import *

## Wine 데이터 -> 선형 회귀

### 데이터 불러오기

In [None]:
os.getcwd()

In [None]:
os.chdir('../../data')

In [None]:
[i for i in os.listdir() if 'Wine' in i][0]

In [None]:
objs = pd.read_pickle('WhiteWine.pkl')

In [None]:
globals().update(objs)

In [None]:
%whos

In [None]:
X_train, X_valid, y_train, y_valid = X_train, X_valid, y_train, y_valid

### 데이터 표준화

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
# 표준화 객체
scaler = StandardScaler()

In [None]:
# 표준화
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)

In [None]:
pd.DataFrame(data=X_train).describe().round(3)

In [None]:
pd.DataFrame(data=X_valid).describe().round(3)

### 회귀분석 모델

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model_logit = LogisticRegression(C=np.inf, max_iter=1000, random_state=0, class_weight='balanced')

In [None]:
model_logit.fit(X=X_train, y=y_train)

In [None]:
print('Train :', model_logit.score(X=X_train, y=y_train))
print('Valid :', model_logit.score(X=X_valid, y=y_valid))

### 릿지 모델

In [None]:
model_ridge = LogisticRegression(l1_ratio=0, max_iter=1000, random_state=0, solver='sag', C=0.1, class_weight='balanced')

In [None]:
model_ridge.fit(X=X_train, y=y_train)

In [None]:
print('Train :', model_ridge.score(X=X_train, y=y_train))
print('Valid :', model_ridge.score(X=X_valid, y=y_valid))

### 라쏘 모델

In [None]:
model_lasso = LogisticRegression(l1_ratio=1, max_iter=1000, random_state=0, solver='saga', class_weight='balanced', C=0.1)

In [None]:
model_lasso.fit(X=X_train, y=y_train)

In [None]:
print('Train :', model_lasso.score(X=X_train, y=y_train))
print('Valid :', model_lasso.score(X=X_valid, y=y_valid))

In [None]:
pd.DataFrame(
    data={
        'Logit': model_logit.coef_[0],
        'Ridge': model_ridge.coef_[0],
        'Lasso': model_lasso.coef_[0],
    }
)

### 예측값 생성

In [None]:
y_pred_logit = model_logit.predict(X=X_valid)
y_pred_ridge = model_ridge.predict(X=X_valid)
y_pred_lasso = model_lasso.predict(X=X_valid)

In [None]:
hds.stat.clfmetrics(y_true=y_valid, y_pred=y_pred_logit)

In [None]:
hds.stat.clfmetrics(y_true=y_valid, y_pred=y_pred_ridge)

In [None]:
hds.stat.clfmetrics(y_true=y_valid, y_pred=y_pred_lasso)

### 예측 확률 생성

In [None]:
y_prob_logit = model_logit.predict_proba(X=X_valid)
y_prob_ridge = model_ridge.predict_proba(X=X_valid)
y_prob_lasso = model_lasso.predict_proba(X=X_valid)

In [None]:
hds.plot.roc_curve(y_true=y_valid, y_prob=y_prob_logit, color='red')
hds.plot.roc_curve(y_true=y_valid, y_prob=y_prob_ridge, color='green')
hds.plot.roc_curve(y_true=y_valid, y_prob=y_prob_lasso, color='blue')