# Linear Regression

---

In [None]:
# Visual Python: Data Analysis > Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# 1 선형 회귀

|모델 | 설명|
|:--- |:---|
|일반 선형 회귀 | 예측값과 측정값의 오차를 최소화할 수 있도록 회귀 계수를 최적화|
|릿지(Ridge) | 선형 회귀에 L2 규제를 추가한 모델. L2 규제는 상대적으로 큰 회귀 계수값을 더 작게 만드는 규제 모델|
|라쏘(Lasso) | 선형 회귀에 L1 규제를 추가한 모델. L1 규체즌 영향력이 작은 피처의 회귀 계수값을 0으로 만드는 규제 모델(피처 선택 기능)|
|엘라스틱넷(ElasticNet) | L2 규제와 L1 규제를 결합한 모델. 주로 피처가 많은 데이터 셋에 적용|
|로지스틱 회귀(Logistic Regression) | 분류에 사용되는 선형 회귀 모델|

### 1.1 선형 회귀 적용

In [None]:
# Visual Python: Data Analysis > File
df = pd.read_csv('./data/boston.csv')
df

In [None]:
# Visual Python: Machine Learning > Data Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']], df['target'])

In [None]:
# Visual Python: Machine Learning > Regressor
from sklearn.linear_model import LinearRegression

model = LinearRegression()

In [None]:
# Visual Python: Machine Learning > Fit/Predict
model.fit(X_train, y_train)

In [None]:
# Visual Python: Machine Learning > Fit/Predict
pred = model.predict(X_test)
pred

In [None]:
# Visual Python: Machine Learning > Evaluation
from sklearn import metrics

In [None]:
# Visual Python: Machine Learning > Evaluation
from IPython.display import display, Markdown

In [None]:
# Visual Python: Machine Learning > Evaluation
# R squared
print('R squared: {}'.format(metrics.r2_score(y_test, pred)))

In [None]:
# Visual Python: Machine Learning > Evaluation
# MAE(Mean Absolute Error)
print('MAE: {}'.format(metrics.mean_absolute_error(y_test, pred)))

In [None]:
# Visual Python: Machine Learning > Evaluation
# RMSE(Root Mean Squared Error)
print('RMSE: {}'.format(metrics.mean_squared_error(y_test, pred)**0.5))

In [None]:
# Visual Python: Machine Learning > Evaluation
# Regression plot
display(Markdown('### Regression plot'))
plt.scatter(y_test, pred)
plt.xlabel('y_test')
plt.ylabel('pred')
plt.show()

#### Information

In [None]:
# 절편
# Visual Python: Machine Learning > Model Info
intercepts = model.intercept_
intercepts

In [None]:
# 회귀 계수
# Visual Python: Machine Learning > Model Info
coef = model.coef_
coef

In [None]:
sorted(coef, reverse=True)

#### 모델 저장 및 불러오기

In [None]:
# Visual Python: Machine Learning > Save/Load
import joblib

In [None]:
# Visual Python: Machine Learning > Save/Load
joblib.dump(model, './data/model.pkl')

In [None]:
# Visual Python: Machine Learning > Save/Load
model_load = joblib.load('./data/model.pkl')

In [None]:
# Visual Python: Machine Learning > Model Info
intercepts = model_load.intercept_
intercepts

#### Cross validation

In [None]:
# Visual Python: Machine Learning > Model Info
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model, df[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']], df['target'], scoring='neg_root_mean_squared_error')
scores

In [None]:
# RMSE
scores * -1

In [None]:
# RMSE 평균
round((scores * -1).mean(), 2)

# 2 로지스틱 회귀

- Sigmoid function
- Softmax function

### 2.1 데이터 로딩

In [None]:
# Visual Python: Machine Learning > Data Sets
from sklearn.datasets import load_breast_cancer

ldata = load_breast_cancer()
# Create DataFrame
df_ldata = pd.DataFrame(data=ldata.data, columns=ldata.feature_names)
df_ldata['target'] = ldata.target
df_ldata

In [None]:
# Visual Python: Data Analysis > Subset
X = df_ldata.loc[:, ['mean radius','mean texture','mean perimeter','mean area','mean smoothness','mean compactness','mean concavity','mean concave points','mean symmetry','mean fractal dimension','radius error','texture error','perimeter error','area error','smoothness error','compactness error','concavity error','concave points error','symmetry error','fractal dimension error','worst radius','worst texture','worst perimeter','worst area','worst smoothness','worst compactness','worst concavity','worst concave points','worst symmetry','worst fractal dimension']]
X

In [None]:
# Visual Python: Data Analysis > Subset
y = df_ldata.loc[:, 'target']
y

### 2.2 스케일링

In [None]:
# Visual Python: Machine Learning > Data Prep
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [None]:
# Visual Python: Machine Learning > Fit/Predict
X_scaled = scaler.fit_transform(X)
X_scaled

### 2.3 데이터 분할

In [None]:
# Visual Python: Machine Learning > Data Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y)

### 2.4 로지스틱 회귀 적용

In [None]:
# Visual Python: Machine Learning > Classifier
from sklearn.linear_model import LogisticRegression

model_l = LogisticRegression()

In [None]:
# Visual Python: Machine Learning > Fit/Predict
model_l.fit(X_train, y_train)

In [None]:
# Visual Python: Machine Learning > Fit/Predict
pred = model_l.predict(X_test)
pred

In [None]:
# Visual Python: Machine Learning > Evaluation
from IPython.display import display, Markdown

In [None]:
# Visual Python: Machine Learning > Evaluation
# Confusion Matrix
display(Markdown('### Confusion Matrix'))
display(pd.crosstab(y_test, pred, margins=True))

In [None]:
# Visual Python: Machine Learning > Evaluation
# Classification report
print(metrics.classification_report(y_test, pred))

### 2.5 Grid Search(로지스틱 회귀) 적용

In [None]:
# Visual Python: Machine Learning > Pipeline
# [1] GridSearch
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

gs = GridSearchCV(LogisticRegression(), {'C': [0.01,0.1,1,10]}, scoring='accuracy')

# [2] Fit
gs.fit(X_train, y_train)

# [3] Predict
pred = gs.predict(X_test)

# [4] Evaluation
from sklearn import metrics
from IPython.display import display, Markdown
# Confusion Matrix
display(Markdown('### Confusion Matrix'))
display(pd.crosstab(y_test, pred, margins=True))
# Classification report
print(metrics.classification_report(y_test, pred))

In [None]:
# Visual Python: Machine Learning > Model Info
best_estimator = gs.best_estimator_
best_estimator

In [None]:
# Visual Python: Machine Learning > Model Info
best_score = gs.best_score_
best_score

In [None]:
# Visual Python: Machine Learning > Model Info
best_params = gs.best_params_
best_params

---

In [None]:
# End of file