# 1. Simple Logistic Regression w/ Python

<b>1) Linear Regression을 실행하기 위한 package 불러오기</b>

In [None]:
#from IPython.display import Image  # image 설명을 위한 code
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

<b>2) Data 정의</b>

In [None]:
x = np.arange(10).reshape(-1, 1)
y = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

In [None]:
x

In [None]:
y

<b>3) 로지스틱 회귀모델 만들기</b>

### class sklearn.linear_model.LogisticRegression(penalty='l2', *, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

In [None]:
model = LogisticRegression(solver='liblinear')
model.fit(x, y)
# model = LogisticRegression().fit(x, y)

In [None]:
model.classes_

In [None]:
model.coef_

In [None]:
model.intercept_

<b>4) 모델 성능 확인</b>

In [None]:
y_pred = model.predict(x)
y_pred 

In [None]:
model.predict_proba(x)

 - 각각 x instance의 0, 1일 확률을 보여준다.

In [None]:
from IPython.display import Image
Image('../input/lgkoreauniv/logistic1.JPG')

When you have nine out of ten observations classified correctly, the accuracy of your model is equal to 9/10=0.9, which you can obtain with .score():

In [None]:
model.score(x, y)

- Confusion Matrix로 결과 확인하기 

In [None]:
from IPython.display import Image

Image("../input/lgkoreauniv/logistic2.JPG")

 -- <b>True negatives </b> in the upper-left position<br>
 -- <b>False negatives in</b> the lower-left position<br>
 -- <b>False positives in</b> the upper-right position<br>
 -- <b>True positives in</b> the lower-right position<br>

In [None]:
confusion_matrix(y, model.predict(x))

In [None]:
cm = confusion_matrix(y, model.predict(x))

fig, ax = plt.subplots(figsize=(5, 5))
ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted 0s', 'Predicted 1s'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual 0s', 'Actual 1s'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
    for j in range(2):
        ax.text(j, i, cm[i, j], ha='center', va='center', color='red', fontsize = 20)
plt.show()

In [None]:
print(classification_report(y, model.predict(x)))

# Multi-calss Logistic Regression in Python

<b>1) Linear Regression을 실행하기 위한 package 불러오기</b>

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits # 내장된 dataset
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split # dataset 분류
from sklearn.preprocessing import StandardScaler # 데이터 정규화

<b>2) Data 정의</b>

In [None]:
x, y = load_digits(return_X_y=True)

In [None]:
x

In [None]:
print("x1 :", x[1])
print("x1 수: ", len(x[1]))
print("x 수: ", len(x))

In [None]:
print("y: ", y)
print("y 수: ", len(y))

<b>3) Data 나누기</b>

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

<b>4) Scale Data</b> 

- 로지스틱 회귀에 사용하는 입력 데이터를 표준화하는 것을 선호함
- bias를 조금 허용하고 variance를 줄이기 위해 정규화, 모델의 성능 향상에 도움이 됨

In [None]:
# 정규화(maen = 0, sd = 1)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)

In [None]:
x_train

<b> 5) 모델 생성 및 학습</b>

In [None]:
model = LogisticRegression(solver='liblinear', C=0.05, multi_class='ovr',
                           random_state=0)
model.fit(x_train, y_train)
# multiclass 인 경우 = "ovr"(One Vs RestClassifier)
# C : C가 높을수록 훈련을 더 복잡하게, 과적합 발생 

<b> 4) 모델 평가 <b/>

In [None]:
x_test = scaler.transform(x_test)

In [None]:
y_pred = model.predict(x_test)

In [None]:
model.score(x_train, y_train)

In [None]:
model.score(x_test, y_test)

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
cm = confusion_matrix(y_test, y_pred)

fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(cm)
ax.grid(False)
ax.set_xlabel('Predicted outputs', fontsize=10, color='black')
ax.set_ylabel('Actual outputs', fontsize=10, color='black')
ax.xaxis.set(ticks=range(10))
ax.yaxis.set(ticks=range(10))
ax.set_ylim(9.5, -0.5)
for i in range(10):
    for j in range(10):
        ax.text(j, i, cm[i, j], ha='center', va='center', color='red')
plt.show()

## Q. 각 class의 Recall을 구하고, 어떤 calss의 recall이 가장 높은지 구해보세요.