## LogisticRegression
---
- 인공신경망의 기본 개념과 동일
- Regression이지만 다중분류로 사용
- 선형모델 기반으로 y=wx+b 그대로 사용
- y=wx+b의 결과 => 분류 변환 함수 => sigmoid()

In [17]:
# 모듈 로딩 --------------------------------------------
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from scipy.special import softmax, expit
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
data, target=load_iris(as_frame=True, return_X_y=True)
type(data), type(target)

(pandas.core.frame.DataFrame, pandas.core.series.Series)

In [4]:
print(data.info(), data.head(), sep='\n\n')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB
None

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2


In [5]:
print(target.info(), target.head(), sep='\n\n')

<class 'pandas.core.series.Series'>
RangeIndex: 150 entries, 0 to 149
Series name: target
Non-Null Count  Dtype
--------------  -----
150 non-null    int32
dtypes: int32(1)
memory usage: 728.0 bytes
None

0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int32


In [6]:
print(target.unique(), target.value_counts(), sep='\n\n')

[0 1 2]

0    50
1    50
2    50
Name: target, dtype: int64


### [2] 학습 데이터준비

In [7]:
# 데이터 => petal_lengh, petal_width
# 타겟 => 0, 1, 2
data=data.loc[:, data.columns[2:]]
data.head()

Unnamed: 0,petal length (cm),petal width (cm)
0,1.4,0.2
1,1.4,0.2
2,1.3,0.2
3,1.5,0.2
4,1.4,0.2


In [9]:
# 학습용, 테스트용 데이터 분리
X_TRAIN, X_TEST, Y_TRAIN, Y_TEST=train_test_split(
data, target, stratify=target, test_size=0.2, random_state=42)

In [10]:
print(X_TRAIN.shape, X_TEST.shape)

(120, 2) (30, 2)


In [12]:
print(Y_TRAIN.value_counts(), Y_TEST.value_counts(),sep='\n\n')

0    40
2    40
1    40
Name: target, dtype: int64

0    10
2    10
1    10
Name: target, dtype: int64


In [36]:
model=LogisticRegression()

In [37]:
model.fit(X_TRAIN, Y_TRAIN)

LogisticRegression()

In [38]:
print('분류 종류 :', model.classes_)
print('가중치 값 :', model.coef_)
print('절편 값 :', model.intercept_)

분류 종류 : [0 1 2]
가중치 값 : [[-2.57480323 -1.08787617]
 [ 0.11136326 -0.84266177]
 [ 2.46343997  1.93053794]]
절편 값 : [ 10.41582694   2.8335323  -13.24935924]


In [39]:
model.score(X_TEST, Y_TEST)

0.9666666666666667

In [40]:
# 결정함수 y=wx+b
ret=model.decision_function([[1.4, 0.2]])
ret



array([[ 6.59352718,  2.82090851, -9.41443569]])

In [41]:
# 시그모이드 함수 expit(ax+b) / 소프트맥스 softmax(ax+b) 후 결과 확인
# 결과값이 predict_proba 한 것과 같음
# predict_proba() 함수의 양성의 결과와 같음
sigValue2=softmax(ret)
print(np.round(sigValue2, 5))
np.round(model.predict_proba([[1.4, 0.2]]), 5)

[[0.97752 0.02248 0.     ]]




array([[0.97752, 0.02248, 0.     ]])

In [42]:
print('가중치 값 :', model.coef_)

# 3쌍이 나오는 이유 : 분류값이 0, 1, 2로 세 가지라서
# 한 쌍에 2개씩 나오는 이유 : 변수(특성)을 2개 넣었으니까

가중치 값 : [[-2.57480323 -1.08787617]
 [ 0.11136326 -0.84266177]
 [ 2.46343997  1.93053794]]


In [58]:
new_data=[5.2, 2.3]
ret=model.decision_function([new_data])
ret



array([[-5.47526506,  1.47449917,  4.00076589]])

In [55]:
for i in range(3):
    y=model.coef_[i][0]*new_data[0]+\
    model.coef_[i][1]*new_data[1]+model.intercept_[i]
    print('y =', np.round(y, 3))
    print('softmax =>', np.round(softmax(y), 3))

y = -5.475
softmax => 1.0
y = 1.474
softmax => 1.0
y = 4.001
softmax => 1.0


In [59]:
np.round(softmax(ret), 3)

array([[0.   , 0.074, 0.926]])