## 목표 : 선형 모델 기반의 분류 모델 구현
- 데이터 : sklearn.datasets의 iris
- 피쳐 : 2개
- 타겟 : 3개

### (1) 모듈 로딩 & 데이터 준비 <hr>

In [3]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# 데이터 로딩 + Bunch 타입으로 dict와 유사한 scikit-learn 클래스
dataXy = load_iris(return_X_y=True)
print(type(dataXy))

<class 'tuple'>


In [5]:
# 데이터와 타겟을 DataFrame 형식으로 로딩
data = load_iris(return_X_y=True, as_frame=True)
print(type(dataXy), len(dataXy), dataXy)

<class 'tuple'> 2 (array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0

In [6]:
data = load_iris(as_frame=True)
print(data.keys(), data['data'], sep='\n')

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                  5.1               3.5                1.4               0.2
1                  4.9               3.0                1.4               0.2
2                  4.7               3.2                1.3               0.2
3                  4.6               3.1                1.5               0.2
4                  5.0               3.6                1.4               0.2
..                 ...               ...                ...               ...
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9            

In [7]:
# 2가지 분류
# featureDF = data['data'][:100]
# targetDF = data['target'][:100]

# 3가지 분류

featureDF = data['data']
targetDF = data['target']

### (2) 모델 학습 진행 <hr>

In [8]:
# 모듈 로딩
# from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
# from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, PolynomialFeatures

In [9]:
# 모델 인스턴스 생성
model = LogisticRegression(max_iter=1000)
model.fit(featureDF, targetDF)

In [10]:
# 학습 후 결정된 모델 파라미터 확인
print('classes_ : ', model.classes_)
print('feature_name_in_ : ', model.feature_names_in_)
print('n_iter_ : ', model.n_iter_) # 자동 중단된 횟수
print('coef_ : ', model.coef_) # 타겟 갯수에 따라 n행 (2개일 때는 1개만 알면 나머지는 자동 처리)
print('intercept_ : ', model.intercept_) #타겟 갯수에 따라 n개 (2개일 때는 1개만 알면 나머지는 자동 처리)

classes_ :  [0 1 2]
feature_name_in_ :  ['sepal length (cm)' 'sepal width (cm)' 'petal length (cm)'
 'petal width (cm)']
n_iter_ :  [130]
coef_ :  [[-0.42333655  0.96718172 -2.51725526 -1.0793699 ]
 [ 0.53430021 -0.3215421  -0.20640337 -0.94418398]
 [-0.11096366 -0.64563963  2.72365863  2.02355388]]
intercept_ :  [  9.84952339   2.23795871 -12.0874821 ]


In [11]:
model.score(featureDF, targetDF)

0.9733333333333334

In [12]:
featureDF.head(1), featureDF.iloc[[-1]]

(   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 0                5.1               3.5                1.4               0.2,
      sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 149                5.9               3.0                5.1               1.8)

In [13]:
model.predict(featureDF.iloc[[0]])
# 0 => setosa

array([0])

In [14]:
model.predict(featureDF.iloc[[-1]])
# 2 => verginica

array([2])

타겟 라벨  [0 1 2]


NameError: name 'encoder' is not defined

In [16]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(targetDF)

model.classes_, encoder.inverse_transform(model.classes_)

(array([0, 1, 2]), array([0, 1, 2]))

In [17]:
print("타겟 라벨 ", model.classes_)
labels=encoder.inverse_transform

타겟 라벨  [0 1 2]


In [None]:
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
conplot = ConfusionMatrixDisplay(ret_matrix)
conplot.plot()
plt.show()