## 분류(Classification)

In [1]:
from sklearn.datasets import load_iris
iris = load_iris()

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=2021
)

### 로지스틱 회귀(Logistic Regression)

In [4]:
# 모델 생성
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=2021)

In [5]:
# 모델 학습
lr.fit(X_train, y_train)

LogisticRegression(random_state=2021)

In [6]:
# 모델로 예측
pred_lr = lr.predict(X_test)

In [9]:
# 모델 평가
from sklearn.metrics import accuracy_score
acc_lr = accuracy_score(y_test, pred_lr)
acc_lr

1.0

In [10]:
# 모델의 하이퍼 파라메터
lr.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 2021,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [11]:
# 결과 확인 - Weight
lr.coef_

array([[-0.35256841,  0.9416541 , -2.35260875, -1.03252526],
       [ 0.47778198, -0.32798694, -0.13606798, -0.91562913],
       [-0.12521357, -0.61366716,  2.48867673,  1.94815438]])

In [12]:
# 결과 확인 - Bias
lr.intercept_

array([  8.77223458,   2.1108009 , -10.88303547])

In [13]:
X_test.shape

(30, 4)

In [14]:
X_test[:3,:]

array([[4.4, 3.2, 1.3, 0.2],
       [4.6, 3.6, 1. , 0.2],
       [5.5, 2.4, 3.8, 1.1]])

In [15]:
# Y = X * Weight.T + bias
lr.predict_proba(X_test)[:3]

array([[9.84720678e-01, 1.52792689e-02, 5.27592356e-08],
       [9.94362205e-01, 5.63778845e-03, 7.00370406e-09],
       [3.13586690e-02, 9.48883820e-01, 1.97575113e-02]])

In [16]:
pred_lr[:3]

array([0, 0, 1])

### 결정 트리(Decision Tree)

In [18]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier(random_state=2021)
dtc.fit(X_train, y_train)
pred_dt = dtc.predict(X_test)
acc_dt = accuracy_score(y_test, pred_dt)
acc_dt

1.0

In [19]:
dtc.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 2021,
 'splitter': 'best'}

In [20]:
dtc.feature_importances_

array([0.01672241, 0.        , 0.05436015, 0.92891744])

### 서포트 벡터 머신(Support Vector Machine)

In [22]:
from sklearn.svm import SVC
svc = SVC(random_state=2021)
svc.fit(X_train, y_train)
pred_sv = svc.predict(X_test)
acc_sv = accuracy_score(y_test, pred_sv)
acc_sv

1.0

In [23]:
svc.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': 2021,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

### 랜덤 포레스트(Random Forest)

In [25]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=2021)
rfc.fit(X_train, y_train)
pred_rf = rfc.predict(X_test)
acc_rf = accuracy_score(y_test, pred_rf)
acc_rf

0.9333333333333333

In [26]:
rfc.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 2021,
 'verbose': 0,
 'warm_start': False}

### K 최근접 이웃(Nearest Neighbor)

In [27]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_kn = knn.predict(X_test)
acc_kn = accuracy_score(y_test, pred_kn)
acc_kn

0.9666666666666667

In [28]:
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

### 결과 비교

In [30]:
import pandas as pd 

df = pd.DataFrame({'y_test':y_test, 'lr':pred_lr, 'dt':pred_dt,
                   'svc':pred_sv, 'rf':pred_rf, 'knn':pred_kn})
df

Unnamed: 0,y_test,lr,dt,svc,rf,knn
0,0,0,0,0,0,0
1,0,0,0,0,0,0
2,1,1,1,1,1,1
3,0,0,0,0,0,0
4,0,0,0,0,0,0
5,0,0,0,0,0,0
6,0,0,0,0,0,0
7,0,0,0,0,0,0
8,0,0,0,0,0,0
9,0,0,0,0,0,0
