### OvR & OvO 모듈 활용

(1) 모듈 로딩 & 데이터 준비

In [1]:
# 모듈 로딩
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np

In [2]:
# 데이터 준비
data_file = '../data/fish.csv'

fishDF = pd.read_csv(data_file)

fishDF

Unnamed: 0,Species,Weight,Length,Diagonal,Height,Width
0,Bream,242.0,25.4,30.0,11.5200,4.0200
1,Bream,290.0,26.3,31.2,12.4800,4.3056
2,Bream,340.0,26.5,31.1,12.3778,4.6961
3,Bream,363.0,29.0,33.5,12.7300,4.4555
4,Bream,430.0,29.0,34.0,12.4440,5.1340
...,...,...,...,...,...,...
154,Smelt,12.2,12.2,13.4,2.0904,1.3936
155,Smelt,13.4,12.4,13.5,2.4300,1.2690
156,Smelt,12.2,13.0,13.8,2.2770,1.2558
157,Smelt,19.7,14.3,15.2,2.8728,2.0672


(2) 데이터셋 준비

(2-1) 피쳐/타겟 분리

In [3]:
featureDF = fishDF[fishDF.columns[1:]]
targetDF = fishDF[fishDF.columns[0]]

(2-2) 학습용/테스트용 데이터셋 준비

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(featureDF, targetDF,
                                                    stratify=targetDF,
                                                    random_state=11)

(3) 학습 진행

In [8]:
# OvO/OvR에서 사용할 관측지(Estimator) 생성
model = LogisticRegression(solver='liblinear')

(3-1) OvO 기반 학습 진행

In [9]:
ovoModel = OneVsOneClassifier(model)
ovoModel.fit(X_train, y_train)

In [10]:
# 모델 파라미터 확인
print(f'classes_ : {ovoModel.classes_}')
print(f'feature_names_in_ : {ovoModel.feature_names_in_}')
print(f'estimators_ : {ovoModel.estimators_}')

classes_ : ['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
feature_names_in_ : ['Weight' 'Length' 'Diagonal' 'Height' 'Width']
estimators_ : (LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'), LogisticRegression(solver='liblinear'))


In [11]:
# 평가
print(f'[Train Score] {ovoModel.score(X_train, y_train)}\n[Test Score] {ovoModel.score(X_test, y_test)}')

[Train Score] 0.957983193277311
[Test Score] 0.925


In [12]:
# 예측
ovoModel.predict(X_test[:2])

array(['Bream', 'Parkki'], dtype=object)

In [13]:
ovoModel.decision_function(X_test[:2])

array([[ 6.32094951,  5.32872468,  2.32890163,  0.68506766,  3.322758  ,
        -0.33168462,  4.3140798 ],
       [ 4.26849104,  6.32543178,  2.3234672 ,  0.67951149,  5.319289  ,
         0.67104504,  1.85564622]])

(3-2) OvR 기반 학습 진행

In [14]:
ovrModel = OneVsRestClassifier(model)
ovrModel.fit(X_train, y_train)

In [17]:
print(f'[Train Score] {ovrModel.score(X_train, y_train)}\n[Test Score] {ovrModel.score(X_test, y_test)}')

[Train Score] 0.9495798319327731
[Test Score] 0.975
