# Loading Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.multiclass import OneVsOneClassifier

# Reading the data

In [2]:
iris = pd.read_csv('Iris.csv')
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
iris['Species'] = iris['Species'].map({'Iris-setosa': 1, 
                                       'Iris-versicolor': 2,
                                       'Iris-virginica': 3})
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,1
1,2,4.9,3.0,1.4,0.2,1
2,3,4.7,3.2,1.3,0.2,1
3,4,4.6,3.1,1.5,0.2,1
4,5,5.0,3.6,1.4,0.2,1


# Defining X and Y

In [4]:
X = iris[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
Y = iris['Species']

# Cross-Validation Strategy

In [5]:
skf = RepeatedStratifiedKFold(n_splits = 5, n_repeats = 1, random_state = 42)

# One-vs-One with Random Forest

In [8]:
md1 = Pipeline([('transformation', MinMaxScaler()), 
                ('RF', RandomForestClassifier(n_estimators = 500, 
                                             max_depth = 3))])


md1_cv = cross_val_score(OneVsOneClassifier(estimator = md1, n_jobs = -1), 
                         X,
                         Y, 
                         cv = skf, 
                         scoring = 'accuracy', 
                         n_jobs = -1)

print(f"The average 5-folds accuracy of the one-vs-one + RF is {md1_cv.mean()}")

The average 5-folds accuracy of the one-vs-one + RF is 0.9600000000000002


# One-vs-One with SVM

In [9]:
md2 = Pipeline([('transformation', MinMaxScaler()), 
                ('RF', SVC())])


md2_cv = cross_val_score(OneVsOneClassifier(estimator = md2, n_jobs = -1), 
                         X,
                         Y, 
                         cv = skf, 
                         scoring = 'accuracy', 
                         n_jobs = -1)

print(f"The average 5-folds accuracy of the one-vs-one + SVM is {md2_cv.mean()}")

The average 5-folds accuracy of the one-vs-one + SVM is 0.9533333333333334


In [10]:
# From the above results, one-vs-one + RF is better.