### Classification: Iris pipeline example

*Jure Žabkar*

*Thu, 25 April 2024*

---

In [1]:
#from sklearn.datasets import load_iris
from sklearn import tree
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [2]:
iris = pd.read_csv('datasets/Iris.csv')

In [3]:
new_iris = iris.drop("Id", axis=1)
X = iris[iris.columns[1:-1]]
y = iris[iris.columns[-1]]
new_iris.columns

Index(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4, stratify=y)

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [6]:
models = [
    DecisionTreeClassifier(criterion='entropy'),
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]

In [7]:
# CROSS VALIDATION
from sklearn.model_selection import cross_val_score
for clf in models:
    scores = cross_val_score(clf, X_train, y_train, cv=5)
    print(f"{scores.mean():.2f} ± {scores.std():.2f}\t{str(clf).split('(')[0]}")

0.93 ± 0.02	DecisionTreeClassifier
0.97 ± 0.02	KNeighborsClassifier
0.96 ± 0.03	SVC
0.95 ± 0.03	RandomForestClassifier
0.98 ± 0.02	MLPClassifier
0.90 ± 0.04	AdaBoostClassifier
0.95 ± 0.04	GaussianNB
0.97 ± 0.02	QuadraticDiscriminantAnalysis


In [8]:
for clf in models:
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print(f"{score:.2f}\t{clf}")

0.97	DecisionTreeClassifier(criterion='entropy')
0.97	KNeighborsClassifier(n_neighbors=3)
0.97	SVC(C=0.025, kernel='linear')
0.93	RandomForestClassifier(max_depth=5, max_features=1, n_estimators=10)
0.93	MLPClassifier(max_iter=1000)
0.97	AdaBoostClassifier()
1.00	GaussianNB()
0.93	QuadraticDiscriminantAnalysis()
