## Classification model comparison (Iris dataset)

In [1]:
import pandas as pd

In [2]:
print(f'pandas  version = {pd.__version__}')

pandas  version = 0.24.1


In [3]:
df=pd.read_csv('https://github.com/prasertcbs/basic-dataset/raw/master/iris.csv')
df.sample(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
93,5.0,2.3,3.3,1.0,versicolor
39,5.1,3.4,1.5,0.2,setosa
38,4.4,3.0,1.3,0.2,setosa
86,6.7,3.1,4.7,1.5,versicolor
79,5.7,2.6,3.5,1.0,versicolor


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import metrics

In [5]:
test_size=.3
X_train, X_test, y_train, y_test = train_test_split(
    df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']], 
    df.species, 
    test_size=test_size, random_state=7)

In [6]:
model=KNeighborsClassifier() # step 1: choose model/estimator
# model=LogisticRegression()
model.fit(X_train, y_train) # step 2: fit
y_pred=model.predict(X_test) # step 3: predict
model.score(X_test, y_test) # step 4: score

0.9111111111111111

In [7]:
# model=KNeighborsClassifier() # step 1: choose model/estimator
model=LogisticRegression()
model.fit(X_train, y_train) # step 2: fit
y_pred=model.predict(X_test) # step 3: predict
model.score(X_test, y_test) # step 4: score



0.8888888888888888

In [8]:
# model=KNeighborsClassifier() # step 1: choose model/estimator
model=GaussianNB()
model.fit(X_train, y_train) # step 2: fit
y_pred=model.predict(X_test) # step 3: predict
model.score(X_test, y_test) # step 4: score

0.8888888888888888

In [9]:
algo = [
    [KNeighborsClassifier(n_neighbors=10), 'KNeighborsClassifier'],
    [LogisticRegression(solver='lbfgs'), 'LogisticRegression'],
    [GaussianNB(), 'GaussianNB'],
    [GradientBoostingClassifier(), 'GradientBoostingClassifier'],
    [RandomForestClassifier(), 'RandomForestClassifier'],
    [AdaBoostClassifier(), 'AdaBoostClassifier']
]
model_score=[]
for a in algo:
    model=a[0]
    model.fit(X_train, y_train) # step 2: fit
    y_pred=model.predict(X_test) # step 3: predict
    score=model.score(X_test, y_test)
    model_score.append([score, a[1]])
    print(f'{a[1]} score = {score}') # step 4: score
    print(metrics.confusion_matrix(y_test, y_pred))
    print(metrics.classification_report(y_test, y_pred))
    print('-' * 100)
print(model_score)



KNeighborsClassifier score = 0.9555555555555556
[[12  0  0]
 [ 0 16  0]
 [ 0  2 15]]
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        12
  versicolor       0.89      1.00      0.94        16
   virginica       1.00      0.88      0.94        17

   micro avg       0.96      0.96      0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

----------------------------------------------------------------------------------------------------
LogisticRegression score = 0.8444444444444444
[[12  0  0]
 [ 0 11  5]
 [ 0  2 15]]
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        12
  versicolor       0.85      0.69      0.76        16
   virginica       0.75      0.88      0.81        17

   micro avg       0.84      0.84      0.84        45
   macro avg       0.87      0.86      0.86        45
weighted avg       0.85 



In [10]:
dscore=pd.DataFrame(model_score, columns=['score', 'classifier'])
dscore

Unnamed: 0,score,classifier
0,0.955556,KNeighborsClassifier
1,0.844444,LogisticRegression
2,0.888889,GaussianNB
3,0.888889,GradientBoostingClassifier
4,0.888889,RandomForestClassifier
5,0.866667,AdaBoostClassifier


In [11]:
dscore.sort_values('score', ascending=False)

Unnamed: 0,score,classifier
0,0.955556,KNeighborsClassifier
2,0.888889,GaussianNB
3,0.888889,GradientBoostingClassifier
4,0.888889,RandomForestClassifier
5,0.866667,AdaBoostClassifier
1,0.844444,LogisticRegression
