In [31]:
from sklearn.datasets import load_iris

In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [33]:
data = load_iris()
data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [34]:
print(data.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [35]:
data.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [36]:
df = pd.DataFrame(data.data)

In [37]:
df.head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [38]:
data.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [39]:
df.columns = data.feature_names

In [40]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [41]:
data.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [42]:
y = pd.DataFrame(data.target)

In [43]:
y.head()

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0


In [44]:
df.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
dtype: int64

In [45]:
df.isna().sum

<bound method NDFrame._add_numeric_operations.<locals>.sum of      sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                False             False              False             False
1                False             False              False             False
2                False             False              False             False
3                False             False              False             False
4                False             False              False             False
..                 ...               ...                ...               ...
145              False             False              False             False
146              False             False              False             False
147              False             False              False             False
148              False             False              False             False
149              False             False              False             False

[

In [46]:
from sklearn.model_selection import train_test_split

In [47]:
X_train, X_test, Y_train, Y_test = train_test_split(df,y, test_size=0.25,random_state=42)

In [48]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

### MODELS

In [49]:
from sklearn.linear_model import LogisticRegression
classifier_log = LogisticRegression(random_state=0)
classifier_log.fit(X_train,Y_train[0])

LogisticRegression(random_state=0)

In [50]:
from sklearn.neighbors import KNeighborsClassifier
classifier_knn = KNeighborsClassifier(n_neighbors=5)
classifier_knn.fit(X_train,Y_train[0])

KNeighborsClassifier()

In [51]:
from sklearn.svm import SVC
classifier_svc = SVC(kernel='linear',random_state=0)
classifier_svc.fit(X_train,Y_train[0])

SVC(kernel='linear', random_state=0)

In [52]:
from sklearn.naive_bayes import GaussianNB
classifier_gnb = GaussianNB()
classifier_gnb.fit(X_train,Y_train[0])

GaussianNB()

In [53]:
from sklearn.tree import DecisionTreeClassifier
classifier_dtc = DecisionTreeClassifier(criterion='entropy',random_state=0)
classifier_dtc.fit(X_train,Y_train)

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [54]:
from sklearn.ensemble import RandomForestClassifier
classifier_rfc = RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
classifier_rfc.fit(X_train,Y_train[0])

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

In [55]:
classifier_list = [classifier_log, classifier_svc, classifier_knn, classifier_gnb, classifier_dtc, classifier_rfc]

In [56]:
from sklearn.metrics import confusion_matrix, classification_report

In [58]:
def accuracy(X_test,Y_test,classifier):
    print(f"\nClassifier\n{classifier}")
    Y_pred = classifier.predict(X_test)
    correct = 0
    total = len(Y_pred)
    for i,j in zip(Y_pred,Y_test[0]):
        if i==j:
            correct += 1
    acc = (correct/total)*100
    print("\n" + str(acc) + "\n" + "-"*30 + "\n")
    print("\n" + "="*50 + "\n" + classification_report(Y_test,Y_pred) + "\n")
    cm = confusion_matrix(Y_test,Y_pred)
    print(f"Confusion Matrix\n{cm}\n")

In [59]:
for clf in classifier_list:
    accuracy(X_test,Y_test,clf)


Classifier
LogisticRegression(random_state=0)

97.36842105263158
------------------------------


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.91      0.95        11
           2       0.92      1.00      0.96        12

    accuracy                           0.97        38
   macro avg       0.97      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38


Confusion Matrix
[[15  0  0]
 [ 0 10  1]
 [ 0  0 12]]


Classifier
SVC(kernel='linear', random_state=0)

97.36842105263158
------------------------------


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.91      0.95        11
           2       0.92      1.00      0.96        12

    accuracy                           0.97        38
   macro avg       0.97      0.97      0.97        38
weighted avg       0.98      0.97    