In [1]:
from sklearn.datasets import load_iris
import pandas as pd

In [2]:
iris_bunch = load_iris()

print(iris_bunch.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [3]:
iris_df = pd.DataFrame(iris_bunch.data , columns = iris_bunch.feature_names)

In [4]:
iris_df["Class"] = iris_bunch.target

In [5]:
iris_df["Class"].value_counts()

0    50
1    50
2    50
Name: Class, dtype: int64

In [6]:
X = iris_df.drop("Class" ,axis =1)
y = iris_df["Class"]

In [7]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X , y ,test_size = 0.25, random_state =21)

In [8]:
print("Shape of X_train is : ", X_train.shape)
print("Shape of X_test is : ", X_test.shape)
print("Shape of y_train is : ", y_train.shape)
print("Shape of y_test is : ", y_test.shape)


Shape of X_train is :  (112, 4)
Shape of X_test is :  (38, 4)
Shape of y_train is :  (112,)
Shape of y_test is :  (38,)


In [9]:
from sklearn.neighbors import KNeighborsClassifier

In [10]:
knn_model = KNeighborsClassifier(n_neighbors=4) # --> n_neighbors=5 parameter is a K Value.
# n_neighbors is 4 as total no of classes in target column is 3. so it will be 3 + 1 = 4

In [11]:
knn_model.fit(X_train,y_train) # Supervised ML Approach

KNeighborsClassifier(n_neighbors=4)

In [12]:
y_pred_test_knn = knn_model.predict(X_test)

In [13]:
from sklearn.metrics import accuracy_score

print("The accuracy score of the model on test data is :")
print(accuracy_score(y_test,y_pred_test_knn)) # --> Actual values & Predicted value

The accuracy score of the model on test data is :
0.9473684210526315


In [14]:
from sklearn.metrics import classification_report

In [15]:
print(classification_report(y_test,y_pred_test_knn))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.93      0.93      0.93        15
           2       0.90      0.90      0.90        10

    accuracy                           0.95        38
   macro avg       0.94      0.94      0.94        38
weighted avg       0.95      0.95      0.95        38



### Let's check for higher K value whether it affect Accuracy Score or not.

In [16]:
knn_model_10 = KNeighborsClassifier(n_neighbors=10) # --> n_neighbors=5 parameter is a K Value.
# n_neighbors is 4 as total no of classes in target column is 3. so it will be 3 + 1 = 4

In [17]:
knn_model_10.fit(X_train,y_train) # Supervised ML Approach

KNeighborsClassifier(n_neighbors=10)

In [18]:
y_pred_test_knn_10 = knn_model_10.predict(X_test)

In [19]:
 from sklearn.metrics import accuracy_score

print("The accuracy score of the model on test data is :")
print(accuracy_score(y_test,y_pred_test_knn_10)) # --> Actual values & Predicted value

The accuracy score of the model on test data is :
0.8947368421052632


In [20]:
print(classification_report(y_test,y_pred_test_knn_10))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.82      0.93      0.87        15
           2       0.88      0.70      0.78        10

    accuracy                           0.89        38
   macro avg       0.90      0.88      0.88        38
weighted avg       0.90      0.89      0.89        38



### Here,We could infer that higher K Values would affect the Accuracy.

### Decision Tree Classifier

In [21]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier() # Instantiating the estimator object
dt_model.fit(X_train,y_train)
y_predict = dt_model.predict(X_test)

In [22]:
from sklearn.metrics import accuracy_score

In [23]:
accuracy = accuracy_score(y_predict,y_test)

print("The Prediction accuracy score of the Decision Tree model on test data is :", accuracy)

The Prediction accuracy score of the Decision Tree model on test data is : 0.9210526315789473


In [24]:
y_predict_train = dt_model.predict(X_train)

accuracy = accuracy_score(y_predict_train,y_train)

print("The Prediction accuracy score of the Decision Tree model on train data is :", accuracy)



The Prediction accuracy score of the Decision Tree model on train data is : 1.0


### Pruning

In [25]:
from sklearn.tree import DecisionTreeClassifier
dt_model4 = DecisionTreeClassifier(max_depth=2) # Instantiating the estimator object
dt_model4.fit(X_train,y_train)
y_predict4 = dt_model4.predict(X_test)

In [26]:
accuracy = accuracy_score(y_predict4,y_test)
print("The Prediction accuracy score of the Decision Tree model on test data is :", accuracy)


The Prediction accuracy score of the Decision Tree model on test data is : 0.8421052631578947


In [27]:
y_predict_train = dt_model4.predict(X_train)

accuracy = accuracy_score(y_predict_train,y_train)

print("The Prediction accuracy score of the Decision Tree model on train data is :", accuracy)



The Prediction accuracy score of the Decision Tree model on train data is : 0.9821428571428571


### We could infer that Because of Pruning train accuracy is decreased

### Naive Baye's Classifier - GaussianNB

In [28]:
from sklearn.naive_bayes import GaussianNB


In [29]:
gnb_model = GaussianNB()

In [30]:
gnb_model.fit(X_train,y_train)

GaussianNB()

In [31]:
y_pred_test_gnb = gnb_model.predict(X_test)

In [32]:
accuracy = accuracy_score(y_pred_test_gnb,y_test)
print("The Prediction accuracy score of the GaussianNB model on test data is :", accuracy)


The Prediction accuracy score of the GaussianNB model on test data is : 0.9210526315789473


### End of DecisionTree,KNN & Naive Baye's Classifier