# Classification of Iris flower

In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pickle

In [55]:
from sklearn.datasets import load_iris

Get data from sklearn datasets

In [56]:
iris = load_iris()
dir(iris)
print(iris.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


Load data into Pandas dataframe

In [57]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [58]:
df.rename(columns={"sepal length (cm)":"sepal_length_in_cm", 
                    "sepal width (cm)":"sepal_width_in_cm", 
                    "petal length (cm)":"petal_length_in_cm",
                    "petal width (cm)":"petal_width_in_cm"}, inplace=True)

df.head()

Unnamed: 0,sepal_length_in_cm,sepal_width_in_cm,petal_length_in_cm,petal_width_in_cm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [59]:
df['target'] = iris.target
print(iris.target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [60]:
df.head()

Unnamed: 0,sepal_length_in_cm,sepal_width_in_cm,petal_length_in_cm,petal_width_in_cm,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [61]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [62]:
X.shape, y.shape, X_train.shape, y_train.shape, X_test.shape, y_test.shape

((150, 4), (150,), (120, 4), (120,), (30, 4), (30,))

## KNN Classifier


In [63]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

#### Find the optimal value of k

In [64]:
cross_valid_scores = []

for k in range(1, 100):
    knn = KNeighborsClassifier(n_neighbors=k, algorithm='brute')
    scores = cross_val_score(knn, X_train, y_train, cv=10, scoring='accuracy')
    cross_valid_scores.append(scores.mean())

In [65]:
print("Optimal k with cross-validation: ", np.argmax(cross_valid_scores))
optimal_n = np.argmax(cross_valid_scores)

Optimal k with cross-validation:  7


In [66]:
model_knn = KNeighborsClassifier(n_neighbors=optimal_n, algorithm='brute')
model_knn.fit(X_train, y_train)

In [67]:
y_pred = model_knn.predict(X_test)
acc = accuracy_score(y_test, y_pred) * 100

print("Accuracy = ",acc)

Accuracy =  100.0


In [68]:
model_knn.score(X_test, y_test)

1.0

Save the model as pkl file

In [69]:
pickle.dump(model_knn, open('iris_knn.pkl', 'wb'))

In [70]:
X_test

Unnamed: 0,sepal_length_in_cm,sepal_width_in_cm,petal_length_in_cm,petal_width_in_cm
114,5.8,2.8,5.1,2.4
62,6.0,2.2,4.0,1.0
33,5.5,4.2,1.4,0.2
107,7.3,2.9,6.3,1.8
7,5.0,3.4,1.5,0.2
100,6.3,3.3,6.0,2.5
40,5.0,3.5,1.3,0.3
86,6.7,3.1,4.7,1.5
76,6.8,2.8,4.8,1.4
71,6.1,2.8,4.0,1.3


## Random Forest Classifier

In [71]:
from sklearn.ensemble import RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators=40)
model_rf.fit(X_train, y_train)

In [72]:
model_rf.score(X_test, y_test)

1.0

Save the model as pkl file

In [73]:
pickle.dump(model_rf, open('iris_rf.pkl', 'wb'))

## SVM Classifier

In [74]:
from sklearn.svm import SVC

In [75]:
model_svm = SVC(kernel='linear', random_state=0)
model_svm.fit(X_train, y_train)
model_svm.score(X_test, y_test)

1.0

Save the model as pkl file

In [76]:
pickle.dump(model_svm, open('iris_svm.pkl', 'wb'))

## Logistic Regression Classifier

In [77]:
from sklearn.linear_model import LogisticRegression

In [78]:
model_lr = LogisticRegression(random_state=0, max_iter=200)
model_lr.fit(X_train, y_train)
model_lr.score(X_test, y_test)

1.0

Save the model as pkl file

In [79]:
pickle.dump(model_svm, open('iris_lr.pkl', 'wb'))