In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score, confusion_matrix, multilabel_confusion_matrix,classification_report
from sklearn.metrics import precision_score, recall_score, accuracy_score

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("iris.csv")
df.head(3)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa


In [3]:
df.isna().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [4]:
df.drop("Id", axis=1, inplace= True)

In [5]:
df.head(3)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa


# 3.Building Model

In [6]:
x = df.drop("Species", axis=1)
y = df['Species']

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

lr_clf = LogisticRegression(multi_class = 'ovr') #verbose= 4)
lr_clf.fit(x_train, y_train)

y_pred = lr_clf.predict(x_test)
y_pred[25:29]

array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
       'Iris-virginica'], dtype=object)

# 4. Model Evaluation

In [8]:
conf_mat = confusion_matrix(y_test, y_pred)
print("Confusion_matrix is:\n", conf_mat)

Confusion_matrix is:
 [[10  0  0]
 [ 0  8  2]
 [ 0  0 10]]


In [9]:
multi_conf_mat = multilabel_confusion_matrix(y_test, y_pred)
print("Confusion_matrix is:\n", multi_conf_mat)

Confusion_matrix is:
 [[[20  0]
  [ 0 10]]

 [[20  0]
  [ 2  8]]

 [[18  2]
  [ 0 10]]]


In [10]:
class_report = classification_report(y_test, y_pred)
print("classification_report is: ", class_report)

classification_report is:                   precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      0.80      0.89        10
 Iris-virginica       0.83      1.00      0.91        10

       accuracy                           0.93        30
      macro avg       0.94      0.93      0.93        30
   weighted avg       0.94      0.93      0.93        30



# 5. Testing user input

In [12]:
x.head(1).T

Unnamed: 0,0
SepalLengthCm,5.1
SepalWidthCm,3.5
PetalLengthCm,1.4
PetalWidthCm,0.2


In [16]:
SepalLengthCm = 7.4
SepalWidthCm = 2.2
PetalLengthCm = 4.4
PetalWidthCm = 0.92

In [17]:
test_arr = np.zeros(len(x.columns))
test_arr

array([0., 0., 0., 0.])

In [19]:
test_arr[0] = SepalLengthCm
test_arr[1] = SepalWidthCm
test_arr[2] = PetalLengthCm
test_arr[3] = PetalWidthCm

In [20]:
test_arr

array([7.4 , 2.2 , 4.4 , 0.92])

In [23]:
lr_clf.predict([test_arr])[0]



'Iris-versicolor'

# Exporting the data for flask

In [24]:
import pickle
with open("logistic_regression.pkl", "wb") as f:
    pickle.dump(lr_clf, f)

In [26]:
project_data = {"columns" : list(x.columns)}
project_data

{'columns': ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']}

In [28]:
import json
with open("project_data.json", "w") as f:
    json.dump(project_data, f)