# Logistic Regression Iris flower Classification

In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import roc_curve
import warnings
warnings.filterwarnings("ignore")

## Problem statement

## Data Gathering

In [27]:
df=pd.read_csv("iris.csv")
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


## EDA

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [29]:
df.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [30]:
df.drop("Id",axis=1,inplace=True)

In [31]:
df['Species'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: Species, dtype: int64

## Model Training

In [32]:
x=df.drop("Species",axis=1)
y=df["Species"]

In [33]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=2,stratify=y)


In [34]:
## Create instance
model=LogisticRegression()

## Fit the Model
model.fit(x_train,y_train)



## Model Evaluation

In [35]:
## Testing data
y_pred = model.predict(x_test)
y_pred

array(['Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor',
       'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
       'Iris-setosa', 'Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-virginica',
       'Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-virginica', 'Iris-versicolor',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-versicolor',
       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
       'Iris-virginica', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [36]:
## For Testing
y_pred = model.predict(x_test)

cnf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n",cnf_matrix)

print("-"*65)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy Score:",accuracy)
print("-"*65)

clf_report = classification_report(y_test, y_pred)
print("Classification report:\n",clf_report)

Confusion Matrix:
 [[15  0  0]
 [ 0 15  0]
 [ 0  0 15]]
-----------------------------------------------------------------
Accuracy Score: 1.0
-----------------------------------------------------------------
Classification report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        15
Iris-versicolor       1.00      1.00      1.00        15
 Iris-virginica       1.00      1.00      1.00        15

       accuracy                           1.00        45
      macro avg       1.00      1.00      1.00        45
   weighted avg       1.00      1.00      1.00        45



In [37]:
## For Training
y_pred_train = model.predict(x_train)

cnf_matrix = confusion_matrix(y_train, y_pred_train)
print("Confusion Matrix:\n",cnf_matrix)

print("-"*65)
accuracy = accuracy_score(y_train, y_pred_train)
print("Accuracy Score:",accuracy)
print("-"*65)

clf_report = classification_report(y_train, y_pred_train)
print("Classification report:\n",clf_report)

Confusion Matrix:
 [[35  0  0]
 [ 0 32  3]
 [ 0  1 34]]
-----------------------------------------------------------------
Accuracy Score: 0.9619047619047619
-----------------------------------------------------------------
Classification report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        35
Iris-versicolor       0.97      0.91      0.94        35
 Iris-virginica       0.92      0.97      0.94        35

       accuracy                           0.96       105
      macro avg       0.96      0.96      0.96       105
   weighted avg       0.96      0.96      0.96       105



## Test single row

In [38]:
column=x.columns
column

Index(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'], dtype='object')

In [39]:
x_test.head(1).T

Unnamed: 0,82
SepalLengthCm,5.8
SepalWidthCm,2.7
PetalLengthCm,3.9
PetalWidthCm,1.2


In [40]:

SepalLengthCm = 6.5
SepalWidthCm = 2.8
PetalLengthCm = 8.9
PetalWidthCm = 2.3

In [41]:
array=np.array([SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm])
array

array([6.5, 2.8, 8.9, 2.3])

In [42]:
prediction=model.predict([array])[0]
prediction

'Iris-virginica'

In [43]:

project_data={"column":list(column)}
project_data

{'column': ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']}

In [44]:
import json
with open("Iris.json","w") as f:
    json.dump(project_data,f)

In [45]:
import pickle
with open("Iris.pkl","wb") as f:
    pickle.dump(model,f)