In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,multilabel_confusion_matrix,classification_report
from sklearn.linear_model import LogisticRegression
import pickle
import json


In [2]:
df=pd.read_csv('Iris.csv')
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [4]:
df.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [7]:
df['Species'].value_counts()

Species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64

In [5]:
df['Species'].replace({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}, inplace=True)

In [7]:
x=df.drop(columns=['Species','Id'])
y=df['Species']
y

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: Species, Length: 150, dtype: int64

In [8]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=24,stratify=y)

In [10]:
lmodel=LogisticRegression(multi_class='ovr')
lmodel.fit(x_train,y_train)

In [12]:
#training data

y_pred_train = lmodel.predict(x_train)

cnf_matrix = confusion_matrix(y_train,y_pred_train)
print("Confusion Matrix:\n", cnf_matrix)
print("*"*80)

accuracy  = accuracy_score(y_train,y_pred_train)
print("Accuracy:",accuracy)
print("*"*80)

clf_report = classification_report(y_train,y_pred_train)
print("Classification report :\n",clf_report)


Confusion Matrix:
 [[38  0  0]
 [ 0 33  4]
 [ 0  2 35]]
********************************************************************************
Accuracy: 0.9464285714285714
********************************************************************************
Classification report :
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.94      0.89      0.92        37
           2       0.90      0.95      0.92        37

    accuracy                           0.95       112
   macro avg       0.95      0.95      0.95       112
weighted avg       0.95      0.95      0.95       112



In [12]:
#testing data

y_pred = lmodel.predict(x_test)

cnf_matrix = confusion_matrix(y_test,y_pred)
print("Confusion Matrix:\n", cnf_matrix)
print("*"*80)

accuracy  = accuracy_score(y_test,y_pred)
print("Accuracy:",accuracy)
print("*"*80)

clf_report = classification_report(y_test,y_pred)
print("Classification report :\n",clf_report)


Confusion Matrix:
 [[12  0  0]
 [ 0 11  2]
 [ 0  0 13]]
********************************************************************************
Accuracy: 0.9473684210526315
********************************************************************************
Classification report :
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        12
Iris-versicolor       1.00      0.85      0.92        13
 Iris-virginica       0.87      1.00      0.93        13

       accuracy                           0.95        38
      macro avg       0.96      0.95      0.95        38
   weighted avg       0.95      0.95      0.95        38



In [13]:
with open("model.pkl",'wb') as file:
    pickle.dump(lmodel,file)

In [14]:
d1 = {"columns" : ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'],
    "result_values": {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
     }
with open("asset.json",'w') as file:
    json.dump(d1,file)

In [18]:
lmodel.predict([[5.1,3.5,1.4,0.2]])



array([0], dtype=int64)

In [16]:
print(x_train.shape)
print(y_train.shape)

(112, 4)
(112,)


In [16]:
d1

{'columns': ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'],
 'result_values': {'Iris-setosa': 0,
  'Iris-versicolor': 1,
  'Iris-virginica': 2}}