In [1]:
# import all dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
import pickle

In [2]:
# iris_dataset_keys
load_iris().keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [3]:
# data
load_iris().data[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [4]:
# feature names
load_iris().feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [5]:
# target
load_iris().target[:5]

array([0, 0, 0, 0, 0])

In [6]:
# target names
load_iris().target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [7]:
# dataframe for the iris dataset independent variables
df = pd.DataFrame(data=load_iris().data, columns=load_iris().feature_names)

In [8]:
# target column
df['target'] = load_iris().target

In [9]:
# first five data
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [10]:
# replace target valves with flower types
df1 = df.copy()
df1['target'].replace({0:'setosa', 1:'versicolor', 2:'virginica'}, inplace=True)

In [11]:
df1.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [12]:
# value count of flower type
df1.target.value_counts()

target
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64

In [13]:
df[df['target'] == 0]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
6,4.6,3.4,1.4,0.3,0
7,5.0,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
9,4.9,3.1,1.5,0.1,0


In [14]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [15]:
df.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2
149,5.9,3.0,5.1,1.8,2


In [16]:
# separating independent and dependent variables
X = df.drop(labels='target', axis=1)
y = df['target']

In [17]:
# splitting the data into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)

In [18]:
print(f'{X_train.shape}, {y_train.shape}, {X_test.shape}, {y_test.shape}')

(120, 4), (120,), (30, 4), (30,)


## Standardize Data

In [19]:
from sklearn.preprocessing import StandardScaler

In [20]:
scaler = StandardScaler()

In [21]:
X_train = scaler.fit_transform(X_train)

In [22]:
X_test = scaler.transform(X_test)

## Logistic Regression

In [23]:
model_lgr = LogisticRegression(multi_class='multinomial')

In [24]:
model_lgr.fit(X_train, y_train)

In [25]:
# save lgr model
with open('logistic_regression_model.pkl', 'wb') as f:
    pickle.dump({'model_lgr': model_lgr, 'scaler': scaler}, f)

In [26]:
# predict the output for testing data
prediction = model_lgr.predict(X_test)
prediction

array([0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 1, 0, 0, 2, 1, 2, 2, 2, 1, 1, 2, 2,
       1, 0, 1, 0, 0, 2, 0, 1])

In [38]:
model_lgr.predict([[6.7,3.0,5.2,2.3]])[0]

2

In [27]:
y_test

38     0
66     1
115    2
117    2
89     1
136    2
68     1
94     1
84     1
28     0
90     1
11     0
40     0
126    2
79     1
144    2
123    2
124    2
76     1
59     1
132    2
105    2
53     1
17     0
64     1
18     0
13     0
116    2
9      0
81     1
Name: target, dtype: int32

In [28]:
# Accuracy Score
accuracy_score(y_test, prediction)

1.0

In [29]:
# confusion matrix
confusion_matrix(y_test, prediction)

array([[ 8,  0,  0],
       [ 0, 12,  0],
       [ 0,  0, 10]], dtype=int64)

## Support Vector Classifier

In [30]:
model_svc = SVC()

In [31]:
model_svc.fit(X_train, y_train)

In [32]:
# dump SVC model
with open('svc_model.pkl', 'wb') as f:
    pickle.dump({'model_svc': model_svc, 'scaler': scaler}, f)

In [33]:
prediction_svc = model_svc.predict(X_test)
prediction_svc

array([0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 1, 0, 0, 2, 1, 2, 2, 2, 1, 1, 2, 2,
       1, 0, 1, 0, 0, 2, 0, 1])

In [34]:
y_test

38     0
66     1
115    2
117    2
89     1
136    2
68     1
94     1
84     1
28     0
90     1
11     0
40     0
126    2
79     1
144    2
123    2
124    2
76     1
59     1
132    2
105    2
53     1
17     0
64     1
18     0
13     0
116    2
9      0
81     1
Name: target, dtype: int32

In [35]:
# accuracy score
accuracy_score(y_test, prediction_svc)

1.0

In [36]:
# confusion matrix
pd.DataFrame(confusion_matrix(y_test, prediction_svc))

Unnamed: 0,0,1,2
0,8,0,0
1,0,12,0
2,0,0,10
