## Logistic Regression with Iris DataSet


Logistic regression is a statistical analysis method to predict a binary outcome, such as yes or no, based on prior observations of a data set. A logistic regression model predicts a dependent data variable by analyzing the relationship between one or more existing independent variables.

In [254]:
# Required Modules
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix


In [255]:
# Importing the dataset
dataset = pd.read_csv('iris_dataset.csv')
dataset.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [256]:
dataset.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [257]:
# Splitting the dataset into the Training set and Test set
X = dataset.iloc[:, [0,1,2, 3]].values
y = dataset.iloc[:, 4].values

In [258]:
print(X)

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.1 1.5 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.

In [259]:
print(y)

['setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa' 'setosa'
 'setosa' 'setosa' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
 'versicolor' 'versicolor' 'versicolor' 'versicolor' 'versicolor'
 'versicolor' 'versicolor' 'v

In [260]:
# Split the data %80 for training and %20 for testing 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [261]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [262]:
LRclassifier = LogisticRegression(random_state = 0, solver='lbfgs', multi_class='auto')
LRclassifier.fit(X_train, y_train)

LogisticRegression(random_state=0)

In [263]:
# Predicting the Test set results
y_pred = LRclassifier.predict(X_test)
# Predict probabilities
probs_y=LRclassifier.predict_proba(X_test)
probs_y = np.round(probs_y, 2)

In [264]:
print(probs_y)

[[0.   0.03 0.97]
 [0.01 0.96 0.04]
 [1.   0.   0.  ]
 [0.   0.08 0.92]
 [0.98 0.02 0.  ]
 [0.   0.01 0.99]
 [0.98 0.02 0.  ]
 [0.01 0.71 0.28]
 [0.   0.73 0.27]
 [0.02 0.9  0.08]
 [0.   0.48 0.52]
 [0.02 0.76 0.22]
 [0.01 0.87 0.12]
 [0.   0.7  0.3 ]
 [0.01 0.77 0.22]
 [0.96 0.04 0.  ]
 [0.01 0.74 0.25]
 [0.02 0.89 0.09]
 [0.94 0.06 0.  ]
 [0.99 0.01 0.  ]
 [0.   0.18 0.82]
 [0.04 0.74 0.22]
 [0.98 0.02 0.  ]
 [0.96 0.04 0.  ]
 [0.   0.35 0.65]
 [1.   0.   0.  ]
 [0.99 0.01 0.  ]
 [0.02 0.88 0.1 ]
 [0.08 0.91 0.01]
 [0.97 0.03 0.  ]]


In [265]:
results = pd.DataFrame({'y_test':y_test.T, 'y_pred':y_pred.T,'setosa':probs_y[:,0].T,'versicolor':probs_y[:,1].T,'virginica':probs_y[:,2].T })
print(results)

        y_test      y_pred  setosa  versicolor  virginica
0    virginica   virginica    0.00        0.03       0.97
1   versicolor  versicolor    0.01        0.96       0.04
2       setosa      setosa    1.00        0.00       0.00
3    virginica   virginica    0.00        0.08       0.92
4       setosa      setosa    0.98        0.02       0.00
5    virginica   virginica    0.00        0.01       0.99
6       setosa      setosa    0.98        0.02       0.00
7   versicolor  versicolor    0.01        0.71       0.28
8   versicolor  versicolor    0.00        0.73       0.27
9   versicolor  versicolor    0.02        0.90       0.08
10   virginica   virginica    0.00        0.48       0.52
11  versicolor  versicolor    0.02        0.76       0.22
12  versicolor  versicolor    0.01        0.87       0.12
13  versicolor  versicolor    0.00        0.70       0.30
14  versicolor  versicolor    0.01        0.77       0.22
15      setosa      setosa    0.96        0.04       0.00
16  versicolor

In [266]:
# Pedal_Length is the most important feature
LRclassifier.coef_[0]

array([-0.99627888,  1.03951116, -1.81448575, -1.71089577])

In [267]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]


In [271]:
# Try a new flower
newflower = scaler.transform([[5.1,3.5,1.4,0.2]]) 
y_prednew = LRclassifier.predict(newflower)
print(y_prednew)

['setosa']


In [274]:
# Try a new flower, it should be versicolor but it gives virginica
newflower = scaler.transform([[6,2.7,5.1,1.6]]) 
y_prednew = LRclassifier.predict(newflower)
print(y_prednew)

['virginica']


In [275]:
# Predicting the train set results
y_pred = LRclassifier.predict(X_train)

cm = confusion_matrix(y_train, y_pred)
print(cm)

[[39  0  0]
 [ 0 34  3]
 [ 0  2 42]]
