# Logistic Regression for classifying Iris

In [85]:
# libraries import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Reading CSV dataset

In [86]:
iris_df = pd.read_csv("iris.data", sep=",", names=["A", "B", "C", "D", "Class"])
iris_df

Unnamed: 0,A,B,C,D,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [87]:
iris_df = iris_df[:100]
iris_df["Class"].value_counts()

Class
Iris-setosa        50
Iris-versicolor    50
Name: count, dtype: int64

In [88]:
iris_df = pd.get_dummies(data=iris_df, columns=["Class"])
iris_df = iris_df.iloc[:, :-1]
iris_df

Unnamed: 0,A,B,C,D,Class_Iris-setosa
0,5.1,3.5,1.4,0.2,True
1,4.9,3.0,1.4,0.2,True
2,4.7,3.2,1.3,0.2,True
3,4.6,3.1,1.5,0.2,True
4,5.0,3.6,1.4,0.2,True
...,...,...,...,...,...
95,5.7,3.0,4.2,1.2,False
96,5.7,2.9,4.2,1.3,False
97,6.2,2.9,4.3,1.3,False
98,5.1,2.5,3.0,1.1,False


In [89]:
def categorise(x):
    if x:
        return 1
    return 0

In [90]:
iris_df["Class_Iris-setosa"] = iris_df["Class_Iris-setosa"].apply(categorise)

In [91]:
iris_df["Class_Iris-setosa"].value_counts()

Class_Iris-setosa
1    50
0    50
Name: count, dtype: int64

### Splitting Dependent and Independent Variables

In [92]:
X = iris_df.iloc[:, :-1].values
y = iris_df.iloc[:, -1].values

### Splitting of Dataset into Test and Train

In [93]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [94]:
X_train

array([[5.3, 3.7, 1.5, 0.2],
       [4.6, 3.4, 1.4, 0.3],
       [5.7, 2.8, 4.1, 1.3],
       [5.8, 2.7, 3.9, 1.2],
       [6.8, 2.8, 4.8, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.6, 4.4, 1.2],
       [6.2, 2.2, 4.5, 1.5],
       [6.4, 3.2, 4.5, 1.5],
       [5.2, 3.5, 1.5, 0.2],
       [5.7, 3.8, 1.7, 0.3],
       [6.3, 3.3, 4.7, 1.6],
       [6.1, 2.9, 4.7, 1.4],
       [6.4, 2.9, 4.3, 1.3],
       [4.9, 3. , 1.4, 0.2],
       [5.9, 3. , 4.2, 1.5],
       [4.4, 3.2, 1.3, 0.2],
       [4.5, 2.3, 1.3, 0.3],
       [5. , 3.6, 1.4, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5. , 3.5, 1.3, 0.3],
       [4.4, 3. , 1.3, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [6.1, 3. , 4.6, 1.4],
       [5.2, 2.7, 3.9, 1.4],
       [5.1, 3.5, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.2, 3.4, 1.4, 0.2],
       [7. , 3.2, 4.7, 1.4],
       [4.8, 3.4, 1.6, 0.2],
       [5. , 3.2, 1.2, 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [6.9, 3

In [95]:
y_train

array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 1])

### Feature Scaling

In [96]:
from sklearn.preprocessing import MinMaxScaler
MMS = MinMaxScaler()
X_train = MMS.fit_transform(X=X_train)
X_test = MMS.fit_transform(X_test)

In [97]:
X_train

array([[0.34615385, 0.70833333, 0.07692308, 0.05882353],
       [0.07692308, 0.58333333, 0.05128205, 0.11764706],
       [0.5       , 0.33333333, 0.74358974, 0.70588235],
       [0.53846154, 0.29166667, 0.69230769, 0.64705882],
       [0.92307692, 0.33333333, 0.92307692, 0.76470588],
       [0.23076923, 0.        , 0.58974359, 0.52941176],
       [0.42307692, 0.16666667, 0.66666667, 0.58823529],
       [0.42307692, 0.25      , 0.82051282, 0.64705882],
       [0.69230769, 0.08333333, 0.84615385, 0.82352941],
       [0.76923077, 0.5       , 0.84615385, 0.82352941],
       [0.30769231, 0.625     , 0.07692308, 0.05882353],
       [0.5       , 0.75      , 0.12820513, 0.11764706],
       [0.73076923, 0.54166667, 0.8974359 , 0.88235294],
       [0.65384615, 0.375     , 0.8974359 , 0.76470588],
       [0.76923077, 0.375     , 0.79487179, 0.70588235],
       [0.19230769, 0.41666667, 0.05128205, 0.05882353],
       [0.57692308, 0.41666667, 0.76923077, 0.82352941],
       [0.        , 0.5       ,

In [98]:
X_test

array([[0.29166667, 0.6       , 0.16216216, 0.21428571],
       [1.        , 0.45      , 1.        , 1.        ],
       [0.16666667, 0.5       , 0.08108108, 0.07142857],
       [0.58333333, 0.3       , 0.94594595, 0.85714286],
       [0.95833333, 0.4       , 0.91891892, 0.92857143],
       [0.29166667, 0.05      , 0.62162162, 0.64285714],
       [0.45833333, 0.85      , 0.08108108, 0.21428571],
       [0.75      , 0.3       , 1.        , 0.78571429],
       [0.91666667, 0.3       , 0.97297297, 1.        ],
       [0.58333333, 0.4       , 0.86486486, 0.78571429],
       [0.5       , 0.05      , 0.81081081, 0.85714286],
       [0.625     , 0.2       , 0.81081081, 0.78571429],
       [0.70833333, 0.35      , 0.94594595, 1.        ],
       [0.        , 0.4       , 0.02702703, 0.        ],
       [0.29166667, 0.6       , 0.13513514, 0.07142857],
       [0.20833333, 0.45      , 0.16216216, 0.07142857],
       [0.125     , 0.7       , 0.        , 0.07142857],
       [0.20833333, 0.6       ,

In [99]:
unique_val, counts = np.unique(y_train, return_counts=True)
for i in range(len(unique_val)):
    print(unique_val[i], " -> ", counts[i])

0  ->  38
1  ->  37


In [100]:
class Logistic_Regression:
    def __init__(self, learning_rate=0.01, iter=1000):
        self.learnRate = learning_rate
        self.epochs = iter

    def sigmoid(self, z):
        return 1/(1+np.exp(-z))

    def fit(self, X, y):
        no_samples = len(X)
        no_features = len(X[0])
        self.weights = np.zeros(no_features)
        self.bias = 0

        for i in range(self.epochs):
            # forward propagation
            predicted = self.predict(X)

            # backpropagation
            dw = (1/no_samples) * np.dot(X.T, (predicted - y))
            db = (1/no_samples) * np.sum(predicted - y)
            self.weights -= self.learnRate * dw
            self.bias -= self.learnRate * db

    def predict(self, X):
        # forward propagation
        weighted_sum = np.dot(X, self.weights) + self.bias
        predicted = self.sigmoid(weighted_sum)
        return predicted

    def test(self, X, y):  # passing test sets
        global y_predicted_category
        y_predicted = self.predict(X)
        y_predicted_category = []
        for _ in y_predicted:
            if _ > 0.5:
                y_predicted_category.append(1)
            else:
                y_predicted_category.append(0)
        y_predicted_category = np.array(y_predicted_category)
        print(y_predicted_category)
        print(y)
        print(y_predicted)
        correct_pred = 0
        for i in range(len(y)):
            if y[i] == y_predicted_category[i]:
                correct_pred += 1
        print("Accuracy: ", correct_pred/len(y))


In [101]:
Regressor = Logistic_Regression()
Regressor.fit(X_train, y_train)

In [102]:
print(Regressor.weights)
print(Regressor.bias)

[-0.40408166  0.73567234 -1.13636661 -1.02187534]
0.46668817431957194


In [103]:
Regressor.test(X_test, y_test)

[1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 1 0 1]
[1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 1 0 1]
[0.5955568  0.14621729 0.64612482 0.18255099 0.16528752 0.27334933
 0.64467065 0.17437392 0.14057195 0.22089423 0.18304671 0.20375173
 0.15991405 0.67485892 0.63730366 0.61213376 0.70227556 0.61644907
 0.69093158 0.62515692 0.56900357 0.19815996 0.62730591 0.19577031
 0.60059402]
Accuracy:  1.0


In [104]:
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score
matrix = confusion_matrix(y_true=y_test, y_pred=y_predicted_category)
accuracy = accuracy_score(y_true=y_test, y_pred=y_predicted_category)
precision = precision_score(y_true=y_test, y_pred=y_predicted_category)
recall = recall_score(y_true=y_test, y_pred=y_predicted_category)
print(matrix)
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall", recall)

[[12  0]
 [ 0 13]]
Accuracy:  1.0
Precision:  1.0
Recall 1.0
