In [1]:
import numpy as np
import random
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris(as_frame=True)
from sklearn.preprocessing import add_dummy_feature
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import confusion_matrix

In [8]:
class Simple_logistic_regression:
    def __init__(self) -> None:
        self.alpha = None
        self.x = None
        self.y = None
        self.weights = None
        self.coef_ = None
        self.intercept = None
        
    def find_sigma(self, x):
        return 1/(1+np.exp(-x))

    def fit(self, x, y, alpha=0.01):
        self.x, self.y, self.alpha = x, y, alpha
        costs = np.array([])
        params_iterated = []
        self.weights = np.array(random.sample(range(0,10), 2)).reshape(2, 1)
        strt_alpha = alpha
        for i in range(1000):
            if i == 0:
                continue
            elif i%10 == 0:
                self.alpha = strt_alpha/i
            rand_indx = np.random.randint(x.shape[0])
            sigma = self.find_sigma(x[rand_indx].reshape(1, 2) @ self.weights)
            cost = -(y[rand_indx]*np.log(sigma) + (1-y[rand_indx])*np.log(1-sigma))
            costs = np.append(cost, costs)
            params_iterated.append(self.weights.flatten())
            self.weights = self.weights - self.alpha*(-(y[rand_indx] - sigma)*x[rand_indx]).reshape(2, 1)
        params_iterated = np.array(params_iterated)
        min_indx = np.argmin(costs)
        self.intercept = params_iterated[min_indx][0]
        self.coef_ = params_iterated[min_indx][1]
        self.weights = params_iterated[min_indx].reshape(2,1)
        
    def predict(self, x):
        y = self.find_sigma(x @ self.weights)
        y[y>0.9945] = 1
        y[y<0.9945] = 0
        return y

In [9]:
x = iris.data[["petal width (cm)"]].values
y = iris.target_names[iris.target] == "virginica"
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=85)

In [10]:
scaler = StandardScaler()
traindata = scaler.fit_transform(x_train)
testdata = scaler.fit_transform(x_test)
x_train = add_dummy_feature(traindata)
x_test = add_dummy_feature(testdata)
x_test = x_test.reshape(x_test.shape[0], 2)
y_train = y_train.reshape(x_train.shape[0], 1)
y_test = y_test.reshape(x_test.shape[0], 1)

In [11]:
slr = Simple_logistic_regression()
slr.fit(x_train, y_train, alpha=0.04)

In [12]:
x_train_pred = slr.predict(x_train)
x_test_pred = slr.predict(x_test)

In [13]:
cm_train = confusion_matrix(y_train, x_train_pred)
cm_test = confusion_matrix(y_test, x_test_pred)
print(f"train matrix: {cm_train}")
print(f"test matrix: {cm_test}")

train matrix: [[75  0]
 [ 3 34]]
test matrix: [[23  2]
 [ 1 12]]


In [14]:
print(f"train precision : {precision_score(y_train, x_train_pred)}")
print(f"train recall : {recall_score(y_train, x_train_pred)}")
print(f"test precision : {precision_score(y_test, x_test_pred)}")
print(f"test recall : {recall_score(y_test, x_test_pred)}")

train precision : 1.0
train recall : 0.918918918918919
test precision : 0.8571428571428571
test recall : 0.9230769230769231
