In [194]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.special import expit

In [45]:
class perceptron:

    def __init__(self,epochs=1000,lr=0.005):
        self.epochs_ = epochs
        self.lr_ = lr
        self.beta_ = None

    def fit(self,x_train,y_train):
        x_train = x_train.to_numpy()
        y_train = y_train.to_numpy().reshape(-1,1)

        x_train = np.insert(x_train,0,1,axis=1)
        col = x_train.shape[1]
        rows = x_train.shape[0]
        
        beta = np.ones(col)
        beta[0] = 0

        for i in range(self.epochs_):
            idx = np.random.randint(rows)
            if (x_train[idx] @ beta) >= 0 and y_train[idx] == 0:
                for k in range(col):
                    beta[k] = beta[k] - self.lr_*x_train[idx,k]

            elif (x_train[idx] @ beta) < 0 and y_train[idx] == 1:
                for k in range(col):
                    beta[k] = beta[k] + self.lr_*x_train[idx,k]
            else:
                pass
                
        self.beta_ = beta

    def predict(self,x_test):
        x_test = x_test.to_numpy()
        x_test = np.insert(x_test,0,1,axis=1)

        yhat = x_test @ self.beta_
        yhat = np.where(yhat >= 0 ,1,0)
        return yhat

In [4]:
x,y = load_breast_cancer(return_X_y=True,as_frame=True)

In [5]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=3)

In [224]:
lr = LogisticRegression(max_iter=100000 , solver = 'sag' , penalty = None)

In [225]:
lr.fit(x_train,y_train)

In [226]:
lr.coef_

array([[ 2.30717592e-02, -5.15508060e-03,  1.06137033e-01,
         1.42893457e-02,  2.81908770e-05, -9.49058837e-04,
        -1.61384622e-03, -6.19038999e-04,  5.31601070e-05,
         8.95400101e-05,  1.14833406e-04, -1.30492780e-03,
        -3.78749717e-03, -4.75840983e-02, -1.15533708e-05,
        -2.73835702e-04, -3.52832733e-04, -7.47797162e-05,
        -3.98187037e-05, -1.73616701e-05,  2.40252460e-02,
        -3.30239810e-02,  7.91527400e-02, -3.12291035e-02,
        -1.07502970e-04, -3.59304565e-03, -4.62266124e-03,
        -1.17857936e-03, -5.31377087e-04, -1.76280817e-04]])

In [101]:
mylr = perceptron(epochs=10000,lr=0.000075)

In [102]:
mylr.fit(x_train,y_train)

In [103]:
mylr.beta_

array([ 0.0084    ,  1.06617917,  0.8901565 ,  1.297099  ,  0.407965  ,
        0.99987002,  0.99706371,  0.99503309,  0.99807518,  0.99986222,
        1.00011096,  0.99298556,  0.97785415,  0.93709196, -0.02798683,
        0.99990661,  0.99903332,  0.99877879,  0.9997253 ,  0.99973293,
        0.99990356,  1.058296  ,  0.780016  ,  1.1765425 , -0.72008   ,
        0.99947873,  0.99051227,  0.98775609,  0.99691584,  0.99876106,
        0.99939911])

In [227]:
y_pred_my = mylr.predict(x_test)

In [228]:
y_pred_sk = lr.predict(x_test)

In [229]:
accuracy_score(y_test,y_pred_sk)

0.9122807017543859

In [107]:
accuracy_score(y_test,y_pred_my)

0.9035087719298246

In [160]:
class my_log_reg:

    def __init__(self,epochs=1000,lr=0.005):
        self.epochs_ = epochs
        self.lr_ = lr
        self.beta_ = None

    def fit(self,x_train,y_train):
        x_train = x_train.to_numpy()
        y_train = y_train.to_numpy().reshape(-1,1)

        x_train = np.insert(x_train,0,1,axis=1)
        col = x_train.shape[1]
        rows = x_train.shape[0]
        
        beta = np.ones(col).reshape(-1,1)
        beta[0] = 0

        for i in range(self.epochs_):
            ytemp = x_train @ beta
            ytemp = np.clip(ytemp,-10,10)
            y_pred = 1/(1 + np.exp(-ytemp))
            diff = y_train - y_pred
            beta = beta + self.lr_*(x_train.T @ diff)
                
        self.beta_ = beta

    def predict(self,x_test):
        x_test = x_test.to_numpy()
        x_test = np.insert(x_test,0,1,axis=1)

        yhat = x_test @ self.beta_
        yhat = np.clip(yhat,-10,10)
        ypred = 1/(1+np.exp(-yhat))
        ypred = np.where(ypred >= 0.5 ,1,0)
        return ypred

In [189]:
mylrs = my_log_reg(epochs = 5000)

In [190]:
mylrs.fit(x_train,y_train)

In [191]:
y_pred_mylrs = mylrs.predict(x_test)

In [192]:
accuracy_score(y_test,y_pred_mylrs)

0.41228070175438597

In [195]:
class myLogReg:

    def __init__(self,max_iter = 1000,lr = 0.001):
        self.max_iter_ = max_iter
        self.lr_ = lr
        self.beta_ = None

    def fit(self,x_train,y_train):
        x_train = x_train.to_numpy()
        y_train = y_train.to_numpy().reshape(-1,1)

        x_train = np.insert(x_train,0,1,axis=1)
        xrow = x_train.shape[0]
        xcol = x_train.shape[1]
        beta = np.ones(xcol).reshape(-1,1)
        beta[0] = 0

        for i in range(self.max_iter_):
            xbeta = x_train @ beta 
            sigmoid = expit(xbeta)
            gradient = x_train.T @ (y_train - sigmoid)
            
            beta = beta + (self.lr_ * gradient)

        self.beta_ = beta
        
    def predict(self,x_test):
        x_test = x_test.to_numpy()
        x_test = np.insert(x_test , 0,1, axis =1)

        xbeta = x_test @ self.beta_
        ypred = expit(xbeta)
        ypred = np.where(ypred >= 0.5 , 1,0)
        return ypred 


In [217]:
mylog = myLogReg(max_iter = 100000 , lr =0.0005)

In [218]:
mylog.fit(x_train,y_train)

In [219]:
mylog.beta_

array([[ 7.83203554e+01],
       [ 6.12393172e+02],
       [ 1.54843413e+02],
       [ 1.67513206e+03],
       [ 4.99913340e-02],
       [-9.71672252e+00],
       [-6.19334084e+01],
       [-9.47339145e+01],
       [-3.52388734e+01],
       [-2.13144729e+01],
       [-1.93325716e+00],
       [ 1.22113233e+01],
       [ 1.73297498e+01],
       [-1.67432397e+02],
       [-3.92371267e+02],
       [-4.92294783e-01],
       [-1.45027240e+01],
       [-2.10057809e+01],
       [-3.74787211e+00],
       [-4.38705690e+00],
       [-1.10789963e-01],
       [ 6.14414356e+02],
       [-1.02911013e+03],
       [-2.11196353e+02],
       [-1.27525491e+02],
       [-2.32891134e+01],
       [-2.21656865e+02],
       [-2.79733236e+02],
       [-7.33807079e+01],
       [-7.45615471e+01],
       [-1.84010787e+01]])

In [220]:
ypred_mlr = mylog.predict(x_test)

In [221]:
accuracy_score(y_test,ypred_mlr)

0.9210526315789473