In [None]:
import matplotlib.pyplot as plt
import numpy as np
from keras.datasets import mnist
from sklearn.utils import shuffle

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train=np.array( x_train,dtype = float)
y_train=np.array( y_train,dtype = float)
x_test=np.array( x_test,dtype = float)
y_test=np.array( y_test,dtype = float)

In [None]:
x_train.shape

(60000, 28, 28)

In [None]:
# Select only the images and labels corresponding to digit 0 and 1
mask = np.logical_or(y_train == 0, y_train == 1)
x_train = x_train[mask]
y_train = y_train[mask]

mask = np.logical_or(y_test == 0, y_test == 1)
x_test = x_test[mask]
y_test = y_test[mask]

In [None]:
#standrize
x_train= (x_train - np.mean(x_train))/ np.std(x_train)
x_test= (x_test - np.mean(x_test))/ np.std(x_test)

In [None]:
x_train=x_train.reshape(x_train.shape[0],-1)
x_test=x_test.reshape(x_test.shape[0],-1)

In [None]:
# Shuffle images and labels together
x_train, y_train = shuffle(x_train, y_train, random_state=42)
x_test, y_test = shuffle(x_test, y_test, random_state=42)

In [None]:
y_train.shape

(12665,)

In [None]:
def sigmoid(z):
    return (1/(1+np.exp(-z)))

In [None]:
def Logistic_regression(x , y , eta  ,iterations, batch_size ):
    
    np.random.seed(35)
    w = np.random.rand(x.shape[1],1)
    b = np.random.rand(1)
    num_samples = len(y)
    error = []
    tol = 0.0000001
    num_batches = num_samples // batch_size
    
    for i in range(iterations):
        for batch in range(num_batches):
            start= batch * batch_size
            end = start + batch_size
            x_mini = x[start:end]
            y_mini = y[start:end]
            
            z = np.dot(x_mini,w)+b
            phiz = sigmoid(z)
            phiz = phiz.T
            error.append(np.mean(((-y_mini*np.log(phiz+tol))-(((1-y_mini)*(np.log((1-phiz)+tol)))))))
            w = w - eta *((np.dot(phiz - y_mini, x_mini ))/ num_samples).T
            b = b - eta * np.mean(phiz - y_mini)

            if error[i] <= tol:
                  break 
    return w,b

In [None]:
def Logistic_regression_L1(x , y , eta  ,iterations, lamda , batch_size=x_train.shape[0] ):
    
    np.random.seed(35)
    w = np.random.rand(x.shape[1],1)
    b = np.random.rand(1)
    num_samples = len(y)
    error = []
    tol = 0.0000001
    num_batches = num_samples // batch_size
    
    for i in range(iterations):
        for batch in range(num_batches):
            start= batch * batch_size
            end = start + batch_size
            x_mini = x[start:end]
            y_mini = y[start:end]
            
            z = np.dot(x_mini,w)+b
            phiz = sigmoid(z)
            phiz = phiz.T
            l1= (lamda/(2*len(y_mini))) * np.abs(w)
            error.append(np.mean(((-y_mini*np.log(phiz+tol))-(((1-y_mini)*(np.log((1-phiz)+tol)))))+np.sum(l1)))
            w = w - eta *((np.dot(phiz - y_mini, x_mini ) + (lamda/(2)) )/ num_samples).T
            b = b - eta * np.mean(phiz - y_mini)

            if error[i] <= tol:
                  break 
    return w,b

In [None]:
def Logistic_regression_RMS(x , y , eta , iterations , B = 0.9):
    
    np.random.seed(35)
    w = np.random.rand(x.shape[1],1)
    b = np.random.rand(1)
    num_samples = len(y)
    error = []
    tol = 0.0000001
    RMS_vdw = 0
    RMS_vdb = 0
    t=0
    
    for i in range(iterations):
        z = np.dot(x,w)+b
        phiz = sigmoid(z)
        phiz = phiz.T
        error.append(np.mean(((-y*np.log(phiz+tol))-(((1-y)*(np.log((1-phiz)+tol)))))))
        dw = ((np.dot(phiz - y, x ))/ num_samples).T
        db= np.mean(phiz - y)
        t+=1
        RMS_vdw = (B * RMS_vdw + (1-B) * np.square(dw) )/(1-B**t)
        RMS_vdb = (B * RMS_vdb+ (1-B) * np.square(db) )/(1-B**t)
        w = w - eta * (B/np.sqrt(RMS_vdw+tol))*dw
        b = b - eta * (B/np.sqrt(RMS_vdb+tol))*db
        
        if error[i] <= tol:
            break 
    return w,b

In [None]:
def Logistic_regression_ADAM(x , y , eta , iterations , B = 0.9):
    
    np.random.seed(35)
    w = np.random.rand(x.shape[1],1)
    b = np.random.rand(1)
    num_samples = len(y)
    error = []
    tol = 0.0000001
    RMS_vdw = 0
    momentum_vdw = 0
    momentum_vdb = 0
    RMS_vdb = 0
    t=0
    
    for i in range(iterations):
        z = np.dot(x,w) + b
        phiz = sigmoid(z)
        phiz = phiz.T
        error.append(np.mean(((-y*np.log(phiz+tol))-(((1-y)*(np.log((1-phiz)+tol)))))))
        dw = ((np.dot(phiz - y, x ))/ num_samples).T
        db= np.mean(phiz - y)
        t+=1
        RMS_vdw = (B * RMS_vdw + (1-B) * np.square(dw) )/(1-B**t)
        momentum_vdw = (B * momentum_vdw + (1-B) * dw )/(1-B**t)
        RMS_vdb = (B * RMS_vdb+ (1-B) * np.square(db) )/(1-B**t)
        momentum_vdb = (B * momentum_vdb + (1-B) * db )/(1-B**t)
        w = w - eta * (momentum_vdw/np.sqrt(RMS_vdw+tol))
        b = b - eta * (momentum_vdb/np.sqrt(RMS_vdb+tol))
        
        if error[i] <= tol:
            break 
    return w,b

In [None]:
def test(w,b):
    z = np.dot(x_test,w)
    z = sigmoid(z)
    y_pred = np.where(z>0.5 ,1,0)
    y_pred=y_pred.reshape(-1)
    acc = np.mean(y_pred==y_test)
    return acc

In [None]:
iter = 700
w1 , b1= Logistic_regression(x_train,y_train,0.001,iter,50)
w2 , b2= Logistic_regression(x_train,y_train,0.001,iter,1000)
w3 , b3= Logistic_regression_L1(x_train,y_train,0.001,iter,0.001)
w4 , b4= Logistic_regression_L1(x_train,y_train,0.001,iter,1000)
w5 , b5= Logistic_regression_RMS(x_train,y_train,0.001,iter)
w6 , b6= Logistic_regression_ADAM(x_train,y_train,0.001,iter)

acc1 =test(w1,b1)
acc2 =test(w2,b2)
acc3 =test(w3,b3)
acc4 =test(w4,b4)
acc5 =test(w5,b5)
acc6 =test(w6,b6)

  return (1/(1+np.exp(-z)))


In [None]:
print("accuracy of logistic regression with batch size 50 : "+ str(acc1))
print("accuracy of logistic regression with batch size 1000 : "+ str(acc2))
print("we note that the bigger the batch size the bigger the accuracy because By using a ")
print("larger batch size, the model receives more diverse samples during each update step")
print(" ")
print("accuracy of logistic regression with L1 regulization with L=0.001 : "+ str(acc3))
print("accuracy of logistic regression with L1 regulization with L=1000 : "+ str(acc4))
print("we note that the bigger the lamda the bigger the accuracy because we ")
print("depend more on the previous dw which makes the update value more strenched")
print("we note that lasso is better than normal logistic because Lasso regularization")
print("provides a sparse solution by setting coefficients to exactly zero. ") 
print("This makes the model more interpretable,as the non-zero coefficients directly ")
print("indicate the important features that contribute to the predictions")
print(" ")
print("accuracy of logisitic regression with RMS optimizer : "+ str(acc5))
print("accuracy of logistic regression with ADAM optimizer : "+ str(acc6))
print("we note that ADAM optimizer gives us the biggest accuracy then RMS then lasso then normal logistic regression ")
print("Adam combines adaptive learning rates and momentum, leading to faster convergence and robust performance.")


accuracy of logistic regression with batch size 50 : 0.6127659574468085
accuracy of logistic regression with batch size 1000 : 0.6269503546099291
we note that the bigger the batch size the bigger the accuracy because By using a 
larger batch size, the model receives more diverse samples during each update step
 
accuracy of logistic regression with L1 regulization with L=0.001 : 0.6486997635933807
accuracy of logistic regression with L1 regulization with L=1000 : 0.6808510638297872
we note that the bigger the lamda the bigger the accuracy because we 
depend more on the previous dw which makes the update value more strenched
we note that lasso is better than normal logistic because Lasso regularization
 provides a sparse solution by setting coefficients to exactly zero. 
This makes the model more interpretable,as the non-zero coefficients directly 
indicate the important features that contribute to the predictions
 
accuracy of logisitic regression with RMS optimizer : 0.978250591016548