# Task Description
Implement Logistic Regression Model with L1 and L2 Regularization on an Admission_Test_Marks Dataset(dataset.txt):

https://github.com/navoneel1092283/logistic_regression

in Python from scratch i.e., without using Scikit-Learn or any other Machine Learning Toolbox.

The dataset contains marks obtained by students in 2 exams as features and the target label, 0/1 as whether they will be admitted to the university (1) or not (0).

[Use alpha = 0.001 ,number of iterations for Gradient Descent = 100000 and lamdba = 100].

[Don’t expect better results as the dataset is a very clean and simple one, just obtain the weight matrix and cost after 1 Lakh Iterations.]

In [1]:
import numpy as np 
import pandas as pd

In [2]:
data=pd.read_csv("dataset.txt",sep=",")

In [3]:
data.head()

Unnamed: 0,Marks_1,Marks_2,Result
0,34.62366,78.024693,0
1,30.286711,43.894998,0
2,35.847409,72.902198,0
3,60.182599,86.308552,1
4,79.032736,75.344376,1


# Splitting the data in X(train,test),y(train,test)

In [4]:
X=data.iloc[:,0:2].values

In [5]:
y=data.iloc[:,-1].values

In [6]:
X.shape

(100, 2)

In [7]:
y.shape

(100,)

In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [9]:
X_train.shape

(80, 2)

In [10]:
y_train.shape

(80,)

# Creating a User defined class for Lasso Regularization(L1):

In [11]:
class Lasso:
    def __init__(self):
        weight=None
        b=None
        L1_reg=None
    def fit(self,x,y):
        n,w=X.shape
        lambda_1=100
        self.weight=np.zeros(w) #Creation of matrix of co-efficient
        lr=0.001
        self.b=0
        for i in range(0,99999):
            self.L1_reg=(lambda_1/(2*n)*sum(self.weight)) #L1 Regularization term
            eq=np.dot(x,self.weight)+self.b+self.L1_reg #Addition of Regularization term in the equation
            sig=1/(1+np.exp(-eq)) #calculation of Sigmoid function
            devw=(1/n) * np.dot(x.T,(sig-y)) #differentiation for W
            devb=(1/n) * np.sum(sig-y) #differentiation for b
            new_w=self.weight-(lr*devw)
            new_b=self.b-(lr*devb)
            if(abs(new_w-self.weight).any()<=0.001 and abs(new_b-self.b)<=0.001):
                break
            else:
                self.weight=new_w
                self.b=new_b
        return self.weight,self.b,self.L1_reg    
    def predict(self,x):
        eqn=np.dot(x,self.weight)+self.b+self.L1_reg
        pred=1/(1+np.exp(-eqn))
        pred_class=[]
        for j in pred:
            if j<0.5:
                x=0
                pred_class.append(x)
            else:
                x=1
                pred_class.append(x)
        return pred,pred_class

In [12]:
lr=Lasso()
lr.fit(X_train,y_train)

(array([0.03413579, 0.03942822]), -4.269102029111735, 0.036781770005670934)

In [13]:
y1,y_pred=lr.predict(X_test)

In [14]:
from sklearn.metrics import accuracy_score
print("Accuracy:",accuracy_score(y_pred,y_test))

Accuracy: 0.9


In [15]:
data={"Original":y_test,"Predicted":y_pred,"P(z)":y1}
new=pd.DataFrame(data)

In [16]:
new

Unnamed: 0,Original,Predicted,P(z)
0,0,1,0.656787
1,1,1,0.551048
2,1,1,0.590566
3,1,1,0.82657
4,1,1,0.84848
5,0,1,0.528805
6,1,1,0.831016
7,1,1,0.570013
8,1,1,0.657355
9,0,0,0.236157


# Creating a User defined class for Ridge Regularization(L2):

In [17]:
class Ridge:
    def __init__(self):
        weight=None
        b=None
        L2_reg=None
    def fit(self,x,y):
        n,w=X.shape
        lambda_1=100
        self.weight=np.zeros(w) #Creation of matrix of co-efficient
        lr=0.001
        self.b=0
        for i in range(0,99999):
            self.L2_reg=(lambda_1/(2*n)*sum(self.weight**2))
            eq=np.dot(x,self.weight)+self.b+self.L2_reg
            sig=1/(1+np.exp(-eq)) #calculation of Sigmoid function
            devw=(1/n) * np.dot(x.T,(sig-y)) #differentiation for W
            devb=(1/n) * np.sum(sig-y) #differentiation for b
            new_w=self.weight-(lr*devw)
            new_b=self.b-(lr*devb)
            if(abs(new_w-self.weight).any()<=0.001 and abs(new_b-self.b)<=0.001):
                break
            else:
                self.weight=new_w
                self.b=new_b
        return self.weight,self.b,self.L2_reg    
    def predict(self,x):
        eqn=np.dot(x,self.weight)+self.b+self.L2_reg
        pred=1/(1+np.exp(-eqn))
        pred_class=[]
        for j in pred:
            if j<0.5:
                x=0
                pred_class.append(x)
            else:
                x=1
                pred_class.append(x)
        return pred,pred_class

In [18]:
lr1=Ridge()
lr1.fit(X_train,y_train)

(array([0.03431571, 0.03962536]), -4.258399877886584, 0.001373851070611022)

In [19]:
y2,y_pred1=lr1.predict(X_test)

In [20]:
print("Accuracy:",accuracy_score(y_pred1,y_test))

Accuracy: 0.9


In [21]:
data={"Original":y_test,"Predicted":y_pred1,"P(z)":y2}
new1=pd.DataFrame(data)

In [22]:
new1

Unnamed: 0,Original,Predicted,P(z)
0,0,1,0.656787
1,1,1,0.550583
2,1,1,0.59034
3,1,1,0.827311
4,1,1,0.84922
5,0,1,0.528157
6,1,1,0.831742
7,1,1,0.569608
8,1,1,0.65742
9,0,0,0.234522
