# Task Description
Implement Logistic Regression Model on the following dataset:

https://github.com/navoneel1092283/logistic_regression

You have to implement the Algorithm from scratch in Python i.e., without using Scikit-Learn or any other Machine Learning Toolbox. The dataset contains marks obtained by students in 2 exams as features and the target label, 0/1 as whether they will be admitted to the university (1) or not (0).

[Use alpha = 0.001 and number of iterations for Gradient Descent = 100000].

In [1]:
import numpy as np
import pandas as pd

In [2]:
data=pd.read_csv("dataset.txt",sep=",")

In [3]:
data.head()

Unnamed: 0,Marks_1,Marks_2,Result
0,34.62366,78.024693,0
1,30.286711,43.894998,0
2,35.847409,72.902198,0
3,60.182599,86.308552,1
4,79.032736,75.344376,1


# Splitting the data in X(train,test),y(train,test)

In [4]:
X=data.iloc[:,0:2].values

In [5]:
X.shape

(100, 2)

In [6]:
y=data.iloc[:,-1].values

In [7]:
y.shape

(100,)

In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [9]:
X_train.shape

(80, 2)

In [10]:
y_train.shape

(80,)

# Creating a User defined class for Logistic Regression:

In [36]:
class LogisticRegression:
    def __init__(self):
        weight=None
        b=None
    def fit(self,x,y):
        n,w=X.shape
        self.weight=np.zeros(w) #Creation of matrix of co-efficient
        lr=0.001
        self.b=0
        for i in range(0,99999):
            eq=np.dot(x,self.weight)+self.b
            sig=1/(1+np.exp(-eq)) #calculation of Sigmoid function
            devw=(1/n) * np.dot(x.T,(sig-y)) #differentiation for W
            devb=(1/n) * np.sum(sig-y) #differentiation for b
            new_w=self.weight-(lr*devw)
            new_b=self.b-(lr*devb)
            if(abs(new_w-self.weight).any()<=0.001 and abs(new_b-self.b)<=0.001):
                break
            else:
                self.weight=new_w
                self.b=new_b
        return self.weight,self.b    
    def predict(self,x):
        eqn=np.dot(x,self.weight)+self.b
        pred=1/(1+np.exp(-eqn))
        pred_class=[]
        for j in pred:
            if j<0.5:
                x=0
                pred_class.append(x)
            else:
                x=1
                pred_class.append(x)
        return pred,pred_class

In [37]:
lr=LogisticRegression()
lr.fit(X_train,y_train)

(array([0.0341979, 0.0395139]), -4.227800367746941)

In [38]:
y1,y2=lr.predict(X_test)

In [39]:
y2

[1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1]

In [40]:
data={"Original":y_test,"Predicted":y2,"P(z)":y1}
new=pd.DataFrame(data)
new

Unnamed: 0,Original,Predicted,P(z)
0,1,1,0.641812
1,1,1,0.862055
2,0,0,0.469726
3,1,1,0.554343
4,0,0,0.457911
5,0,0,0.388591
6,1,1,0.631591
7,0,1,0.532165
8,0,0,0.490084
9,1,1,0.831016


In [41]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y2,y_test))

0.95
