# LOGISTIC REGRESSION



In [5]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

### My own regression model

In [100]:
def initialise( features ) :
    w = np.zeros(( features , 1 ))
    b = 0
    return w , b

In [34]:
def sigmoid( z ) :
    return 1/( 1 + np.exp(-z) )

In [145]:
def fitLinearRegression( x , w , b ) :
    
    z = np.dot( w.T , x ) + b
    
    return z

In [141]:
def findGrad( a , x , y ) :
    m = x.shape[1]
    dz = a - y
    
    dw = (1/m)*np.dot( x , dz.T )
    db = (1/m)*np.sum( dz )
    
    return dw , db

In [237]:
def findCost( y , a ) :
    
    m = a.shape[1]
    cost = (-1/m)*np.sum( y*np.log(a) + (1-y)*np.log(1-a) )
    
    return cost

In [175]:
def predict( x , w , b ) :
    
    a = fitLinearRegression( x , w , b  )
    m = a.shape[1]
    y_predict = np.zeros( (1,m) )
    
    for i in range( m ) :
        if a[0][i] >= 0 :
            y_predict[0][i] = 0
        else :
            y_predict[0][i] = 1
    
    return y_predict

In [297]:
def printAccuracy( y_predict , y_real ) :
    acc = 100 - np.mean(np.abs(y_predict - y_real)) * 100
    print( "The accuracy is : ",acc,"%" )

In [315]:
def train( x , y , x_test , y_test , iterations = 1000 , learning_rate = 0.001 , printCost = True ) :

    w , b = initialise( x.shape[0] )  
    
    for i in range(iterations) :
        z = fitLinearRegression( x , w , b  )
        a = sigmoid( z )
        dw , db = findGrad( a , x , y )
        cost = findCost( y , a )
        if i%100 == 0 and printCost :
            print( "For i = ",i," : Cost = ",cost )
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
    y_train_predict = predict( x , w , b )
    print("For training set : ")
    printAccuracy( y_train_predict , y )
    y_test_predict = predict( x_test , w , b )
    print("For test set : ")
    printAccuracy( y_test_predict , y_test )

### Main Function :

In [316]:
dataset = pd.read_csv('social.csv')

In [317]:
y = dataset['Purchased']

In [318]:
x = dataset.drop( ['User ID','Purchased','Gender'] , axis = 1 )
x.head()

Unnamed: 0,Age,EstimatedSalary
0,19,19000
1,35,20000
2,26,43000
3,27,57000
4,19,76000


In [319]:
x = np.array(x)
y = (np.array(y).T).reshape(400,1)
x = x.T
y = y.T 
print( x.shape , y.shape)

(2, 400) (1, 400)


In [320]:
x_train = x[ : , : 300 ]
x_test = x[ : , 300 : ]
y_train = y[ : , : 300 ]
y_test = y[ : , 300 : ]
print( x_train.shape  , x_test.shape , y_train.shape , y_test.shape)

(2, 300) (2, 100) (1, 300) (1, 100)


In [322]:
train( x_train , y_train , x_test , y_test)

For i =  0  : Cost =  0.8390769841801067
For i =  100  : Cost =  0.6931471805599453
For i =  200  : Cost =  0.6931471805599453
For i =  300  : Cost =  0.6931471805599453
For i =  400  : Cost =  0.6931471805599453
For i =  500  : Cost =  0.6931471805599453
For i =  600  : Cost =  0.6931471805599453
For i =  700  : Cost =  0.6931471805599453
For i =  800  : Cost =  0.6931471805599453
For i =  900  : Cost =  0.6931471805599453
For training set : 
The accuracy is :  73.0 %
For test set : 
The accuracy is :  38.0 %
