In [199]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler

In [200]:
def initialise_dataset():
    data_dict=load_breast_cancer()
    X,Y = data_dict['data'],data_dict['target']
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    return X,Y

In [201]:
def initialise_dimensions(X,Y):
    m, n = X.shape[0],X.shape[1]
    k = Y.shape[1]
    return m, n, k

In [202]:
def initialise_weights(X,Y):
    theta=np.zeros((1,X.shape[1]))
    return theta

In [203]:
def sigmoid_of(Z):
    return 1.0 / (1.0 + np.exp(-Z))

In [204]:
def compute_cost(X,Y,m,n,theta):
    Z=np.dot(X,theta.T)
    A=sigmoid_of(Z)

    #calculate total cost in 2 parts
    
    #part1: when y=1;prediction :
    
    Y_pred1=np.multiply(Y,np.log(A))
    
    #part2 : when y=0;prediction :
    
    Y_pred0=np.multiply((1-Y),np.log(1-A))
    
    #adding the cost 
    
    Y_pred=Y_pred1+Y_pred0
    
    
    ssum=(-1.0/m)*np.sum(Y_pred)
    
    return ssum
    

In [205]:
def compute_gradient(X,Y,m,n,theta):
    
    Z=np.dot(X,theta.T)
    A=sigmoid_of(Z)
    
    dw=(1.0/m)*np.dot((A-Y).T,X)
    
    return dw
    
    
    

In [206]:
def train_logistic_reg(X,Y):
    #initialise dimensions,weights,learning_rate
    
    m,n,k=initialise_dimensions(X,Y)
    theta=initialise_weights(X,Y)    
    learning_rate=0.05
    iterations=10000
    
    #starting iterations
    for p in range(iterations):    
        #forward propagation

        cost=compute_cost(X,Y,m,n,theta)

        #backward propagation

        dw=compute_gradient(X,Y,m,n,theta)

        #update theta using gradient

        theta=theta-(learning_rate*dw)
        
        if (p % 500 == 0):
            activations = sigmoid_of(np.dot(X, theta.T))
            y_pred = np.where(activations > 0.5, 1, 0)
            accuracy = (float(np.sum(y_pred[:,0] == Y[:,0]))/ m)* 100
            print ("Cost after iteration %i: %f | accuracy after iteration %i: %f" % (p, cost,p, accuracy))
        
    result_Z=np.dot(X, theta.T)
    activations = sigmoid_of(result_Z)
    y_pred = np.where(activations > 0.5, 1, 0)
    print "\nPredicted Y :", y_pred.reshape(m)[:60]
    print "\nActual Y :" ,Y.reshape(m)[:60]
    

In [207]:
X,Y=initialise_dataset()

X=np.c_[np.ones(X.shape[0]),X]

Y=Y.reshape(X.shape[0],1)

print X.shape,Y.shape

train_logistic_reg(X,Y)

(569, 31) (569, 1)
Cost after iteration 0: 0.693147 | accuracy after iteration 0: 65.026362
Cost after iteration 500: 0.373827 | accuracy after iteration 500: 92.618629
Cost after iteration 1000: 0.286767 | accuracy after iteration 1000: 92.970123
Cost after iteration 1500: 0.244831 | accuracy after iteration 1500: 94.024605
Cost after iteration 2000: 0.219297 | accuracy after iteration 2000: 94.376098
Cost after iteration 2500: 0.201683 | accuracy after iteration 2500: 94.727592
Cost after iteration 3000: 0.188563 | accuracy after iteration 3000: 94.727592
Cost after iteration 3500: 0.178272 | accuracy after iteration 3500: 95.079086
Cost after iteration 4000: 0.169901 | accuracy after iteration 4000: 95.254833
Cost after iteration 4500: 0.162904 | accuracy after iteration 4500: 95.957821
Cost after iteration 5000: 0.156932 | accuracy after iteration 5000: 96.309315
Cost after iteration 5500: 0.151751 | accuracy after iteration 5500: 96.485062
Cost after iteration 6000: 0.147196 | acc