In [1]:
#Read Datafile
import csv
import numpy as np
import pandas as pd

dataset = pd.read_csv('wdbc.dataset',header = None)
data = np.array(dataset)
# pd.map({'cat': 'kitten', 'dog': 'puppy'})

In [2]:
def normalization(X):
    
    #computing normalization
    X_norm = np.linalg.norm(X, axis=1, keepdims = True);
    
    #dividing main matrix by the normalized value
    X = X / X_norm;
    
    return X

In [3]:
np.random.shuffle(data)
#Splitting input matrix into train,validation and test set
X_train, X_valid, X_test = data[:456,2:], data[456:513,2:], data[513:,2:]

#Splitting labelled vector into train,validation and test set
Y_train, Y_valid, Y_test = data[:456,1:2], data[456:513,1:2], data[513:,1:2]

In [4]:
#normalizing input data
X_train = normalization(X_train)
X_valid = normalization(X_valid)
X_test = normalization(X_test)

#reshaping the input matrix
X_train = X_train.reshape(30,X_train.shape[0])
X_valid = X_valid.reshape(30,X_valid.shape[0])

In [5]:
#initialization of weights and bias
def initialization(dimension):
    w = np.zeros((dimension,1))
    b = 0.0

    return w,b   

In [6]:
def sigmoid(x):
    sig = 1/(1+np.exp(-x))
    return sig

In [7]:
def propogation(w,b,X,Y):
    
    #number of training examples
    m = X.shape[1]

    #calculating forward propogation
    a = sigmoid(np.dot(w.T,X) + b);
    
    cost = (-1/m) * np.sum(Y * np.log(a) + (1-Y) * np.log(1-a)) 
    
    #calculating back_prop
    dw = (1/m)* np.dot(X,(a-Y).T)
    db = (1/m)* np.sum(a-Y)

    grad_values = {"dw": dw,
                   "db": db}

    return cost,grad_values

In [8]:
def gradient_descent(w, b, X, Y, num_iterations, learning_rate):
    
    #creating a empty list for cost values
    cost_values = []
    
    #iterating for given number of iterations
    for i in range(num_iterations):
        
        #gradient descent calculation
        cost,grad_values = propogation(w, b, X, Y)
        
        dw = grad_values["dw"]
        db = grad_values["db"]
        
        #update w and b
        w = w - (learning_rate*dw)
        b = b - (learning_rate*db)

        # append cost to cost list and print after every 100 iterations
        if i % 100 == 0:
            cost_values.append(cost)
            print ("Cost after %i iteration : %f" %(i, cost))
            
        parameters = {"w": w,
                      "b": b}
    
        grad_values = {"dw": dw,
                       "db": db}
            
    return parameters, grad_values, cost_values  

In [9]:
def logistic_regression(w,b,X):
    
    m = X.shape[1]

    Y_prediction = np.zeros((1,m))

    a =  sigmoid(np.dot(w.T,X)+ b)

    for i in range(a.shape[1]):
        Y_prediction[0, i] = 1 if a[0, i] > 0.5 else 0
    
    assert(Y_prediction.shape == (1, m))
    return Y_prediction  

In [10]:
def model(X_train, X_valid, Y_train, Y_valid, num_iterations, learning_rate):
    
    # initialize parameters with 0
    w, b = initialization(X_train.shape[0])
    
    #Gradient_descent
    parameters, grad_values, cost_values = gradient_descent(w, b, X_train, Y_train, num_iterations, learning_rate)
    
    w = parameters["w"]
    b = parameters["b"]
    
    Y_pred_train = logistic_regression(w,b,X_train)
    print("Y_pred_train shape",Y_pred_train.shape)
    Y_pred_valid = logistic_regression(w,b,X_valid)
    print("Y_pred_valid shape",Y_pred_valid.shape)
    
    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_pred_train - Y_train)) * 100))
    print("Validation accuracy: {} %".format(100 - np.mean(np.abs(Y_pred_valid - Y_valid)) * 100))
    
    model_data = {"costs": cost_values,
     "Y_prediction_train" : Y_pred_train, 
     "w" : w, 
     "b" : b,
     "learning_rate" : learning_rate,
     "num_iterations": num_iterations}
    
    return model_data

In [11]:
Final = model(X_train, X_valid, Y_train, Y_valid, num_iterations = 30000, learning_rate = 0.013)

Cost after 0 iteration : 316.075114
Cost after 100 iteration : 286.385265
Cost after 200 iteration : 275.273911
Cost after 300 iteration : 265.490446
Cost after 400 iteration : 256.865855
Cost after 500 iteration : 249.244852
Cost after 600 iteration : 242.488821
Cost after 700 iteration : 236.476536
Cost after 800 iteration : 231.103470
Cost after 900 iteration : 226.280335
Cost after 1000 iteration : 221.931297
Cost after 1100 iteration : 217.992147
Cost after 1200 iteration : 214.408566
Cost after 1300 iteration : 211.134586
Cost after 1400 iteration : 208.131242
Cost after 1500 iteration : 205.365426
Cost after 1600 iteration : 202.808924
Cost after 1700 iteration : 200.437614
Cost after 1800 iteration : 198.230799
Cost after 1900 iteration : 196.170656
Cost after 2000 iteration : 194.241776
Cost after 2100 iteration : 192.430791
Cost after 2200 iteration : 190.726058
Cost after 2300 iteration : 189.117397
Cost after 2400 iteration : 187.595879
Cost after 2500 iteration : 186.15364