In [87]:
import numpy as ny
import pandas as pa
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [88]:
data_header = ['id','clump_thickness','uniformity_cell_size','uniformity_cell_shape','marginal_adhesion','epithelial_cell_size','bare_nuclei','bland_chromatin','normal_nucleoli','mitoses','class']
data_set = pa.read_csv('breast-cancer-wisconsin.data',names=data_header,encoding='latin-1')

drop_field = ['id'] 
data_set = data_set.drop(drop_field, axis=1)

data_set['class'] = data_set['class'].map({2: 0, 4: 1})

data_set.drop(data_set.loc[data_set['bare_nuclei']=='?'].index, inplace=True)

data_set = data_set.astype({"bare_nuclei": 'int64'})

data_set.head()

Unnamed: 0,clump_thickness,uniformity_cell_size,uniformity_cell_shape,marginal_adhesion,epithelial_cell_size,bare_nuclei,bland_chromatin,normal_nucleoli,mitoses,class
0,5,1,1,1,2,1,3,1,1,0
1,5,4,4,5,7,10,3,2,1,0
2,3,1,1,1,2,2,3,1,1,0
3,6,8,8,1,3,4,3,7,1,0
4,4,1,1,3,2,1,3,1,1,0


In [101]:
test_data = int(data_set.shape[0]*0.8) 
value_data = int(data_set.shape[0]*0.7)

test_Data_set = data_set[test_data:]
value_data = data_set[value_data:test_data]

data_set = data_set[:test_data]


y_train, x_train = data_set['class'], data_set.drop('class', axis=1)
value_y, value_x = value_data['class'], value_data.drop('class', axis=1)
y_test, x_test = test_Data_set['class'], test_Data_set.drop('class', axis=1)

In [90]:
x_train.head()

Unnamed: 0,clump_thickness,uniformity_cell_size,uniformity_cell_shape,marginal_adhesion,epithelial_cell_size,bare_nuclei,bland_chromatin,normal_nucleoli,mitoses
0,5,1,1,1,2,1,3,1,1
1,5,4,4,5,7,10,3,2,1
2,3,1,1,1,2,2,3,1,1
3,6,8,8,1,3,4,3,7,1
4,4,1,1,3,2,1,3,1,1


In [91]:
def sigmoid(x):
    return 1/(1+ny.exp(-x))

In [92]:
def newton_raph(current, y, x, regression=None):
    p = ny.array(sigmoid(x.dot(current[:,0])), ndmin=2).T 
    W = ny.diag((p*(1-p))[:,0]) 
    hessian = -1*ny.dot(x.T,p)
    #hessian = -1*ny.dot(x.T, p)
    gradient = x.T.dot(y-p) 
    

    if regression:
        step = ny.dot(ny.linalg.inv(hessian + regression*ny.eye(current.shape[0])), gradient)
    else:
        step = ny.dot(ny.linalg.inv(hessian), gradient)
        
    beta = current + step
    
    return beta


In [93]:
def convergence(bt_old, bt_new, tolerence, itr):
    coef = ny.abs(bt_old - bt_new)
    return not (ny.any(coef>tolerence) and itr < max_itr)

In [94]:
def test_mod(x, y, beta):
    prob = ny.array(sigmoid(x.dot(beta)))
    
    prob = ny.greater(prob, 0.5*ny.ones((prob.shape[1],1)))
    accuracy = ny.count_nonzero(ny.equal(prob, y))/prob.shape[0] * 100
    return accuracy

In [106]:
max_itr = 10
tolerence=0.1
regression_term = 1

bt_old, beta = ny.ones((9,1)), ny.zeros((9,1))
itr_cnt = 0
coef_cnverged = False

while not coef_cnverged:
    print('Iteration: {}'.format(itr_cnt))
    print('Accuracy: {}%'.format(test_mod(x_test, y_test.to_frame(), bt_old)))
    bt_old = beta
    beta = newton_raph(beta, y_train.to_frame(), x_train, regression_term)
    itr_cnt += 1
    coef_cnverged = convergence(bt_old, beta, tolerence, itr_cnt)

Iteration: 0
Accuracy: 21.818181818181817%
Iteration: 1
Accuracy: 78.18181818181819%
Iteration: 2
Accuracy: 90.0%
Iteration: 3
Accuracy: 84.54545454545455%
Iteration: 4
Accuracy: 26.36363636363636%
Iteration: 5
Accuracy: 89.0909090909091%
Iteration: 6
Accuracy: 63.63636363636363%
Iteration: 7
Accuracy: 83.63636363636363%
Iteration: 8
Accuracy: 26.36363636363636%
Iteration: 9
Accuracy: 90.0%


  return 1/(1+ny.exp(-x))
