In [1]:
import numpy as np
import matplotlib.pyplot as plt
import copy
import math
import pandas as pd

%matplotlib inline

In [2]:
data = pd.read_csv('data.csv')

radius = data['radius_mean']
texture = data['texture_mean']
perimeter = data['perimeter_mean']
area = data['area_mean']
smoothness = data['smoothness_mean']
compactness = data['compactness_mean']
concavity = data['concavity_mean']
concave_point = data['concave points_mean']
symmetry = data['symmetry_mean']
fractal = data['fractal_dimension_mean']

diagnosis = data['diagnosis']
feature=[]
for i in range (len(radius)):
    tmp=[]
    tmp.append(radius[i]/100)
    tmp.append(texture[i]/100)
    tmp.append(perimeter[i]/1000)
    tmp.append(area[i]/1000)
    tmp.append(smoothness[i])
    tmp.append(compactness[i])
    tmp.append(concavity[i])
    tmp.append(concave_point[i])
    tmp.append(symmetry[i])
    tmp.append(fractal[i])
    feature.append(tmp)

diagnosis = [1 if element == 'M' else 0 for element in diagnosis]

X_train = np.array(feature)
y_train = np.array(diagnosis)

print(X_train)
print(len(X_train))
print(X_train.shape)
print(y_train)
print(len(y_train))
print(y_train.shape)

[[0.1799  0.1038  0.1228  ... 0.1471  0.2419  0.07871]
 [0.2057  0.1777  0.1329  ... 0.07017 0.1812  0.05667]
 [0.1969  0.2125  0.13    ... 0.1279  0.2069  0.05999]
 ...
 [0.166   0.2808  0.1083  ... 0.05302 0.159   0.05648]
 [0.206   0.2933  0.1401  ... 0.152   0.2397  0.07016]
 [0.0776  0.2454  0.04792 ... 0.      0.1587  0.05884]]
569
(569, 10)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 0 1 0 1 1
 0 1 0 1 1 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 0
 0 0 0 0 0 0 1 1 1 0 1 1 0 0 0 1 1 0 1 0 1 1 0 1 1 0 0 1 0 0 1 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 0 0 1 1 1 0 1
 0 1 0 0 0 1 0 0 1 1 0 1 1 1 1 0 1 1 1 0 1 0 1 0 0 1 0 1 1 1 1 0 0 1 1 0 0
 0 1 0 0 0 0 0 1 1 0 0 1 0 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0

In [3]:
def sigmoid(z):

    g = 1/(1+np.exp(-z))
 
    return g

In [4]:
def compute_cost(X, y, w, b):
    m, n = X.shape
    cost=0.0
    for i in range (m):
        z_i = np.dot(X[i],w) + b
        f_wb_i = sigmoid(z_i)
        cost += -y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i)
    total_cost = cost / m
    return total_cost

In [5]:
def compute_gradient(X, y, w, b): 

    m, n = X.shape
    dj_dw = np.zeros(w.shape)
    dj_db = 0.

    for i  in range(m):
        f_wb_i=sigmoid(np.dot(X[i],w)+b)
        err_i = f_wb_i - y[i]
        for j in range(n):
            dj_dw[j]=dj_dw[j]+err_i*X[i,j]
        dj_db=dj_db + err_i
    dj_dw=dj_dw/m
    dj_db=dj_db/m
    
    return dj_db, dj_dw

In [6]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 

    m = len(X)

    J_history = []
    w_history = []
    
    for i in range(num_iters):

        dj_db, dj_dw = gradient_function(X, y, w_in, b_in)   

        w_in = w_in - alpha * dj_dw               
        b_in = b_in - alpha * dj_db              

        if i<100000:  
            cost =  cost_function(X, y, w_in, b_in)
            J_history.append(cost)

        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
        
    return w_in, b_in, J_history, w_history 

In [7]:

initial_w = np.zeros(10)
initial_b = 0

iterations = 30000
alpha = 0.1

w,b, J_history, w_history = gradient_descent(X_train ,y_train, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations)
print(f"b,w found by gradient descent: {b:0.2f},{w} ")

Iteration    0: Cost     0.69   
Iteration 3000: Cost     0.28   
Iteration 6000: Cost     0.24   
Iteration 9000: Cost     0.23   
Iteration 12000: Cost     0.22   
Iteration 15000: Cost     0.21   
Iteration 18000: Cost     0.20   
Iteration 21000: Cost     0.20   
Iteration 24000: Cost     0.19   
Iteration 27000: Cost     0.19   
Iteration 29999: Cost     0.19   
b,w found by gradient descent: -10.28,[ 0.22019661  5.77612877  0.40482236  9.41643851  1.78892862  6.99359421
 11.18388166  5.7815578   2.45638007  0.10310501] 


In [137]:
def predict(X, w, b): 
    m, n = X.shape   
    p = np.zeros(m)
    for i in range(m):   
        z_wb = 0
        for j in range(n): 
            z_wb_ij = X[i, j] * w[j]
            z_wb += z_wb_ij
        z_wb += b
        f_wb = sigmoid(z_wb)
        p[i] = f_wb >= 0.5
    return p

In [138]:
p = predict(X_train, w,b)
print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))

Train Accuracy: 91.564148


In [108]:
initial_w = np.zeros(10)
print(initial_w)
print(initial_w.shape)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
(10,)
