In [53]:
!pip install scikit-learn



In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [65]:
data=pd.read_csv("breast-cancer.csv")

data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})
y=data.iloc[:,1]
x=data.iloc[0:,2:]
x=np.array(x)
y=np.array(y)
x = (x - np.mean(x, axis=0)) / np.std(x, axis=0)
X_train, X_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=0)
X_train

array([[-1.15800874, -0.40948202, -1.1351621 , ..., -0.75548625,
        -0.01092926, -0.40485783],
       [-0.94499809,  0.62606285, -0.95474903, ..., -0.605352  ,
         0.10393316, -0.40596615],
       [ 0.57163772, -1.03080894,  0.50791498, ..., -0.02597997,
        -0.19859181, -0.76616896],
       ...,
       [-1.33239345, -0.22564372, -1.32422512, ..., -0.97581512,
        -0.72275273, -0.14329518],
       [-1.25173342, -0.24891439, -1.28674204, ..., -1.74506282,
        -1.60444316, -1.01720262],
       [-0.74334801,  1.07984094, -0.71872918, ..., -0.27523937,
        -1.2760337 ,  0.1869831 ]])

In [56]:
def fwb(z):
  p=1/(1+np.exp(-z))
  return p

In [81]:
def cost(X,y,w,b,lambdas):
  m,n=X.shape
  total_cost=0
  reg_cost=0
  for i in range(m):
    z=np.dot(w,X[i])+b
    fwb=1/(1+np.exp(-z))
    total_cost+=(-y[i]*np.log(fwb)-(1-y[i])*np.log(1-fwb))
  reg_cost = (lambdas / (2*m)) * np.sum(w**2)
  total_cost/=m
  total_cost+=reg_cost

  return total_cost



In [69]:
def gradient(X,y,w,b,lamdas):
  m,n=X.shape
  total_cost=0
  dw=np.zeros(n)
  db=0
  for i in range(m):
    z=np.dot(w,X[i])+b
    fwb=1/(1+np.exp(-z))
    err=fwb-y[i]
    for j in range(n):
      dw[j]=dw[j]+ err * X[i, j]
    db+=err
  dw/=2*m
  db/=2*m
  for j in range(n):
      dw[j] = dw[j] + (lamdas/m) * w[j]
  return dw,db


In [85]:
import math
def gredent_desent(X,y,w,b,alpha,iter,lambdas):
  m,n=X.shape
  hhis=[]
  phis=[]
  costs=0
  for i in range(iter):
    dw,db=gradient(X,y,w,b,lambdas)
    w=w-alpha*dw
    b=b-alpha*db
    if i<100000:      # prevent resource exhaustion
        costs =  cost(X, y, w, b, lambdas)
        hhis.append(costs)

    # Print cost every at intervals 10 times or as many iterations if < 10
    if i% math.ceil(iter/10) == 0 or i == (iter-1):
        phis.append(w)
        print(f"Iteration {i:4}: Cost {float(hhis[-1]):8.2f}   ")
  return w,b,hhis,phis


In [89]:
np.random.seed(1)
initial_w = 0.01 * (np.random.rand(30) - 0.5)
initial_b = -8

# Some gradient descent settings
iterations = 15000
alpha = 0.001

w,b, J_history,_ = gredent_desent(X_train ,y_train, initial_w, initial_b,alpha,iterations,0)

Iteration    0: Cost     2.90   
Iteration 1500: Cost     1.57   
Iteration 3000: Cost     1.03   
Iteration 4500: Cost     0.82   
Iteration 6000: Cost     0.71   
Iteration 7500: Cost     0.63   
Iteration 9000: Cost     0.58   
Iteration 10500: Cost     0.54   
Iteration 12000: Cost     0.51   
Iteration 13500: Cost     0.48   
Iteration 14999: Cost     0.46   


In [104]:
def pridict(X,w,b):
  m,n=X.shape
  yhat=np.zeros(m)
  for i in range(m):
    z=np.dot(w,X[i])*b
    g=fwb(z)
    if g<0.5:
      yhat[i]=1


  return yhat


In [107]:
yhat=pridict(X_test,w,b)
print(yhat)
y_test
print('Train Accuracy: %f'%(np.mean(yhat == yhat) * 100))

[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0.
 1. 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0. 0.
 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1.
 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 1. 0. 0.
 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0.]
Train Accuracy: 100.000000
