In [1]:
from sklearn import datasets
import numpy as np

In [2]:
dataset = datasets.load_breast_cancer()

In [3]:
dataset.DESCR.split('\n')

['.. _breast_cancer_dataset:',
 '',
 'Breast cancer wisconsin (diagnostic) dataset',
 '--------------------------------------------',
 '',
 '**Data Set Characteristics:**',
 '',
 '    :Number of Instances: 569',
 '',
 '    :Number of Attributes: 30 numeric, predictive attributes and the class',
 '',
 '    :Attribute Information:',
 '        - radius (mean of distances from center to points on the perimeter)',
 '        - texture (standard deviation of gray-scale values)',
 '        - perimeter',
 '        - area',
 '        - smoothness (local variation in radius lengths)',
 '        - compactness (perimeter^2 / area - 1.0)',
 '        - concavity (severity of concave portions of the contour)',
 '        - concave points (number of concave portions of the contour)',
 '        - symmetry',
 '        - fractal dimension ("coastline approximation" - 1)',
 '',
 '        The mean, standard error, and "worst" or largest (mean of the three',
 '        worst/largest values) of these features w

In [4]:
x = dataset.data
y = dataset.target

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x,y)

In [7]:
import sklearn.preprocessing as preprocessing

In [8]:
scaler = preprocessing.StandardScaler()

In [9]:
scaler.fit(x_train)

StandardScaler()

In [10]:
x_train_norma =  scaler.transform(x_train)

In [11]:
x_train.shape

(426, 30)

# Adding 1's column in the end 

In [12]:
x_train_with_1s = np.column_stack((x_train,np.ones(x_train.shape[0])))

In [13]:
x_train_norma_with_1s = np.column_stack((x_train_norma,np.ones(x_train_norma.shape[0])))

In [14]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [21]:
def fit(x,y,lr,epochs):
    m_and_c = np.zeros(x.shape[1])
    for _ in range(epochs):
        m_and_c = train(x,y,m_and_c,lr)
        if _% 50 == 0:
            print(_,": ",cost(x,y,m_and_c))
    return m_and_c

In [16]:
def train(x,y,m_and_c,lr):
    N = y.shape[0]
    diff = np.zeros(x.shape[1])
    for i in range(N):
        h = sigmoid(np.dot(x[i],m_and_c))
        diff += (-1/N)*(y[i] - h)*x[i]
    m_and_c -= lr*diff
    return m_and_c

In [17]:
def cost(x,y,m_and_c):
    N = y.shape[0]
    cost = 0
    for i in range(N):
        h = sigmoid(np.dot(x[i],m_and_c))
        cost += -y[i]*np.log(h) - (1-y[i])*np.log(1- h)
    return cost

In [22]:
m_and_c = fit(x_train_norma_with_1s,y_train,0.01,5000)

0 :  286.4745037388121
50 :  137.58097999041325
100 :  104.25158468648553
150 :  88.14639096674362
200 :  78.26338879008395
250 :  71.44496259961944
300 :  66.39912353358592
350 :  62.48487776828581
400 :  59.34312903221563
450 :  56.7551424805109
500 :  54.57924655189242
550 :  52.719251416194936
600 :  51.1073493410941
650 :  49.694249707322086
700 :  48.443191380737794
750 :  47.326153866624544
800 :  46.3213746370275
850 :  45.411673210646946
900 :  44.583290358327886
950 :  43.8250658176962
1000 :  43.12784412279052
1050 :  42.48403761253687
1100 :  41.88729990826783
1150 :  41.332278424855694
1200 :  40.81442434100798
1250 :  40.32984495736866
1300 :  39.87518774308096
1350 :  39.4475483626957
1400 :  39.044397054870636
1450 :  38.66351920146657
1500 :  38.30296697486352
1550 :  37.96101971111838
1600 :  37.63615121322572
1650 :  37.32700260097907
1700 :  37.03235963232661
1750 :  36.75113365400127
1800 :  36.48234551664389
1850 :  36.22511192594806
1900 :  35.978633806895466
195

In [23]:
def predict(x,m_and_c):
    N = x.shape[0]
    arr =[]
    for i in range(N):
        h = sigmoid(np.dot(x[i],m_and_c))
        if h>= 0.5:
            arr.append(1)
        else:
            arr.append(0)
    return np.array(arr)

In [24]:
y_pred = predict(x_train_norma_with_1s,m_and_c)

In [36]:
def accuracy(y,y_pred):
    N = y.shape[0]
    return (y == y_pred).sum()/N

In [37]:
accuracy(y_pred,y_train)

0.9859154929577465

In [38]:
x_test_norma =  scaler.transform(x_test)

# Adding 1's column in the end 

In [39]:
x_test_norma_with_1s = np.column_stack((x_test_norma,np.ones(x_test_norma.shape[0])))

In [40]:
y_test_pred = predict(x_test_norma_with_1s,m_and_c)

In [41]:
accuracy(y_test_pred,y_test)

0.9790209790209791