In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
import warnings

In [2]:
warnings.filterwarnings("ignore")

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [4]:
# Gradient decent function
def gradientDecent(x, y, theta, alpha, num_iters):
    # Length of the target vector
    m = len(y)
    
    for i in range(num_iters):
        
        # Dot Product to get Z
        z = np.dot(x,theta)
        
        # Sigmoid of Z
        h = sigmoid(z)
        
        # Updating theta for each iterations
        theta = theta - (alpha / m) * (np.dot(x.T,(h-y)))
        
    return theta

In [5]:
# Dataset Description
dataset = load_breast_cancer()
print(dataset.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [6]:
# Loading and spliting the data
X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [7]:
# Printing Training dataset and Testing dataset

print(X_train)
print(X_test)
print(y_train)
print(y_test)

[[1.149e+01 1.459e+01 7.399e+01 ... 7.431e-02 2.941e-01 9.180e-02]
 [1.049e+01 1.861e+01 6.686e+01 ... 6.528e-02 2.213e-01 7.842e-02]
 [1.225e+01 1.794e+01 7.827e+01 ... 8.211e-02 3.113e-01 8.132e-02]
 ...
 [9.436e+00 1.832e+01 5.982e+01 ... 5.052e-02 2.454e-01 8.136e-02]
 [9.720e+00 1.822e+01 6.073e+01 ... 0.000e+00 1.909e-01 6.559e-02]
 [1.151e+01 2.393e+01 7.452e+01 ... 9.653e-02 2.112e-01 8.732e-02]]
[[1.340e+01 2.052e+01 8.864e+01 ... 2.051e-01 3.585e-01 1.109e-01]
 [1.321e+01 2.525e+01 8.410e+01 ... 6.005e-02 2.444e-01 6.788e-02]
 [1.402e+01 1.566e+01 8.959e+01 ... 8.216e-02 2.136e-01 6.710e-02]
 ...
 [1.371e+01 1.868e+01 8.873e+01 ... 1.284e-01 2.849e-01 9.031e-02]
 [1.330e+01 2.157e+01 8.524e+01 ... 5.614e-02 2.637e-01 6.658e-02]
 [9.333e+00 2.194e+01 5.901e+01 ... 2.564e-02 2.435e-01 7.393e-02]]
[1 1 1 0 1 1 1 1 0 0 1 1 0 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 1 1 0 0 0 1 1 1
 0 1 1 1 1 1 0 1 0 1 0 1 0 1 0 1 1 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 0 0 1 0 1
 0 1 0 0 0 0 1 0 1 0 1 0 1 0 1 1 0

In [8]:
optimised_theta = gradientDecent(X_train, y_train, np.zeros(30), 0.001, 750)
print(optimised_theta)

[ 2.66401007e-01  4.19615725e-01  1.56853940e+00  9.12108478e-01
  2.48813835e-03 -5.71666687e-04 -4.00201401e-03 -1.78832011e-03
  4.65843114e-03  1.98603839e-03  9.10848029e-05  2.93694851e-02
 -1.14880929e-02 -8.03458726e-01  1.73632089e-04 -3.40287042e-05
 -2.41698616e-04  1.07675425e-05  4.71287812e-04  5.96528021e-05
  2.79437200e-01  5.30260668e-01  1.58000451e+00 -1.09962425e+00
  3.16092593e-03 -2.92404638e-03 -7.77482431e-03 -1.85935065e-03
  6.64332895e-03  1.98721191e-03]


In [9]:
# Predicting the Test dataset

def prediction(x,theta):
    z = np.dot(x,theta)
    y = 1 / (1 + (np.exp(-z)))
    y = np.where( y > 0.5, 1, 0)
    return y

y_pred = prediction(X_test, optimised_theta)
print(y_pred)

[0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 1 1 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 0 0 1 1
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 0 0 1 1
 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1 0 1 0
 1 1 1 1 1 1 1 0 1 0 1 0 0 1 1 0 1 0 0 0 1 1 1]


In [10]:
print(y_test)

[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0 1
 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1 0 1 0
 1 1 1 1 1 1 1 0 1 0 1 0 0 1 1 0 1 0 0 0 1 1 1]


In [11]:
# Evaluation Metrics

accuracy = (y_pred == y_test).sum()/len(X_test)

print(f"Accuracy of the Logistic Regression Model is {accuracy}")

Accuracy of the Logistic Regression Model is 0.9298245614035088
