In [1]:
# follow step by step from tutorial:
# https://github.com/python-engineer/MLfromscratch/tree/master/mlfromscratch

import numpy as numpy


class LogisticRegression:
    def __init__(self, lr=0.1, n_iters=50, thres=0.5):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
        self.thres = thres

    def fit(self, X, y):
        """
        take training samples X, and labels y
        X is numpy n-D vector, size mxn
            m = # samples
            n = # features
        y is a 1-D row vector, also of size m
        involve the training step and SGD
        """
        print("fit method called")
        # unpack mxn shape
        n_samples, n_features = X.shape
        print("n_samples, n_features = ", n_samples, n_features)
        # initialize the weights
        self.weights = np.zeros(n_features)
        print("initial weights = ", self.weights)
        # optional bias
        self.bias = 0

        # gradient descent algorithm
        # iteratively update the weights
        for _ in range(self.n_iters):
            # print("--- {} iteration ---".format(_))
            # first apply the linear transformation
            # wx + b
            linear_trans = np.dot(X, self.weights) + self.bias
            # print("linear_trans = ", linear_trans)
            # print("linear_trans.shape = ", linear_trans.shape)
            # then apply the logistic/sigmoid function
            # sigmoid function outputs the estimated prob
            y_predicted = self._sigmoid(linear_trans)
            # print("y_predicted = ", y_predicted)
            # print("y_predicted.shape = ", y_predicted.shape)
            # print("y true = ", y)

            # update the weights with gradients
            # w = w - a dw
            # gradient of weights dw
            # sum of (x times difference(predicted_y - actual_y))
            # X.T transpose the mxn into nxm, to multiply the mx1 y
            dw = (1/n_samples) * np.dot(X.T, y_predicted-y)
            db = (1/n_samples) * np.sum(y_predicted-y)

            # gradient descent
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        print("predict method called")
        print("input X shape = ", X.shape)
        # first apply the linear transformation
        # wx + b
        # print(X.shape, self.weights.shape)
        # print(np.dot(X, self.weights))
        # print(self.bias)
        linear_trans = np.dot(X, self.weights) + self.bias
        # then apply the logistic/sigmoid function
        # sigmoid function outputs the estimated prob
        y_predicted = self._sigmoid(linear_trans)

        # discrete class based on the decision threshold
        y_predicted_cls = [1 if i > self.thres else 0 for i in y_predicted]

        return np.array(y_predicted_cls)
    
    def predict_proba(self, X):
        print("predict_proba method called")
        print("input X shape = ", X.shape)
        # first apply the linear transformation
        # wx + b
        # print(X.shape, self.weights.shape)
        # print(np.dot(X, self.weights))
        # print(self.bias)
        linear_trans = np.dot(X, self.weights) + self.bias
        # then apply the logistic/sigmoid function
        # sigmoid function outputs the estimated prob
        y_predicted = self._sigmoid(linear_trans)

        return np.array(y_predicted)

    def _sigmoid(self, logit):
        """sigmoid function"""
        return 1 / (1 + np.exp(-logit))



In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

In [3]:
# Test with sklearn's breast cancer dataset
bc = datasets.load_breast_cancer()
print("list of breast_cancer keys() =\n", list(bc.keys()))

# Class Distribution: 212 - Malignant, 357 - Benign
print("target_names = ", bc["target_names"])

print("DESCR = ")
print(bc["DESCR"])

list of breast_cancer keys() =
 ['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename']
target_names =  ['malignant' 'benign']
DESCR = 
.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mea

In [4]:
X, y = bc.data, bc.target
print("X.shape, X.dtype = ", X.shape, X.dtype)
print("y.shape, y.dtype = ", y.shape, y.dtype)

# Class Distribution: 212 - Malignant, 357 - Benign
np.testing.assert_equal(np.count_nonzero(y==1), 357)
# benign class is 1
np.testing.assert_equal(np.count_nonzero(y==0), 212)
# malignant class is 0

X.shape, X.dtype =  (569, 30) float64
y.shape, y.dtype =  (569,) int64


In [5]:
print("feature_names = ", bc["feature_names"])

feature_names =  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [6]:
# split the train and test set
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
)

In [7]:
print("X_train.shape, X_test.shape = ", X_train.shape, X_test.shape)
print("y_train.shape, y_test.shape = ", y_train.shape, y_test.shape)

X_train.shape, X_test.shape =  (455, 30) (114, 30)
y_train.shape, y_test.shape =  (455,) (114,)


In [8]:
# test our implemented log reg
clf = LogisticRegression(lr=0.1, n_iters=50)
clf.fit(X_train, y_train)

print("x_test's first 3 predicted probabilities = \n", clf.predict_proba(X_test)[:3])

# get actual predicted class
y_pred = clf.predict(X_test)

# show the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("cm =\n", cm)
target_names = ['Benign', 'Malignant']
print(classification_report(y_test, y_pred, target_names=target_names))

def regular_accuracy(y_true, y_pred):
    regular_accuracy = np.sum(y_true == y_pred) / len(y_true)
    return regular_accuracy

print("LogReg regular accuracy:", regular_accuracy(y_test, y_pred))

fit method called
n_samples, n_features =  455 30
initial weights =  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
predict_proba method called
input X shape =  (114, 30)
x_test's first 3 predicted probabilities = 
 [0. 0. 0.]
predict method called
input X shape =  (114, 30)
cm =
 [[43  0]
 [71  0]]
              precision    recall  f1-score   support

      Benign       0.38      1.00      0.55        43
   Malignant       0.00      0.00      0.00        71

    accuracy                           0.38       114
   macro avg       0.19      0.50      0.27       114
weighted avg       0.14      0.38      0.21       114

LogReg regular accuracy: 0.37719298245614036


  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# test our implemented log reg
clf = LogisticRegression(lr=0.001, n_iters=100)
clf.fit(X_train, y_train)

print("x_test's first 3 predicted probabilities = \n", clf.predict_proba(X_test)[:3])

# get actual predicted class
y_pred = clf.predict(X_test)

# show the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("cm =\n", cm)
target_names = ['Benign', 'Malignant']
print(classification_report(y_test, y_pred, target_names=target_names))

def regular_accuracy(y_true, y_pred):
    regular_accuracy = np.sum(y_true == y_pred) / len(y_true)
    return regular_accuracy

print("LogReg regular accuracy:", regular_accuracy(y_test, y_pred))

fit method called
n_samples, n_features =  455 30
initial weights =  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
predict_proba method called
input X shape =  (114, 30)
x_test's first 3 predicted probabilities = 
 [3.55583998e-120 0.00000000e+000 1.68730055e-264]
predict method called
input X shape =  (114, 30)
cm =
 [[43  0]
 [71  0]]
              precision    recall  f1-score   support

      Benign       0.38      1.00      0.55        43
   Malignant       0.00      0.00      0.00        71

    accuracy                           0.38       114
   macro avg       0.19      0.50      0.27       114
weighted avg       0.14      0.38      0.21       114

LogReg regular accuracy: 0.37719298245614036




In [10]:
# test our implemented log reg
clf = LogisticRegression(lr=0.001, n_iters=1000)
clf.fit(X_train, y_train)

print("x_test's first 3 predicted probabilities = \n", clf.predict_proba(X_test)[:3])

# get actual predicted class
y_pred = clf.predict(X_test)

# show the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("cm =\n", cm)
target_names = ['Benign', 'Malignant']
print(classification_report(y_test, y_pred, target_names=target_names))

def regular_accuracy(y_true, y_pred):
    regular_accuracy = np.sum(y_true == y_pred) / len(y_true)
    return regular_accuracy

print("LogReg regular accuracy:", regular_accuracy(y_test, y_pred))

fit method called
n_samples, n_features =  455 30
initial weights =  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]




predict_proba method called
input X shape =  (114, 30)
x_test's first 3 predicted probabilities = 
 [1.23888189e-019 1.61687073e-305 6.19260000e-126]
predict method called
input X shape =  (114, 30)
cm =
 [[43  0]
 [ 6 65]]
              precision    recall  f1-score   support

      Benign       0.88      1.00      0.93        43
   Malignant       1.00      0.92      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.96      0.95       114
weighted avg       0.95      0.95      0.95       114

LogReg regular accuracy: 0.9473684210526315


In [12]:
# test our implemented log reg
clf = LogisticRegression(lr=0.0001, n_iters=10000)
clf.fit(X_train, y_train)

print("x_test's first 3 predicted probabilities = \n", clf.predict_proba(X_test)[:3])

# get actual predicted class
y_pred = clf.predict(X_test)

# show the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("cm =\n", cm)
target_names = ['Benign', 'Malignant']
print(classification_report(y_test, y_pred, target_names=target_names))

def regular_accuracy(y_true, y_pred):
    regular_accuracy = np.sum(y_true == y_pred) / len(y_true)
    return regular_accuracy

print("LogReg regular accuracy:", regular_accuracy(y_test, y_pred))

fit method called
n_samples, n_features =  455 30
initial weights =  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
predict_proba method called
input X shape =  (114, 30)
x_test's first 3 predicted probabilities = 
 [3.66226631e-01 1.83722762e-25 1.17508136e-09]
predict method called
input X shape =  (114, 30)
cm =
 [[41  2]
 [ 2 69]]
              precision    recall  f1-score   support

      Benign       0.95      0.95      0.95        43
   Malignant       0.97      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.96      0.96       114
weighted avg       0.96      0.96      0.96       114

LogReg regular accuracy: 0.9649122807017544
