In [12]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets

# Parameters

In [13]:
X = np.array(
    [[1129, 1.435],
     [1453, 1.601],
     [1656, 1.654],
     [1787, 1.803],
     [1611, 1.734]]
)
y = [0, 0, 0, 1,1]

X_test = np.array(
    [[1629, 1.635],
     [1853, 1.701],
     [1356, 1.454]]
)

n_iter = 3
lr = 0.01
threshold = 0.4

# Standardize the continuous initial variables

In [14]:
X_to_print = pd.DataFrame(X, columns = ['x_1','x_2'])
print(f'The original data (i.e., matrix X):')
X_to_print

The original data (i.e., matrix X):


Unnamed: 0,x_1,x_2
0,1129.0,1.435
1,1453.0,1.601
2,1656.0,1.654
3,1787.0,1.803
4,1611.0,1.734


In [15]:
X_train_mean = np.mean(X, axis=0)
print(f'X_train_mean: \n{X_train_mean} \n')
X_train_std = np.std(X, axis=0, ddof=1)
print(f'X_train_std: \n{X_train_std} \n')

X_train_mean: 
[1527.2       1.6454] 

X_train_std: 
[2.52575137e+02 1.40535761e-01] 



In [16]:
X_std = (X - np.mean(X, axis=0)) / np.std(X, axis=0, ddof=1)
X_std_to_print = pd.DataFrame(X_std, columns = ['x_1_std','x_2_std'])
print(f'The standardised data (i.e., matrix X_std):')
print(f'x_1_std mean = {X_std_to_print["x_1_std"].mean():.2f}')
print(f'x_1_std std = {X_std_to_print["x_1_std"].std():.2f}')
print(f'x_2_std mean = {X_std_to_print["x_2_std"].mean():.2f}')
print(f'x_2_std std = {X_std_to_print["x_2_std"].std():.2f}')
X_std_to_print

The standardised data (i.e., matrix X_std):
x_1_std mean = -0.00
x_1_std std = 1.00
x_2_std mean = -0.00
x_2_std std = 1.00


Unnamed: 0,x_1_std,x_2_std
0,-1.576561,-1.497128
1,-0.293774,-0.315934
2,0.509947,0.061194
3,1.028605,1.121423
4,0.331782,0.630445


# Compute coefficients

In [17]:
# 1. Initialize coefficients
weights = np.zeros(X_std.shape[1])
y = np.asarray(y)
b = 0

# 2. Perform gradient descent
for i in range(n_iter):
    linear_pred = np.dot(X_std, weights) + b
    probability = 1 / (1 + np.exp(-linear_pred))
    
    # Calculate derivatives
    partial_w = (1 / X_std.shape[0]) * (np.dot(X_std.T, (probability - y)))
    partial_d = (1 / X_std.shape[0]) * (np.sum(probability - y))
    
    # Update the coefficients
    weights -= lr * partial_w
    b -= lr * partial_d

weights, b

(array([0.00812583, 0.01047453]), -0.0029925066216211892)

# Predict (test set)

In [18]:
X_test_std = (X_test - X_train_mean) / X_train_std
print(f'STD X_test: \n{X_test_std} \n')

STD X_test: 
[[ 0.40304838 -0.07400252]
 [ 1.28991319  0.39562884]
 [-0.6778181  -1.36193094]] 



In [19]:
linear_pred = np.dot(X_test_std, weights) + b
y_pred = 1 / (1 + np.exp(-linear_pred))
y_pred

array([0.49987686, 0.50290825, 0.49430876])

In [20]:
[1 if i > threshold else 0 for i in y_pred]

[1, 1, 1]

# Test 1

In [21]:
class LogisticRegression:
    '''
    A class which implements logistic regression model with gradient descent.
    '''
    def __init__(self, learning_rate=0.1, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights, self.bias = None, None
        
    @staticmethod
    def _sigmoid(x):
        '''
        Private method, used to pass results of the line equation through the sigmoid function.
        
        :param x: float, prediction made by the line equation
        :return: float
        '''
        return 1 / (1 + np.exp(-x))
    
    @staticmethod
    def _binary_cross_entropy(y, y_hat):
        '''
        Private method, used to calculate binary cross entropy value between actual classes 
        and predicted probabilities.
        
        :param y: array, true class labels
        :param y_hat: array, predicted probabilities
        :return: float
        '''
        def safe_log(x): 
            return 0 if x == 0 else np.log(x)
        total = 0
        for curr_y, curr_y_hat in zip(y, y_hat):
            total += (curr_y * safe_log(curr_y_hat) + (1 - curr_y) * safe_log(1 - curr_y_hat))
        return - total / len(y)
        
    def fit(self, X, y):
        '''
        Used to calculate the coefficient of the logistic regression model.
        
        :param X: array, features
        :param y: array, true values
        :return: None
        '''
        # 1. Initialize coefficients
        self.weights = np.zeros(X.shape[1])
        self.bias = 0
        
        # 2. Perform gradient descent
        for i in range(self.n_iterations):
            linear_pred = np.dot(X, self.weights) + self.bias
            probability = self._sigmoid(linear_pred)
            
            # Calculate derivatives
            partial_w = (1 / X.shape[0]) * (np.dot(X.T, (probability - y)))
            partial_d = (1 / X.shape[0]) * (np.sum(probability - y))
            
            # Update the coefficients
            self.weights -= self.learning_rate * partial_w
            self.bias -= self.learning_rate * partial_d
            
    def predict_proba(self, X):
        '''
        Calculates prediction probabilities for a given threshold using the line equation 
        passed through the sigmoid function.
        
        :param X: array, features
        :return: array, prediction probabilities
        '''
        linear_pred = np.dot(X, self.weights) + self.bias
        return self._sigmoid(linear_pred)
    
    def predict(self, X, threshold=0.5):
        '''
        Makes predictions using the line equation passed through the sigmoid function.
        
        :param X: array, features
        :param threshold: float, classification threshold
        :return: array, predictions
        '''
        probabilities = self.predict_proba(X)
        return [1 if i > threshold else 0 for i in probabilities]

In [22]:
model = LogisticRegression(learning_rate=lr, n_iterations=n_iter)
model.fit(X_std, np.asarray(y))
preds = model.predict(X_test_std)
preds

[0, 1, 0]