**Logistic Regression Coding**

*Using Gradient Descent Method*

In [12]:
import numpy as np

In [17]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def train_logistic_regression_gd(X, y, learning_rate, max_epochs):
    n, p = X.shape
    y = y.reshape((n, 1))
    # initialize weights and bias
    weights = np.zeros(shape=(p, 1))
    bias = np.zeros(shape=(1,1))
    # iterate epochs
    for i in range(max_epochs):
        y_pred = sigmoid(bias + np.dot(X, weights))
        # update gradients
        grad_weights = np.dot(X.T, (y_pred - y)) / n
        grad_bias = np.sum(y_pred - y)
        # update parameters
        weights -= learning_rate * grad_weights
        bias -= learning_rate * grad_bias
    return bias, weights

In [26]:
X = np.array([[1, 2], [2,3], [2, -1]])
y = np.array([1, 1, 0])
bias_hat, weights_hat = train_logistic_regression_gd(X, y, 0.01, 1000)
print(weights_hat)
print(bias_hat)

[[-0.41304598]
 [ 1.81750381]]
[[-0.08982288]]


**Serving Function**

In [27]:
def serve_logistic_regression(newx, weights, bias):
    y_pred = sigmoid(np.dot(newx, weights) + bias)
    return 1 if y_pred > 0.5 else 0

In [32]:
newx = np.array([-1,3])
serve_logistic_regression(newx, weights_hat, bias_hat)

1

**Multi-class Classification (Multinomial Logistic Regression)**

*(1) using softmax*
*(2) using cross-entropy*

In [80]:
def train_multinomial_logistic_regression_gd(X, y, k, learning_rate, max_epochs):
    n, p = X.shape
    # translate y using one hot encoding
    y_onehot = np.zeros((y.size, k))
    y_onehot[np.arange(y.size), y] = 1
    # initialize weights and bias
    weights = np.zeros(shape=(p, k))
    bias = np.zeros(shape=(1,k))
    # iterate epochs
    for i in range(max_epochs):
        # update prediction
        numerator = np.exp(bias + np.dot(X, weights))
        denominator = np.sum(numerator, axis=1)
        prob_pred = numerator / denominator.reshape((n,1))
#         y_pred = np.argmax(prob_pred, axis=1).reahspe((n,1))
        # update gradients
        grad_weights = np.dot(X.T, (prob_pred - y_onehot)) / n
        grad_bias = np.sum(prob_pred - y_onehot, axis = 0) / n
        # update parameters
        weights -= learning_rate * grad_weights
        bias -= learning_rate * grad_bias
    return bias, weights

In [81]:
X = np.array([[1, 2], [2,3], [2, -1], [-2,-3]])
y = np.array([1, 1, 0, 2])
k = 3

In [82]:
bias_hat, weights_hat = train_multinomial_logistic_regression_gd(X, y, 3, 0.01, 1000)
print(weights_hat)
print(bias_hat)

[[ 0.86002799  0.09827324 -0.95830122]
 [-0.64945917  1.32497387 -0.6755147 ]]
[[-0.0432596  0.1301616 -0.086902 ]]
