In [1]:

# WORKSHEET 6 PART I & II â€“ SIGMOID AND SOFTMAX REGRESSION FROM SCRATCH

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [2]:

# Sigmoid (Logistic) function
def logistic_function(x):
    return 1 / (1 + np.exp(-x))


In [3]:

# Log loss for binary classification
def log_loss(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-10, 1 - 1e-10)
    return -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))


In [4]:

# Average cost function
def cost_function(y_true, y_pred):
    return np.mean(log_loss(y_true, y_pred))


In [5]:

# Logistic regression cost using parameters
def costfunction_logreg(X, y, w, b):
    z = np.dot(X, w) + b
    y_pred = logistic_function(z)
    return cost_function(y, y_pred)


In [6]:

# Compute gradients for logistic regression
def compute_gradient(X, y, w, b):
    n = X.shape[0]
    y_pred = logistic_function(np.dot(X, w) + b)
    grad_w = (1/n) * np.dot(X.T, (y_pred - y))
    grad_b = (1/n) * np.sum(y_pred - y)
    return grad_w, grad_b


In [7]:

# Gradient descent for logistic regression
def gradient_descent(X, y, w, b, alpha, n_iter):
    cost_history = []
    for _ in range(n_iter):
        grad_w, grad_b = compute_gradient(X, y, w, b)
        w -= alpha * grad_w
        b -= alpha * grad_b
        cost_history.append(costfunction_logreg(X, y, w, b))
    return w, b, cost_history


In [8]:

# Binary prediction using threshold
def prediction(X, w, b, threshold=0.5):
    probs = logistic_function(np.dot(X, w) + b)
    return (probs >= threshold).astype(int)


In [9]:

# Evaluation metrics for binary classification
def evaluate_classification(y_true, y_pred):
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))
    precision = TP / (TP + FP) if (TP + FP) else 0
    recall = TP / (TP + FN) if (TP + FN) else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0
    return {"confusion_matrix": [[TN, FP], [FN, TP]], "precision": precision, "recall": recall, "f1_score": f1}


In [10]:

# Softmax function for multiclass classification
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


In [11]:

# Categorical cross-entropy loss
def loss_softmax(y_true, y_pred):
    return -np.sum(y_true * np.log(y_pred + 1e-10))


In [12]:

# Cost function for softmax regression
def cost_softmax(X, y, W, b):
    z = np.dot(X, W) + b
    y_pred = softmax(z)
    return -np.sum(y * np.log(y_pred + 1e-10)) / X.shape[0]


In [13]:

# Compute gradients for softmax regression
def compute_gradient_softmax(X, y, W, b):
    n = X.shape[0]
    z = np.dot(X, W) + b
    y_pred = softmax(z)
    grad_W = np.dot(X.T, (y_pred - y)) / n
    grad_b = np.sum(y_pred - y, axis=0) / n
    return grad_W, grad_b


In [14]:

# Gradient descent for softmax regression
def gradient_descent_softmax(X, y, W, b, alpha, n_iter):
    cost_history = []
    for _ in range(n_iter):
        grad_W, grad_b = compute_gradient_softmax(X, y, W, b)
        W -= alpha * grad_W
        b -= alpha * grad_b
        cost_history.append(cost_softmax(X, y, W, b))
    return W, b, cost_history


In [15]:

# Predict class labels using softmax
def predict_softmax(X, W, b):
    return np.argmax(softmax(np.dot(X, W) + b), axis=1)
