In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import scale

In [2]:
sepsis = np.genfromtxt('sepsis.csv', delimiter=',', skip_header=1)

In [155]:
# initialize data
X = sepsis[:,:2]
y = sepsis[:,3]
y = np.reshape(y,(110204,1))
# get rid of rows containing missing values
y = y[~np.isnan(X).any(axis=1)]
X = X[~np.isnan(X).any(axis=1)]
#print(y)

In [156]:
# split into train, test
[X_train, X_test, y_train, y_test] = train_test_split(X, y, test_size = 0.2)
X_train = scale(X_train)
X_test = scale(X_test)


In [157]:
def σ(φ, w):
    '''
    Computes the sigmoid activation function, which maps input values to the range (0, 1), representing their probability of success.
    
    :param φ: numpy array
        Input data vector used in the computation. 
    :param w: numpy array
        Weight vector applied to the input φ. These weights scale the input φ to produce different probabilities.
    :return: float
        The probability result of the sigmoid activation function, between 0 and 1.
    '''
    return 1/(1 + np.exp((-φ.T.dot(w)).item()))

In [158]:
def binary_cross_entropy(Φ, y, w):
    '''
    Computes the binary cross-entropy loss for a logistic regression model.

    :param Φ: numpy array of shape (n, d)
        Input feature matrix, where `n` is the number of data points and `d` is the number of features.
        Each row `Φ[i]` represents the features for the i-th data point.
    :param y: numpy array of shape (n,1)
        True binary labels for the data points, where each `y[i]` is either 0 or 1.
    :param w: numpy array of shape (d, 1)
        Weight vector used in the model, where `d` is the number of features.
    :return: float
        The average binary cross-entropy loss over all data points.
    '''
    sum = 0
    n = Φ.shape[0]
    d = Φ.shape[1]
    for i in range(n):
        φ = np.reshape(Φ[i], (d,1))
        y_i = y[i]
        u = φ.T.dot(w)
        sum += (np.log(np.exp(u) + 1) - y_i * u)

    return (1/n) * sum

In [159]:
def binary_cross_entropy_grad(Φ, y, w):
    '''
    Computes the gradient of the binary cross entropy loss function with respect to the weight vector `w`.

    :param Φ: numpy array of shape (n, d)
        Input feature matrix, where `n` is the number of data points and `d` is the number of features.
        Each row `Φ[i]` represents a single data point.
    :param y: numpy array of shape (n,1)
        True labels for each data point, where each `y[i]` is either 0 or 1.
    :param w: numpy array of shape (d, 1)
        Weight vector used for the logistic regression model.
    :return: numpy array of shape (d, 1)
        The gradient of the logistic loss function with respect to `w`.
    '''
    sum = 0
    n = Φ.shape[0]
    d = Φ.shape[1]
    for i in range(n):
        φ = np.reshape(Φ[i], (d,1))
        y_i = y[i]
        sum += (σ(φ, w) - y_i) * φ

    return (1/n) * sum

In [160]:
def binary_cross_entropy_grad_l1(Φ, y, w, λ):
    '''
    Computes the gradient of the binary cross entropy loss function with L1 regularization.

    :param Φ: numpy array of shape (n, d)
        Input feature matrix, where `n` is the number of data points and `d` is the number of features.
    :param y: numpy array of shape (n,1)
        True labels for each data point, where each `y[i]` is either 0 or 1.
    :param w: numpy array of shape (d, 1)
        Weight vector used for the logistic regression model.
    :param λ: float
        Regularization strength (penalty term). Controls the influence of L1 regularization.
    :return: numpy array of shape (d, 1)
        The gradient of the logistic loss function with L1 regularization with respect to `w`.
    '''
    return binary_cross_entropy_grad(Φ, y, w) + λ * np.sign(w)

In [161]:
# initialize feature maps (just use linear with bias term for simplicity)
Φ_train = np.hstack((X_train, np.ones((X_train.shape[0],1))))
Φ_test = np.hstack((X_test, np.ones((X_test.shape[0],1))))

In [162]:
def gd(Φ, y, w, λ, η, iterations):
    '''
    Performs gradient descent optimization with L1-regularized binary cross-entropy loss.

    :param Φ: numpy array of shape (n, d)
        Input feature matrix, where `n` is the number of data points and `d` is the number of features.
        Each row `Φ[i]` corresponds to the features of one data point.
    :param y: numpy array of shape (n,1)
        True binary labels (0 or 1) for each data point.
    :param w: numpy array of shape (d, 1)
        Initial weight vector for the logistic regression model.
    :param λ: float
        Regularization strength (penalty term) for L1 regularization.
    :param η: float
        Learning rate, which controls the step size for weight updates.
    :param iterations: int
        Number of iterations to run the gradient descent optimization.
    :return: numpy array of shape (d, 1)
        Optimized weight vector after performing gradient descent.
    '''
    for i in range(iterations):
        u = binary_cross_entropy_grad_l1(Φ, y, w, λ)
        w = w - η * u
    return w

In [163]:
# Finally train on real sepsis data
w = np.zeros((Φ_train.shape[1],1))
w = gd(Φ_train, y_train, w, λ=0.001, η=0.001, iterations=10000)

In [167]:
def batch_predict(Φ, w):
    '''
    Computes binary predictions for a batch of data points using the sigmoid function.

    This function applies the sigmoid function to the dot product of input features and weights 
    for all data points in the batch. The resulting probabilities are rounded to 0 or 1, 
    producing binary predictions suitable for binary classification tasks.

    :param Φ: numpy array of shape (n, d)
        Input feature matrix, where `n` is the number of data points and `d` is the number of features.
        Each row `Φ[i]` corresponds to the features of one data point.
    :param w: numpy array of shape (d, 1)
        Weight vector used for prediction.
    :return: numpy array of shape (n, 1)
        Binary predictions (0 or 1) for each data point.
    '''
    yhats = []
    n = Φ.shape[0]
    d = Φ.shape[1]
    for i in range(n):
        φ = np.reshape(Φ[i], (d,1))
        yhats.append(np.round(σ(φ, w)))
        
    yhats = np.array(yhats)
    yhats = np.reshape(yhats,(n,1))
    return yhats


In [168]:
print("Accuracy score on test data:", accuracy_score(batch_predict(Φ_test, w), y_test))

Accuracy score on test data: 0.9245497028265505
