In [1]:
import numpy as np

In [2]:
# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic regression cost-function (Cross-entropy loss)
def logistic_loss(X, y, weights):
    z = np.dot(X, weights)
    predictions = sigmoid(z)
    loss = -np.mean(y * np.log(predictions) + (1 - y) * np.log(1 - predictions))
    return loss

# Gradient of the logistic regression cost-function
def logistic_gradient(X, y, weights):
    predictions = sigmoid(np.dot(X, weights))
    gradient = np.dot(X.T, (predictions - y)) / y.size
    return gradient

# Function to add custom interactions between the variables
def add_custom_interactions(X, interaction_indices):
    interactions = np.array([X[:, i] * X[:, j] for i, j in interaction_indices]).T
    return np.hstack((X, interactions))

In [3]:
def adam_logistic_regression(X, y, alpha, beta1, beta2, epsilon=1e-8, max_iterations=1000, interaction_indices=None):
    # If interaction_indices is provided, augment the feature set with specified interaction terms
    if interaction_indices:
        X = add_custom_interactions(X, interaction_indices)

    # Initialize weights
    weights = np.zeros(X.shape[1])

    # Initialize first moment and second moment
    m = np.zeros(X.shape[1])
    v = np.zeros(X.shape[1])

    for t in range(1, max_iterations+1):
        # Compute the gradient
        g = logistic_gradient(X, y, weights)

        # Update moments
        m = beta1 * m + (1 - beta1) * g
        v = beta2 * v + (1 - beta2) * np.square(g)

        # Compute bias-corrected moments
        m_corrected = m / (1 - beta1 ** t)
        v_corrected = v / (1 - beta2 ** t)

        # Update weights
        weights -= alpha * m_corrected / (np.sqrt(v_corrected) + epsilon)

        # Print the loss value occasionally to monitor progress
        if t % 100 == 0 or t == 1:
            loss = logistic_loss(X, y, weights)
            print(f'Iteration {t}/{max_iterations} - Loss: {loss}')

    return weights

In [4]:
def generate_fake_data(n: int):
  X = np.random.normal(loc=0,scale=1,size=(n,3))
  Y = X[:,0] + (X[:,1] * X[:,2])
  Y = (Y > 1).astype(int)
  return X, Y

In [10]:
X, y = generate_fake_data(10)
print(X)

[[ 0.71671533  0.21360876  0.24112544]
 [ 0.52678433  2.48753786 -0.03087967]
 [ 0.66183741 -1.63411601 -0.75063294]
 [-0.42253157 -0.49435484 -0.79454838]
 [-1.40786671 -1.16401044  0.91079288]
 [-1.21231104 -0.56454689  1.69508072]
 [-3.37682423  0.02853939 -1.33770856]
 [ 0.6538612  -0.01890399 -0.65351566]
 [-2.34758436  0.57518585  0.71882664]
 [ 0.46121278  0.1281024   0.85440883]]


In [20]:
alpha = 0.001 #learning rate
beta1 = 0.9
beta2 = 0.999
interaction_indices = [(0, 1), (0, 2), (1, 2)]  # Interactions between X1*X2, X1*X3, and X2*X3

# Train the logistic regression model using ADAM with specified interactions
custom_weights = adam_logistic_regression(X, y, alpha, beta1, beta2, max_iterations=1000, interaction_indices=interaction_indices)
weights_without_interactions = adam_logistic_regression(X, y, alpha, beta1, beta2, max_iterations=1000)
print(weights_without_interactions)
print(custom_weights)

Iteration 1/1000 - Loss: 0.6922107392977853
Iteration 100/1000 - Loss: 0.6137356196056912
Iteration 200/1000 - Loss: 0.5585182759102753
Iteration 300/1000 - Loss: 0.520353318491456
Iteration 400/1000 - Loss: 0.49368645019302343
Iteration 500/1000 - Loss: 0.47466948121998015
Iteration 600/1000 - Loss: 0.4607526273654015
Iteration 700/1000 - Loss: 0.45026411667934296
Iteration 800/1000 - Loss: 0.442104281749302
Iteration 900/1000 - Loss: 0.4355446275138811
Iteration 1000/1000 - Loss: 0.4300991709822012
Iteration 1/1000 - Loss: 0.6925351897244209
Iteration 100/1000 - Loss: 0.6383995859226491
Iteration 200/1000 - Loss: 0.5964357537728319
Iteration 300/1000 - Loss: 0.5647682407833423
Iteration 400/1000 - Loss: 0.5410201098983268
Iteration 500/1000 - Loss: 0.5232444325698975
Iteration 600/1000 - Loss: 0.509931305525479
Iteration 700/1000 - Loss: 0.49994338600592475
Iteration 800/1000 - Loss: 0.4924354983095129
Iteration 900/1000 - Loss: 0.4867823381977229
Iteration 1000/1000 - Loss: 0.482520

In [22]:
from sklearn.metrics import balanced_accuracy_score
# Generate predictions from the custom logistic regression model
y_pred_probs_custom = sigmoid(np.dot(X_with_interactions, custom_weights))

# Convert probabilities to binary predictions using a threshold (e.g., 0.5)
y_pred_custom = (y_pred_probs_custom >= 0.5).astype(int)

# Calculate balanced accuracy for the custom model
bal_acc_custom = balanced_accuracy_score(y, y_pred_custom)
print(f"Balanced Accuracy (Custom Model): {bal_acc_custom}")

Balanced Accuracy (Custom Model): 0.8333333333333333
