# Numba-compatible Optimisation

> **Created** by Mario Boley on 2025-06-09

In the future we might want to replace `scipy.optimize` by compiled `numba`-code. This could be especially useful for Shahrzad's LLTBoost algorithm.

In [34]:
import numpy as np
from numba import njit

@njit
def generate_logreg_data(n, p, seed=0):
    np.random.seed(seed)
    x = np.random.randn(n, p)
    w_true = np.random.randn(p)
    logits = x @ w_true
    probs = 1 / (1 + np.exp(-logits))
    y = np.empty(n)
    for i in range(n):
        y[i] = 1.0 if np.random.rand() < probs[i] else 0.0
    return x, y, w_true

x, y, beta = generate_logreg_data(10, 2)
x, y, beta

(array([[ 1.76405235,  0.40015721],
        [ 0.97873798,  2.2408932 ],
        [ 1.86755799, -0.97727788],
        [ 0.95008842, -0.15135721],
        [-0.10321885,  0.4105985 ],
        [ 0.14404357,  1.45427351],
        [ 0.76103773,  0.12167502],
        [ 0.44386323,  0.33367433],
        [ 1.49407907, -0.20515826],
        [ 0.3130677 , -0.85409574]]),
 array([0., 0., 0., 0., 1., 1., 0., 0., 0., 0.]),
 array([-2.55298982,  0.6536186 ]))

## Numba Implementation of Raw Newton

In [35]:
import numpy as np
from numba import njit

@njit
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

@njit
def numba_rawnewton_logreg(X, y, lam, max_iter=100, tol=1e-6):
    n, d = X.shape
    w = np.zeros(d)
    
    for it in range(max_iter):
        z = X @ w
        p = sigmoid(z)
        grad = X.T @ (p - y) + 2 * lam * w
        S = p * (1 - p)
        H = X.T @ (X * S[:, None]) + 2 * lam * np.eye(d)
        delta = np.linalg.solve(H, grad)
        w -= delta
        if np.linalg.norm(delta) < tol:
            break
    return w

numba_rawnewton_logreg(x, y, 0.1)

array([-2.86478563,  0.74649916])

## Scipy Implementation with Conjugated Gradient Newton

In [36]:
from scipy.optimize import minimize

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def logreg_objective(w, X, y, lam):
    z = X @ w
    log_likelihood = np.sum(np.log(1 + np.exp(-z)) * y + np.log(1 + np.exp(z)) * (1 - y))
    reg = lam * np.dot(w, w)
    return log_likelihood + reg

def logreg_grad(w, X, y, lam):
    z = X @ w
    p = sigmoid(z)
    grad = X.T @ (p - y) + 2 * lam * w
    return grad

def logreg_hess(w, X, y, lam):
    z = X @ w
    p = sigmoid(z)
    S = p * (1 - p)
    H = X.T @ (X * S[:, None]) + 2 * lam * np.eye(X.shape[1])
    return H

def scipy_newtoncg_logreg(X, y, lam):
    d = X.shape[1]
    res = minimize(fun=logreg_objective,
                   x0=np.zeros(d),
                   args=(X, y, lam),
                   method='Newton-CG',
                   jac=logreg_grad,
                   hess=logreg_hess,
                   options={'xtol': 1e-6, 'disp': False})
    return res.x

scipy_newtoncg_logreg(x, y, 0.1)

array([-2.86478563,  0.74649916])

In [37]:
%timeit scipy_newtoncg_logreg(x, y, 0.1)

264 μs ± 2.61 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [38]:
%timeit numba_rawnewton_logreg(x, y, 0.1)

20.8 μs ± 170 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


## Medium Size Problem

In [39]:
x_med, y_med, beta_med = generate_logreg_data(200, 10)
beta_med

array([-1.53292105, -1.71197016,  0.04613506, -0.95837448, -0.08081161,
       -0.70385904, -0.7707843 , -0.48084534,  0.70358555,  0.92914515])

In [40]:
numba_rawnewton_logreg(x_med, y_med, 0.01)

array([-2.11985398, -1.95074541, -0.06884512, -0.94531301, -0.06184398,
       -0.83223477, -0.90754435, -0.48258231,  1.00053492,  0.84389168])

In [41]:
scipy_newtoncg_logreg(x_med, y_med, 0.01)

array([-2.11985398, -1.95074541, -0.06884512, -0.94531301, -0.06184398,
       -0.83223477, -0.90754435, -0.48258231,  1.00053492,  0.84389168])

In [42]:
%timeit scipy_newtoncg_logreg(x_med, y_med, 0.01)

608 μs ± 6.16 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [43]:
%timeit numba_rawnewton_logreg(x_med, y_med, 0.01)

56.9 μs ± 50.9 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
