In [206]:
import numpy as np
import pandas as pd
from numba import jit, njit
from scipy.special import expit, logit

df = pd.read_csv('spam.data', engine='pyarrow', sep=' ', header=None)
X = df.iloc[:, :-1].to_numpy()
Y = df.iloc[:, -1].to_numpy(dtype=np.int8)

# X_scale = (X-X.mean(axis=0))/X.std(axis=0)
X_scale = X

In [207]:
@njit()
def sigmoid(x: np.ndarray):
    return np.exp(-np.logaddexp(0, -x))

@njit()
def confusion_matrix(true, pred):
  '''Computes a confusion matrix using numpy for two np.arrays
  true and pred.

  Results are identical (and similar in computation time) to: 
    "from sklearn.metrics import confusion_matrix"

  However, this function avoids the dependency on sklearn.'''

  K = len(np.unique(true)) # Number of classes 
  result = np.zeros((K, K), dtype=np.int32)

  for i in range(len(true)):
    result[true[i]][pred[i]] += 1

  return result

In [208]:
@njit()
def discriminant_func(X: np.ndarray, S_inv: np.ndarray, mu_j: np.ndarray, pi_j: float):
    return X @ S_inv @ mu_j - 1/2 * mu_j.T @ S_inv @ mu_j + np.log(pi_j)

@njit()
def qda_func(X: np.ndarray, S: np.ndarray, mu_j: np.ndarray, pi_j: float):
    S_inv = np.linalg.inv(S)

    r_j2 = np.sum(((X-mu_j) @ S_inv) * (X-mu_j), axis=1)
    return -1/2*np.log(np.linalg.det(S)) - 1/2*r_j2 + np.log(pi_j), r_j2

def discrinimant_analysis(X: np.ndarray, Y: np.ndarray, atype: str='LDA'):
    n = len(Y)
    n_0 = (1-Y).sum()
    n_1 = Y.sum()

    pi_0 = n_0/n
    pi_1 = n_1/n

    X_0 = X[Y == 0, :]
    X_1 = X[Y == 1, :]

    mu_0 = X_0.mean(axis=0, keepdims=True)
    mu_1 = X_1.mean(axis=0, keepdims=True)

    S_0 = ((X_0 - mu_0).T @ (X_0 - mu_0))/n_0
    S_1 = ((X_1 - mu_1).T @ (X_1 - mu_1))/n_1

    if atype == 'LDA':
        S = (n_0*S_0 + n_1*S_1)/n
        S_inv = np.linalg.inv(S)
        
        delta_0 = discriminant_func(X, S_inv, mu_0.T, pi_0)
        delta_1 = discriminant_func(X, S_inv, mu_1.T, pi_1)

        return np.squeeze(delta_1 > delta_0)*1
    else:
        delta_0, r_02 = qda_func(X, S_0, mu_0, pi_0)
        delta_1, r_12 = qda_func(X, S_1, mu_1, pi_1)

        return (r_12 < r_02 + 2*np.log(pi_1/pi_0) + np.log(np.linalg.det(S_0)/np.linalg.det(S_1)))*1

In [209]:
# Y_lda = discrinimant_analysis(X, Y)
# lda_mat = confusion_matrix(Y, Y_lda)
# lda_mat

In [210]:
# Y_qda = discrinimant_analysis(X, Y, atype='QDA')
# qda_mat = confusion_matrix(Y, Y_qda)
# qda_mat

In [211]:
def logistic_func(X: np.ndarray, beta_j: np.ndarray):
    X1 = np.insert(X, 0, 1, axis=1)
    return expit(X1 @ beta_j)

def logistic_regression(X: np.ndarray, Y: np.ndarray, beta_j: np.ndarray, p_j: np.ndarray):
    diag_W = p_j*(1-p_j)
    logit_j = X @ beta_j
    Z = logit_j + (Y - p_j)/diag_W
    W = np.diag(diag_W.squeeze())

    # Negative Log-likelihood
    L = -(Y*logit_j - np.log( 1 + np.exp(logit_j) )).sum()

    X_inv = np.linalg.inv( X.T @ W @ X )

    return X_inv @ X.T @ W @ Z, L

In [212]:
X1 = np.insert(X_scale, 0, 1, axis=1)
if len(Y.shape) == 1:
    Y1 = np.expand_dims(Y, axis=1)
else:
    Y1 = Y

beta_hat = np.zeros((X1.shape[1], 1))
p_hat = expit(X1 @ beta_hat)

L_old = L_new = L_diff = np.inf
tol = 1e-9

while L_diff > tol:
    L_old = L_new
    beta_hat, L_new = logistic_regression(X1, Y1, beta_hat, p_hat)
    p_hat: np.ndarray = expit(X1 @ beta_hat)
    p_hat[p_hat == 1.0] = 1-tol
    L_diff = np.abs(L_old - L_new)
    print(L_diff)
    

inf
1547.9353754372644
369.1347890639936
176.23590434739685
97.33682202400041
46.83403577376623
21.548708087765363
12.186396835217693
6.764403190691951
2.6159881829055394
0.5898500300313572
0.10119832043233146
0.003966714305192909
9.990584430852323e-07
2.2737367544323206e-13


In [213]:
Y_pred = 1*(p_hat>0.5).squeeze()
print(confusion_matrix(Y.squeeze(), Y_pred))

[[2666  122]
 [ 194 1619]]


In [214]:
%reset -f