In [3]:
import numpy as np
import time

In [4]:
def check_data(X,y):
  if len(X) != len(y):
    return False, "Length of the answer vector doesn't fit the number of data points.\n"
  for i in range(1,len(X)):
    if len(X[i]) != len(X[0]):
      return False, "The row at the index "+str(i)+" seems to be missing an observation.\n"
  for i in range(len(y)):
    if y[i] < 0 or y[i] > 1:
      return False, "The answer at the index "+str(i)+" doesn't indicate a binary class nor does it indicate a probability of belonging to one.\n"
  return True, ""

In [16]:
def fit_with_IWLS(data, answers, intercept: bool = False,
                  relevant_variables = None, additional_interactions = None,
                  l2_reg: float = 1.0, beta0_gen = None,
                  max_iterations: int = 100, min_step_norm: float = 1e-12,
                  max_time: float = 3600.0, check_data: bool = False):
  """
  Calculated the coefficients of a Logistic Regression model using Iterative
  Weighted Least Squares method.
  :param data: The data on which the model will be fit.
  :param answers: The vector with answers (numbers belonging to the
    [0,1] interval).
  :param intercept: If True, the model will fit an intercept (meaning beta' @ x
    shall be replaced with beta' @ x + beta0 in all calculations).
  :param relevant_variables: A collection of indices, indicating on which
    columns of the data should the model be built.
    If None, all columns will be used.
  :param additional_interactions: A collection of pairs of indices, indicating
    which column element-wise products should be using for building the model.
    If None, no such variables will be considered.
  :param l2_reg: The strength of ridge regularization (the coefficient of
    the ridge penalty). 0 means no regularization. 1 is the default, same as
    in the scikit-learn implementation.
  :param beta0_gen: A generator used to determine the starting values of
    coefficients. Should include .generate(n: int) method, returning a numpy
    array of length n filled with floats.
    If None, all coefficients will be initialized to zeros.
  :param max_iterations: The maximum number of iterations the algorhithm will
    perform before stopping and proposing a solution.
    By default, 100 in accordance to scikit-learn implementation.
  :param min_step_norm: The minimum value for the euclidian norm of the change
    of a parameter vector in a single step. If the difference between
    iterations falls below that number, the algorhithm will stop and propose
    a solution.
  :param max_time: The maximum time the procedure can run in seconds. Once
    exceeded, the iterating will stop and the solution will be proposed.
  :param check_data: If True, the format of data and answers will be examined
    prior to running the algorhithm.

  :return: A numpy array containing the proposed coefficients and a dictionary,
    labeling said coefficients.
  """
  # Ensuring the correct dimensionality of the data.
  if check_data:
    status, message = check_data(data, answers)
    assert status, message
  assert len(data) == len(answers), "For every data point, there has to be a correct class specified.\n"
  n = len(data)

  # Filling up the default values of parameters.
  if additional_interactions is None:
    additional_interactions = []
  if relevant_variables is None:
    relevant_variables = np.arange(len(data[0]))

  ### Constructing the experiment matrix and labels for it.
  Y = np.array(answers)
  X = []
  labels = []
  for index in relevant_variables:
    X.append(np.array(data[:,index]).astype(float))
    labels.append("X"+str(index))

  for index1, index2 in additional_interactions:
    X.append(np.array(data[:,index1]).astype(float) * np.array(data[:,index2]).astype(float))
    labels.append("X"+str(index1)+"X"+str(index2))

  if intercept:
    X.append(np.ones(n))
    labels.append("intercept")

  X = np.column_stack(X)
  p = len(labels)
  # If the penalty is the l2_reg times the sum of squares of coefficients, this
  # is the matrix of the second order derivatives with respect to the coefs.
  penalty_hessian = 2 * l2_reg * np.eye(p)

  ### Initializing coefficients.
  if beta0_gen is None:
    beta = np.zeros(p)
  else:
    beta = beta0_gen.generate(p)

  start = time.time()

  ### Iterating the main algorhithm.
  for _ in range(max_iterations):
    P = X @ beta
    P = np.exp(P) / (np.exp(P) + 1)
    W = np.diag(P * (1 - P))

    # deriv is the derivative of the minus log-like + penalty with respect to beta
    deriv =  X.transpose() @ (P-Y) + 2 * l2_reg * beta
    # hessian is the matrix of second order derivatives of the previously mentioned function
    hessian = X.transpose() @ W @ X + penalty_hessian

    # In order to avoid numerical complexity of the matrix inversion,
    # new beta is defined as a solution to a linear equation.
    beta_new = np.linalg.solve(hessian, hessian @ beta - deriv)

    diff = beta - beta_new
    diff_norm = np.sqrt(diff @ diff)
    beta = beta_new
    if diff_norm < min_step_norm:
      break
    curr = time.time()
    if curr - start > max_time:
      break

  ### Creating the coefficient dictionary.
  beta_dict = {}
  for i in range(p):
    beta_dict[labels[i]] = beta[i]

  return beta, beta_dict

**Example**

In [9]:
def generate_fake_data(n: int):
  X = np.random.normal(loc=0,scale=1,size=(n,3))
  Y = X[:,0] + (X[:,1] * X[:,2])
  Y = (Y > 1).astype(int)
  return X, Y

In [10]:
X, Y = generate_fake_data(10)
print(X)
print(Y)

[[-1.2769084   0.97461493 -1.70939791]
 [ 1.32057104  0.48228883  1.33455102]
 [ 1.44572932 -0.81520141  0.3026459 ]
 [ 0.62406698 -1.05531948 -0.65286175]
 [ 1.5757052   1.43480957 -0.59260877]
 [-1.31432842  2.00401493  0.11509419]
 [ 0.11245183  0.77642164  1.08314294]
 [-0.1703819  -2.39797365  1.11380643]
 [ 0.43700367  0.16743747 -1.67064208]
 [ 2.30200017 -0.30759345 -1.63520366]]
[0 1 1 1 0 0 0 0 0 1]


In [18]:
X, Y = generate_fake_data(1000)
interactions = [(0,1), (1,2), (2,0)]

In [19]:
beta1, beta1_dict = fit_with_IWLS(X, Y, intercept=False, max_iterations=500)
beta2, beta2_dict = fit_with_IWLS(X, Y, intercept=True, additional_interactions=interactions, max_iterations=500)

In [20]:
print(beta1_dict)
print(beta2_dict)

{'X0': 1.111542112280021, 'X1': -0.03760221407116636, 'X2': 0.01815495209641154}
{'X0': 3.785955804511253, 'X1': 0.042327736624419, 'X2': -0.01698865779874892, 'X0X1': 0.044835245048462556, 'X1X2': 3.793928962655196, 'X2X0': 0.013792500116502111, 'intercept': -3.9445740505194786}
