In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
eps = np.finfo(float).eps

In [2]:
def weight_init(nd, init_mode='random'):
	if init_mode == 'random':
		theta = np.random.randn(nd + 1, 1) # plus the bias/const term
	elif init_mode=='xavier':
		"""
		Glorot, Xavier, and Yoshua Bengio. 
		"Understanding the difficulty of training deep feedforward neural networks." 
		Proceedings of the thirteenth international conference on artificial 
		intelligence and statistics. 2010.
		"""
		pass
	else:
		pass
	return theta

def cross_entropy_loss(pred, y):
	"""
	https://en.wikipedia.org/wiki/Cross_entropy#Cross-entropy_loss_function_and_logistic_regression
	"""
	# n = y.shape[0]
	# loss = -(y*np.log(pred+eps) + (1-y)*np.log(1-pred+eps))/n
	# loss = loss.sum(axis=0)
	# return loss
	"""
	for numerical stability use log sum:
		??max_val = (-input).clamp(min=0)
		??loss = input - input * target + max_val + ((-max_val).exp() + (-input - max_val).exp()).log()
		https://discuss.pytorch.org/t/numerical-stability-of-bcewithlogitsloss/8246
		http://tagkopouloslab.ucdavis.edu/?p=2197
		https://www.xarg.org/2016/06/the-log-sum-exp-trick-in-machine-learning/
	"""
	n = y.shape[0]
	max_value = np.clip(pred, eps, 1-eps)
	loss = -(y*np.log(max_value) + (1-y)*np.log(1-max_value))/n
	loss = loss.sum(axis=0)
	return loss

def _pos_sigmoid(x):
	z = np.exp(-x)
	return 1 / (1 + z)

def _neg_sigmoid(x):
	z = np.exp(x)
	return z / (1 + z)

def sigmoid(x):
	#return 1.0 / (1.0 + np.exp(-x))
	"""
	Numerically stable sigmoid function.
		see: https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
	"""
	sx = x.copy()
	pos_ind = sx >= 0
	neg_ind = ~pos_ind

	sx[pos_ind] = _pos_sigmoid(sx[pos_ind])
	sx[neg_ind] = _neg_sigmoid(sx[neg_ind])
	return sx

def compute_accuracy(X, yin, theta):
	y = yin.copy()
	n = y.shape[0]
	gz = sigmoid(X @ theta)
	acc = (gz == y).sum()/n
	return acc*100.

In [3]:
from sklearn import datasets
breast_cancer = datasets.load_breast_cancer()
X, y = breast_cancer.data, breast_cancer.target

In [4]:
ni, nd = X.shape

In [5]:
lr = 1e-3
bsize = 569
max_epochs = 200

In [6]:
X = np.append(X, np.ones((ni, 1)), axis=1) # append the bias/const term
theta = weight_init(nd, init_mode='random')
y = np.expand_dims(y, axis=1)

In [18]:
ind = 0
loss = 0
while ind < ni:
    end = ind + bsize if ind + bsize <= ni else ni
    bX = X[ind:end]
    by = y[ind:end]
    gz = sigmoid(np.matmul(bX, theta))
    pred = gz
    loss += cross_entropy_loss(pred, by)
    grad = (np.matmul(bX.T, (pred - by))).sum(axis=1)
    grad = np.expand_dims(grad, 1)
    print(grad.sum())
    theta -= lr * grad  # sgd
    ind += bsize

599785.303706


In [8]:
theta.shape

(31, 1)

In [9]:
bX.shape

(569, 31)

In [9]:
acc = compute_accuracy(X, y, theta)

In [10]:
acc

62.741652021089635

In [11]:
yy = np.expand_dims(y, axis=1)
gz = sigmoid(X @ theta)

In [12]:
sum(yy)/ni

array([[0.62741652]])

In [13]:
from sklearn.linear_model import LogisticRegression

In [14]:
classifier = LogisticRegression(C=1.0, penalty='none',
                        solver='sag',
                        multi_class='multinomial',
                        max_iter=1000)

In [15]:
classifier.fit(X, y)
y_pred = classifier.predict(X)

  return f(**kwargs)


In [16]:
y_pred

array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,

In [17]:
(y == y_pred).mean()*100.

54.05314414027631