# Assignment 1a


In [2]:
import numpy as np

## Exercise 1.1
For $dJ/db$,
$$
\begin{aligned}
\frac{dJ}{db} = \frac{1}{n}\sum_{i = 1}^n \frac{\partial J}{\partial z_i}\frac{dz_i}{db},
\end{aligned}
$$
and for $dJ/dw_j$,
$$
\begin{aligned}
\frac{dJ}{dw_j} = \frac{1}{n}\sum_{i = 1}^n \frac{\partial J}{\partial z_i}\frac{d z_i}{d w_j}
\end{aligned}
$$

## Exercise 1.2

$$
\begin{aligned}
\frac{\partial J}{\partial z_i} & = \frac{1}{n}\left( -y_i \frac{\partial}{\partial z_i}\left( \ln \frac{1}{1 + e^{-z_i}}\right) - (1 - y_i)\frac{\partial}{\partial z_i}\left( \ln \frac{e^{-z_i}}{1 + e^{-z_i}}\right)\right) \nonumber \\
&= \frac{1}{n}\left( y_i \frac{1}{1+e^{-z_i}} \frac{\partial}{\partial z_i}e^{-z_i} - (1-y_i)\left( \frac{\partial}{\partial z_i} \ln e^{-z_i} - \frac{\partial}{\partial z_i} \ln \big(1 + e^{-z_i}\big)\right)\right) \nonumber \\
&=\frac{1}{n}\left(-y_i \frac{e^{-z_i}}{1+e^{-z_i}} + (1-y_i)\left(1 - \frac{e^{-z_i}}{1 + e^{-z_i}} \right)\right) \nonumber \\
&=\frac{1}{n}\left(\frac{e^{-z_i}}{1+e^{-z_i}} \big(-y_i - (1-y_i) \big) + (1-y_i) \big)\right) = \frac{1}{n} \left( - \frac{e^{-z_i}}{1 + e^{-z_i}} + 1 - y_i \right) \nonumber \\
&= \frac{1}{n}\left( - \left( 1 - \frac{1}{1+e^{-z_i}} \right) + 1 -y_i \right) = \frac{1}{n}\left( \frac{1}{1+e^{-z_i}} - y_i\right).
\end{aligned}
$$

Next, 

$$
\begin{aligned}
\frac{d z_i}{d b} = \left[ \frac{\partial}{\partial b} \left(\mathbf{w}^T\mathbf{x_1} + b\right),  \ldots, \frac{\partial}{\partial b} \left( \mathbf{w}^T\mathbf{x_n} + b \right) \right] = [1, \ldots , 1].
\end{aligned}
$$

And finally 
$$
\begin{aligned}
\frac{dz_i}{dw_j} = \left[ x_{1j}, x_{2j}, \ldots, x_{nj} \right]
\end{aligned}
$$

## Exercise 1.3

In [101]:
def init(p, n, random = True):
    # p: numb of vars
    # returns: w, b
    if random:
        return(np.random.rand(p, 1), np.repeat(np.random.rand(1), n))
    else:
        return(np.zeros([p, 1]), np.zeros(n))

The update steps of the gradient descent will be
$$
\begin{aligned}
w_j := w_j - \eta \frac{1}{n}\sum_{i=1}^n\left( \frac{1}{1+e^{-z_i}} - y_i\right)x_{ij}  \\
b := b - \eta \frac{1}{n}\sum_{i=1}^n\left( \frac{1}{1+e^{-z_i}} - y_i\right)
\end{aligned}
$$

In [222]:
def cost(y, x, w, b):
    # eq (1a) in matrix form
    z = np.matmul(w.T, x) + b
    # eq (1b)
    p = (1 + np.exp(-z))**(-1)
    # eq (1c)
    L = -y * np.log(p) - (1-y) * np.log(1-p)
    # eq (1d)
    return(1/y.size * np.sum(L))

def update_w(w, step_size, x, y, b):
    update_mat = (1 + np.exp(-(np.matmul(w.T, x) + b)))**(-1) - y
    w = w- step_size * 1/y.size * np.sum(np.matmul(update_mat, x.T))
    return(w)

def update_b(b, step_size, x, y, w):
    update_mat = (1 + np.exp(-(np.matmul(w.T, x) + b)))**(-1) - y
    b = b - step_size * 1/y.size * np.sum(update_mat)
    return(b)

def optimize(y, x, epochs = 2000, stop = .0001, 
             step_size = 0.001, random_init = True):
    w_new, b_new = init(x.shape[0], x.shape[1], random = random_init)
    cost_new = cost(y, x, w_new, b_new)
    for i in range(epochs):
        w_old, b_old, cost_old = w_new, b_new, cost_new
        w_new = update_w(w_old, step_size, x, y, b_old)
        b_new = update_b(b_old, step_size, x, y, w_old)
        cost_new = cost(y, x, w_new, b_new)
        if abs(cost_new - cost_old) <= stop:
            print("Stopping criterion met, iteration {}".format(i))
            break
    if abs(cost_new - cost_old) >= stop:
        print("Max epochs reached")
    return(w_new, b_new)    

To apply the model, I will simulate data for $p = 3$.

In [210]:
np.random.seed(12)
mean_vec = [1, 5, 3]
cov_mat = np.diag([2.2, 15.3, 82.5])
n = 100
# as defined in the task, the complete datamatrix is p x n rather than n x p
x = np.random.multivariate_normal(mean_vec, cov_mat, n).T
coefs = np.array([2, -1.2, 0.5]).reshape(3,1)

z = 1 + np.matmul(x.T ,coefs)
p = 1/(1 + np.exp(-z))
y = np.random.binomial(1, p.flatten(), n)

In [223]:
w, b = optimize(y, x, epochs = 5000, stop = .00000001, random_init = False)

Max epochs reached


In [224]:
w, b[1]

(array([0.03541994, 0.03541994, 0.03541994]), -0.47327617552528284)

In [221]:
np.zeros([2,3])

array([[0., 0., 0.],
       [0., 0., 0.]])