# Assignment 1a


In [2]:
import numpy as np

## Exercise 1.1
For $dJ/db$,
$$
\begin{aligned}
\frac{dJ}{db} = \frac{1}{n}\sum_{i = 1}^n \frac{\partial J}{\partial z_i}\frac{dz_i}{db},
\end{aligned}
$$
and for $dJ/dw_j$,
$$
\begin{aligned}
\frac{dJ}{dw_j} = \frac{1}{n}\sum_{i = 1}^n \frac{\partial J}{\partial z_i}\frac{d z_i}{d w_j}
\end{aligned}
$$

## Exercise 1.2

$$
\begin{aligned}
\frac{\partial J}{\partial z_i} & = \frac{1}{n}\left( -y_i \frac{\partial}{\partial z_i}\left( \ln \frac{1}{1 + e^{-z_i}}\right) - (1 - y_i)\frac{\partial}{\partial z_i}\left( \ln \frac{e^{-z_i}}{1 + e^{-z_i}}\right)\right) \nonumber \\
&= \frac{1}{n}\left( y_i \frac{1}{1+e^{-z_i}} \frac{\partial}{\partial z_i}e^{-z_i} - (1-y_i)\left( \frac{\partial}{\partial z_i} \ln e^{-z_i} - \frac{\partial}{\partial z_i} \ln \big(1 + e^{-z_i}\big)\right)\right) \nonumber \\
&=\frac{1}{n}\left(-y_i \frac{e^{-z_i}}{1+e^{-z_i}} + (1-y_i)\left(1 - \frac{e^{-z_i}}{1 + e^{-z_i}} \right)\right) \nonumber \\
&=\frac{1}{n}\left(\frac{e^{-z_i}}{1+e^{-z_i}} \big(-y_i - (1-y_i) \big) + (1-y_i) \big)\right) = \frac{1}{n} \left( - \frac{e^{-z_i}}{1 + e^{-z_i}} + 1 - y_i \right) \nonumber \\
&= \frac{1}{n}\left( - \left( 1 - \frac{1}{1+e^{-z_i}} \right) + 1 -y_i \right) = \frac{1}{n}\left( \frac{1}{1+e^{-z_i}} - y_i\right).
\end{aligned}
$$

Next, 

$$
\begin{aligned}
\frac{d z_i}{d b} = \left[ \frac{\partial}{\partial b} \left(\mathbf{w}^T\mathbf{x_1} + b\right),  \ldots, \frac{\partial}{\partial b} \left( \mathbf{w}^T\mathbf{x_n} + b \right) \right] = [1, \ldots , 1].
\end{aligned}
$$

And finally 
$$
\begin{aligned}
\frac{dz_i}{dw_j} = \left[ x_{1j}, x_{2j}, \ldots, x_{nj} \right]
\end{aligned}
$$

## Exercise 1.3

In [231]:
def init(p, n, random = True):
    # p: numb of vars
    # returns: w, b
    if random:
        return(np.random.rand(p, 1), np.repeat(np.random.rand(1), n))
    else:
        return(np.zeros([p, 1]).reshape(3,1), np.zeros(n))

The update steps of the gradient descent will be
$$
\begin{aligned}
w_j := w_j - \eta \frac{1}{n}\sum_{i=1}^n\left( \frac{1}{1+e^{-z_i}} - y_i\right)x_{ij}  \\
b := b - \eta \frac{1}{n}\sum_{i=1}^n\left( \frac{1}{1+e^{-z_i}} - y_i\right)
\end{aligned}
$$

To apply the model, I will simulate data for $p = 3$.

In [272]:
from sklearn import datasets

iris = sklearn.datasets.load_iris()
X = iris.data[:, :2]
y = (iris.target != 0)*1

In [276]:
# new try
def z(X, )

def sigmoid(z):
    return(1 / (1 + np.exp(-z)))

z = np.dot(X, np.array([1,2]).reshape(2,1))

def loss(h, y):
    return((-y * np.log(h) - (1-y) * np.log(1-h)).mean())

def gradient_w

In [552]:
class LogisticReg:
    def __init__(self, step_size = 0.01, epochs = 10000, random_init = False):
        self.step_size = step_size
        self.epochs = epochs
        self.random_init = random_init
    # equation 1a in matrix form
    def z(self, X, w):
        return(np.dot(X, w))
    # equation 1b
    def sigmoid(self, z):
        return(1/(1+np.exp(-z)))
    # equation 1d
    def loss(self, h, y):
        # h: sigmoid applies to z
        return((-y * np.log(h) - (1-y) * np.log(1-h)).mean())
    
    def add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return(np.concatenate((intercept, X), axis = 1))
    
    def fit(self, X, y):
        # add an intercept for the b term
        X = self.add_intercept(X)
        # initialize weights
        if not self.random_init:
            self.w = np.zeros(X.shape[1]).reshape(X.shape[1],1)
        elif self.random_init:
            self.w = np.random.rand(X.shape[1]).reshape(X.shape[1],1)
        
        # time for model fitting
        for i in range(self.epochs):
            z = np.dot(X, self.w)
            h = self.sigmoid(z)
            # as calculated in the exercises but not including
            # the gradient wrt b as it is accounted for in 
            # the data matrix by the column of 1s
            gradient = np.dot(X.T, (h-y)) / y.size
            # update
            self.w -= self.step_size * gradient     
            
            z = np.dot(X, self.w)
            h = self.sigmoid(z)
            loss = self.loss(h, y)


In [556]:
X = iris.data[:, :2]
y = ((iris.target != 0)*1).reshape(150, 1)
model_test = LogisticReg(step_size = .1, epochs = 300000 )
model_test.fit(X, y)

In [573]:
np.random.seed(12)
mean_vec = [1, 5, 3]
cov_mat = np.diag([2.2, 15.3, 82.5])
n = 100
# as defined in the task, the complete datamatrix is p x n rather than n x p
x = np.random.multivariate_normal(mean_vec, cov_mat, n)
coefs = np.array([2, -1.2, 0.5]).reshape(3,1)

z = 5 + np.matmul(x , coefs)
p = 1/(1 + np.exp(-z))
y = np.random.binomial(1, p.flatten(), n)

In [574]:
model_test = LogisticReg(step_size = .1, epochs = 300000)

In [575]:
model_test.fit(x, y.reshape(100, 1))

In [576]:
model_test.w

array([[ 4.80892653],
       [ 1.85489525],
       [-1.1601345 ],
       [ 0.47527861]])