In [None]:
import numpy as np
from bokeh.plotting import figure, output_notebook, show
output_notebook()

# Prepare Data
Here, we have a step function with some outliers. Basically, it can be written as $f(x) = \begin{cases}
    1       & \quad \text{if } x\geq100 \\
    0  & \quad \text{if } x<100
  \end{cases}$. 

We want to estimate paratemers in a logistic regression which can roughly decribe this function.

In [None]:
def generate_dataset(n):
    np.random.seed(7)
    x = np.arange(n)
    res_x = []
    res_y = []
    for i in range(n):
        r = 0.15 if i < n/2 else 0.85
        y_i = 1 if np.random.rand() < r else 0
        res_x.append([1, x[i]])
        res_y.append(y_i)
    return np.array(res_x), np.array(res_y)

In [None]:
x, y = generate_dataset(200)
p = figure()
print("Shape of x:", x.shape)
print("Shape of y:", y.shape)
p.scatter(x[:,1], y)
show(p)

Shape of x: (200, 2)
Shape of y: (200,)


# Logistic Regression
In the first exercise, we only consider the simplest linear function wthout bias. $$z=\omega x^T$$
## Sigmoid function
Here, you need to implement the sigmoid function.$$sigmoid(x) =  \frac{1}{1 + e^{-z} }$$ You are recommended to use numpy function to conduct operations on vector and matrix.

In [None]:
def sigmoid(coef, x):
    return  1 / (1 + np.exp(-coef.dot(x.T)))

## Cross entropy
Here, you need to implement the cross entropy function $L(\hat{y},y)=-(y\log{\hat{y}}+(1-y)\log{(1-\hat{y})})$

In [None]:
def cross_entropy(coef, x, y):
    p = sigmoid(coef,x)
    return  -np.mean(y*np.log(p)+(1-y)*np.log(1-p))

## Gradient
Here, you need to implement the gradient which will be used in the back propagation.$$\frac{\partial L(\hat{y},y)}{\partial \omega} =\frac{\partial L(\hat{y},y)}{\partial \hat{y}}  \frac{\partial \hat{y}}{\partial z} \frac{\partial z}{\partial \omega} = x^{T}(\hat{y}-y)$$
Attention, use np.mean(x, axis=1) to calculate average value on the second dimension in x.

In [None]:
def gradient(coef, x, y):
    return  np.mean(x.T * (sigmoid(coef, x)-y), axis=1)

In [None]:
def logistic_regression(coef, x, y, lr, epsilon):
    prev_loss = 0
    t = 0
    if t == 50000: lr = 0.1*lr # Learning rate decay
    while t < 100000:
        loss =  cross_entropy(coef, x, y)
        if t%1000==0: # Visualize loss every 1000 iterations
            print(loss)
        if abs(loss - prev_loss) <= epsilon: # Early stop when the loss stabilizes
            break
        prev_loss = loss
        grad =  gradient(coef, x, y)
        t += 1
        delta = lr * grad
        coef =  coef - delta
    return coef

In [None]:
def logistic_regression_v2(coef, x, y, lr, epsilon=1.e-8, b1=0.9, b2=0.999):
    prev_loss = 0
    m_coef = np.zeros(coef.shape)
    v_coef = np.zeros(coef.shape)
    moment_m_coef = np.zeros(coef.shape)
    moment_v_coef = np.zeros(coef.shape)
    t = 0
    if t == 50000: lr = 0.1*lr # Learning rate decay
    while t < 100000:
        loss =  cross_entropy(coef, x, y)
        if t%1000==0: # Visualize loss every 1000 iterations
            print(loss)
        if abs(loss - prev_loss) <= epsilon: # Early stop when the loss stabilizes
            break
        prev_loss = loss
        grad =  gradient(coef, x, y)
        t += 1
        m_coef = b1 * m_coef + (1-b1) * grad
        v_coef = b2 * v_coef + (1-b2) * grad ** 2
        moment_m_coef = m_coef / (1-b1**t)
        moment_v_coef = v_coef / (1-b2**t)
        delta = (lr / moment_v_coef**0.5 + 1e-8) + (b1*moment_m_coef + )
        coef =  np.subtract(coef, delta)
    return coef

## Validation
You can check if your logistic regression can estimate our target function by runing the following code. You will get a curve that is close to the step function defined in the first section.

In [None]:
coef = np.array([1.0, 0.0])
coef_estimation = logistic_regression(coef, x, y, lr=1e-3, epsilon=1e-10)

In [None]:
p = figure()
p.scatter(x[:,1], y)
p.line(x[:,1], sigmoid(coef_estimation, x), color="red")
show(p)
coef_estimation

array([-2.97413021,  0.0298362 ])

In [None]:
coef = np.array([1.0, 0.0])
coef_estimation = logistic_regression_v2(coef, x, y, lr=1e-3, epsilon=1e-10)

0.8182616875182228
nan


  
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan


KeyboardInterrupt: ignored