# Homework 3: Logistics Regression
Implement everywhere that has an ellipsis (`...`).

In [1]:
import numpy as np

### Load dataset

In [2]:
from sklearn.datasets import load_iris
data = load_iris()
X, Y = data.data, data.target

In [3]:
# only classify class 1 vs. class 2

X = X[:, :]
idx = (Y!=0)
X = X[idx, :]
Y = Y[idx]

### Data normalization
Remember: normalization is done per feature, not all at once. Also, remember that Y is [1, 2] instead of [0, 1].

In [4]:
# Write your code here. Don't forget the bias!
X = (X - np.min(X)) / (np.max(X) - np.min(X))
Y = (Y - np.min(Y)) / (np.max(Y) - np.min(Y))

In [5]:
X.shape[1]

4

In [6]:
Y

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

### Define classification model
Create parameters here. Initialize with zeros. In case you forgot: $Y = \sigma(X\Theta)$.

**Forward Propagation:**
$$Z = Xw + b$$
$$\hat{y} = \sigma(Z) =\sigma(Xw + b) $$
$$J(w, b) = -\frac{1}{m}\sum_{i=1}^m{ \Big( y^{(i)} log( \hat{y}^{(i)}) + (1-y^{(i)}) log(1 - \hat{y}^{(i)}) \Big)} \tag{5}$$

**and Backward**

$$ \frac{\partial J}{\partial w} = \frac{1}{m}X^T(\hat{y}-y)\tag{6}$$
$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{i=1}^m (\hat{y}^{(i)}-y^{(i)})\tag{7}$$

In [7]:
# Initialize params
def initialize_params(X):
    '''Initialize w, b with zeros and return'''
    w = np.zeros((X.shape[1], 1))
    b = np.zeros((1, 1))
    return w, b
# Implement sigmoid
def sigmoid(Z):
    return 1/(1 + np.exp(-Z))
# Forward propagation
def forward(w, b, X):
    '''Return y_hat'''
    Z = np.dot(X, w) + b
    y_hat = sigmoid(Z)
    return y_hat
# Binary cross entropy loss
def binany_cross_entropy(y, y_hat):
    '''Calculate loss function J and return'''
    J = -np.mean(y*np.log(y_hat) + (1-y)*np.log(1-y_hat))
    return J


In [8]:
# Backward propagation
def backward(X, y, y_hat, w, b):
    '''Calculate dw, db and return'''
    m = X.shape[0]
    dw = (1/m) * np.dot(X.T, (y_hat - y))
    db = (1/m) * np.sum(y_hat - y, keepdims=True)
    return dw, db

# Update parameters
def update_params(w, b, dw, db, learning_rate):
    '''Update w, b and return'''
    w = w - learning_rate * dw
    b = b - learning_rate * db
    return w, b

In [9]:
def predict(w, b, X):
    '''Return predicted y of X'''
    y_hat = forward(w, b, X)
    return y_hat > 0.5

In [10]:
weight = ...

### Define evaluation metrics

In [11]:
def accuracy(X, Y, w,b):
    '''
    Evaluate the model, represented by `weight`, with data (X, Y).
    
    Input:
        X:      data features
        Y:      data labels
        weight: model weights
    Ouput:
        Model accuracy on input data.
    '''
    # implement your code here
    y_hat = predict(w, b, X)
    count_ = 0
    for i in range(X.shape[1]):
        if Y[i] == y_hat[i]:
            count += 1
    return count_ / X.shape[1]

### Hyperparameters

In [12]:
# learning rate
alpha = 1e-2
# epochs
epoch = 10000

In [13]:
a = np.array([1,2,3])
b = np.array([1,2,3])
np.linalg.norm(a-b)

0.0

In [17]:
# Training process
def train(X, y, iterations, learning_rate):
    '''Train w, b and return'''
    w, b = initialize_params(X)
    for i in range(iterations):
        y_hat = forward(w, b, X)
        J = binany_cross_entropy(y, y_hat)
        dw, db = backward(X, y, y_hat, w, b)
        w, b = update_params(w, b, dw, db, learning_rate)
        
    return w, b
w, b = train(X,Y,1000,1e-4)

In [19]:
w

array([[-0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
        -0.04878128, -0.04878128, -0.04878128, -0.04878128, -0.04878128,
         0.04878128,  0.04878128,  0.04878128,  0.04878128,  0.04878128,
         0.04878128,  0.04878128,  0.04878128,  0.04878128,  0.04878128,
         0.04878128,  0.04878128,  0.04878128,  0.04878128,  0.04878128,
         0.04878128,  0.04878128,  0.04878128,  0.0

In [28]:
weight = np.append(b,[0.04878128,0.01323187,0.02760426,0.00477494])



### Train the model.

In [29]:
# just to double check
assert np.allclose(weight, np.array([0.12047504, -0.44156746, -0.89309501, 2.965364, 3.3427994]))

AssertionError: 

In [30]:
# model accuracy
accuracy(X, Y, weight)

ValueError: shapes (5,) and (100,5) not aligned: 5 (dim 0) != 100 (dim 0)

### (Optional) How low can you go?
Do anything you want to get the best performance out of the training set. For once, let's overfit to your heart's content.

In [None]:
# do some fun code here and try to match this :)
...

In [None]:
print('Weights:', weight)
y = 1 / (1 + np.exp(-X @ weight))
loss = -np.sum(Y * np.log(y) + (1 - Y) * np.log(1 - y)) / y.size
print('Loss:', loss)
print('Accuracy:', accuracy(X, Y, weight))