<a href="https://colab.research.google.com/github/kcavatar/pml_workshops/blob/main/1_simple_logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://githubtocolab.com/PML-UCF/pml_workshops/blob/main/1_simple_logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/></a>

# Simple Logistic Regression

Loading dataset

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import pandas as pd
import ipywidgets as widgets
from sklearn.preprocessing import StandardScaler
import time
from IPython import display
%matplotlib inline

In [None]:
dataset = pd.read_csv('https://raw.githubusercontent.com/PML-UCF/pml_workshops/main/data/social_network_ads.csv')
dataset

In [None]:
X = dataset.iloc[:, [2,3]].values
sc = StandardScaler()
X = sc.fit_transform(X)
Y = dataset.iloc[:, 4].values[:,np.newaxis]

## Binary Logistic Regression

Lets consider:

$\hat{y}=p(y=1 \mid x)$

$\hat{y}$ is the probability that $y=1$, given $x$

$1-\hat{y}=p(y=0 \mid x)$

$\hat{y}=f(u)$, $u=x w^{T}+b$


## Loss function
$\operatorname{cost}\left(\hat{y}, y\right)= \begin{cases}-\log \left(\hat{y}\right) & \text { if } y=1 \\ -\log \left(1-\hat{y}\right) \text { if } y=0\end{cases}$


## Simplified Loss Function
$\operatorname{Cost}\left(\hat{y}, y\right)=-y \log \left(\hat{y}\right)-(1-y) \log \left(1-\hat{y}\right)$



## Deriving Gradient

$z=w_{1} x_{1}+w_{2} x_{2}+b$

$\hat{y}=a=\sigma(z)$

$Loss \rightarrow L(\hat{y}, y)$

**For $w_{1}$**:

$\frac{\partial(L)}{\partial w_{1}}=\frac{\partial L}{\partial a} \cdot \frac{\partial a}{\partial z} \cdot \frac{\partial(z)}{\partial w_{1}}$

$\frac{\partial L}{\partial a}=\frac{\partial}{\partial a}(-y \log a-(1-y) \log (1-a))$

$=-y\left(\frac{1}{a}\right)-(-1) \frac{(1-y)}{(1-a)}$

$\frac{\partial L}{\partial a}=\left(\frac{-y}{a}\right)+\left(\frac{1-y}{1-a}\right)$

$\frac{\partial a}{\partial z}=a(1-a)$

$\frac{\partial z}{\partial w_{1}}=x_{1}$

Then:

$\frac{\partial(L)}{\partial w_{1}}=(a-y) \cdot x_{1}$,

and

$\frac{\partial(L)}{\partial b}=(a-y)$

In [None]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def d_sigmoid(a):
    return a*(1-a)

def loss(weights, bias, x, y):
    a = forward(weights, bias, x)
    return (-1/x.shape[0])*(np.sum((y*np.log(a)) + ((1-y)*(np.log(1-a)))))

def d_loss(a, y):
    return ((1-y)/(1-a)) - y/a

def forward(weights, bias, x):
    z = x.dot(weights.T) + bias
    a = sigmoid(z)
    return a

def backward(a, y):
    # gradient = d_loss(a, y)*d_sigmoid(a)
    gradient = (a - y)
    return gradient

In [None]:
X1, X2 = np.meshgrid(np.arange(start = X[:, 0].min() - 1, stop = X[:, 0].max() + 1, step = 0.1),
                    (np.arange(start = X[:, 1].min() -1, stop = X[:, 1].max() + 1, step = 0.1)))
def plot(loss, pred, fig, ax1, ax2):
    ax1.clear()
    ax2.clear()
    
    ax1.grid(True)
    ax1.plot(loss)
    ax1.set_ylabel('Loss')
    ax1.set_xlabel('Epochs')
    ax1.set_xlim(0, epochs)

    ax2.contourf(X1, X2, pred,
                alpha = 0.50, cmap = ListedColormap(('red', 'green')))
    ax2.set_xlim(X1.min(), X1.max())
    ax2.set_ylim(X2.min(), X2.max())
    for i, j in enumerate(np.unique(Y)):
        ax2.scatter(X[Y[:,0] == j, 0], X[Y[:,0] == j, 1],
                    color = ListedColormap(('red', 'green'))(i), label = j)

    ax2.set_xlabel('Age')
    ax2.set_ylabel('Estimated Salary')
    ax2.legend()
    
    display.update_display(fig,display_id=1)

## Training loop

**Update model parameters in batches:**

$w = w - \alpha \cdot \frac{\partial(L)}{\partial w}$

and

$b = b - \alpha \cdot \frac{\partial(L)}{\partial b}$

$\alpha \rightarrow $ Learning rate

In [None]:
weights = np.random.randn(1, X.shape[1])
bias = np.zeros((1, 1))

history = {'loss':[]}

lr=0.01
batch_size=4
epochs=20

fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(10, 4))
display.display(fig,display_id=1)

for ep in range(epochs):
    shuffled_indices = np.random.permutation(X.shape[0])
    x_shuffled = X[shuffled_indices]
    y_shuffled = Y[shuffled_indices]

    # SGD with mini batches
    for i in range(0, X.shape[0], batch_size):
        xi = x_shuffled[i:i+batch_size]
        yi = y_shuffled[i:i+batch_size]

        a = forward(weights, bias, xi)
        gradient = backward(a, yi)
        
        weights = weights - lr * (gradient.T @ xi)/batch_size
        bias = bias - lr * gradient.mean()

    history['loss'] += [loss(weights, bias, x_shuffled, y_shuffled)]

    pred = forward(weights, bias, np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape)
    plot(history['loss'], pred, fig, ax1, ax2)
    time.sleep(1)
display.clear_output(wait=True)

 _______ 

## Using Tensorflow Automatic Differentiation and Gradients

In [None]:
import tensorflow as tf

def tf_sigmoid(z):
    return 1/(1+tf.exp(-z))

def tf_forward(w, b, x):
    z = x @ w + b
    a = tf_sigmoid(z)
    return a

def tf_loss(a, y):
    return (-1/a.shape[0])*(tf.reduce_sum((y*tf.math.log(a)) + ((1-y)*(tf.math.log(1-a)))))

In [None]:
w = tf.Variable(tf.zeros((X.shape[1], 1)), name='w')
b = tf.Variable(tf.random.uniform((1, 1)), name='b')

history = {'loss':[]}
lr=0.01
batch_size=4
epochs=20

for ep in range(epochs):
    shuffled_indices = np.random.permutation(X.shape[0])
    x_shuffled = X[shuffled_indices]
    y_shuffled = Y[shuffled_indices]

    # SGD with mini batches
    for i in range(0, X.shape[0], batch_size):
        xi = x_shuffled[i:i+batch_size]
        yi = y_shuffled[i:i+batch_size]

        # Computing gradients with tf.GradientTape
        with tf.GradientTape(persistent=True) as tape:
            tape.watch(w)
            tape.watch(b)
            a = tf_forward(w, b, xi)
            l = tf_loss(a, yi)

        [dl_dw, dl_db] = tape.gradient(l, [w, b])
        
        w = w - lr * dl_dw
        b = b - lr * dl_db

    history['loss'] += [tf_loss(tf_forward(w, b, x_shuffled), y_shuffled).numpy()]


In [None]:
pred = tf_forward(w,b,np.array([X1.ravel(), X2.ravel()]).T).numpy().reshape(X1.shape)

fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(10, 4))
plot(history['loss'], pred, fig, ax1, ax2)

_________

## Using Tensorflow Model and Layers API

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential(
    Dense(1, activation='sigmoid')
)

model.compile(optimizer='SGD', loss='binary_crossentropy')
history = model.fit(X,Y, batch_size=4, epochs=20)

In [None]:
pred = model.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape)

fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(10, 4))
plot(history.history['loss'], pred, fig, ax1, ax2)