### Logistic Regression

AI Bootcamp 2022 Winter

Dates: 2022-2-7

Author: Yung-Kyun Noh

Department of Computer Science, Hanyang University


In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def draw_state(wval, bval, title_str='Data'):
    # function for scattering data and drawing classification boundary
    # wx - b > 0 or  wx - b < 0
    
    # create a figure and axis
    fig, ax = plt.subplots()

    # Scatter data points in 2-dimensional space
    ax.scatter(data1[:,0], data1[:,1], label='class 1', c='red', alpha=.3)
    ax.scatter(data2[:,0], data2[:,1], label='class 2', marker='^', c='blue', alpha=.3)
    # set a title and labels
    ax.set_title(title_str)
    ax.legend()
    
    [x1min,x1max,x2min,x2max] = ax.axis()
    x1vals = np.arange(x1min,x1max,0.1)
    ax.plot(x1vals, (-wval[0]*x1vals + bval)/wval[1], 'k')
    ax.axis([x1min,x1max,x2min,x2max])
    ax.grid()


In [None]:
def logistic_function(t):
    # example: logistic_function(np.array([0,1,2]))
    
    ret_val = 1/(1 + np.exp(-t))
    return ret_val

In [None]:
def get_loss(labels, fs):
    loss_val = np.sum(labels*np.log(fs) + (1 - labels)*np.log(1 - fs))
    return loss_val

### Data generation

In [None]:
# generate two Gaussians (class 1 & class 2)
dim = 2
datanum1 = 50
datanum2 = 50
mean1 = np.array([0, 0])
mean2 = np.array([1, -.5])
cov1 = np.array([[.1,.02],[.02,.1]])
cov2 = np.array([[.1,.02],[.02,.1]])
data1 = np.random.multivariate_normal(mean1, cov1, datanum1)
data2 = np.random.multivariate_normal(mean2, cov2, datanum2)
tstdatanum1 = 100
tstdatanum2 = 100
tstdata1 = np.random.multivariate_normal(mean1, cov1, tstdatanum1)
tstdata2 = np.random.multivariate_normal(mean2, cov2, tstdatanum2)


### Optimal Bayes classifier

When the data generating functions are Gaussians having the equivalent covariances, we can obtain the optimal linear classifier.

In [None]:
# optimal linear classifier
optw = np.matmul(mean1 - mean2, np.linalg.inv(cov1))
optb = np.matmul(optw, (mean1 + mean2)/2)
print(optw, optb)

draw_state(optw, optb, 'Data and optimal boundary')

### Learning

In [None]:
# Initialize parameters
w_init = np.random.normal(0,1,dim)
b_init = np.random.normal(0,1,1)

# extended w: [w, -b]
extw = np.array([np.concatenate((w_init, -b_init))])
# data with '1' is appended: [X, 1]
extX = np.concatenate((np.concatenate((data1, data2), axis=0), \
                       np.ones([datanum1 + datanum2, 1])), axis=1)
labels = np.concatenate((np.ones(datanum1), np.zeros(datanum2)))  # label of class 1: 1, label of class 2: 0


In [None]:
def update_w(extw, extX, labels, num_update=100, step_size=0.001, reg_const=1):
#     print(extw)
    draw_state(extw[0,0:2], -extw[0,2], 'Before update')

    objective_history = []
    for i in range(num_update):
        ts = np.matmul(extX, extw.T)  # w^TX
        fs = logistic_function(ts)
        extw = extw + step_size*(np.matmul(np.array([labels]) - fs.T, extX) - reg_const*extw)
        objective_history.append(get_loss(labels, fs.T[0]))

    draw_state(extw[0,0:2], -extw[0,2], 'Updated boundary')
    
    fig, ax = plt.subplots()
    ax.plot(objective_history)
    ax.set_title("objective function")
    
    return extw


In [None]:
extw = update_w(extw, extX, labels, num_update=500, step_size=0.001, reg_const=0)

In [None]:
# Additional 100 updates
extw = update_w(extw, extX, labels, num_update=500)

#### Evaluation

In [None]:
# data with '1' is appended: [X, 1]
extTstX = np.concatenate((np.concatenate((tstdata1, tstdata2), axis=0), \
                       np.ones([tstdatanum1 + tstdatanum2, 1])), axis=1)
TstLabels = np.concatenate((np.ones(tstdatanum1), np.zeros(tstdatanum2)))

ts = np.matmul(extTstX, extw.T)  # w^TX
err_rate = np.sum(np.abs((ts.T > 0) - np.array([TstLabels])))/(tstdatanum1 + tstdatanum2)
print(err_rate)

### Learning using Keras

In [None]:
import tensorflow as tf

from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow.keras as keras

x_train = np.concatenate((data1, data2), axis=0)
y_train = np.concatenate((np.ones(datanum1), np.zeros(datanum2)))  # label of class 1: 1, label of class 2: 0
x_test = np.concatenate((tstdata1, tstdata2), axis=0)
y_test = np.concatenate((np.ones(tstdatanum1), np.zeros(tstdatanum2)))


## Keras implementation

In [None]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow.keras as keras

In [None]:
x_train = np.concatenate((data1, data2), axis=0)
y_train = np.concatenate((np.ones(datanum1), np.zeros(datanum2)))  # label of class 1: 1, label of class 2: 0
x_test = np.concatenate((tstdata1, tstdata2), axis=0)
y_test = np.concatenate((np.ones(tstdatanum1), np.zeros(tstdatanum2)))


In [None]:
model = Sequential()
model.add(Dense(units=1, activation='sigmoid', input_shape=(2,), kernel_regularizer=keras.regularizers.l2(0.1)))   # caution: not 'softmax' or 'relu'
model.summary()

model.compile(loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
history = model.fit(x_train, y_train,
                    epochs=500,
                    verbose=1,
                    validation_data=(x_test, y_test))


In [None]:
fig, ax = plt.subplots()
ax.plot(history.history['loss'], 'b', label='train loss')
ax.plot(history.history['val_loss'], 'r', label='test loss')
ax.legend()

In [None]:
print('w param: \n', model.weights[0], '\n\nb param: \n', model.weights[1])

draw_state(model.weights[0].numpy(), -model.weights[1].numpy(), 'keras boundary')


### Watch derivatives

Example: calculate the derivative of a function with respect to a vector
\begin{eqnarray}
f(\mathbf{w}) &=& \mathbf{w}^\top \mathbf{x} \in\mathbb{R} \quad \text{for} \quad \mathbf{w}, \mathbf{x} \in \mathbb{R}^2 \\
&& \text{then} \quad \frac{df}{d\mathbf{w}} = ?
\end{eqnarray}

In [None]:
import tensorflow as tf

In [None]:
x = tf.Variable(tf.constant([[2],[3]], dtype='float'))
w = tf.Variable(tf.constant([[0],[0]], dtype='float'))
with tf.GradientTape() as tape:
    f = tf.matmul(tf.transpose(w), x)

df_dw = tape.gradient(f, w)
print('derivative=', df_dw.numpy().T, '\nx=', x.numpy().T)

### Taking gradients of your parameters

In [None]:
# gradient of outputs

x = np.array([[1,0]])
with tf.GradientTape() as tape:
    model = Sequential([
        Dense(units=1, activation='sigmoid')
    ])
    fx = model(x)

df_dw = tape.gradient(fx, model.weights)   # Derivative of (f at x) w.r.t. weights at the current weights
print('f=', f)
print('\ndf/dw=\n', df_dw[0].numpy(), ' for w,\nand\n', df_dw[1].numpy(), 'for b')

In [None]:
# gradient of losses
# gradient for trainable_variables

layer = tf.keras.layers.Dense(units=1, activation='sigmoid')
x = x_train

with tf.GradientTape() as tape:
    # Forward pass
    y = layer(x)   # weights are initialized here
    loss = tf.reduce_mean((tf.transpose(y) - y_train)**2 )
    # Calculate gradients with respect to every trainable variable
    grad = tape.gradient(loss, layer.trainable_variables)

print('loss:', loss)
print('\ngrad for w:\n', grad[0]) # for w and b separately
print('\ngrad for b:\n', grad[1]) # for w and b separately


In [None]:
# Logistic regression learning

def tf_get_loss(labels, fs):
    loss_val = -1*tf.math.reduce_sum(labels*tf.math.log(fs) + (1 - labels)*tf.math.log(1 - fs))
    return loss_val

layer = tf.keras.layers.Dense(units=1, activation='sigmoid')
n_iteration = 500
step_size = 0.001
reg_const = 0.05
for iiter in range(n_iteration):
    with tf.GradientTape() as tape:
        ys = layer(x_train)   # weights are initialized at first call
        loss = tf_get_loss(y_train, tf.transpose(ys)[0])

    # Calculate gradients with respect to every trainable variable
    grad = tape.gradient(loss, layer.trainable_variables)
    if iiter == 0:
        print('weights at start:\nw:\n', layer.weights[0].numpy(), '\nb:\n', layer.weights[1].numpy(), '\n')

    # Update the weights in our linear layer.
    layer.weights[0].assign(layer.weights[0] - step_size*grad[0] - reg_const*layer.weights[0])
    layer.weights[1].assign(layer.weights[1] - step_size*grad[1])
    if iiter % 100 == 0:
        print('loss:', loss.numpy())

print('\nweights at finish:\nw:\n', layer.weights[0].numpy(),'\nb:\n', layer.weights[1].numpy())


In [None]:
print('w param: \n', layer.weights[0].numpy(), '\n\nb param: \n', layer.weights[1].numpy())

draw_state(layer.weights[0].numpy(), -layer.weights[1].numpy(), 'keras boundary')


### Multilayer Neural Networks

In [None]:
model = Sequential()

model.add(Dense(units=512, activation='relu', input_shape=(2,)))
model.add(Dense(units = 512, activation='relu'))
model.add(Dense(units = 1, activation='sigmoid'))

model.summary()

In [None]:
model.compile(loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(x_train, y_train,
                    epochs=500,
                    verbose=1,
                    validation_data=(x_test, y_test))


In [None]:
fig, ax = plt.subplots()
ax.plot(history.history['loss'], 'b', label='train loss')
ax.plot(history.history['val_loss'], 'r', label='test loss')
ax.legend()