# Assignment : Logistic Regression

Katarina Chiam (1004908996)

In [1]:
#google drive mount to colab statement
import os
from google.colab import drive

drive.mount('/content/drive')
root = '/content/drive/My Drive/_Y3/ECE421_W/A1'
os.chdir(root)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [63]:
%tensorflow_version 1.x
#starter code given below

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

def loadData():
    with np.load('notMNIST.npz') as dataset:
        Data, Target = dataset['images'], dataset['labels']
        posClass = 2
        negClass = 9
        dataIndx = (Target==posClass) + (Target==negClass)
        Data = Data[dataIndx]/255.
        Target = Target[dataIndx].reshape(-1, 1)
        Target[Target==posClass] = 1
        Target[Target==negClass] = 0
        np.random.seed(421)
        randIndx = np.arange(len(Data))
        np.random.shuffle(randIndx)
        Data, Target = Data[randIndx], Target[randIndx]
        trainData, trainTarget = Data[:3500], Target[:3500]
        validData, validTarget = Data[3500:3600], Target[3500:3600]
        testData, testTarget = Data[3600:], Target[3600:]
    return trainData, validData, testData, trainTarget, validTarget, testTarget

In [64]:
def loss(W, b, x, y, reg):
    # Your implementation here
    N = (x.shape[0])
    W_re = W.reshape((x.shape[1] * x.shape[2], 1))
    x_re = x.reshape((N, x.shape[1] * x.shape[2]))
    y_hat = sigmoid(np.matmul(x_re, W_re) + b)
    CE = np.sum(np.multiply(-1, y)*np.log(y_hat) - (1-y)*np.log(1-y_hat))
    reg_loss = reg/2*np.sum(W_re*W_re)
    return (1/N*CE + reg_loss)

def sigmoid(z):
  return (1/(1+np.e**(-z)))

In [65]:
def grad_loss(W, b, x, y, reg):
  # Your implementation here
    N = (x.shape[0])
    W_re = W.reshape((x.shape[1] * x.shape[2], 1))
    x_re = x.reshape((N, x.shape[1] * x.shape[2]))
    y_hat = sigmoid(np.matmul(x_re, W_re) + b)
    grad_W = x_re * (y_hat-y) + reg*W_re.transpose()
    grad_b = y_hat-y
    return np.mean(grad_W, axis = 0), np.mean(grad_b)

In [66]:
def acc(W, b, x, y):
  W_re = W.reshape((x.shape[1] * x.shape[2], 1))
  x_re = x.reshape((x.shape[0], x.shape[1] * x.shape[2]))
  prediction = np.matmul(x_re, W_re) + b
  act = sigmoid(prediction)
  decision = np.where(act>0.5, 1, 0)
  correct = (decision == y).sum()
  return correct/y.shape[0]

In [68]:
def grad_descent(W, b, x, y, alpha, epochs, reg, error_tol):
    # Your implementation here
    trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()
    trainLoss = []
    validLoss = []
    testLoss = []
    trainAcc = []
    validAcc = []
    testAcc = []
    W_new = W
    b_new = b
    for i in range(epochs):
      trainLoss.append(loss(W_new, b_new, trainData, trainTarget, reg))
      validLoss.append(loss(W_new, b_new, validData, validTarget, reg))
      testLoss.append(loss(W_new, b_new, testData, testTarget, reg))
      grad_W, grad_b = grad_loss(W_new, b_new, trainData, trainTarget, reg)
      trainAcc.append(acc(W_new, b_new, trainData, trainTarget))
      validAcc.append(acc(W_new, b_new, validData, validTarget))
      testAcc.append(acc(W_new, b_new, testData, testTarget))
      W_old_norm = np.linalg.norm(W_new)
      W_new = W_new - alpha * grad_W
      W_new_norm = np.linalg.norm(W_new)
      b_new = b_new - alpha * grad_b
      if np.abs(W_old_norm - W_new_norm) < error_tol:
        break
    plot(trainLoss, validLoss, 1, reg) #note: just for plotting purposes
    plot(trainAcc, validAcc, 2, reg) #note: see plot function Appendix A1
    return (W_new, b_new)

def plot(train, valid, fignum, reg):
  plt.figure(fignum)
  plot1, = plt.plot(train)
  plot2, = plt.plot(valid)
  plt.xlabel('Epochs')
  if fignum == 1:
    plt.ylabel('Loss')
    plt.title('Loss vs. Epochs with Learning Rate = 0.005 and λ = {:1.3f}'.format(reg))
  elif fignum == 2:
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs. Epochs with Learning Rate = 0.005 and λ = {:1.3f}'.format(reg))
  plt.legend([plot1,plot2], ['Training', 'Validation'])
  plt.show()
  return True

In [None]:
np.random.seed(421)

W = np.random.normal(loc = 0, scale = 0.5, size = 784)
b = 0

trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()

x = trainData
y = trainTarget
alpha = 0.005
epochs = 5000
reg = 0
error_tol = 10^-7

grad_descent(W, b, x, y, alpha, epochs, reg, error_tol)

# Part 2: Logistic Regression in Tensorflow

In [57]:
def buildGraph(alpha = 0.001, beta_1 = None, beta_2 = None, e = None, batch_size = 500, epochs = 700, reg = 0):
  #build computation graph 

  #weights and bias
  W = tf.Variable(tf.truncated_normal(shape=(784, 1), mean=0.0, stddev=0.5))
  bias = tf.Variable(0.00)

  #variables for input - data, labels and reg
  x = tf.placeholder(tf.float32, shape = (batch_size, 784), name = 'x')
  y = tf.placeholder(tf.float32, shape = (batch_size, 1), name = 'y')
  reg_val = tf.placeholder(tf.float32, name = 'reg_val')

  valid_x = tf.placeholder(tf.float32, shape=(100, 784), name = 'valid_x')
  valid_y = tf.placeholder(tf.float32, shape=(100, 1), name = 'valid_y')

  test_x = tf.placeholder(tf.float32, shape=(145, 784), name = 'test_x')
  test_y = tf.placeholder(tf.float32, shape=(145, 1), name = 'test_y')

  samples = {"tr": x, "v":valid_x, "te":test_x}
  labels = {"tr":y, "v":valid_y, "te":test_y}

  params = {"b1":beta_1, "b2":beta_2, "epsilon":e, "bs":batch_size, "epoch":epochs, "regular":reg}

  #loss
  y_hat, loss = tf_loss(W, bias, x, y, reg)
  v_y_hat, v_loss = tf_loss(W, bias, valid_x, valid_y, reg_val)
  t_y_hat, t_loss = tf_loss(W, bias, test_x, test_y, reg_val)

  #optimizer
  optimizer = tf.train.AdamOptimizer(learning_rate=alpha).minimize(loss) #batch
  #optimizer = tf.train.AdamOptimizer(learning_rate=alpha, beta1 = beta_1, beta2 = beta_2, epsilon = e).minimize(loss) #hyperparameter

  #implement SGD
  SGD(optimizer, W, bias, loss, v_loss, t_loss, samples, labels, reg, reg_val, params)
  return W, bias, y_hat, y, loss, optimizer

In [59]:
def SGD(optimizer, W, bias, loss, v_loss, t_loss, samples, labels, reg, reg_val, params):
  trainLoss = []
  validLoss = []
  testLoss = []
  trainAcc = []
  validAcc = []
  testAcc = []

  with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()
    trainData = trainData.reshape(trainData.shape[0], trainData.shape[1]*trainData.shape[2])
    validData = validData.reshape(validData.shape[0], validData.shape[1]*validData.shape[2])
    testData = testData.reshape(testData.shape[0], testData.shape[1]*testData.shape[2])

    batch_size = params["bs"]
    for ep in range(params["epoch"]):
      batch_num = int(trainData.shape[0]/batch_size)
      sample_index = np.random.choice(trainData.shape[0], size = trainData.shape[0], replace=False)
      trainData_sample = trainData[sample_index]
      trainTarget_sample = trainTarget[sample_index]
      trLossaccum =0
      vLossaccum =0
      teLossaccum =0
      for b in range(batch_num):
        x_re = np.reshape(trainData_sample[b*batch_size:(b+1)*batch_size], (batch_size, 784))
        y_re = np.reshape(trainTarget_sample[b*batch_size:(b+1)*batch_size], (batch_size, 1))
        feed = {samples["tr"]:x_re,
                labels["tr"]:y_re,
                samples["v"]:validData,
                labels['v']:validTarget,
                samples['te']:testData,
                labels['te']:testTarget,
                reg_val: reg}
        _, update_W, update_b, trLoss, vLoss, teLoss = sess.run([optimizer, W, bias, loss, v_loss, t_loss], feed_dict = feed)
        trLossaccum += np.mean(trLoss)
        vLossaccum += np.mean(vLoss)
        teLossaccum += np.mean(teLoss)
      trainLoss.append(trLossaccum/batch_num)
      validLoss.append(vLossaccum/batch_num)
      testLoss.append(teLossaccum/batch_num)
      trainAcc.append(tf_acc(update_W, update_b, trainData, trainTarget))
      validAcc.append(tf_acc(update_W, update_b, validData, validTarget))
      testAcc.append(tf_acc(update_W, update_b, testData, testTarget))
  print('train acc:', trainAcc[-1])
  print('valid acc:', validAcc[-1])
  print('test acc:', testAcc[-1])
  #tf_plot(trainLoss, validLoss, 1, params["b1"], params["b2"], params["epsilon"]) #see Appendix A2 for plotting code
  #tf_plot(trainAcc, validAcc, 2, params["b1"], params["b2"], params["epsilon"])
  return True

In [None]:
beta_one = [0.95, 0.99]
beta_two = [0.99, 0.999]
e_list = [1e-9, 1e-4]
for o in beta_one:
  for t in beta_two:
    for ep in e_list:
      print('β1 = {0}, β2 = {1}, ϵ = {2:.1E}'.format(o, t, ep))
      buildGraph(beta_1 = o, beta_2 = t, e = ep)

In [4]:
def tf_plot(train, valid, fignum, beta_1, beta_2, e):
  plt.figure(fignum)
  plot1, = plt.plot(train)
  plot2, = plt.plot(valid)
  plt.xlabel('Epochs')
  if fignum == 1:
    plt.ylabel('Loss')
    plt.title(r'Loss vs. Epochs with Learning Rate = 0.001, λ = 0, Batch Size = 500,' + '\n' +  'β1 = {0}, β2 = {1}, ϵ = {2:.1E}'.format(beta_1, beta_2, e), wrap = True)
  elif fignum == 2:
    plt.ylabel('Accuracy')
    plt.title(r'Accuracy vs. Epochs with Learning Rate = 0.001, λ = 0, Batch Size = 500,'  + '\n' +  'β1 = {0}, β2 = {1}, ϵ = {2:.1E}'.format(beta_1, beta_2, e), wrap = True)
  plt.legend([plot1,plot2], ['Training', 'Validation'])
  plt.show()
  return True

In [35]:
def tf_loss(W, b, x, y, reg):
  y_hat = tf.sigmoid(tf.matmul(x, W) + b)
  CE_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=tf.matmul(x, W) + b)
  reg_loss = tf.matmul(tf.transpose(W), W) * 0.5 * reg
  loss = CE_loss + reg_loss
  return y_hat, loss

In [6]:
def tf_acc(W, b, x, y):
  prediction = np.matmul(x, W) + b
  act = sigmoid(prediction)
  decision = np.where(act>0.5, 1, 0)
  correct = (decision == y).sum()
  return correct/y.shape[0]