<a href="https://colab.research.google.com/github/shirleyzz/cap5610_machineleanrning/blob/master/HW3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from keras.datasets import mnist
import numpy as np
from keras import backend as K
import keras
from keras.utils import to_categorical

Using TensorFlow backend.


In [0]:
#load the training and test data using keras
(x_train_origin, y_train_origin),(x_test_origin, y_test_origin) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [0]:
x_train = x_train_origin.reshape((60000, 28 * 28))
x_train = x_train_origin.astype('float32') / 255

x_test = x_test_origin.reshape((10000, 28 * 28))
x_test = x_test_origin.astype('float32') / 255

In [0]:
#split into 10 classes with categorial labels
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
y_train = to_categorical(y_train_origin, num_classes=10) 
y_test = to_categorical(y_test_origin, num_classes=10)

So each of the ten classifiers has an input layer consisting of 28 x 28 input neurons and an output layer consisting of a single output neuron.


In [0]:
#calculate cross entropy
def compute_scores(w,b,X):
  return np.dot(X,w.T) + b
def softmax(scores):
  exp = np.exp(scores)
  sum_exp = np.sum(np.exp(scores), axis=1, keepdims=True)
  softmax = exp / sum_exp
  return softmax
def cross_entropy(X, Y, scores):
  m = X.shape[0]
  loss = - (1 / m) * np.sum(Y * np.log(scores))
  return loss

In [0]:
#tranforms vector Y of labels to one-hot encoded matrix
def one_hot(X, Y, n_classes=10):
  m = X.shape[0]
  one_hot = np.zeros((m, n_classes))
  one_hot[np.arange(m), Y.T] = 1
  return one_hot

In [0]:
#initialize w and b
def initialize_params_with_zeros(n_classes, n_features):
  w = np.random.rand(n_classes, n_features)
  b = np.zeros((1, n_classes))
  return w,b

In [0]:
# predict
def predict(w,b,X):
  scores = compute_scores(w,b,X)
  probs = softmax(scores)
  preds = np.argmax(probs,axis=1)
  print(preds)
  #return np.argmax(probs, axis=1)[:, np.newaxis]
  return preds

In [0]:
#training process
def train(X_train, Y_train, n_iters=10, learning_rate=0.01,n_classes = 10):
  n_samples,n_features = X_train.shape
  w, b = initialize_params_with_zeros(n_classes,n_features)
  #print(w.shape)
  all_losses = []
  #stochastic gradient regression
  for item in range(n_iters):
    cost = 0.0
    for i in range(n_samples):
      rand_ind = np.random.randint(0,n_samples)
      X_i = X_train[rand_ind,:].reshape(1,n_features)
      Y_i = Y_train[rand_ind].reshape(1,1)
      y_one_hot = one_hot(X_i,Y_i)
      scores = compute_scores(w,b,X_i)
      probs = softmax(scores)
      loss = cross_entropy(X_i,y_one_hot, probs)
      #print(probs.shape)
      dw = (1. / n_samples) * np.dot((probs - y_one_hot).T,X_i)
      db = (1. / n_samples) * np.sum(probs - y_one_hot, axis=0)
      w = w - learning_rate * dw
      b = b - learning_rate * db
    all_losses.append(loss)
    print(f'Iteration number: {item}, loss: {np.round(loss, 4)}')
  return w, b, all_losses

In [0]:
def model(X_train, Y_train, X_test, Y_test, num_iters, learning_rate,n_classes):
  w, b, loss = train(X_train, Y_train, num_iters, learning_rate, n_classes)
  #predict test/train set examples 
  Y_pred_test = predict(w, b, X_test)
  Y_pred_train = predict(w, b, X_train)
  #use argmax to return indices of the maximum values along an axis
  Y_true_test = np.argmax(Y_test, axis=0)
  Y_true_train = np.argmax(Y_train,axis=0)
                         

  #print train/test Errors
  print("")
  print("train accuracy: {} %".format(100*sum(Y_pred_train==Y_true_train)/(float(len(Y_train)))))
  print("test accuracy: {} %".format(100*sum(Y_pred_test==Y_true_test)/(float(len(Y_test)))))
 
  d = {"costs": loss, "Y_pred_test": Y_pred_test, 
        "Y_pred_train" : Y_pred_train, 
        "w" : w, 
        "b" : b,
        "learning_rate" : learning_rate,
        "num_iters": num_iters}
    
  return d

In [0]:
d = model(x_train, y_train_origin, x_test, y_test_origin, num_iters=20, learning_rate=0.01,n_classes=10)

Iteration number: 0, loss: 7.6328
Iteration number: 1, loss: 7.7259
Iteration number: 2, loss: 3.9758
Iteration number: 3, loss: 6.9337
Iteration number: 4, loss: 4.6261
Iteration number: 5, loss: 9.403
Iteration number: 6, loss: 2.0145
Iteration number: 7, loss: 3.4868
Iteration number: 8, loss: 6.3907
Iteration number: 9, loss: 3.2737
Iteration number: 10, loss: 4.947
Iteration number: 11, loss: 9.3881
Iteration number: 12, loss: 5.831
Iteration number: 13, loss: 2.7629
Iteration number: 14, loss: 3.9463
Iteration number: 15, loss: 3.1821
Iteration number: 16, loss: 7.4483
Iteration number: 17, loss: 1.6395
Iteration number: 18, loss: 3.1758
Iteration number: 19, loss: 4.185
[3 5 3 ... 3 5 9]
[7 7 3 ... 6 9 7]

train accuracy: 4.016666666666667 %
test accuracy: 18.16 %
