<a href="https://colab.research.google.com/github/mariomeissner/nlp_class/blob/master/simple_nn_two_layers_task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Import necessary packages
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
# Set seed for reproducibility
np.random.seed(1)

In [0]:
# Load the data
data = datasets.load_iris()
X, y = data.data, data.target
X, y = shuffle(X, y, random_state=1)
print(X.shape, y.shape)

In [0]:
# Normalize input data
X -= X.mean(axis=0)
X /= X.max(axis=0)
print(X.max(axis=0), X.mean(axis=0).round(decimals=2))

In [0]:
# One-hot encode labels
yh = np.zeros((y.shape[0], y.max() + 1))
yh[range(y.shape[0]), y] = 1
y = yh

In [0]:
# Explore the data
print(np.concatenate([X, y], axis=1).round(2)[:10])

In [0]:
# Our activation function
def sigmoid(x, deriv = False):
  ''' The sigmoid of x. 
  If deriv, then x is the output of a previous sigmoid call. '''
  if deriv: return x * (1 - x)
  return 1 / (1 + np.exp(-x))

In [0]:
def softmax(x):
  ''' Softmax over x, assuming we work with rows. '''
  e_x = np.exp(x)
  return e_x / e_x.sum(axis=-1, keepdims=True)

In [0]:
def cross_entropy(p, y):
  ''' Cross entropy loss between predictions p and true labels y (one-hots). '''
  n = y.shape[0]
  log_likelihood = -np.log(p)
  return np.sum(y * log_likelihood) / n

In [0]:
def accuracy(y, pred):
  count = 0
  for y_r,pred_r in zip(y,pred):
    if y_r.argmax() == pred_r.argmax():
      count += 1
  return count / len(y)

In [0]:
# Testing softmax
print(softmax(np.array([[3,   4,   5],
                        [0,  15,  20],
                        [0.1,0.2,0.3]])).round(decimals=2))

In [0]:
# Testing cross-entropy
p1 = np.array([[0.8, 0.1, 0.1], [0.1, 0.7, 0.2]])
p2 = np.array([[0.5, 0.3, 0.2], [0.1, 0.6, 0.3]])
y1 = np.array([[1,0,0], [0,1,0]])
y2 = np.array([[0,0,1], [0,0,1]])
# First one should be rather low, second one higher
print(cross_entropy(p1, y1))
print(cross_entropy(p2, y2))

In [0]:
# Get the data shapes
batch, features = X.shape
output = 3 # Number of output neurons
hidden = 2 # Number of hidden neurons
print(f"Shapes: \nbatch: {batch}, features: {features}, output: {output}.")

In [0]:
# Initialize weights
# TODO: Give the weights the correct shapes
w1 = np.random.normal(size=None) 
b1 = np.random.normal(size=None)
w2 = np.random.normal(size=None)
b2 = np.random.normal(size=None)

In [0]:
# Learning rate
lr = 1e-4

In [0]:
# TODO: IMPLEMENT THE FORWARD AND BACKWARD PASS
for i in range(10000):

  # forward pass
  zh    = None
  h     = None
  zpred = None
  pred  = None

  # loss
  loss = cross_entropy(pred, y)

  # Print loss every now and then
  if i % 500 == 0:
      print(f"Iteration {i:0>4}, loss: {loss:6.3f}")

  # backpropagation
  grad_zpred = pred - y
  grad_h     = None
  grad_w2    = None
  grad_b2    = None
  grad_zh    = None
  grad_w1    = None
  grad_b1    = None

  # update weights
  w1 -= lr * grad_w1
  b1 -= lr * grad_b1 
  w2 -= lr * grad_w2     
  b2 -= lr * grad_b2 
      
print("Predictions:")
print(np.concatenate((pred, y), axis=1)[:10].round(2))
print(f"Accuracy: {accuracy(y, pred)}")