<a href="https://colab.research.google.com/github/mariomeissner/nlp_class/blob/master/simple_nn_one_layer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Import necessary packages
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
# Set seed for reproducibility
np.random.seed(1)

In [0]:
# Load the data
data = datasets.load_iris()
X, y = data.data, data.target
X, y = shuffle(X, np.expand_dims(y, axis=1), random_state=1)
print(X.shape, y.shape)

In [0]:
# Normalize input data
X -= X.mean(axis=0)
X /= X.max(axis=0)
print(X.max(axis=0), X.mean(axis=0).round(2))

In [0]:
# Explore the data
print(np.concatenate([X, y], axis=1).round(2)[:10])

In [0]:
# Merge classes 1 and 2
y = y.reshape((len(y),1))
y = np.where(y==2, 1, y)

In [0]:
# Get the data shapes
batch, features = X.shape
output = 1
print(f"Shapes: \nbatch: {batch}, features: {features}, output: {output}.")

In [0]:
# Our activation function
def sigmoid(x, deriv = False):
  ''' The sigmoid of x. 
  If deriv = True, then x is the output of a previous sigmoid call. '''
  if deriv: return x * (1 - x)
  return 1 / (1 + np.exp(-x))

In [0]:
def accuracy(y, pred):
  ''' Computes the accuracy of the predictions pred with respect to y. '''
  count = 0
  for y_r,pred_r in zip(y,pred):
    if y_r.argmax() == pred_r.argmax():
      count += 1
  return count / len(y)

In [0]:
# Initialize weights
w1 = np.random.normal(size=(features, output)) # (4,1)
b1 = np.random.normal(size=(1, output)) # (1,1)

In [0]:
# Training loop
for i in range(1000):

    # forward pass
    zpred = X.dot(w1) + b1 # (batch,features) @ (features,output) + (1,output)
    pred = sigmoid(zpred)
    
    # loss (squared error)
    loss = np.sum(np.square((pred - y)))
    
    # Print loss every now and then
    if i % 50 == 0:
        print(f"Iteration {i:0>3}, loss: {loss:6.3f}")
    
    # backpropagation
    # shapes of grad variables should match originals!
    grad_pred = 2.0 * (pred - y)
    grad_zpred = grad_pred * sigmoid(pred, deriv=True)
    grad_w1 = X.T.dot(grad_zpred)
    grad_b1 = grad_zpred.sum(axis=0)
    
    # update weights
    w1 -= 1e-3 * grad_w1
    b1 -= 1e-3 * grad_b1
    
# View a slice of the predictions
print("\nPredictions:")
print(np.concatenate((pred, y), axis=1).round(2)[:10])
print(f"\nAccuracy: {accuracy(y, pred)}.")