<a href="https://colab.research.google.com/github/simon-clematide/colab-notebooks-for-teaching/blob/main/xor_Logistic_Regression_With_Autograd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# XOR by Logistic Regression with autograd
- Autograd was one of the first implementations of autodifferentiation in Python
- Example adapted from https://github.com/HIPS/autograd


In [1]:
import autograd.numpy as np
from autograd import grad

# monkey patch for nicer output https://github.com/HIPS/autograd/issues/355
np.numpy_boxes.ArrayBox.__str__ = lambda self: str(self._value)

# avoid numerical environment pollution ;-)
np.set_printoptions(precision=3)

def sigmoid(x):
    return 0.5*(np.tanh(x) + 1)

def logistic_predictions(weights, inputs):
    # Outputs probability of a label being true according to logistic model.
    return sigmoid(np.dot(inputs, weights))

def training_loss(weights):
    # Training loss is the negative log-likelihood of the training labels.
    # updates the predictions and label probabilities as globals to reflect the model changes
    global preds, label_probabilities
    preds = logistic_predictions(weights, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))

# Build a toy dataset.
# x1, x2, bias
inputs = np.array([[0.,0.,1.],
                   [0.,1.,1.],
                   [1.,0.,1.],
                   [1.,1.,1.]])
targets = np.array([False, True, True, False])

# Build a function that returns gradients of training loss using autograd.
training_gradient_fun = grad(training_loss)

# initialize the weights randomly
weights = np.random.rand(3)
# or by zeroes
# weights = np.array([0.,0.,0.])

# learning rate
lrate = 0.2

# Optimize weights using gradient descent.
print(f"Initial loss:        {training_loss(weights):.4f}")
print(f"Initial weights:     {weights}")
print(f"Raw predictions:     {preds}")
print(f"Label probabilities: {label_probabilities}")
print(f"Targets:             {targets.astype(float)}")
print()
for i in range(10):
    print(f"\nIteration {i}:")
    print(f"Current weights:     {weights}")
    gradients = training_gradient_fun(weights)
    print(f"Gradients:           {gradients}")
    weights -= lrate * gradients
    print(f"New weights:         {weights}")
    print(f"Trained loss:        {training_loss(weights):.4f}")
    print(f"Raw predictions:     {preds}")
    print(f"Label probabilities: {label_probabilities}")
    print(f"Targets:             {targets.astype(float)}")


Initial loss:        3.3234
Initial weights:     [0.167 0.356 0.244]
Raw predictions:     [0.62  0.768 0.695 0.822]
Label probabilities: [0.38  0.768 0.695 0.178]
Targets:             [0. 1. 1. 0.]


Iteration 0:
Current weights:     [0.167 0.356 0.244]
Gradients:           [1.034 1.182 1.811]
New weights:         [-0.04   0.119 -0.118]
Trained loss:        2.7927
Raw predictions:     [0.441 0.501 0.422 0.481]
Label probabilities: [0.559 0.501 0.422 0.519]
Targets:             [0. 1. 1. 0.]

Iteration 1:
Current weights:     [-0.04   0.119 -0.118]
Gradients:           [-0.195 -0.037 -0.311]
New weights:         [-0.001  0.127 -0.056]
Trained loss:        2.7807
Raw predictions:     [0.472 0.535 0.472 0.535]
Label probabilities: [0.528 0.535 0.472 0.465]
Targets:             [0. 1. 1. 0.]

Iteration 2:
Current weights:     [-0.001  0.127 -0.056]
Gradients:           [0.013 0.141 0.029]
New weights:         [-0.004  0.098 -0.061]
Trained loss:        2.7778
Raw predictions:     [0.469 0.

## Feature engineering
Let's construct an additional feature by hand: x1 * x2

Can we now learn the xor function?

In [2]:
# Build a toy dataset.
# x1, x2, bias, x1*x2
inputs = np.array([[0.,0.,1.,0.],
                   [0.,1.,1.,0.],
                   [1.,0.,1.,0.],
                   [1.,1.,1.,1.]])
targets = np.array([False, True, True, False])

# initialize the weights
weights = np.array([0.,0.,0.,0])

# Optimize weights using gradient descent.
print(f"Initial loss:        {training_loss(weights):.4f}")
print(f"Initial weights:     {weights}")
print(f"Raw predictions:     {preds}")
print(f"Label probabilities: {label_probabilities}")

print()
for i in range(1000):
    print(f"\nIteration {i}:")
    print(f"Current weights:     {weights}")
    gradients = training_gradient_fun(weights)
    print(f"Gradients:           {gradients}")
    weights -= gradients * 0.1
    print(f"New weights:         {weights}")
    print(f"Trained loss:        {training_loss(weights):.4f}")
    print(f"Raw predictions:     {preds}")
    print(f"Label probabilities: {label_probabilities}")
    print(f"Targets:             {targets.astype(float)}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
New weights:         [ 3.095  3.095 -1.441 -6.614]
Trained loss:        0.1501
Raw predictions:     [0.053 0.965 0.965 0.023]
Label probabilities: [0.947 0.965 0.965 0.977]
Targets:             [0. 1. 1. 0.]

Iteration 445:
Current weights:     [ 3.095  3.095 -1.441 -6.614]
Gradients:           [-0.024 -0.024  0.012  0.047]
New weights:         [ 3.097  3.097 -1.442 -6.619]
Trained loss:        0.1498
Raw predictions:     [0.053 0.965 0.965 0.023]
Label probabilities: [0.947 0.965 0.965 0.977]
Targets:             [0. 1. 1. 0.]

Iteration 446:
Current weights:     [ 3.097  3.097 -1.442 -6.619]
Gradients:           [-0.024 -0.024  0.012  0.047]
New weights:         [ 3.1    3.1   -1.443 -6.623]
Trained loss:        0.1494
Raw predictions:     [0.053 0.965 0.965 0.023]
Label probabilities: [0.947 0.965 0.965 0.977]
Targets:             [0. 1. 1. 0.]

Iteration 447:
Current weights:     [ 3.1    3.1   -1.443 -6.623]
Gradient