<a href="https://colab.research.google.com/github/simon-clematide/colab-notebooks-for-teaching/blob/main/xor_Logistic_Regression_With_Autograd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# XOR by Logistic Regression with autograd
- Autograd was one of the first implementations of autodifferentiation in Python
- Example adapted from https://github.com/HIPS/autograd


In [None]:
import autograd.numpy as np
from autograd import grad

# monkey patch for nicer output https://github.com/HIPS/autograd/issues/355
np.numpy_boxes.ArrayBox.__str__ = lambda self: str(self._value)

# avoid numerical environment pollution ;-)
np.set_printoptions(precision=3)

def sigmoid(x):
    return 0.5*(np.tanh(x) + 1)

def logistic_predictions(weights, inputs):
    # Outputs probability of a label being true according to logistic model.
    return sigmoid(np.dot(inputs, weights))

def training_loss(weights):
    # Training loss is the negative log-likelihood of the training labels.
    # updates the predictions and label probabilities as globals to reflect the model changes
    global preds, label_probabilities
    preds = logistic_predictions(weights, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))

# Build a toy dataset.
# x1, x2, bias
inputs = np.array([[0.,0.,1.],
                   [0.,1.,1.],
                   [1.,0.,1.],
                   [1.,1.,1.]])
targets = np.array([False, True, True, False])

# Build a function that returns gradients of training loss using autograd.
training_gradient_fun = grad(training_loss)

# initialize the weights randomly
weights = np.random.rand(3)
# or by zeroes
# weights = np.array([0.,0.,0.])

# learning rate
lrate = 0.2

# Optimize weights using gradient descent.
print(f"Initial loss:        {training_loss(weights):.4f}")
print(f"Initial weights:     {weights}")
print(f"Raw predictions:     {preds}")
print(f"Label probabilities: {label_probabilities}")
print(f"Targets:             {targets.astype(float)}")
print()
for i in range(10):
    print(f"\nIteration {i}:")
    print(f"Current weights:     {weights}")
    gradients = training_gradient_fun(weights)
    print(f"Gradients:           {gradients}")
    weights -= lrate * gradients
    print(f"New weights:         {weights}")
    print(f"Trained loss:        {training_loss(weights):.4f}")
    print(f"Raw predictions:     {preds}")
    print(f"Label probabilities: {label_probabilities}")
    print(f"Targets:             {targets.astype(float)}")


Initial loss:        3.0605
Initial weights:     [0.167 0.314 0.103]
Raw predictions:     [0.551 0.697 0.632 0.763]
Label probabilities: [0.449 0.697 0.632 0.237]
Targets:             [0. 1. 1. 0.]


Iteration 0:
Current weights:     [0.167 0.314 0.103]
Gradients:           [0.79  0.92  1.288]
New weights:         [ 0.009  0.13  -0.154]
Trained loss:        2.7953
Raw predictions:     [0.423 0.488 0.428 0.492]
Label probabilities: [0.577 0.488 0.428 0.508]
Targets:             [0. 1. 1. 0.]

Iteration 1:
Current weights:     [ 0.009  0.13  -0.154]
Gradients:           [-0.159 -0.04  -0.337]
New weights:         [ 0.041  0.138 -0.087]
Trained loss:        2.7829
Raw predictions:     [0.457 0.525 0.477 0.546]
Label probabilities: [0.543 0.525 0.477 0.454]
Targets:             [0. 1. 1. 0.]

Iteration 2:
Current weights:     [ 0.041  0.138 -0.087]
Gradients:           [0.046 0.142 0.01 ]
New weights:         [ 0.032  0.109 -0.089]
Trained loss:        2.7797
Raw predictions:     [0.456 0.

## Feature engineering
Let's construct an additional feature by hand: x1 * x2

Can we now learn the xor function?

In [None]:
# Build a toy dataset.
# x1, x2, bias, x1*x2
inputs = np.array([[0.,0.,1.,0.],
                   [0.,1.,1.,0.],
                   [1.,0.,1.,0.],
                   [1.,1.,1.,1.]])
targets = np.array([False, True, True, False])

# initialize the weights
weights = np.array([0.,0.,0.,0])

# Optimize weights using gradient descent.
print(f"Initial loss:        {training_loss(weights):.4f}")
print(f"Initial weights:     {weights}")
print(f"Raw predictions:     {preds}")
print(f"Label probabilities: {label_probabilities}")

print()
for i in range(10):
    print(f"\nIteration {i}:")
    print(f"Current weights:     {weights}")
    gradients = training_gradient_fun(weights)
    print(f"Gradients:           {gradients}")
    weights -= gradients * 0.1
    print(f"New weights:         {weights}")
    print(f"Trained loss:        {training_loss(weights):.4f}")
    print(f"Raw predictions:     {preds}")
    print(f"Label probabilities: {label_probabilities}")
    print(f"Targets:             {targets.astype(float)}")


Initial loss:        2.7726
Initial weights:     [0. 0. 0. 0.]
Raw predictions:     [0.5 0.5 0.5 0.5]
Label probabilities: [0.5 0.5 0.5 0.5]


Iteration 0:
Current weights:     [0. 0. 0. 0.]
Gradients:           [0. 0. 0. 1.]
New weights:         [ 0.   0.   0.  -0.1]
Trained loss:        2.6776
Raw predictions:     [0.5  0.5  0.5  0.45]
Label probabilities: [0.5  0.5  0.5  0.55]
Targets:             [0. 1. 1. 0.]

Iteration 1:
Current weights:     [ 0.   0.   0.  -0.1]
Gradients:           [-0.1 -0.1 -0.1  0.9]
New weights:         [ 0.01  0.01  0.01 -0.19]
Trained loss:        2.5958
Raw predictions:     [0.505 0.51  0.51  0.421]
Label probabilities: [0.495 0.51  0.51  0.579]
Targets:             [0. 1. 1. 0.]

Iteration 2:
Current weights:     [ 0.01  0.01  0.01 -0.19]
Gradients:           [-0.139 -0.139 -0.109  0.841]
New weights:         [ 0.024  0.024  0.021 -0.274]
Trained loss:        2.5216
Raw predictions:     [0.51  0.522 0.522 0.399]
Label probabilities: [0.49  0.522 0.522 