# XOR by Logistic Regression with autograd
Example adapted from https://github.com/HIPS/autograd


In [None]:
import autograd.numpy as np
from autograd import grad

# monkey patch for nicer output https://github.com/HIPS/autograd/issues/355
np.numpy_boxes.ArrayBox.__str__ = lambda self: str(self._value)

def sigmoid(x):
    return 0.5*(np.tanh(x) + 1)

def logistic_predictions(weights, inputs):
    # Outputs probability of a label being true according to logistic model.
    return sigmoid(np.dot(inputs, weights))

def training_loss(weights):
    # Training loss is the negative log-likelihood of the training labels.
    # updates the predictions and label probabilities as globals to reflect the model changes
    global preds, label_probabilities
    preds = logistic_predictions(weights, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))

# Build a toy dataset.
# x1, x2, bias
inputs = np.array([[0.,0.,1.],
                   [0.,1.,1.],
                   [1.,0.,1.],
                   [1.,1.,1.]])
targets = np.array([False, True, True, False])

# Build a function that returns gradients of training loss using autograd.
training_gradient_fun = grad(training_loss)

# initialize the weights randomly
weights = np.random.rand(3)
# or by zeroes
# weights = np.array([0.,0.,0.])

# Optimize weights using gradient descent.
print(f"Initial loss:        {training_loss(weights):.4f}")
print(f"Initial weights:     {weights}")
print(f"Raw predictions:     {preds}")
print(f"Label probabilities: {label_probabilities}")

print()
for i in range(100):
    print(f"\nIteration {i}:")
    print(f"Current weights:     {weights}")
    gradients = training_gradient_fun(weights)
    print(f"Gradients:           {gradients}")
    weights -= gradients * 0.1
    print(f"New weights:         {weights}")
    print(f"Trained loss:        {training_loss(weights):.4f}")
    print(f"Raw predictions:     {preds}")
    print(f"Label probabilities: {label_probabilities}")
    

## Feature engineering
Let's construct an additional feature by hand: x1 * x2

Can we now learn the xor function?

In [None]:
# Build a toy dataset.
# x1, x2, bias, x1*x2
inputs = np.array([[0.,0.,1.,0.],
                   [0.,1.,1.,0.],
                   [1.,0.,1.,0.],
                   [1.,1.,1.,1.]])
targets = np.array([False, True, True, False])

# initialize the weights
weights = np.array([0.,0.,0.,0])

# Optimize weights using gradient descent.
print(f"Initial loss:        {training_loss(weights):.4f}")
print(f"Initial weights:     {weights}")
print(f"Raw predictions:     {preds}")
print(f"Label probabilities: {label_probabilities}")

print()
for i in range(1000):
    print(f"\nIteration {i}:")
    print(f"Current weights:     {weights}")
    gradients = training_gradient_fun(weights)
    print(f"Gradients:           {gradients}")
    weights -= gradients * 0.1
    print(f"New weights:         {weights}")
    print(f"Trained loss:        {training_loss(weights):.4f}")
    print(f"Raw predictions:     {preds}")
    print(f"Label probabilities: {label_probabilities}")
