# XOR

Can we make `xor` with a net?

In [None]:
%matplotlib widget
#%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
es = np.einsum

`xor(x,y)` is x != y, viz:

In [None]:
True ^ True, False ^ False, True ^ False, False ^ True

Adopt "CMOS logic levels", where less than 1/2 is False, greater is True. Then

In [None]:
def flexor(a:float, b:float) -> float:
    return 1.0 if (a < 0.5) ^ (b < 0.5) else 0.0

In [None]:
flexor(0, 0), flexor(0,1), flexor(1,0), flexor(1,1)

## Supporting functions
We need some functions implementing nonlinear operations:

In [None]:
relu = np.vectorize(lambda x: max(0.0,x))
t = (np.arange(10).reshape(5,2) - 4.5)/2
t, relu(t)

In [None]:
positive = np.vectorize(lambda x: max(0, np.sign(x)))
positive(t)

In [None]:
tanh_gradient = np.vectorize(lambda x: 1-np.tanh(x)**2)
tanh_gradient(t)

## Network implementations

The inputs to xor are an array of vectors, e.g. :

In [None]:
X = np.array([0,0, 0,1, 1,0, 1,1]).reshape(4,2); X

Our first parameter is an input offset vector. We'll use "perfect" values for the parameters for this example:

In [None]:
v0 = np.array([-0.5, -0.5])

In [None]:
h1 = X + v0; h1

Then a matrix multiply, offset, and nonlinear activation function:

In [None]:
m1 = np.array([[ 1.0, -1.0],
               [-1.0,  1.0]])
h2 = h1 @ m1; h2

In [None]:
v1 = np.array([0.0, 0.0])
h3 = h2 + v1; h3

In [None]:
h4 = relu(h3); h4

followed by an output matrix, which in this single-output case is a dot-product and offset:

In [None]:
v2 = np.array([1.0, 1.0])
h5 = h4.dot(v2); h5

In [None]:
v3 = np.array([-0.5])
h6 = h5 + v3; h6

As a final step we make the output discrete, effectively boolean:

In [None]:
h7 = positive(h6); h7

### Class time

Put it in a class:

In [None]:
class Xnet_1:
    "Calculate exclusive-or using a network"
    domain = np.array([0,0, 0,1, 1,0, 1,1]).reshape(4,2)
    
    def __init__(self):
        self.randomize()

    "Calculates the ideal return value directly, to provide a reference"
    ideal = np.vectorize(lambda x: 1 if (x[0] > 0.5) ^ (x[1] > 0.5) else 0,
                            signature='(i)->()')

    analog_ideal = np.vectorize(lambda x: 0.5 if (x[0] > 0.5) ^ (x[1] > 0.5) else -0.5,
                            signature='(i)->()')

    def randomize(self):
        "Set new, random parameters for the network"
        self.v0 = np.random.randn(2)
        self.m1 = np.random.randn(2,2)
        self.v1 = np.random.randn(2)
        self.m2 = np.random.randn(2)
        self.v2 = np.random.randn(2)
        self.v3 = np.random.randn(1)
        self.current = False
        return self

    def make_perfect(self):
        "Set the matricies to a handmade value that gives perfect behavior"
        self.v0 = np.array([-0.5, -0.5])
        self.m1 = np.array([[ 1.0, -1.0],
                            [-1.0,  1.0]])
        self.v1 = np.array([0.0, 0.0])
        self.m2 = np.array([1.0, 1.0])
        self.v2 = np.array([1.0, 1.0])
        self.v3 = np.array([-0.5])
        self.current = False
        return self
    
    def _forward(self, X):
        if not self.current:
            self._calculate_forward(X)
            self.current = True

    def _calculate_forward(self, X):
        self.X = X
        self.h1 = X + self.v0
        self.h2 = self.h1 @ self.m1
        self.h3 = self.h2 + self.v1
        self.h4 = relu(self.h3)
        self.h5 = self.h4.dot(self.v2)
        self.h6 = self.h5 + self.v3
        self.h7 = positive(self.h6)
    
    def _propagate_backward(self):
        self.e = self.h6 - self.analog_ideal(self.X)
        self.loss = self.e.dot(self.e)/(2.0 * len(self.X))
        self.dlossde = self.e
        self.dh6ds6_v3 = np.ones(self.v3.shape)
        self.dh6dh5 = np.ones(self.v3.shape)
        self.dh5ds5_v2 = self.h4
        self.dh5dh4 = self.v2
        self.dh4ds4_relu = relu(self.h3)
        self.dh4dh3 = positive(self.h3)
        self.dh3ds3_v1 = np.ones(self.v1.shape)
        self.dh3dh2 = np.ones(self.v1.shape)
        self.dh2ds2_m1 = 

    
    def analog(self, X):
        self._forward(X)
        return self.h6

    def analog_error_and_loss(self):
        self.current = False
        self.e = self.analog(self.domain) - self.analog_ideal(self.domain)
        self.loss = self.e.dot(self.e)/(2.0 * len(self.domain))
        return self.e, self.loss
    """
    def analog_loss(self):
        assert self.current
        return self.loss
    """
    def __call__(self, X):
        self.current = False
        self._forward(X)
        return self.h7


In [None]:
net = Xnet_1()

In [None]:
net.domain

In [None]:
net.analog(net.domain)

In [None]:
net.make_perfect()
net.analog(net.domain), net(net.domain), net.ideal(net.domain)

In [None]:
net.analog(net.domain) - net.analog_ideal(net.domain)

In [None]:
net.randomize().analog(net.domain)

In [None]:
net.analog_error_and_loss()

In [None]:
net.make_perfect(); net.analog_error_and_loss()

What happens if we randomize only the last vector (v3)?

In [None]:
net.v3 = np.random.randn(1); net.v3

In [None]:
e, loss = net.analog_error_and_loss(); e, loss

In [None]:
np.mean(e)

In [None]:
net.v3 -= np.mean(e); net.v3

In [None]:
net.analog_error_and_loss()

In [None]:
net.make_perfect()
net.v2 = np.random.randn(2)
e, loss = net.analog_error_and_loss()
print(f"v2={net.v2}\ne={e}\nloss={loss}")

In [None]:
net.h4, net.h5, net.e

In [None]:
net.e @ net.h4

In [None]:
net.v2 -= np.mean(h4, axis=0); net.v2

---

In [None]:
class Netxor:
    """Calculate exclusive-or using a network"""
    def __init__(self):
        self.m0 = np.array([-0.5, -0.5])
        self.randomize()
        
    def randomize(self):
        "Randomize the matricies"
        self.m1 = np.random.randn(2,2)
        self.m2 = np.random.randn(2)
        return self
    
    def make_perfect(self):
        "Set the matricies to a handmade value that gives perfect behavior"
        self.m0 = np.array([-0.5, -0.5])
        self.m1 = np.array([[ 1.0, -1.0],
                            [-1.0,  1.0]])
        self.m2 = np.array([1.0, 1.0])
        return self
    
    def ideal(self, a:float, b:float):
        "Calculates the ideal return value directly, to provide a reference"
        return 1 if (a > 0.5) ^ (b > 0.5) else 0
    
    def netwise(self, a:float, b:float):
        "Calculate a single result using network primitives"
        v = self.net_lin(a, b)
        v = self.p5 = 0 if v < 0.5 else 1
        return v

    def net_ana(self, a:float, b:float):
        "Calculate a single analog result using network primitives"
        v = self.p4 = np.tanh(self.net_lin(a, b))
        return v

    def net_lin(self, a:float, b:float):
        "The network output up to the last linear stage"
        input = np.array([a, b])
        v = self.p0 = self.m0 + input
        v = self.p1 = self.m1 @ v
        v = self.p2 = relu(v)
        v = self.p3 = np.dot(self.m2, v)
        return v

    def __call__(self, a, b):
        "Vectorized calculation of result using network"
        return np.vectorize(self.netwise)(a, b)

    def loss(self):
        "L2 loss function of the network implementation"
        return sum((self.__call__(x,y) - self.ideal(x,y))**2 for x in (-1, 1) for y in (-1, 1))
    
    def aloss(self):
        "L2 loss function of the network analog implementation"
        return sum((self.net_ana(x,y) - self.ideal(x,y))**2 for x in (-1, 1) for y in (-1, 1))
    
    def __repr__(self):
        return F"Exactor m0={self.m0}, m1={self.m1}, m2={self.m2})"
            

### Try out the implementation

In [None]:
exor = Exactor()
print(exor)
exor.net_lin(1.0,1.0)

In [None]:
exor.net_ana(1.0,1.0)

In [None]:
exor.netwise(1,1)

In [None]:
for x in (0, 1):
    for y in (0, 1):
        print(exor.ideal(x, y), exor(x,y), exor.net_ana(x,y))

In [None]:
exor.loss(), exor.aloss()

In [None]:
print(exor.make_perfect().loss())
print(exor)

In [None]:
for x in (0, 1):
    for y in (0, 1):
        print(exor.ideal(x, y), exor(x,y), exor.p0, exor.p1, exor.p2, exor.p3, exor.p5)

In [None]:
exor.loss(), exor.aloss()

In [None]:
exor.randomize()
exor.loss(), exor.aloss()

### Can we find working matricies by trying random matricies?

In [None]:
best_loss = 1e300
best_aloss = 1e300
best_repr = ""
for n in range(10000):
    exor.randomize()
    #if n == 6789:
    #    exor.make_perfect()
    loss = exor.loss()
    if best_loss > loss:
        best_loss = loss
        best_repr = repr(exor)
    aloss = exor.aloss()
    if best_aloss > aloss:
        best_aloss = aloss
        best_arepr = repr(exor)
    if loss == 0:
        print(F"Success after {n+1} tries: {exor}")
        break
if exor.loss() > 0:
    print(F"Failure, none of {n+1} random tries worked")
print(f"best net loss:{best_loss}, Best net:{best_repr}")
print(f"best net aloss:{best_aloss}, Best anet:{best_arepr}")

### Density of successful random nets

In [None]:
sum((1 if exor.randomize().loss() == 0.0 else 0) for i in range(1000))

In [None]:
t = [exor.randomize().loss() for i in range(100000)]

In [None]:
np.bincount(t)

In [None]:
alosses = np.array([exor.randomize().aloss() for i in range(100000)])

In [None]:
%matplotlib widget

In [None]:
plt.hist(alosses, bins=250);

### A less delicate network implementation

In [None]:
class was_Flexor:
    """A more robust XOR"""
    def __init__(self):
        self.m0 = np.array([-0.5, -0.5])
        self.randomize()
        
    def randomize(self):
        "Randomize the matricies"
        self.m1 = np.random.randn(4,2)
        self.m2 = np.random.randn(4)
    
    def make_perfect(self):
        "Set the matricies to a handmade value that gives perfect behavior"
        self.m1 = np.array([[ 1.0, -1.0],
                            [-1.0,  1.0],
                            [ 1.0,  1.0],
                            [-1.0, -1.0]])
        self.m2 = np.array([1.0, 1.0, -1.0, -1.0])
    
    def ideal(self, a:float, b:float) -> bool:
        "Calculates the ideal return value directly, to provide a reference"
        return 1 if (a > 0.5) ^ (b > 0.5) else 0
    
    def netwise(self, a:float, b:float) -> bool:
        "Calculate a single result using network primitives"
        input = np.array([[a],
                          [b]])
        v = self.p1 = self.m1 @ input
        v = self.p2 = relu(v)
        v = self.p3 = np.dot(self.m2, v)
        v = self.p4 = relu(v)
        v = self.p5 = (-1,1)[int(np.sign(v)[0])]
        return v

    def __call__(self, a, b):
        "Vectorized calculation of result using network"
        return np.vectorize(self.netwise)(a, b)

    def loss(self):
        "L2 loss function of the network implementation"
        return sum((self.__call__(x,y) - self.ideal(x,y))**2 for x in (-1, 1) for y in (-1, 1))
    
    def goodness(self):
        "analog goodness function"
        rv = 0.0
        for x in (-1, 1):
            for y in (-1, 1):
                _ = self.netwise(x, y)
                rv += self.p4 * self.ideal(x,y)
        return rv[0]

    def __repr__(self):
        return F"Flexor(m1={self.m1}, m2={self.m2})"

In [None]:
class Flexor:
    """A more robust XOR using a network"""
    def __init__(self):
        self.m0 = np.array([-0.5, -0.5])
        self.randomize()
        
    def randomize(self):
        "Randomize the matricies"
        self.m1 = np.random.randn(4,2)
        self.m2 = np.random.randn(4)
    
    def make_perfect(self):
        "Set the matricies to a handmade value that gives perfect behavior"
        self.m0 = np.array([-0.5, -0.5])
        self.m1 = np.array([[ 1.0, -1.0],
                            [-1.0,  1.0],
                            [ 1.0,  1.0],
                            [-1.0, -1.0]])
        self.m2 = np.array([1.0, 1.0, -1.0, -1.0])
    
    def ideal(self, a:float, b:float) -> bool:
        "Calculates the ideal return value directly, to provide a reference"
        return 1 if (a > 0.5) ^ (b > 0.5) else 0
    
    def netwise(self, a:float, b:float):
        "Calculate a single result using network primitives"
        v = self.net_lin(a, b)
        v = self.p5 = 0 if v < 0.5 else 1
        return v

    def net_ana(self, a:float, b:float):
        "Calculate a single analog result using network primitives"
        v = self.p4 = np.tanh(self.net_lin(a, b))
        return v

    def net_lin(self, a:float, b:float):
        "The network output up to the last linear stage"
        input = np.array([a, b])
        v = self.p0 = self.m0 + input
        v = self.p1 = self.m1 @ input
        v = self.p2 = relu(v)
        v = self.p3 = np.dot(self.m2, v)
        return v

    def __call__(self, a, b):
        "Vectorized calculation of result using network"
        return np.vectorize(self.netwise)(a, b)

    def loss(self):
        "L2 loss function of the network implementation"
        return sum((self.__call__(x,y) - self.ideal(x,y))**2 for x in (-1, 1) for y in (-1, 1))
    
    def aloss(self):
        "L2 loss function of the network analog implementation"
        return sum((self.net_ana(x,y) - self.ideal(x,y))**2 for x in (-1, 1) for y in (-1, 1))
    
    def __repr__(self):
        return F"Exactor m0={self.m0}, m1={self.m1}, m2={self.m2})"
            

In [None]:
flor = Flexor()
flor.m1, flor.m2

In [None]:
flor

In [None]:
for x in (0, 1):
    for y in (0, 1):
        print(flor.ideal(x, y), '\tres:', flor(x,y))

In [None]:
flor.loss(), flor.aloss()

In [None]:
flor.make_perfect()

In [None]:
for x in (-1, 1):
    for y in (-1, 1):
        print(flor.ideal(x, y), flor(x,y))

In [None]:
flor.loss(), flor.aloss()

In [None]:
flor.randomize()
flor.loss(), flor.aloss()

### Can we find working matricies by trying random matricies?

In [None]:
for n in range(10000):
    flor.randomize()
    if flor.loss() == 0:
        print(F"Success after {n+1} tries:\n{flor}")
        break
if flor.loss() > 0:
    print(F"Failure, none of {n+1} random tries worked")

In [None]:
flor.loss(), flor.aloss()

E.g. [Neural Network from scratch in Python](https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65)