# Try to solve XOR problem with Neural Network

Input layer has 3 nodes <br>
Hidden layer has 2 nodes : w_0 is a 3x2 matrix <br>
Output layer has 1 node  : w_1 is a 2x1 matrix <br>
No bias term

In [5]:
import numpy as np
import torch
np.random.seed(1)

In [167]:
# input dataset, in matrix form
# X3 is irrelevant, the XOR signal is from X1 and X2
#x = torch.tensor([[0,0,1], [0,1,1],[1,0,1],[1,1,1]],dtype = torch.double)
x = np.array([[0,0,1], [0,1,1],[1,0,1],[1,1,1]])

In [170]:
#convert from numpy array to torch tensor
x = torch.from_numpy(x)

In [172]:
#set torch tensor to float64 data type
x = x.double()
x

tensor([[ 0.,  0.,  1.],
        [ 0.,  1.,  1.],
        [ 1.,  0.,  1.],
        [ 1.,  1.,  1.]], dtype=torch.float64)

In [173]:
y = np.array([[0,1,1,0]]).T
y = torch.from_numpy(y)
y = y.double()
y

tensor([[ 0.],
        [ 1.],
        [ 1.],
        [ 0.]], dtype=torch.float64)

In [175]:
x

tensor([[ 0.,  0.,  1.],
        [ 0.,  1.,  1.],
        [ 1.,  0.,  1.],
        [ 1.,  1.,  1.]], dtype=torch.float64)

In [176]:
y

tensor([[ 0.],
        [ 1.],
        [ 1.],
        [ 0.]], dtype=torch.float64)

In [177]:
#sigmoid function 
def sigmoid(x):
    return 1.0/(1+torch.exp(-x))

In [186]:
learning_rate = 1e-2

In [187]:
# initialize weights randomly 
# if w is a Tensor with requires_grad = True then
#after backpropagation w.grad will be another Tensor holding the gradient of x

w1 = torch.rand(3,2, requires_grad = True,dtype = torch.double) 
w2 = torch.rand(2,1, requires_grad = True,dtype = torch.double)

#gradient descent step:
for i in range (50001):
    #forward pass: go from input layer to output layer
    y_pred = sigmoid(x.mm(w1)).mm(w2)
    
    #compute and print the loss
    loss = (y_pred-y).pow(2).sum()
    if (i%10000 == 0):
        print(i,loss.item())
        
    #note that loss is a scalar
    loss.backward()
    
    #compute the gradient using autograd backprop from Pytorch
    with torch.no_grad():
        #update weights
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        #zero out the gradient
        w1.grad.zero_()
        w2.grad.zero_()
        

0 1.0035441257999647
10000 3.4478243700479526e-06
20000 4.389199724663059e-17
30000 4.477389608759802e-27
40000 4.477389608759802e-27
50000 4.477389608759802e-27


In [188]:
y_pred

tensor([[ 3.5194e-14],
        [ 1.0000e+00],
        [ 1.0000e+00],
        [ 3.9968e-14]], dtype=torch.float64)

In [189]:
w1

tensor([[ 3.6963,  1.3268],
        [ 3.7856,  1.3463],
        [-1.2793, -1.6572]], dtype=torch.float64)

In [190]:
w2

tensor([[ 2.8593],
        [-3.8868]], dtype=torch.float64)