In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
torch.manual_seed(2)

<torch._C.Generator at 0x7f82e225b8d0>

In [2]:
# Creating Data to train the model
X = torch.Tensor([[0,0], [0,1], [1,0], [1,1]])
Y = torch.Tensor([0, 1, 1, 0]).view(-1,1)

In [3]:
# A class defining the architecture - overall 44 parameters
class XOR(nn.Module):
    def __init__(self, input_dim = 2, output_dim=1):
        super(XOR, self).__init__()
        
        self.lin1 = nn.Linear(input_dim, 5) # Layer = 1, No of inputs = 2, No of neurons in 1st Layer = 4
        self.lin2 = nn.Linear(5, 4) # Layer = 2, No of inputs = 5, No of neurons in 2nd Layer = 4
        self.lin3 = nn.Linear(4, output_dim) # Output Layer, No of inputs = 4, No of neurons in 3rd Layer = 1
        
    
    def forward(self, x):
        x = self.lin1(x) # Layer 1
        x = F.tanh(x)    # Layer 1 tanh activation funtion
        x = self.lin2(x) # Layer 2
        x = F.tanh(x)    # Layer 2 tanh activation funtion
        x = self.lin3(x) # Layer 3
        # Removed the last activation funtion
        # x = F.tanh(x)
        return x

In [4]:
# Assigning the model and getting parameters summary
model = XOR()
print(model)
from torchsummary import summary
# printing summary of the model
summary(model, (2,2))

XOR(
  (lin1): Linear(in_features=2, out_features=5, bias=True)
  (lin2): Linear(in_features=5, out_features=4, bias=True)
  (lin3): Linear(in_features=4, out_features=1, bias=True)
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 2, 5]              15
            Linear-2                 [-1, 2, 4]              24
            Linear-3                 [-1, 2, 1]               5
Total params: 44
Trainable params: 44
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------




In [5]:
# weight initialization for the neurons
def weights_init(model):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            # initialize the weight tensor, here we use a normal distribution
            m.weight.data.normal_(0, 1)

weights_init(model)

In [6]:
# Defining loss funtion
loss_func = nn.L1Loss()

In [7]:
# Defining optimizer
optimizer = optim.SGD(model.parameters(), lr=0.02, momentum=0.9)


In [8]:
# Training of the model
epochs = 2001 # no of epochs
steps = X.size(0)
# A loop to train the model with given no of epochs
for i in range(epochs):
    for j in range(steps):
        data_point = np.random.randint(X.size(0))
        x_var = Variable(X[data_point], requires_grad=False)
        y_var = Variable(Y[data_point], requires_grad=False)
        
        optimizer.zero_grad()
        y_hat = model(x_var)
        loss = loss_func.forward(y_hat, y_var)
        loss.backward()
        optimizer.step()
        
    if i % 50 == 0:
        print( "Epoch: {0}, Loss: {1}, ".format(i, loss.data.numpy()))



Epoch: 0, Loss: 0.5741396546363831, 
Epoch: 50, Loss: 0.8225635290145874, 
Epoch: 100, Loss: 0.5582839250564575, 
Epoch: 150, Loss: 0.011883020401000977, 
Epoch: 200, Loss: 0.10531638562679291, 
Epoch: 250, Loss: 0.23555490374565125, 
Epoch: 300, Loss: 0.2543698847293854, 
Epoch: 350, Loss: 0.149377703666687, 
Epoch: 400, Loss: 0.32247769832611084, 
Epoch: 450, Loss: 0.041568875312805176, 
Epoch: 500, Loss: 0.02942488342523575, 
Epoch: 550, Loss: 0.05058705806732178, 
Epoch: 600, Loss: 0.05725304037332535, 
Epoch: 650, Loss: 0.17187544703483582, 
Epoch: 700, Loss: 0.3789202570915222, 
Epoch: 750, Loss: 0.06832718849182129, 
Epoch: 800, Loss: 0.04583917558193207, 
Epoch: 850, Loss: 0.17462311685085297, 
Epoch: 900, Loss: 0.005715608596801758, 
Epoch: 950, Loss: 0.5409003496170044, 
Epoch: 1000, Loss: 0.2673335671424866, 
Epoch: 1050, Loss: 0.48354804515838623, 
Epoch: 1100, Loss: 0.23841071128845215, 
Epoch: 1150, Loss: 0.2794840335845947, 
Epoch: 1200, Loss: 0.1054583340883255, 
Epoch: