### Simple neural network implementation in Numpy

In [1]:
import numpy as np

N = 64 # batch_size
input_dimensions = 500
hidden_neurons = 100
output_dimensions = 10

#Creating random input data
x = np.random.randn(N, input_dimensions)
y = np.random.randn(N, output_dimensions)

#Intializing random weights
w1 = np.random.randn(input_dimensions, hidden_neurons)
w2 = np.random.randn(hidden_neurons, output_dimensions)

learning_rate = 0.000001

for epoch in range(1000):
    #Forward pass to compute y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_predict = h_relu.dot(w2)
    
    
    #Compute loss (mean squared error)
    loss = np.square(y_predict - y).sum()
    #print("epoch: ",epoch, " loss: ",loss)
    
    #Backprop to compute weights and loss
    #These can be calculated on the loss function for simple networks. But it gets really difficult to calculate thse
    #for networds with more than 4 layers or networks with complex structure
    grad_y_predict = 2.0 * (y_predict - y)
    grad_w2 = h_relu.T.dot(grad_y_predict)
    grad_h_relu = grad_y_predict.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    #Updating weights
    w1 = w1 - learning_rate * grad_w1
    w2 = w2 - learning_rate * grad_w2

### Simple neural network in pytorch with only Tensors and without using Variable and autograd

In [2]:
import torch


N = 64 # batch_size
input_dimensions = 500
hidden_neurons = 100
output_dimensions = 10

#Creating random input data
x = torch.randn(N, input_dimensions).type(torch.FloatTensor)
y = torch.randn(N, output_dimensions).type(torch.FloatTensor)

#Intializing random weights
w1 = torch.randn(input_dimensions, hidden_neurons).type(torch.FloatTensor)
w2 = torch.randn(hidden_neurons, output_dimensions).type(torch.FloatTensor)

learning_rate = 0.000001

for epoch in range(1000):
    #Forward pass to compute y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_predict = h_relu.mm(w2)
    
    
    #Compute loss (mean squared error)
    loss = (y_predict - y).pow(2).sum()
    #print("epoch: ",epoch, " loss: ",loss)
    
    #Backprop to compute weights and loss
    #These can be calculated on the loss function for simple networks. But it gets really difficult to calculate thse
    #for networds with more than 4 layers or networks with complex structure
    grad_y_predict = 2.0 * (y_predict - y)
    grad_w2 = h_relu.t().mm(grad_y_predict)
    grad_h_relu = grad_y_predict.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)
    
    #Updating weights
    w1 = w1 - learning_rate * grad_w1
    w2 = w2 - learning_rate * grad_w2

### Simple neural network with Variable and autograd

In [13]:
import torch
from torch.autograd import Variable

N = 64 # batch_size
input_dimensions = 500
hidden_neurons = 100
output_dimensions = 10

#Creating random input data
x = Variable(torch.randn(N, input_dimensions).type(torch.FloatTensor), requires_grad=False)
y = Variable(torch.randn(N, output_dimensions).type(torch.FloatTensor), requires_grad=False)

#Intializing random weights
w1 = Variable(torch.randn(input_dimensions, hidden_neurons).type(torch.FloatTensor), requires_grad=True)
w2 = Variable(torch.randn(hidden_neurons, output_dimensions).type(torch.FloatTensor), requires_grad=True)

learning_rate = 0.000001

for epoch in range(1000):
    #Forward pass to compute y. All the references to the intermediate variables need to me maintained to automatically do backgrop
    y_predict = x.mm(w1).clamp(min=0).mm(w2)
#     h = x.mm(w1)
#     h_relu = h.clamp(min=0)
#     y_predict = h_relu.mm(w2)
    
    
    #Compute loss (mean squared error)
    loss = (y_predict - y).pow(2).sum()
    #print("epoch: ",epoch, " loss: ",loss.data[0])
    
    #Backprop to compute weights and loss
    loss.backward()
#     grad_y_predict = 2.0 * (y_predict - y)
#     grad_w2 = h_relu.t().mm(grad_y_predict)
#     grad_h_relu = grad_y_predict.mm(w2.t())
#     grad_h = grad_h_relu.clone()
#     grad_h[h < 0] = 0
#     grad_w1 = x.t().mm(grad_h)
    
    #Updating weights
    w1.data = w1.data - learning_rate * w1.grad.data
    w2.data = w2.data - learning_rate * w2.grad.data
    
    # Update gradients to Zero after updating weights
    w1.grad.data.zero_()
    w2.grad.data.zero_()

### Implementing using pytorch nn module

In [27]:
import torch
from torch.autograd import Variable

N = 64 # batch_size
input_dimensions = 500
hidden_neurons = 100
output_dimensions = 10

#Creating random input data
x = Variable(torch.randn(N, input_dimensions).type(torch.FloatTensor))
y = Variable(torch.randn(N, output_dimensions).type(torch.FloatTensor), requires_grad=False)


model = torch.nn.Sequential(torch.nn.Linear(input_dimensions, hidden_neurons),
                           torch.nn.ReLU(),
                           torch.nn.Linear(hidden_neurons, output_dimensions),)

loss_fn = torch.nn.MSELoss(size_average=False)

learning_rate = 0.0001

for epoch in range(300):
    y_predict = model(x)
    loss = loss_fn(y_predict, y)
    #print("epoch: ",epoch, " loss: ",loss.data[0])
    
    model.zero_grad()
    loss.backward()
    
    for param in model.parameters():
        param.data -= learning_rate * param.grad.data

### Simple neural network with optimizer 

In [31]:
import torch
from torch.autograd import Variable

N = 64 # batch_size
input_dimensions = 500
hidden_neurons = 100
output_dimensions = 10

#Creating random input data
x = Variable(torch.randn(N, input_dimensions).type(torch.FloatTensor))
y = Variable(torch.randn(N, output_dimensions).type(torch.FloatTensor), requires_grad=False)

model = torch.nn.Sequential(torch.nn.Linear(input_dimensions, hidden_neurons),
                           torch.nn.ReLU(),
                           torch.nn.Linear(hidden_neurons, output_dimensions),)

loss_func = torch.nn.MSELoss(size_average=False)
learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(500):
    y_predict = model(x)
    loss = loss_func(y_predict, y)
    #print("epoch: ",epoch, " loss: ",loss.data[0])
    
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()