In [4]:
import torch
import torch.nn as nn

#### With the help of PyTorch, we can use the following steps for typical training procedure for a neural network:
    1. Define the neural network that has some learnable parameters (or weights).
    2. Iterate over a dataset of inputs.
    3. Process input through the network.
    4. Compute the loss (how far is the output from being correct).
    5. Propagate gradients back into the network’s parameters.
    6. Update the weights of the network, typically using a simple update as given below
    

In [2]:
# Defining input size, hidden layer size, output size and batch size respectively
# Here batch size is the no. of training examples  
n_in,n_h,n_out,batch_size = 10,5,1,10

In [5]:
# Create dummy input and target tensors (data)
x = torch.randn(batch_size, n_in) #size is 10x10 i.e. 10 training examples with 10 features or classes
y = torch.tensor([[1.0], [0.0], [0.0],[1.0], [1.0], [1.0], [0.0], [0.0], [1.0], [1.0]]) #size is 10x1

In [6]:
# Create a model
# Here Sequential creates a container which adds the module to the container in the order they are passed in the constructor so that they are executed in the order they are passed.
# Linear subclass applies a linear transformation to the incoming data:- "y = xA^T + b".
# ReLU(Rectified Linear Unit) is a squashing function which squashes the input an outputs either 0 or 1 depending on whether input is -ve or positive respectively
# Sigmoid is another such squashing function which is of the form "1/(1+e^(x))" it outputs real numbers in the range -1 to +1.
model = nn.Sequential(nn.Linear(n_in, n_h),
   nn.ReLU(),
   nn.Linear(n_h, n_out),
   nn.Sigmoid())

In [7]:
# Constructing the loss function or the cost function
# Here the LF used is Mean Squared Error which is of the form "(1/N)*sum(y_pred - y)^2" applied over all the training examples.
criterion = torch.nn.MSELoss()

# Construct the optimizer (Stochastic Gradient Descent in this case)
# It ties together the lF and model parameters by updating the model in response to the output of the lF. 
# In simpler terms, optimizers shape and mold our model into its most accurate possible form by playing(changing continuously) with the weights.
# The Stochastic Gradient Descent (SGD) simply updates and computes the gradient of the parameters using only a single or a few training examples.
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01) #Here lr is the learning rate. This thing is just a small number, something like 0.001, that we multiply the gradients by to scale them. 

In [8]:
# Gradient Descent
for epoch in range(50):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, y)
   print('epoch: ', epoch,' loss: ', loss.item())#loss.item gives us the real number instead of the tensor value.

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  0.2940043807029724
epoch:  1  loss:  0.29363393783569336
epoch:  2  loss:  0.2932642102241516
epoch:  3  loss:  0.29289519786834717
epoch:  4  loss:  0.29252687096595764
epoch:  5  loss:  0.2921592891216278
epoch:  6  loss:  0.2917923927307129
epoch:  7  loss:  0.29142624139785767
epoch:  8  loss:  0.29106080532073975
epoch:  9  loss:  0.29069608449935913
epoch:  10  loss:  0.2903321087360382
epoch:  11  loss:  0.2899688482284546
epoch:  12  loss:  0.28960633277893066
epoch:  13  loss:  0.28924453258514404
epoch:  14  loss:  0.2888834774494171
epoch:  15  loss:  0.28853222727775574
epoch:  16  loss:  0.28818389773368835
epoch:  17  loss:  0.2878362536430359
epoch:  18  loss:  0.28748929500579834
epoch:  19  loss:  0.2871430516242981
epoch:  20  loss:  0.28679752349853516
epoch:  21  loss:  0.28645265102386475
epoch:  22  loss:  0.28610849380493164
epoch:  23  loss:  0.28576505184173584
epoch:  24  loss:  0.28542232513427734
epoch:  25  loss:  0.28508028388023376
epoch