In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [11]:
class NN(nn.Module):
    def __init__(self,D_in,H_in,D_out): #The __init__ function initialises the two linear layers of the model.
        super(NN,self).__init__()
        self.Linear1 = nn.Linear(D_in,H_in)
        self.Linear2 = nn.Linear(H_in,D_out)
        
    def forward(self,x): #In the forward function, we first apply the first linear layer, apply ReLU activation and then apply the second linear layer.
        in_lin = self.Linear1(x) # Here the x is assumed to be the batch size (no.of training examples) by the model
        # If the network takes in the input vector of dim 100 and a batch size of 50 is passed in the model above, then the dim of x would be (50 x 100).
        h_relu = F.relu(in_lin)
        out_lin = self.Linear2(h_relu)
        y_pred = F.relu(out_lin)
        return y_pred
        

In [6]:
# Training the model
D_in,H_in,D_out,batch_size = 100,50,25,50
# Create nodes in pytorch using Variable present in torch.autograd package
# Create random Tensors to hold inputs and outputs, and wrap them in Variables
x = torch.autograd.Variable(torch.randn(batch_size,D_in))

In [12]:
model = NN(D_in,H_in,D_out)
y_pred = model(x)

In [54]:
target = torch.randn(batch_size,D_out).float()

In [55]:
#Loss Function
criterion = nn.MSELoss()
loss = criterion(y_pred,target)

In [58]:
theta = model.parameters()

In [60]:
for each in theta:
    print(each)

Parameter containing:
tensor([[ 0.0861, -0.0635,  0.0343,  ...,  0.0829, -0.0528, -0.0113],
        [ 0.0040, -0.0725,  0.0304,  ...,  0.0191, -0.0957,  0.0291],
        [-0.0359,  0.0433,  0.0952,  ..., -0.0227, -0.0311, -0.0826],
        ...,
        [ 0.0220,  0.0456, -0.0435,  ...,  0.0858, -0.0920, -0.0151],
        [ 0.0739,  0.0794,  0.0547,  ..., -0.0221, -0.0510, -0.0922],
        [ 0.0499,  0.0184, -0.0918,  ...,  0.0203,  0.0734, -0.0595]],
       dtype=torch.float32, requires_grad=True)
Parameter containing:
tensor([ 0.0485, -0.0937, -0.0077, -0.0073, -0.0168,  0.0526,  0.0825, -0.0883,
         0.0519,  0.0624,  0.0429,  0.0649, -0.0329, -0.0161,  0.0643,  0.0492,
         0.0950, -0.0428, -0.0046,  0.0239, -0.0096,  0.0077,  0.0953, -0.0314,
         0.0523,  0.0700, -0.0022, -0.0425, -0.0134, -0.0769, -0.0520,  0.0407,
        -0.0046,  0.0687,  0.0868,  0.0993,  0.0983, -0.0805,  0.0266, -0.0181,
         0.0901, -0.0626,  0.0842,  0.0357, -0.0784, -0.0172,  0.0207, -0.

In [62]:
optimizer = torch.optim.SGD(model.parameters(),lr=0.1)

In [64]:
#Once gradients have been computed using loss.backward()
loss.backward()
#Calling optimizer.step() updates the parameters according to the optimization algorithm.
optimizer.step()

In [65]:
# For Step by Step we can print the iterations
for epoch in range(50):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, target)
   print('epoch: ', epoch,' loss: ', loss.item())#loss.item gives us the real number instead of the tensor value.

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  1.001589059829712
epoch:  1  loss:  0.9987631440162659
epoch:  2  loss:  0.9960681796073914
epoch:  3  loss:  0.993518054485321
epoch:  4  loss:  0.9910887479782104
epoch:  5  loss:  0.9887332916259766
epoch:  6  loss:  0.986463189125061
epoch:  7  loss:  0.9842555522918701
epoch:  8  loss:  0.9820830225944519
epoch:  9  loss:  0.9799185991287231
epoch:  10  loss:  0.9778361916542053
epoch:  11  loss:  0.9758167862892151
epoch:  12  loss:  0.9738246202468872
epoch:  13  loss:  0.9718044996261597
epoch:  14  loss:  0.9698389172554016
epoch:  15  loss:  0.9679654836654663
epoch:  16  loss:  0.9661278128623962
epoch:  17  loss:  0.9643331170082092
epoch:  18  loss:  0.9625755548477173
epoch:  19  loss:  0.9608418345451355
epoch:  20  loss:  0.9591400623321533
epoch:  21  loss:  0.9574832320213318
epoch:  22  loss:  0.9558798670768738
epoch:  23  loss:  0.9543021321296692
epoch:  24  loss:  0.9527469277381897
epoch:  25  loss:  0.9511935114860535
epoch:  26  loss:  0.9496

In [66]:
#After above iterations we see that the weights have still pretty large loss...
# this is because we have to either chose a little larger learning rate or else we have iterate moder no. of times

In [69]:
# Let us update the Learning Rate to 1 and check
optimizer = torch.optim.SGD(model.parameters(),lr=1)

In [70]:
for epoch in range(50):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, target)
   print('epoch: ', epoch,' loss: ', loss.item())#loss.item gives us the real number instead of the tensor value.

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  0.8631477355957031
epoch:  1  loss:  0.8563557267189026
epoch:  2  loss:  0.8464503884315491
epoch:  3  loss:  0.836352527141571
epoch:  4  loss:  0.8277811408042908
epoch:  5  loss:  0.8199793100357056
epoch:  6  loss:  0.8126081824302673
epoch:  7  loss:  0.8046957850456238
epoch:  8  loss:  0.7967583537101746
epoch:  9  loss:  0.7894161939620972
epoch:  10  loss:  0.783511221408844
epoch:  11  loss:  0.7777265906333923
epoch:  12  loss:  0.7726659774780273
epoch:  13  loss:  0.7679680585861206
epoch:  14  loss:  0.763272762298584
epoch:  15  loss:  0.7595945000648499
epoch:  16  loss:  0.7561229467391968
epoch:  17  loss:  0.7527402639389038
epoch:  18  loss:  0.749127984046936
epoch:  19  loss:  0.7462190389633179
epoch:  20  loss:  0.7437117695808411
epoch:  21  loss:  0.7411103248596191
epoch:  22  loss:  0.7386329770088196
epoch:  23  loss:  0.7362147569656372
epoch:  24  loss:  0.7339076399803162
epoch:  25  loss:  0.7320958971977234
epoch:  26  loss:  0.73040

In [74]:
# Still it has not reached the minima 
optimizer = torch.optim.SGD(model.parameters(),lr=6)
# So now we again change the LR and increse the no. of iterations
for epoch in range(300):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, target)
   print('epoch: ', epoch,' loss: ', loss.item())#loss.item gives us the real number instead of the tensor value.

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  0.673101007938385
epoch:  1  loss:  0.673101007938385
epoch:  2  loss:  0.673101007938385
epoch:  3  loss:  0.673101007938385
epoch:  4  loss:  0.673101007938385
epoch:  5  loss:  0.673101007938385
epoch:  6  loss:  0.673101007938385
epoch:  7  loss:  0.673101007938385
epoch:  8  loss:  0.673101007938385
epoch:  9  loss:  0.673101007938385
epoch:  10  loss:  0.673101007938385
epoch:  11  loss:  0.673101007938385
epoch:  12  loss:  0.673101007938385
epoch:  13  loss:  0.6731010675430298
epoch:  14  loss:  0.6731011867523193
epoch:  15  loss:  0.6731019020080566
epoch:  16  loss:  0.6731064915657043
epoch:  17  loss:  0.6733314394950867
epoch:  18  loss:  0.6745344400405884
epoch:  19  loss:  0.679814338684082
epoch:  20  loss:  0.7111214399337769
epoch:  21  loss:  0.7719753384590149
epoch:  22  loss:  0.7891640663146973
epoch:  23  loss:  0.7902747988700867
epoch:  24  loss:  0.7505916953086853
epoch:  25  loss:  0.7076110243797302
epoch:  26  loss:  0.685482621192932

In [75]:
#Since Here even after several no. of iterations we were not able to decrease the loss this is because of the dummy data we have chosen which is totally inapproriate and therei is no meaningful relationship between the input data and the target values