In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class NN(nn.Module):
    def __init__(self,D_in,H_in,D_out): #The __init__ function initialises the two linear layers of the model.
        super(NN,self).__init__()
        self.Linear1 = nn.Linear(D_in,H_in)
        self.Linear2 = nn.Linear(H_in,D_out)
        
    def forward(self,x): #In the forward function, we first apply the first linear layer, apply ReLU activation and then apply the second linear layer.
        in_lin = self.Linear1(x) # Here the x is assumed to be the batch size (no.of training examples) by the model
        # If the network takes in the input vector of dim 100 and a batch size of 50 is passed in the model above, then the dim of x would be (50 x 100).
        h_relu = F.relu(in_lin)
        out_lin = self.Linear2(h_relu)
        y_pred = F.relu(out_lin)
        return y_pred
        

In [3]:
# Training the model
D_in,H_in,D_out,batch_size = 100,50,25,50
# Create nodes in pytorch using Variable present in torch.autograd package
# Create random Tensors to hold inputs and outputs, and wrap them in Variables
x = torch.autograd.Variable(torch.randn(batch_size,D_in))

In [4]:
model = NN(D_in,H_in,D_out)
y_pred = model(x)

In [6]:
target = torch.randn(batch_size,D_out).float()

In [7]:
#Loss Function
criterion = nn.MSELoss()
loss = criterion(y_pred,target)

In [14]:
print(loss.item())

0.9971992373466492


In [58]:
theta = model.parameters()

In [60]:
for each in theta:
    print(each)

Parameter containing:
tensor([[ 0.0861, -0.0635,  0.0343,  ...,  0.0829, -0.0528, -0.0113],
        [ 0.0040, -0.0725,  0.0304,  ...,  0.0191, -0.0957,  0.0291],
        [-0.0359,  0.0433,  0.0952,  ..., -0.0227, -0.0311, -0.0826],
        ...,
        [ 0.0220,  0.0456, -0.0435,  ...,  0.0858, -0.0920, -0.0151],
        [ 0.0739,  0.0794,  0.0547,  ..., -0.0221, -0.0510, -0.0922],
        [ 0.0499,  0.0184, -0.0918,  ...,  0.0203,  0.0734, -0.0595]],
       dtype=torch.float32, requires_grad=True)
Parameter containing:
tensor([ 0.0485, -0.0937, -0.0077, -0.0073, -0.0168,  0.0526,  0.0825, -0.0883,
         0.0519,  0.0624,  0.0429,  0.0649, -0.0329, -0.0161,  0.0643,  0.0492,
         0.0950, -0.0428, -0.0046,  0.0239, -0.0096,  0.0077,  0.0953, -0.0314,
         0.0523,  0.0700, -0.0022, -0.0425, -0.0134, -0.0769, -0.0520,  0.0407,
        -0.0046,  0.0687,  0.0868,  0.0993,  0.0983, -0.0805,  0.0266, -0.0181,
         0.0901, -0.0626,  0.0842,  0.0357, -0.0784, -0.0172,  0.0207, -0.

In [15]:
optimizer = torch.optim.SGD(model.parameters(),lr=0.1)

In [16]:
#Once gradients have been computed using loss.backward()
loss.backward()
#Calling optimizer.step() updates the parameters according to the optimization algorithm.
optimizer.step()

In [17]:
# For Step by Step we can print the iterations
for epoch in range(50):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, target)
   print('epoch: ', epoch,' loss: ', loss.item())#loss.item gives us the real number instead of the tensor value.

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  0.9925305247306824
epoch:  1  loss:  0.9880533218383789
epoch:  2  loss:  0.9837154150009155
epoch:  3  loss:  0.9795017838478088
epoch:  4  loss:  0.9754605889320374
epoch:  5  loss:  0.9716665148735046
epoch:  6  loss:  0.9680467247962952
epoch:  7  loss:  0.9645963311195374
epoch:  8  loss:  0.9613767862319946
epoch:  9  loss:  0.9582943916320801
epoch:  10  loss:  0.9553154706954956
epoch:  11  loss:  0.9525396227836609
epoch:  12  loss:  0.9499474763870239
epoch:  13  loss:  0.9474191069602966
epoch:  14  loss:  0.9449619650840759
epoch:  15  loss:  0.942597508430481
epoch:  16  loss:  0.9403333067893982
epoch:  17  loss:  0.9381005167961121
epoch:  18  loss:  0.9359002113342285
epoch:  19  loss:  0.9337812662124634
epoch:  20  loss:  0.9316959381103516
epoch:  21  loss:  0.9296768307685852
epoch:  22  loss:  0.9277480244636536
epoch:  23  loss:  0.9258564114570618
epoch:  24  loss:  0.9239720106124878
epoch:  25  loss:  0.9221180081367493
epoch:  26  loss:  0.92

In [66]:
#After above iterations we see that the weights have still pretty large loss...
# this is because we have to either chose a little larger learning rate or else we have iterate moder no. of times

In [18]:
# Let us update the Learning Rate to 1 and check
optimizer = torch.optim.SGD(model.parameters(),lr=1)

In [19]:
for epoch in range(50):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, target)
   print('epoch: ', epoch,' loss: ', loss.item())#loss.item gives us the real number instead of the tensor value.

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  0.8800033926963806
epoch:  1  loss:  0.8667050004005432
epoch:  2  loss:  0.8524036407470703
epoch:  3  loss:  0.8395223617553711
epoch:  4  loss:  0.8281170129776001
epoch:  5  loss:  0.816463053226471
epoch:  6  loss:  0.8059546947479248
epoch:  7  loss:  0.7958323955535889
epoch:  8  loss:  0.7862088680267334
epoch:  9  loss:  0.7763833999633789
epoch:  10  loss:  0.7675084471702576
epoch:  11  loss:  0.7592566013336182
epoch:  12  loss:  0.7522581219673157
epoch:  13  loss:  0.7446271181106567
epoch:  14  loss:  0.7375498414039612
epoch:  15  loss:  0.731005847454071
epoch:  16  loss:  0.7258388996124268
epoch:  17  loss:  0.7205127477645874
epoch:  18  loss:  0.7148871421813965
epoch:  19  loss:  0.7096412181854248
epoch:  20  loss:  0.7046909332275391
epoch:  21  loss:  0.7004160284996033
epoch:  22  loss:  0.6962743997573853
epoch:  23  loss:  0.6926296353340149
epoch:  24  loss:  0.689002275466919
epoch:  25  loss:  0.685852587223053
epoch:  26  loss:  0.68253

In [21]:
# Still it has not reached the minima 
optimizer = torch.optim.SGD(model.parameters(),lr=6)
# So now we again change the LR and increse the no. of iterations
for epoch in range(300):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(x)

   # Compute and print loss
   loss = criterion(y_pred, target)
   print('epoch: ', epoch,' loss: ', loss.item())#loss.item gives us the real number instead of the tensor value.

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()

epoch:  0  loss:  0.6091945767402649
epoch:  1  loss:  0.6091945171356201
epoch:  2  loss:  0.6091945171356201
epoch:  3  loss:  0.6091944575309753
epoch:  4  loss:  0.6091943979263306
epoch:  5  loss:  0.6091943383216858
epoch:  6  loss:  0.6091943383216858
epoch:  7  loss:  0.609194278717041
epoch:  8  loss:  0.6091942191123962
epoch:  9  loss:  0.6091942191123962
epoch:  10  loss:  0.6091941595077515
epoch:  11  loss:  0.6091940999031067
epoch:  12  loss:  0.6091940999031067
epoch:  13  loss:  0.6091940402984619
epoch:  14  loss:  0.6091940402984619
epoch:  15  loss:  0.6091939806938171
epoch:  16  loss:  0.6091939806938171
epoch:  17  loss:  0.6091939210891724
epoch:  18  loss:  0.6091939210891724
epoch:  19  loss:  0.6091938614845276
epoch:  20  loss:  0.6091938614845276
epoch:  21  loss:  0.6091938018798828
epoch:  22  loss:  0.6091938018798828
epoch:  23  loss:  0.6091938018798828
epoch:  24  loss:  0.609193742275238
epoch:  25  loss:  0.609193742275238
epoch:  26  loss:  0.6091

epoch:  215  loss:  0.609192967414856
epoch:  216  loss:  0.609192967414856
epoch:  217  loss:  0.609192967414856
epoch:  218  loss:  0.609192967414856
epoch:  219  loss:  0.609192967414856
epoch:  220  loss:  0.609192967414856
epoch:  221  loss:  0.609192967414856
epoch:  222  loss:  0.609192967414856
epoch:  223  loss:  0.609192967414856
epoch:  224  loss:  0.609192967414856
epoch:  225  loss:  0.609192967414856
epoch:  226  loss:  0.609192967414856
epoch:  227  loss:  0.609192967414856
epoch:  228  loss:  0.609192967414856
epoch:  229  loss:  0.609192967414856
epoch:  230  loss:  0.609192967414856
epoch:  231  loss:  0.609192967414856
epoch:  232  loss:  0.609192967414856
epoch:  233  loss:  0.609192967414856
epoch:  234  loss:  0.609192967414856
epoch:  235  loss:  0.609192967414856
epoch:  236  loss:  0.609192967414856
epoch:  237  loss:  0.609192967414856
epoch:  238  loss:  0.609192967414856
epoch:  239  loss:  0.609192967414856
epoch:  240  loss:  0.609192967414856
epoch:  241 

In [75]:
#Since Here even after several no. of iterations we were not able to decrease the loss this is because of the dummy data we have chosen which is totally inapproriate and therei is no meaningful relationship between the input data and the target values