In [0]:
import torch

# **Models in PyTorch**


*   Supervised Learning Example
*   3 classes
*   Batch Size 32





In [0]:
import torch.nn as nn
import torch.nn.functional as F

class TwoLayerNet(nn.Module):
  def __init__(self, D_in, H, D_out):
    """
    In the constructor we instantiate two nn.Linear modules and assign them as
    member variables.
    
    D_in: input dimension
    H: dimension of hidden layer
    D_out: output dimension
    """
    super(TwoLayerNet, self).__init__()
    self.linear1 = nn.Linear(D_in, H) 
    self.linear2 = nn.Linear(H, D_out)
  
  def forward(self, x):
    """
    In the forward function we accept a Variable of input data and we must 
    return a Variable of output data. We can use Modules defined in the 
    constructor as well as arbitrary operators on Variables.
    """
    h_relu = F.relu(self.linear1(x))
    y_pred = self.linear2(h_relu)
    y_pred = F.relu(y_pred)
    return y_pred

In [0]:
# N is batch size; D_in is input dimension;
# H is the dimension of the hidden layer; D_out is output dimension.
N, D_in, H, D_out = 32, 100, 50, 3

# Create random Tensors to hold inputs and outputs, and wrap them in Variables
x = torch.rand(N, D_in)  # dim: 32 x 100
# print(x)

y = torch.rand(N, D_out)  # dim: 32 x 3

y = torch.FloatTensor(N).uniform_(0, 3).long()
    
# print(y)

# print(y.size())

# print(y)

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Forward pass: Compute predicted y by passing x to the model
# print(x)
y_pred = model(x)   # dim: 32 x 2

# print(y_pred.size())

# print(y_pred)

# **Loss Function**

In [6]:
criterion = nn.CrossEntropyLoss()

output = (torch.randn(3,32).float())
target = (torch.FloatTensor(3).uniform_(0, 32).long())
print("Output: ", output)
print("Target: " ,target)
loss = criterion(output, target)
print("Loss:", loss)

Output:  tensor([[ 0.3976, -0.1241, -1.1557,  1.2158,  0.3010,  0.5733,  1.5274,
          0.6171,  0.0192, -0.6024, -1.9612, -0.9871, -0.3225, -0.7100,
         -1.4539, -0.0773, -1.6230,  0.9777, -0.0581, -1.2614, -2.0150,
         -0.8536, -0.7466, -1.2676,  1.1775,  0.8409, -1.2445,  0.6884,
         -0.9267, -0.5604, -0.2831, -0.9563],
        [-1.6946,  0.1189, -0.5623, -0.6538, -0.3278,  0.5723,  1.1066,
         -0.7878, -0.1231, -1.8793, -2.7301, -1.3512,  0.9074, -0.2071,
         -1.3011,  1.3194, -1.2997, -0.9457,  0.4361,  0.1044, -1.0863,
         -0.3599,  0.0248, -2.0370, -0.8718,  1.2631,  0.1607,  0.3247,
         -0.0504, -1.0646, -0.9786,  0.0231],
        [-1.8096, -0.7159, -0.1512,  1.3091,  1.1906, -2.5256,  0.3281,
          1.6342, -2.2397, -2.8589, -0.2430, -1.3686,  1.1070,  0.2212,
         -0.2696,  0.0692,  0.3481,  1.8937, -0.1204,  2.0366,  0.9666,
          0.8021, -0.3474,  0.4185,  0.4533, -0.6399, -0.8451, -0.5691,
          0.3095,  0.4965, -0.5726,

In [8]:
print(y_pred)
print(y)

loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(y_pred, y.long())

print("Torch Loss:",loss)

def userDefinedCrossEntropyLoss(outputs, labels):
  # print("labels:",labels)
  batch_size = outputs.size()[0]               # batch_size
  # print("outputs:",outputs)
  outputs = F.log_softmax(outputs,dim=1)       # compute the log of softmax values
  # print("out log soft:",outputs)
  outputs = outputs[range(batch_size), labels] # pick the values corresponding to the labels
  # print(outputs)
  return -torch.sum(outputs)/32

# print("y_pred:",y_pred.size())
# print("y:",y.transpose(0,1).size())
loss = userDefinedCrossEntropyLoss(y_pred, y.long())

print("User Defined Loss:",loss)

tensor([[ 0.1854,  0.1120,  0.2549],
        [ 0.2387,  0.1056,  0.1274],
        [ 0.2683,  0.1179,  0.1758],
        [ 0.2713,  0.2277,  0.2346],
        [ 0.2350,  0.1633,  0.2247],
        [ 0.1813,  0.1541,  0.2876],
        [ 0.3718,  0.1811,  0.3602],
        [ 0.2109,  0.0810,  0.2994],
        [ 0.2384,  0.1889,  0.3350],
        [ 0.2563,  0.1073,  0.1847],
        [ 0.2669,  0.0180,  0.1705],
        [ 0.2854,  0.1335,  0.2481],
        [ 0.2529,  0.1296,  0.2761],
        [ 0.2567,  0.0962,  0.1453],
        [ 0.2781,  0.1755,  0.2639],
        [ 0.2648,  0.1215,  0.3435],
        [ 0.1928,  0.0000,  0.2761],
        [ 0.1133,  0.1907,  0.2261],
        [ 0.1249,  0.1229,  0.2809],
        [ 0.3003,  0.0637,  0.2137],
        [ 0.3414,  0.0000,  0.3098],
        [ 0.3668,  0.1756,  0.2765],
        [ 0.3543,  0.1161,  0.2686],
        [ 0.2542,  0.0709,  0.0332],
        [ 0.3362,  0.0498,  0.2985],
        [ 0.2972,  0.1284,  0.2641],
        [ 0.2859,  0.0524,  0.1965],
 

# **Optimizer**

In [0]:
# SGD optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)

# ADAM
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

# **Training vs Evaluation**

Before training the model, it is imperative to call

***model.train()***

Likewise, you must call

***model.eval() ***before testing the model.

This corrects for the differences in dropout, batch normalization during training and testing.




# **Core Training Step**

In [10]:
output_batch = model(x)           # compute model output
loss = loss_fn(output_batch, y)   # calculate loss

print(loss)

for _ in range(5):
    
    optimizer.zero_grad()  # clear previous gradients
    
    loss.backward()        # compute gradients of all variables wrt loss

    optimizer.step()       # perform updates using calculated gradients
    
    output_batch = model(x)     
    
    loss = loss_fn(output_batch, y)

    print(loss)

tensor(1.1181)
tensor(1.1159)
tensor(1.1137)
tensor(1.1116)
tensor(1.1095)
tensor(1.1075)


# **Computing Metrics**

In [11]:
import numpy as np
def accuracy(out, labels):
  outputs = np.argmax(out, axis=1)
  return np.sum(outputs==labels)/float(labels.size)

print(accuracy(output_batch.long().numpy(), y.numpy()))

0.25
