In [1]:
import torch


### Models in PyTorch

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class TwoLayerNet(nn.Module):
  def __init__(self, D_in, H, D_out):
    """
    In the constructor we instantiate two nn.Linear modules and assign them as
    member variables.
    
    D_in: input dimension
    H: dimension of hidden layer
    D_out: output dimension
    """
    super(TwoLayerNet, self).__init__()
    self.linear1 = nn.Linear(D_in, H) 
    self.linear2 = nn.Linear(H, D_out)
  
  def forward(self, x):
    """
    In the forward function we accept a Variable of input data and we must 
    return a Variable of output data. We can use Modules defined in the 
    constructor as well as arbitrary operators on Variables.
    """
    h_relu = F.relu(self.linear1(x))
    y_pred = self.linear2(h_relu)
    y_pred = F.relu(y_pred)
    return y_pred

In [48]:
# N is batch size; D_in is input dimension;
# H is the dimension of the hidden layer; D_out is output dimension.
N, D_in, H, D_out = 32, 100, 50, 3

# Create random Tensors to hold inputs and outputs, and wrap them in Variables
x = torch.rand(N, D_in)  # dim: 32 x 100
# print(x)

y = torch.rand(N, D_out)  # dim: 32 x 3

y = torch.FloatTensor(N).uniform_(0, 3).long()
    
#print(y)

# print(y.size())

# print(y)

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Forward pass: Compute predicted y by passing x to the model
# print(x)
y_pred = model(x)   # dim: 32 x 2

# print(y_pred.size())

# print(y_pred)

### Loss Function

In [47]:
criterion = nn.CrossEntropyLoss()

output = (torch.randn(3,32).float())
target = (torch.FloatTensor(3).uniform_(0, 32).long())
print(output)
print(target)
loss = criterion(output, target)

tensor([[-0.0265, -0.0310, -0.5137,  0.1935, -2.4646, -0.5742, -1.5485,  0.7430,
         -0.9231,  0.2128,  1.3878, -0.7392,  0.1442, -0.0018,  0.9393,  1.2987,
         -0.5604,  0.6821,  0.4307,  0.1910,  2.1175, -0.2789, -1.5653,  0.7893,
          0.2644, -0.4956, -1.6536,  2.8626, -1.8352,  1.7630,  0.6107],
        [ 0.4388, -1.2152,  1.2271, -0.2915, -0.3719,  0.0523, -0.0716,  2.0456,
         -0.6136, -0.2922,  0.4311,  1.4703, -1.1593,  0.7987,  0.0518, -0.1968,
          2.3238,  0.7141, -1.2400, -0.5959, -1.3883, -1.5304, -0.7080, -1.8377,
          0.1892,  0.6140,  1.2384,  0.1379,  0.2010, -0.9397,  0.7367],
        [ 1.0436,  0.1653, -0.0220,  1.3089,  0.3749, -0.8718,  0.2459,  1.1522,
         -0.9381, -1.2553, -0.3964,  0.9405,  0.0998, -0.5594, -0.0527,  0.7225,
         -0.5757,  0.4124,  0.1184,  0.0529,  0.1075,  0.1305,  0.7485,  1.1113,
         -1.2988,  0.6225,  0.6330,  1.2118,  1.3249, -1.5428, -1.2823]])
tensor([18, 15, 23])


In [53]:
print(y_pred)
print(y)

loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(y_pred, y.long())

print("Loss:",loss)

def myCrossEntropyLoss(outputs, labels):
  # print("labels:",labels)
  batch_size = outputs.size()[0]            # batch_size
  # print("outputs:",outputs)
  outputs = F.log_softmax(outputs,dim=1)   # compute the log of softmax values
  # print("out log soft:",outputs)
  outputs = outputs[range(batch_size), labels] # pick the values corresponding to the labels
  # print(outputs)
  return -torch.sum(outputs)/32

# print("y_pred:",y_pred.size())
# print("y:",y.transpose(0,1).size())
loss = myCrossEntropyLoss(y_pred, y.long())

print("Loss:",loss)

tensor([[0.0000, 0.0000, 0.0000],
        [0.0000, 0.0469, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0124, 0.0000],
        [0.0000, 0.0410, 0.0000],
        [0.0000, 0.0495, 0.0000],
        [0.0000, 0.0242, 0.0000],
        [0.0000, 0.0297, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0065, 0.0000],
        [0.0000, 0.0412, 0.0000],
        [0.0000, 0.0754, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0184, 0.0000],
        [0.0000, 0.0082, 0.0000],
        [0.0000, 0.0406, 0.0000],
        [0.0000, 0.0817, 0.0000],
        [0.0000, 0.0220, 0.0000],
        [0.0000, 0.0212, 0.0000],
        [0.0000, 0.0687, 0.0000],
        [0.0000, 0.0421, 0.0000],
        [0.0358, 0.1771, 0.0000],
        [0.0000, 0.1196, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.1033, 0.0000],
        [0.0000, 0.0722, 0.0000],
        [0.0000, 0.0376, 0.0000],
        [0.000

### Optimizer

In [54]:
# SGD optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)

# ADAM
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)



### Training vs Evaluation
`Before training the model, it is imperative to call`

model.train()

`Likewise, you must call` 

model.eval() 
before testing the model. 

`This corrects for the differences in dropout, batch normalization during training and testing.`

### Core Training Step

In [59]:
output_batch = model(x)           # compute model output
loss = loss_fn(output_batch, y)  # calculate loss

print(loss)

for _ in range(5):
    
    optimizer.zero_grad()  # clear previous gradients
    
    loss.backward()        # compute gradients of all variables wrt loss

    optimizer.step()       # perform updates using calculated gradients
    
    output_batch = model(x)     
    
    loss = loss_fn(output_batch, y)

    print(loss)

tensor(1.0949, grad_fn=<NllLossBackward>)
tensor(1.0944, grad_fn=<NllLossBackward>)
tensor(1.0939, grad_fn=<NllLossBackward>)
tensor(1.0936, grad_fn=<NllLossBackward>)
tensor(1.0932, grad_fn=<NllLossBackward>)
tensor(1.0929, grad_fn=<NllLossBackward>)


### Computing Metrics

In [63]:
import numpy as np
def accuracy(out, labels):
  outputs = np.argmax(out, axis=1)
  return np.sum(outputs==labels)/float(labels.size)

print(accuracy(output_batch.long().numpy(), y.numpy()))

0.34375
