In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
import os

from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [4]:
 ##parameters definition
 
 input_size = 784 # 28x28
 hidden_size = 500 
 num_classes = 10
 num_epochs = 3
 batch_size = 100
 learning_rate = 0.01 

In [5]:
# Loading the MNIST dataset 
train_dataset = datasets.MNIST(root='./data', 
                                            train=True, 
                                       transform=transforms.ToTensor(),  
                                           download=True)
test_dataset = datasets.MNIST(root='./data', 
                                           train=False, 
                                           transform=transforms.ToTensor()) 

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [6]:
# Importing Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                            batch_size=batch_size, 
                                            shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=False) 

In [7]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        
        ## cnn layers 
        self.conv1 = nn.Sequential(         
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
       
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        # fully connected layer, output 10 classes
        self.out = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        
        x = x.reshape(x.shape[0], -1) 

        output = self.out(x)
        return output, x    # return x for visualization

In [8]:
#Defining our model 

model = LeNet().to(device)
model

LeNet(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)

In [9]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [10]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(num_epochs + 1)]

In [11]:
  def train(epoch):

    model.train()
    #n_total_steps = len(train_loader)
  
    
    for i, (images, labels) in enumerate(train_loader):  
        # origin shape: [100, 1, 28, 28]
        # resized: [100, 784]
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs[0], labels)

        # Backward and optimize
        
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
          print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, i * len(images), len(train_loader.dataset),100. * i / len(train_loader), loss.item()))
          train_losses.append(loss.item())
          train_counter.append((i*64) + ((epoch-1)*len(train_loader.dataset)))
          torch.save(model.state_dict(), '/content/results/model.pth')
    


In [12]:
def test():

  model.eval()
  
  n_correct = 0
  test_loss = 0

  with torch.no_grad():
     
     for images, labels in test_loader:
         images = images.to(device)
         labels = labels.to(device)

         outputs = model(images)
         test_loss += F.nll_loss(outputs[0], labels, size_average=False).item()
         # max returns (value ,index)

         predicted = outputs[0].data.max(1, keepdim=True)[1]

         n_correct += predicted.eq(labels.data.view_as(predicted)).sum()

     test_loss /= len(test_loader.dataset)
     test_losses.append(test_loss)
     print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, n_correct, len(test_loader.dataset), 100. * n_correct / len(test_loader.dataset)))

In [None]:
for x, y in train_loader:
  # print(x.size())
  out = model(x.to(device))
  #print(out[1].shape)
  print (out[0].data.max(1, keepdim=True)[1].shape)
  break

torch.Size([100, 1])


In [13]:
!mkdir results

In [14]:
for epoch in range(1, num_epochs+1):
  train(epoch)
  test()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)







Test set: Avg. loss: -9.2386, Accuracy: 9831/10000 (98%)


Test set: Avg. loss: -11.8680, Accuracy: 9816/10000 (98%)


Test set: Avg. loss: -13.7654, Accuracy: 9831/10000 (98%)



In [15]:
loaded_model = LeNet().to(device)
loaded_model.load_state_dict(torch.load('/content/results/model.pth'))
loaded_model.eval()

LeNet(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)

In [18]:
def my_loss(outputs, labels):
    loss = criterion(outputs, labels)
    return loss

for i, (images, labels) in enumerate(train_loader):  
      
      images = images.to(device)
      labels = labels.to(device)
      # Forward pass
      outputs = loaded_model(images)

Hess_params = torch.autograd.functional.hessian(my_loss, (outputs[0], labels))

RuntimeError: ignored

In [19]:
 def compute_hessian():

  for i, (images, labels) in enumerate(train_loader):  
      
      images = images.to(device)
      labels = labels.to(device)
      # Forward pass
      outputs = loaded_model(images)
     
      for name, param in loaded_model.named_parameters():
        
        p = param
        loss = criterion(outputs[0], labels)
        print(loss)
       
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        grad_params = torch.autograd.grad(loss, p, create_graph=True,allow_unused=True)  # p is the weight matrix for a particular layer 
        hess_params = torch.zeros_like(grad_params[0])
        

        for i in range(grad_params[0].size(0)):
            for j in range(grad_params[0].size(1)):
              
              hess_params[i, j] = torch.autograd.grad(grad_params[0][i][j], p, retain_graph=True)[0][i, j]
              
        optimizer.step()

        return hess_params


In [20]:
h=compute_hessian()

tensor(0.0379, device='cuda:0', grad_fn=<NllLossBackward>)


RuntimeError: ignored

In [None]:
def train_OBD(epoch):

  model.train()
  n_total_steps = len(train_loader)
  
    
  for i, (images, labels) in enumerate(train_loader):  
      # origin shape: [100, 1, 28, 28]
      # resized: [100, 784]
      images = images.reshape(-1, 28*28).to(device)
      labels = labels.to(device)
      # Forward pass
      outputs = model(images)

      for name, param in model.named_parameters():
        
        p = param
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
      
        loss.backward(retain_graph=True)
        grad_params = torch.autograd.grad(loss, p, create_graph=True)  # p is the weight matrix for a particular layer 
        hess_params = torch.zeros_like(grad_params[0])

        for i in range(grad_params[0].size(0)):
            for j in range(grad_params[0].size(1)):
                hess_params[i, j] = torch.autograd.grad(grad_params[0][i][j], p, retain_graph=True)[0][i, j]
        optimizer.step()

        if i % 100 == 0:
          print('Train_OBD Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, i * len(images), len(train_loader.dataset),
            100. * i / len(train_loader), loss.item()))
          train_losses.append(loss.item())
          train_counter.append((i*64) + ((epoch-1)*len(train_loader.dataset)))


  return grad_params, hess_params