In [1]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torch.utils.data.sampler import SubsetRandomSampler
from torch.profiler import profile, record_function, ProfilerActivity

In [22]:
def build_data_loader(data_dir, batch_size, random_seed, valid_size, shuffle = True, test = False):

  transforms = transforms.Compose([transforms.ToTensor()])

  train_dataset = datasets.MNIST(root= data_dir, train=True, download=True, transform=transforms)
  valid_dataset = datasets.MNIST(root= data_dir, train=True, download=True, transform=transforms)
  test_dataset = datasets.MNIST( root= data_dir, train=False, download=True, transform=transforms)

  num_train = len(train_dataset)


  indices = list(range(num_train))

  split = int(np.floor(valid_size * num_train))

  if shuffle:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

  train_idx, valid_idx = indices[split:], indices[:split]

  train_sampler = SubsetRandomSampler(train_idx)
  valid_sampler = SubsetRandomSampler(valid_idx)

  train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
  valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, sampler=valid_sampler)
  test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)

  return train_loader, valid_loader, test_loader


In [13]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        self.fc1 = nn.Linear(64*7*7, 512)

        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [17]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [23]:
def train(model, train_loader, num_epochs, criterion, optimizer, device):

  total_steps = len(train_loader)
  for epoch in range(num_epochs):
    for step, (images, labels) in enumerate(train_loader):

      # move images and labels to device
      images = images.to(device)
      labels = labels.to(device)

      # forward pass
      outputs = model(images)
      loss = criterion(outputs, labels)

      # backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))


In [25]:
def validate(mode, valid_loader, device):

  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in valid_loader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
      del labels, images, outputs
    print('Accuracy of the network on the {} validation images: {:.2f} %'.format(5000, 100 * correct / total))

In [26]:
def test(model, test_loader, device):
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

In [27]:
valid_size = 0.1
shuffle = True
random_seed = 412
batch_size = 64
data_dir = './data'
device = 'cpu'

# Hyperparameters
max_lr = 0.00001
weight_decay = 0.005
batch_size = 64
learning_rate = 0.0001
num_epochs = 5
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam


# dataset definition
train_loader, valid_loader, test_loader = build_data_loader(data_dir, batch_size, random_seed, valid_size, shuffle = True, test = False)

# Model definition
model = CNN()

# Optimizer
optimizer = optimizer(model.parameters(), max_lr, weight_decay=weight_decay)

print(count_parameters(model))

1630090


In [28]:
%%time
# Train the model
train(model, train_loader, num_epochs, criterion, optimizer, device)
test(model, test_loader, device)

Epoch [1/5], Loss: 0.6461
Epoch [2/5], Loss: 0.4391
Epoch [3/5], Loss: 0.3048
Epoch [4/5], Loss: 0.2326
Epoch [5/5], Loss: 0.2475
Accuracy of the network on the 10000 test images: 93.65 %
CPU times: user 6min 30s, sys: 45.2 s, total: 7min 16s
Wall time: 7min 18s


In [29]:
prof = profile(activities=[ProfilerActivity.CPU])

input_sample, _ = next(iter(train_loader))

prof.start()
model(input_sample)
prof.stop()

print(prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=100))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
    aten::max_pool2d_with_indices        46.36%      25.634ms        46.36%      25.634ms      12.817ms             2  
         aten::mkldnn_convolution        37.24%      20.590ms        37.39%      20.670ms      10.335ms             2  
                      aten::addmm        12.27%       6.782ms        12.38%       6.847ms       3.424ms             2  
                  aten::clamp_min         2.68%       1.484ms         2.68%       1.484ms     742.000us             2  
                       aten::relu         0.42%     234.000us         3.11%       1.718ms     859.000us             2  
                     aten::conv2d       

In [31]:
!pip install onnx

Collecting onnx
  Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx
Successfully installed onnx-1.16.1


In [32]:
input_names = ['input']
output_names = ['output']
torch.onnx.export(model, input_sample, "cnn.onnx", input_names=input_names, output_names=output_names, export_params=True)