# Setup and Installations

In [None]:
import torch
import torch.nn.functional as F
import torchvision.models as models
import numpy as np
from time import perf_counter
from copy import deepcopy

def timer(f,*args):   
    start = perf_counter()
    f(*args)
    return (1000 * (perf_counter() - start))





# Model and Data Load

In [None]:
model = models.resnet18(pretrained=True)       # We now have an instance of the pretrained model



In [None]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

# Convert the Model to Torchscript

In [None]:
model_scripted = torch.jit.script(model)         # *** This is the TorchScript export
dummy_input = torch.rand(1, 3, 224, 224)  

# Compare the results of Torch and TorchScript Models

In [None]:
unscripted_output = model(dummy_input)         # Get the unscripted model's prediction...
scripted_output = model_scripted(dummy_input)  # ...and do the same for the scripted version

unscripted_top5 = F.softmax(unscripted_output, dim=1).topk(5).indices
scripted_top5 = F.softmax(scripted_output, dim=1).topk(5).indices

print('Python model top 5 results:\n  {}'.format(unscripted_top5))
print('TorchScript model top 5 results:\n  {}'.format(scripted_top5))

Python model top 5 results:
  tensor([[418, 845, 111, 892, 644]])
TorchScript model top 5 results:
  tensor([[418, 845, 111, 892, 644]])


In [None]:
model_scripted.save('model_scripted.pt')

# Compare the runtime performance of Torch and TorchScript Model

---



In [None]:
print("Average runtime of Pytorch Model in CPU: " + str(np.mean([timer(model,dummy_input) for _ in range(10)])))
print("Average runtime of TorchScript Model in CPU: " + str(np.mean([timer(model_scripted,dummy_input) for _ in range(10)])))

# Compare the runtime performance of Torch and TorchScript Model in GPU

In [None]:
model_gpu = model.cuda()
dummy_input_gpu = deepcopy(dummy_input).cuda()


In [None]:
model_gpu.eval()

In [None]:
model_scripted_gpu = torch.jit.script(model_gpu)

In [None]:
print("Average runtime of Pytorch Model in GPU: " + str(np.mean([timer(model_gpu,dummy_input_gpu) for _ in range(10)])))
print("Average runtime of TorchScript Model in GPU: " + str(np.mean([timer(model_scripted_gpu,dummy_input_gpu) for _ in range(100)])))

# Tracing the entire forward pass of the Scripted Model

In [None]:
model_trace_scripted = torch.jit.trace(model, (dummy_input_gpu))  
print(model_trace_scripted.code)



# Further Optimization of TorchScript Model ??

In [None]:
frozen_mod = torch.jit.optimize_for_inference(model_scripted)

In [None]:
print("Average runtime of Pytorch Model in CPU: " + str(np.mean([timer(model,dummy_input) for _ in range(10)])))
print("Average runtime of TorchScript Model in CPU: " + str(np.mean([timer(model_scripted,dummy_input) for _ in range(10)])))
print("Average runtime of Optimized Frozen Model in CPU: " + str(np.mean([timer(frozen_mod,dummy_input) for _ in range(10)])))

Average runtime of Pytorch Model in CPU: 132.05410810000444
Average runtime of TorchScript Model in CPU: 101.92786839999712
Average runtime of Optimized Frozen Model in CPU: 57.239240899986044
