<a href="https://colab.research.google.com/github/seyedalicheraghi/Model-Optimization/blob/master/Overview_of_Colaboratory_Features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install onnx

Collecting onnx
  Downloading onnx-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m51.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx
Successfully installed onnx-1.14.0


In [2]:
# Super Resolution model definition in PyTorch
import torch.nn as nn
import torch.nn.init as init
# Some standard imports
import io
import numpy as np

from torch import nn
import torch.utils.model_zoo as model_zoo
import torch.onnx

In [3]:
class SuperResolutionNet(nn.Module):
    def __init__(self, upscale_factor, inplace=False):
        super(SuperResolutionNet, self).__init__()

        self.relu = nn.ReLU(inplace=inplace)
        self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
        self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
        self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
        self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
        self.pixel_shuffle = nn.PixelShuffle(upscale_factor)

        self._initialize_weights()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.pixel_shuffle(self.conv4(x))
        return x

    def _initialize_weights(self):
        init.orthogonal_(self.conv1.weight, init.calculate_gain('relu'))
        init.orthogonal_(self.conv2.weight, init.calculate_gain('relu'))
        init.orthogonal_(self.conv3.weight, init.calculate_gain('relu'))
        init.orthogonal_(self.conv4.weight)

In [4]:
# Create the super-resolution model by using the above model definition.
torch_model = SuperResolutionNet(upscale_factor=3)

if torch.cuda.is_available():
    map_location = None
torch_model.load_state_dict(model_zoo.load_url('https://s3.amazonaws.com/pytorch/test_data/export/superres_epoch100-44c6958e.pth'))

# Input to the model
x = torch.randn(1, 1, 224, 224, requires_grad=True)
# torch_out = torch_model(x)

# Export the model
torch.onnx.export(torch_model,               # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  "super_resolution.onnx",   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})

Downloading: "https://s3.amazonaws.com/pytorch/test_data/export/superres_epoch100-44c6958e.pth" to /root/.cache/torch/hub/checkpoints/superres_epoch100-44c6958e.pth
100%|██████████| 234k/234k [00:00<00:00, 955kB/s] 


verbose: False, log level: Level.ERROR



#Install Required Libraries in Google Colab

In [5]:
!pip install pycuda # install cuda
!pip install tensorrt

Collecting pycuda
  Downloading pycuda-2022.2.2.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2023.1.1-py2.py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting mako (from pycuda)
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pycuda: filename=pycuda-2022.2.2-cp310-cp310-linux_x86_64.whl size=661405 sha256=d5dbb

In [6]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import os
import argparse
import tensorrt as trt
import random
import argparse
import os

import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
import numpy as np
from PIL import Image

###Define Constants

In [75]:
verbose = False
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
MAX_BATCH_SIZE = 1
model_name = "ssd-10_backbone"
# model_name = "yolox_s"
fp16 = False
int8 = False
dla_core = -1
verbose = False
engine_path = '%s.trt' % model_name
dim  = (1, 3, 1200, 1200)
# dim  = (1, 3, 640, 640)

###Load created ONNX model

In [76]:
def load_onnx(model_name):
    """Read the ONNX file."""
    onnx_path = '%s.onnx' % model_name
    if not os.path.isfile(onnx_path):
        print('ERROR: file (%s) not found!  You might want to run yolo_to_onnx.py first to generate it.' % onnx_path)
        return None
    else:
        with open(onnx_path, 'rb') as f:
            return f.read()

In [77]:
onnx_data = load_onnx(model_name)

###Set network input batch size

In [78]:
def set_net_batch(network, batch_size):
    """Set network input batch size.

    The ONNX file might have been generated with a different batch size,
    say, 64.
    """
    if trt.__version__[0] >= '7':
        shape = list(network.get_input(0).shape)
        shape[0] = batch_size
        network.get_input(0).shape = shape
    return network

### Build a TensorRT engine from ONNX


In [79]:
def build_engine(model_name,do_fp16, do_int8, dla_core, verbose=False):
    print('Loading the ONNX file...')
    onnx_data = load_onnx(model_name)
    if onnx_data is None:
        return None

    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    EXPLICIT_BATCH = [] if trt.__version__[0] < '7' else \
        [1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)]
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        if do_int8 and not builder.platform_has_fast_int8:
            raise RuntimeError('INT8 not supported on this platform')
        if not parser.parse(onnx_data):
            print('ERROR: Failed to parse the ONNX file.')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            return None
        network = set_net_batch(network, MAX_BATCH_SIZE)

        print('Building the TensorRT engine.  This would take a while...')
        print('(Use "--verbose" or "-v" to enable verbose logging.)')
        # new API: build_engine() with builder config
        builder.max_batch_size = MAX_BATCH_SIZE
        config = builder.create_builder_config()
        config.max_workspace_size = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
        profile = builder.create_optimization_profile()

        profile.set_shape('myin', dim, dim, dim)
        config.add_optimization_profile(profile)
        engine = builder.build_engine(network, config)
        if engine is not None:
            print('Completed creating engine.')
        return engine

In [80]:
engine = build_engine(model_name, fp16, int8, dla_core, verbose)
if engine is None:
      raise SystemExit('ERROR: failed to build the TensorRT engine!')
with open(engine_path, 'wb') as f:
      f.write(engine.serialize())
print('Serialized the TensorRT engine to file: %s' % engine_path)

Loading the ONNX file...
Building the TensorRT engine.  This would take a while...
(Use "--verbose" or "-v" to enable verbose logging.)


  builder.max_batch_size = MAX_BATCH_SIZE
  config.max_workspace_size = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
  engine = builder.build_engine(network, config)


Completed creating engine.
Serialized the TensorRT engine to file: yolox_s.trt


In [19]:
from onnx import hub
model = hub.load("resnet50")

Downloading resnet50 to local path /root/.cache/onnx/hub/vision/classification/resnet/model/af16a04a6ec48ac494065d4439fe9dea590d337b9ca6dc328160ccf04a217b9c_resnet50-v1-7.onnx


In [20]:
!mv /root/.cache/onnx/hub/vision/classification/resnet/model/af16a04a6ec48ac494065d4439fe9dea590d337b9ca6dc328160ccf04a217b9c_resnet50-v1-7.onnx .


In [67]:
import onnx

onnx_path = "ssd-10"
# Operators to ignore
NodesToIgnore = ['Relu_317']
NewOutputNodes = ['Add_316']

FullGraph_Values = {}

Backbone_NewNodes = []
Backbone_NewValues = []
model = onnx.load(onnx_path + '.onnx')
# Separate the backbone from the output head
for n in model.graph.initializer:
    FullGraph_Values[n.name] = n
for n in model.graph.node:
    if len(n.output) > 0 and n.output[0] not in NodesToIgnore:
        for v in n.input:
            if FullGraph_Values.get(v) is not None:
                w = FullGraph_Values.get(v)
                Backbone_NewValues.append(w)
        Backbone_NewNodes.append(n)
    else:
      break
shapes = {}
shape_info = onnx.shape_inference.infer_shapes(model)

opset_import = shape_info.opset_import
# Get shapes of each layer
for counter, n in enumerate(shape_info.graph.value_info):
    shapes[n.name] = n

graph1_input = model.graph.input[0]
# graph1_output = model.graph.output
# print(graph1_output)
graph1_def = onnx.helper.make_graph(
    Backbone_NewNodes, 'flex_logix',
    [graph1_input],
    [shapes.get(NewOutputNodes[0])],
    initializer=Backbone_NewValues)
shapes.get(NewOutputNodes[0])
model = onnx.helper.make_model(graph1_def, producer_name='flex_spatial_conv', opset_imports=opset_import)
model.ir_version = 7
onnx.save(model, onnx_path + '_backbone.onnx')