In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch_tensorrt 
!pip install torchinfo
from torchinfo import summary

import os

  from .autonotebook import tqdm as notebook_tqdm


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting torchinfo
  Downloading torchinfo-1.7.0-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.0


In [2]:
FLASK_MODEL_DIR = './Flask/Models'
TRITON_MODEL_DIR = './Triton/Models'
TRANSFORM_DIR = './Transforms'

TRITON_CONFIG_FILE = 'config.pbtxt'
#TRITON_MODEL_FILE = 'model.pt'

os.makedirs(FLASK_MODEL_DIR, exist_ok=True)
os.makedirs(TRITON_MODEL_DIR, exist_ok=True)
os.makedirs(TRANSFORM_DIR, exist_ok=True)

In [3]:
def save_flask_model(model, transform, model_name):
    path = os.path.join(TRANSFORM_DIR, model_name + '.plan')
    torch.save(transform, path)
    print(path)

    path = os.path.join(FLASK_MODEL_DIR, model_name + '.plan')
    torch.jit.save(model, path)
    print(path)
    return

In [4]:
def save_triton_model(model, config, model_name):
    path = os.path.join(TRITON_MODEL_DIR, model_name, TRITON_CONFIG_FILE)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'w') as f:
        f.write(config.strip())
    print(path)
    
    path = os.path.join(TRITON_MODEL_DIR, model_name, '1', model_name+ '.plan')
    os.makedirs(os.path.dirname(path), exist_ok=True)
    torch.jit.save(model, path)
    print(path)
    return

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [6]:
model_name = 'resnet34-ttrt_fp32'

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

model = torchvision.models.resnet34(pretrained=True)
model = torch.jit.script(model)
model = model.to(device)
model = model.eval()

triton_config = """
platform: "tensorrt_plan"
max_batch_size: 32
input [
 {
    name: "input_0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
  }
]
output {
    name: "output_0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""


#Torch_TRT 
model=torch_tensorrt.compile(model,inputs=[torch_tensorrt.Input(
                        min_shape=[1,3,224,224],
                        opt_shape=[16,3,224,224],
                        max_shape=[32,3,224,224],)], 
                        enabled_precisions=torch.float32 # Run with FP32
                        )

save_flask_model(model, transform, model_name)
save_triton_model(model, triton_config, model_name)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████████████████████████████████| 83.3M/83.3M [00:09<00:00, 9.29MB/s]


./Transforms/resnet34-ttrt_fp32.plan
./Flask/Models/resnet34-ttrt_fp32.plan
./Triton/Models/resnet34-ttrt_fp32/config.pbtxt
./Triton/Models/resnet34-ttrt_fp32/1/resnet34-ttrt_fp32.plan




In [None]:
model_name = 'resnet34-ttrt_fp16'

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

model = torchvision.models.resnet34(pretrained=True)
model = torch.jit.script(model)
model = model.to(device)
model = model.eval()

triton_config = """
platform: "tensorrt_plan"
max_batch_size: 32
input [
 {
    name: "input_0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
  }
]
output {
    name: "output_0"
    data_type: TYPE_FP16
    dims: [ 1000 ]
  }
"""


#Torch_TRT 
model=torch_tensorrt.compile(model,inputs=[torch_tensorrt.Input(
                        min_shape=[1,3,224,224],
                        opt_shape=[16,3,224,224],
                        max_shape=[32,3,224,224],)], 
                        enabled_precisions=torch.float16 
                        )

save_flask_model(model, transform, model_name)
save_triton_model(model, triton_config, model_name)

In [None]:
model_name = 'mobilenet_v2-ttrt_fp32'

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

model = torchvision.models.mobilenet_v2(pretrained=True)
model = torch.jit.script(model)
model = model.to(device)
model = model.eval()

triton_config = """
platform: "tensorrt_plan"
max_batch_size: 32
input [
 {
    name: "input_0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
  }
]
output {
    name: "output_0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

model=torch_tensorrt.compile(model,inputs=[torch_tensorrt.Input(
                        min_shape=[1,3,224,224],
                        opt_shape=[16,3,224,224],
                        max_shape=[32,3,224,224],)], 
                        enabled_precisions={torch.float32}
                        )


save_flask_model(model, transform, model_name)
save_triton_model(model, triton_config, model_name)

In [None]:
model_name = 'efficientnet_b0-ttrt_fp32'

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

model = torchvision.models.efficientnet_b0(pretrained=True)
model = torch.jit.script(model)
model = model.to(device)
model = model.eval()

triton_config = """
platform: "tensorrt_plan"
max_batch_size: 32
input [
 {
    name: "input_0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 600, 600 ]
  }
]
output {
    name: "output_0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

model=torch_tensorrt.compile(model,inputs=[torch_tensorrt.Input(
                        min_shape=[1,3,224,224],
                        opt_shape=[16,3,224,224],
                        max_shape=[32,3,224,224],)], 
                        enabled_precisions={torch.float32}
                        )


save_flask_model(model, transform, model_name)
save_triton_model(model, triton_config, model_name)

In [None]:
model_name = 'efficientnet_b7-ttrt_fp32'

transform = transforms.Compose([
    transforms.Resize(633),
    transforms.CenterCrop(600),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

model = torchvision.models.efficientnet_b7(pretrained=True)
model = torch.jit.script(model)
model = model.to(device)
model = model.eval()

triton_config = """
platform: "tensorrt_plan"
max_batch_size: 32
input [
 {
    name: "input_0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 600, 600 ]
  }
]
output {
    name: "output_0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

model=torch_tensorrt.compile(model,inputs=[torch_tensorrt.Input(
                        min_shape=[1,3,224,224],
                        opt_shape=[16,3,224,224],
                        max_shape=[32,3,224,224],)], 
                        enabled_precisions={torch.float32}
                        )



save_flask_model(model, transform, model_name)
save_triton_model(model, triton_config, model_name)