In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

import os

* ONNX 파일의 Input size 변경하여 모델 및 config 수정 필요
* .onnx파일을 이용해 trt 모델 생성 (64, 128 ,256, 512, 1024)

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [9]:
SCRIPT_MODEL_DIR = './Flask/Models'

FLASK_MODEL_DIR = './Flask/Models'
TRITON_MODEL_DIR = './Triton/Models'

TRITON_CONFIG_FILE = 'config.pbtxt'
TRITON_MODEL_FILE = 'model.onnx'

os.makedirs(FLASK_MODEL_DIR, exist_ok=True)
os.makedirs(TRITON_MODEL_DIR, exist_ok=True)

In [10]:
def save_flask_model(model, model_name, input_size):
    path = os.path.join(FLASK_MODEL_DIR, model_name + '.onnx')
    torch.onnx.export(model,
                      torch.randn(*input_size).to(device),
                      path,                                          # 모델 저장 경로
                      export_params=True,                            # 가중치 저장 여부
                      input_names=['input__0'],                       # 모델 입력값 이름
                      output_names=['output__0'],                     # 모델 출력값 이름
                      dynamic_axes={'input__0': {0: 'batch_size'},    # 가변 길이 차원
                                    'output__0': {0: 'batch_size'}})
    print(path)
    return

In [11]:
def save_triton_model(model, config, model_name, input_size):
    path = os.path.join(TRITON_MODEL_DIR, model_name, TRITON_CONFIG_FILE)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'w') as f:
        f.write(config.strip())
    print(path)
    
    path = os.path.join(TRITON_MODEL_DIR, model_name, '1', TRITON_MODEL_FILE)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    torch.onnx.export(model,
                      torch.randn(*input_size).to(device),
                      path,                                          # 모델 저장 경로
                      export_params=True,                            # 가중치 저장 여부
                      input_names=['input__0'],                       # 모델 입력값 이름
                      output_names=['output__0'],                     # 모델 출력값 이름
                      dynamic_axes={'input__0': {0: 'batch_size'},    # 가변 길이 차원
                                    'output__0': {0: 'batch_size'}})
    print(path)
    return

In [6]:
model_name = 'resnet34-onnx-64'
script_model_name = 'resnet34-script.pt'

model_file = os.path.join(SCRIPT_MODEL_DIR, script_model_name)
model = torch.jit.load(model_file)
model = model.to(device)
model = model.eval()

input_size = (1, 3, 64, 64)
triton_config = """
platform: "onnxruntime_onnx"
max_batch_size: 32
input [
 {
    name: "input__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 64, 64 ]
  }
]
output {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

save_flask_model(model, model_name, input_size)
save_triton_model(model, triton_config, model_name, input_size)



./Flask/Models/resnet34-onnx-64.onnx
./Triton/Models/resnet34-onnx-64/config.pbtxt
./Triton/Models/resnet34-onnx-64/1/model.onnx


In [7]:
model_name = 'resnet34-onnx-128'
script_model_name = 'resnet34-script.pt'

model_file = os.path.join(SCRIPT_MODEL_DIR, script_model_name)
model = torch.jit.load(model_file)
model = model.to(device)
model = model.eval()

input_size = (1, 3, 128, 128)
triton_config = """
platform: "onnxruntime_onnx"
max_batch_size: 32
input [
 {
    name: "input__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 128, 128 ]
  }
]
output {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

save_flask_model(model, model_name, input_size)
save_triton_model(model, triton_config, model_name, input_size)

./Flask/Models/resnet34-onnx-128.onnx
./Triton/Models/resnet34-onnx-128/config.pbtxt
./Triton/Models/resnet34-onnx-128/1/model.onnx


In [8]:
model_name = 'resnet34-onnx-256'
script_model_name = 'resnet34-script.pt'

model_file = os.path.join(SCRIPT_MODEL_DIR, script_model_name)
model = torch.jit.load(model_file)
model = model.to(device)
model = model.eval()

input_size = (1, 3, 256, 256)
triton_config = """
platform: "onnxruntime_onnx"
max_batch_size: 32
input [
 {
    name: "input__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 256, 256 ]
  }
]
output {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""


save_flask_model(model, model_name, input_size)
save_triton_model(model, triton_config, model_name, input_size)

./Flask/Models/resnet34-onnx-256.onnx
./Triton/Models/resnet34-onnx-256/config.pbtxt
./Triton/Models/resnet34-onnx-256/1/model.onnx


In [None]:
model_name = 'resnet34-onnx-512'
script_model_name = 'resnet34-script.pt'

model_file = os.path.join(SCRIPT_MODEL_DIR, script_model_name)
model = torch.jit.load(model_file)
model = model.to(device)
model = model.eval()

input_size = (1, 3, 512, 512)
triton_config = """
platform: "onnxruntime_onnx"
max_batch_size: 32
input [
 {
    name: "input__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 512, 512 ]
  }
]
output {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

save_flask_model(model, model_name, input_size)
save_triton_model(model, triton_config, model_name, input_size)

In [None]:
model_name = 'resnet34-onnx-1024'
script_model_name = 'resnet34-script.pt'

model_file = os.path.join(SCRIPT_MODEL_DIR, script_model_name)
model = torch.jit.load(model_file)
model = model.to(device)
model = model.eval()

input_size = (1, 3, 1024, 1024)
triton_config = """
platform: "onnxruntime_onnx"
max_batch_size: 32
input [
 {
    name: "input__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 1024, 1024 ]
  }
]
output {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

save_flask_model(model, model_name, input_size)
save_triton_model(model, triton_config, model_name, input_size)

In [7]:
model_name = 'mobilenet_v2-onnx'
script_model_name = 'mobilenet_v2-script.pt'

model_file = os.path.join(SCRIPT_MODEL_DIR, script_model_name)
model = torch.jit.load(model_file)
model = model.to(device)
model = model.eval()

input_size = (1, 3, 224, 224)
triton_config = """
platform: "onnxruntime_onnx"
max_batch_size: 32
input [
 {
    name: "input__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
  }
]
output {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

save_flask_model(model, model_name, input_size)
save_triton_model(model, triton_config, model_name, input_size)

./Flask/Models/mobilenet_v2-onnx.onnx
./Triton/Models/mobilenet_v2-onnx/config.pbtxt
./Triton/Models/mobilenet_v2-onnx/1/model.onnx


In [8]:
model_name = 'efficientnet_b0-onnx'
script_model_name = 'efficientnet_b0-script.pt'

model_file = os.path.join(SCRIPT_MODEL_DIR, script_model_name)
model = torch.jit.load(model_file)
model = model.to(device)
model = model.eval()

input_size = (1, 3, 224, 224)
triton_config = """
platform: "onnxruntime_onnx"
max_batch_size: 32
input [
 {
    name: "input__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 224, 224 ]
  }
]
output {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

save_flask_model(model, model_name, input_size)
save_triton_model(model, triton_config, model_name, input_size)

./Flask/Models/efficientnet_b0-onnx.onnx
./Triton/Models/efficientnet_b0-onnx/config.pbtxt
./Triton/Models/efficientnet_b0-onnx/1/model.onnx


In [None]:
model_name = 'efficientnet_b7-onnx'
script_model_name = 'efficientnet_b7-script.pt'

model_file = os.path.join(SCRIPT_MODEL_DIR, script_model_name)
model = torch.jit.load(model_file)
model = model.to(device)
model = model.eval()

input_size = (1, 3, 600, 600)
triton_config = """
platform: "onnxruntime_onnx"
max_batch_size: 32
input [
 {
    name: "input__0"
    data_type: TYPE_FP32
    format: FORMAT_NCHW
    dims: [ 3, 600, 600 ]
  }
]
output {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1000 ]
  }
"""

save_flask_model(model, model_name, input_size)
save_triton_model(model, triton_config, model_name, input_size)