In [1]:
import torch
import torchvision
import torch.utils.data as data
import torchvision.transforms as transforms

from torchinfo import summary

import numpy as np
import time

In [2]:
IMAGENET_DIR = '/home/data/ImageNet'
# IMAGENET_DIR = 'D:/ImageNet'
BATCH_SIZE = 128
LOADER_WORKERS = 4

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [4]:
model = torchvision.models.resnet152(pretrained=True)
model = torch.jit.script(model) # ONNX, Framework 에서 진행할 때 모두 Torch Script 모델을 이용해야 함
model = model.to(device)
model = model.eval()

### 참고
TrochScript Trace vs Script

1. torch.jit.trace
--> Provide example inputs, The Tracer runs the function, recording the tensor operations performed.(*Warning : control-flow and data structyres are ignered : ex - If Else 문 시 하나만 인식)
2. torch.jit.script
--> Translate model directly to TrochScript. Control-flow is preserved (보편적인 방법)

## Make ONNX File

In [None]:
input_names=["actual_input_1"]
output_names=["output_1"]
torch.onnx.export(model,torch.randn(BATCH_SIZE,3,224,224).cuda(), 'model.onnx',
                input_names=input_names,output_names=output_names,
                dynamic_axex={'actual_input_1':{0,'batch_size'},'output_1':{0:'batch_size'}}) #생략가능 (여러가지 배치사이즈를 커버하고 싶을 때))

### ONNX에서 TensorRT로 바꾸는 과정
- TensorRT에서 지원되지 않는 과정에서 에러가 날 수 있음 
1. onnx simpplified_model 이용 (python3 -m onnxsim model.onnx simplified_model.onnx)
2. custom Plugin, Onnx-GraphSurgeon
3. Use Framework integration vesion(TF-TRT, Torch-TRT)

In [None]:
!trtexec\
    --onnx=model.onnx\
    --explicitBatch\
    --optShapes=actual_input_1:16x3x224x224\
    --maxShapes=actual_input_1:32x3x224x224\
    --minShapes=actual_input_1:1x3x224x224\
    --best \
    --saveEngine=model.plan

In [None]:
!trtexec --loadEngine=model.plan --dumpOutput


In [None]:
!trtexec --loadEngine=model.plan --dumpProfile


## Build TensorRT (Torch-TRTb)


In [None]:
import torch_tensorrt as torchtrt

#model =torch.jit.Load('')

trt_module=torchtrt.comile(model,inputs=[torchtrt.Input(
                        min_shape=[1,3,224,224],
                        opt_shape=[16,3,224,224],
                        max_shape=[32,3,224,224],)], enabled_precisions={torch.half})
trt_module.save('test.ts')

In [None]:
!rm -rf models
!mkdir -p models/torch_model
!mkdir -p models/torch_model/1
!mkdir -p models/onnx_model
!mkdir -p models/onnx_model/1
!mkdir -p models/trt_model
!mkdir -p models/trt_model/1

!mv model.pt models/torch_model/1
!mv model.onnx models/onnx_model/1
!mv model.plan models/trt_model/1

!cp src/onnx_config.pbtxt models/onnx_model/config.pbtxt
!cp scr/torch_config.pbtxt models/torch_model/config.pbtxt
!cp src/trt_config.pbtxt models/trt_model/config.pbtxt

In [None]:
trt_module=torchtrt.compile(model, input =example_input,enabled_precisions={torch.half})

trt_module(x)

In [5]:
summary(model, (BATCH_SIZE, 3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   --                        --
├─Conv2d: 1-1                            --                        9,408
├─BatchNorm2d: 1-2                       --                        128
├─ReLU: 1-3                              --                        --
├─MaxPool2d: 1-4                         --                        --
├─Sequential: 1-5                        --                        --
│    └─Bottleneck: 2-1                   --                        --
│    │    └─Conv2d: 3-1                  --                        4,096
│    │    └─BatchNorm2d: 3-2             --                        128
│    │    └─Conv2d: 3-3                  --                        36,864
│    │    └─BatchNorm2d: 3-4             --                        128
│    │    └─Conv2d: 3-5                  --                        16,384
│    │    └─BatchNorm2d: 3-6             --                        5

In [6]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
dataset = torchvision.datasets.ImageNet(root=IMAGENET_DIR, transform=transform, split='val')
loader = data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=LOADER_WORKERS)

In [8]:
n_top1 = 0
n_top5 = 0
cnt = 0

s = time.time()
pred_tms = []
with torch.no_grad():
    for images, labels in loader:

        x = images.to(device)
        y = labels.to(device)
        
        ss = time.time()
        output = model(x)
        pred_tms.append(time.time()-ss)
        
        cnt += output.size(0)
        
        _, pred_top1 = output.max(1)
        _, pred_top5 = output.topk(5, 1, True, True)
        
        n_top1 += torch.eq(pred_top1, y).sum().item()
        n_top5 += torch.isin(pred_top5, y).max(1)[0].sum().item()

        print(f"\rstep: {cnt}/{len(dataset)}", end='')
total_tm = time.time() - s
print()
print(f"top-1:  {n_top1/cnt:0.4f}")
print(f"top-5:  {n_top5/cnt:0.4f}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Total Time: {total_tm:0.4f} ({total_tm/len(dataset):0.4f})")
print(f"Average Prediction Time: {np.mean(pred_tms):0.4f}")

step: 50000/50000
top-1:  0.7833
top-5:  0.9463
Batch Size: 128
Total Time: 178.6659 (0.0036)
Average Prediction Time: 0.0128
