In [1]:
import torch
import torchvision.models as models
from Torch2Tensor.t2t import T2TParser

In [2]:
cls_model_name = ['alexnet', 'googlenet', 'vgg11', 'resnet50', 
                  'inception_v3', 'densenet121', 'mobilenet_v2', 
                  'shufflenet_v2_x1_0', 'regnet_y_400mf', 'mnasnet0_5', 
                  'squeezenet1_0', 'efficientnet_b0', 'mobilenet_v3_small']

In [3]:
# note: if you only have cpu cpu platform, ignore this dict 
mlc_dict = dict(target='cuda --max_threads_per_block=1024 --max_shared_memory_per_block=49152', work_dir="./demo", 
            task_name='main', max_trials_global=32, 
            num_trials_per_iter=32, compile_tir_target='cuda')

In [4]:
# test demo
class Demo(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv = torch.nn.Conv2d(3, 3, 3)
        self.ada = torch.nn.AdaptiveAvgPool2d((1, 1))
        self.linear = torch.nn.Linear(3, 1)
    
    def forward(self, x):
        x = self.conv(x)
        x = self.ada(x).view((1, -1))
        x = self.linear(x)
        return x

In [5]:
# model = getattr(models, 'alexnet')()
model = Demo()
x = torch.randn((1,3,224, 224))
input_shapes = [(1,3, 224, 224)]

In [6]:
# PR = T2TParser(model, x, input_shapes) default fot llvm
PR = T2TParser(model, x, input_shapes, device_id=0, **mlc_dict)

2023-03-07 20:12:44.585 | INFO     | Torch2Tensor.t2t_optimizer.tir_tune.mlc_tune:__init__:22 - target: cuda --max_threads_per_block=1024 --max_shared_memory_per_block=49152; compile_tir_target: cuda


In [7]:
PR.convert()
PR.print_tabular(PR.pytorch_graph)
PR.print_ir(PR.relax_graph)

2023-03-07 20:12:44.662 | DEBUG    | Torch2Tensor.t2t:convert:84 - <class 'torch.Tensor'>
2023-03-07 20:12:44.725 | DEBUG    | Torch2Tensor.t2t:print_tabular:48 - 
opcode       name    target    args            kwargs
-----------  ------  --------  --------------  --------
placeholder  x       x         ()              {}
call_module  conv    conv      (x,)            {}
call_module  ada     ada       (conv,)         {}
call_method  view    view      (ada, (1, -1))  {}
call_module  linear  linear    (view,)         {}
output       output  output    (linear,)       {}
2023-03-07 20:12:44.736 | INFO     | Torch2Tensor.t2t_relax.input_layer:_generate_input:15 - input_layer: x created
2023-03-07 20:12:44.738 | DEBUG    | Torch2Tensor.t2t_relax.module.conv_layer:generate_node:32 - {'strides': (1, 1), 'padding': (0, 0), 'dilation': (1, 1), 'groups': 1, 'data_layout': 'NCHW', 'kernel_layout': 'OIHW', 'out_layout': None, 'out_dtype': None}
2023-03-07 20:12:44.742 | INFO     | Torch2Tensor.t2t_

2023-03-07 20:12:44.922 | INFO     | Torch2Tensor.t2t:print_ir:59 - None


In [8]:
PR.gen_TensorIR()
PR.print_ir(PR.TensorIR)
PR.print_op(PR.TensorIR)

2023-03-07 20:12:45.854 | INFO     | Torch2Tensor.t2t:print_ir:59 - None
2023-03-07 20:12:45.856 | INFO     | Torch2Tensor.t2t:print_op:68 - ['reshape1', 'add', 'dense_te', 'adaptive_pool', 'conv2d', 'reshape']


In [9]:
# tune for gpu platform
PR.tune_tir()
PR.print_ir(PR.tuned_TensorIR)

2023-03-07 20:13:20 [INFO] [task_scheduler.cc:260] Task #0 has finished. Remaining task(s): 0


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,147852,1,57.8955,2.5538,2.5538,6,Y


2023-03-07 20:13:20 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |   FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------
  0 | main | 147852 |      1 |        57.8955 |       2.5538 |                2.5538 |      6 |    Y 
-----------------------------------------------------------------------------------------------------
Total trials: 6
Total latency (us): 2.55377



2023-03-07 20:13:20.964 | INFO     | Torch2Tensor.t2t_optimizer.tir_tune.mlc_tune:mlc_tune_tir:38 - read to tune no.2 op: dense_te
2023-03-07 20:13:20.966 | INFO     | Torch2Tensor.t2t_optimizer.tir_tune.mlc_tune:mlc_tune_op:47 - op: dense_te


2023-03-07 20:13:20 [INFO] Logging directory: ./demo/op_dense_te/logs
2023-03-07 20:13:20 [INFO] LocalBuilder: max_workers = 36
2023-03-07 20:13:23 [INFO] LocalRunner: max_workers = 1
2023-03-07 20:13:25 [INFO] [task_scheduler.cc:159] Initializing Task #0: "main"
2023-03-07 20:13:25 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2023-03-07 20:13:26 [INFO] [task_scheduler.cc:193] Sending 31 sample(s) to builder
2023-03-07 20:13:32 [INFO] [task_scheduler.cc:195] Sending 31 sample(s) to runner


In [None]:
PR.check_result()
PR.infer_benchmark()

2023-03-07 20:10:56.979 | INFO     | Torch2Tensor.benchmark:check_result:73 - accuracy test passed
2023-03-07 20:10:57.613 | INFO     | Torch2Tensor.benchmark:inf:98 - tensor program inf time: 0.629478(ms)
2023-03-07 20:10:57.934 | INFO     | Torch2Tensor.benchmark:inf:119 - torch model inf time : 0.256214(ms)
