In [1]:
import MLC.mlc as mlc
import torch
import torch.nn as nn

class Demo(nn.Module):
    def __init__(self):
        super().__init__()
        self.w = nn.Parameter(torch.rand((900, 128)))
        self.b = nn.Parameter(torch.rand((128,)))
        self.conv2d = nn.Conv2d(1, 1, 3, 1)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.conv2d(x)
        x = torch.relu(x)
        x = x.view([1, -1])
        x = torch.matmul(x, self.w)
        x = torch.add(x, self.b)
        x = self.relu(x)
        x = self.linear(x)
        return x

In [2]:
DemoRelax = mlc.from_fx(Demo(), [(1, 1, 32, 32)])

In [3]:

DemoFused = mlc.FuseDenseAddPass()(DemoRelax)
# DemoFused.show()

In [4]:

DemoModelTIR = mlc.LowerToTensorIRPass()(DemoFused)
# DemoModelTIR.show()

In [5]:
from tvm import relax
DemoModelFinal = relax.transform.FuseTIR()(DemoModelTIR)
# DemoModelFinal.show()

In [6]:
DemoModelTuned = mlc.mlc_tune_tir(DemoModelFinal, target="cuda --max_threads_per_block=1024 --max_shared_memory_per_block=49152", 
                max_trials_global=1,
                num_trials_per_iter=1,compile_tir_target='cuda')

2023-02-13 22:48:13 [INFO] Logging directory: ./tune_tmp/logs
2023-02-13 22:48:22 [INFO] LocalBuilder: max_workers = 36
2023-02-13 22:48:24 [INFO] LocalRunner: max_workers = 1
2023-02-13 22:48:29 [INFO] [task_scheduler.cc:159] Initializing Task #0: "main"
2023-02-13 22:48:29 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
