In [34]:
import torch
import gc

from torch import nn
from torch.nn.utils import weight_norm
from torch.profiler import profile, record_function, ProfilerActivity

import timeit

torch.cuda.empty_cache()
gc.collect()

def WNConv1d(*args, **kwargs):
    return weight_norm(nn.Conv1d(*args, **kwargs))


conv1d = WNConv1d(128, 128, 7, dilation=1, padding=3).to('mps')

torch.manual_seed(0)
data = torch.randn(1, 128, 220500).to('mps')

with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        conv1d(data)

print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print("\n\n")
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=10))

# # time
# execution_time = timeit.timeit(lambda: conv1d(data), number=1000)

# print(f"Average execution time: {execution_time / 1000:.6f} seconds per call")

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 model_inference        18.51%      81.697us       100.00%     441.363us     441.363us             1  
              aten::_weight_norm         0.49%       2.166us        55.58%     245.293us     245.293us             1  
    aten::_weight_norm_interface        53.29%     235.207us        55.09%     243.127us     243.127us             1  
                    aten::conv1d         0.66%       2.917us        25.91%     114.373us     114.373us             1  
               aten::convolution         1.43%       6.294us        25.25%     111.456us     111.456us             1  
              aten::_convolution         1.98%  

In [1]:
import tvm

from tvm import relax
from tvm.relax.frontend import nn

from typing import Optional
from tvm import te
from tvm import dlight as dl
from tvm.target import Target
import numpy as np
import tempfile

from mlc_dac.layers import WNConv1d

# conv1d = WNConv1d(128, 128, 7, dilation=1, padding=3)

conv1d = nn.Conv1D(128, 128, 7, dilation=1, padding=3)
mod, params = conv1d.export_tvm(
    {"forward": {"x": nn.spec.Tensor((1, 128, 220500), "float32")}}
)

trials = 2000
target = Target.from_device("metal")

with target, tempfile.TemporaryDirectory() as tmp_dir:
    seq = tvm.transform.Sequential(
        [
            # tvm.relax.transform.LegalizeOps(),
            # tvm.relax.transform.AnnotateTIROpPattern(),
            # tvm.relax.transform.FoldConstant(),
            # tvm.relax.transform.FuseOps(),
            # tvm.relax.transform.FuseTIR(),
            # dl.ApplyDefaultSchedule(
            #     # dl.gpu.Matmul(),
            #     dl.gpu.GEMV(),
            #     dl.gpu.Reduction(),
            #     dl.gpu.GeneralReduction(),
            #     dl.gpu.Fallback(),
            # ),

            relax.get_pipeline("zero"),
            relax.transform.MetaScheduleTuneTIR(work_dir=tmp_dir, max_trials_global=trials),
            relax.transform.MetaScheduleApplyDatabase(work_dir=tmp_dir),
        ]
    )

    mod = seq(mod)

mod.show()

2024-09-17 12:39:44 [INFO] Logging directory: /var/folders/50/mzqbqxqj5fddcby2mg3h334c0000gp/T/tmpsoeq5wyn/logs
2024-09-17 12:39:50 [INFO] LocalBuilder: max_workers = 24
2024-09-17 12:39:50 [INFO] LocalRunner: max_workers = 1
2024-09-17 12:39:51 [INFO] [task_scheduler.cc:159] Initializing Task #0: "main"
2024-09-17 12:39:51 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:39:51 [INFO] [task_scheduler.cc:193] Sending 3 sample(s) to builder
2024-09-17 12:39:53 [INFO] [task_scheduler.cc:195] Sending 3 sample(s) to runner
2024-09-17 12:39:54 [DEBUG] XGB iter   0: tr-p-rmse: 0.422960	tr-a-peak@32: 1.000000	tr-rmse: 0.422972	tr-rmse: 0.422972
2024-09-17 12:39:54 [DEBUG] XGB iter  25: tr-p-rmse: 0.008021	tr-a-peak@32: 1.000000	tr-rmse: 0.008030	tr-rmse: 0.008030
2024-09-17 12:39:54 [DEBUG] XGB iter  50: tr-p-rmse: 0.003364	tr-a-peak@32: 1.000000	tr-rmse: 0.003361	tr-rmse: 0.003361
2024-09-17 12:39:54 [DEBUG] XGB iter  75: tr-p-rmse: 0.003362	tr-a-peak@32: 1.000

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,1,1,0.0001,16.3717,16.3717,3,



Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:55 [DEBUG] [task_scheduler.cc:318] 
 ID | Name | FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
---------------------------------------------------------------------------------------------------
  0 | main |    1 |      1 |         0.0001 |      16.3717 |               16.3717 |      3 |      
---------------------------------------------------------------------------------------------------
Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:55 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:193] Sending 0 sample(s) to builder
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:195] Sending 0 sample(s) to runner
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,1,1,0.0001,16.3717,16.3717,3,



Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:55 [DEBUG] [task_scheduler.cc:318] 
 ID | Name | FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
---------------------------------------------------------------------------------------------------
  0 | main |    1 |      1 |         0.0001 |      16.3717 |               16.3717 |      3 |      
---------------------------------------------------------------------------------------------------
Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:55 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:193] Sending 0 sample(s) to builder
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:195] Sending 0 sample(s) to runner
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,1,1,0.0001,16.3717,16.3717,3,



Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:55 [DEBUG] [task_scheduler.cc:318] 
 ID | Name | FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
---------------------------------------------------------------------------------------------------
  0 | main |    1 |      1 |         0.0001 |      16.3717 |               16.3717 |      3 |      
---------------------------------------------------------------------------------------------------
Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:55 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:193] Sending 0 sample(s) to builder
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:195] Sending 0 sample(s) to runner
2024-09-17 12:39:55 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,1,1,0.0001,16.3717,16.3717,3,


2024-09-17 12:39:55 [DEBUG] [task_scheduler.cc:318] 
 ID | Name | FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
---------------------------------------------------------------------------------------------------
  0 | main |    1 |      1 |         0.0001 |      16.3717 |               16.3717 |      3 |      
---------------------------------------------------------------------------------------------------
Total trials: 3
Total latency (us): 16.3717


Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:55 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:39:56 [INFO] [task_scheduler.cc:193] Sending 0 sample(s) to builder
2024-09-17 12:39:56 [INFO] [task_scheduler.cc:195] Sending 0 sample(s) to runner
2024-09-17 12:39:56 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,1,1,0.0001,16.3717,16.3717,3,


2024-09-17 12:39:56 [DEBUG] [task_scheduler.cc:318] 
 ID | Name | FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
---------------------------------------------------------------------------------------------------
  0 | main |    1 |      1 |         0.0001 |      16.3717 |               16.3717 |      3 |      
---------------------------------------------------------------------------------------------------
Total trials: 3
Total latency (us): 16.3717


Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:56 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:39:56 [INFO] [task_scheduler.cc:260] Task #0 has finished. Remaining task(s): 0


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,1,1,0.0001,16.3717,16.3717,3,Y



Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:56 [DEBUG] [task_scheduler.cc:318] 
 ID | Name | FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
---------------------------------------------------------------------------------------------------
  0 | main |    1 |      1 |         0.0001 |      16.3717 |               16.3717 |      3 |    Y 
---------------------------------------------------------------------------------------------------
Total trials: 3
Total latency (us): 16.3717

2024-09-17 12:39:56 [INFO] Logging directory: /var/folders/50/mzqbqxqj5fddcby2mg3h334c0000gp/T/tmpsoeq5wyn/logs
2024-09-17 12:39:56 [INFO] LocalBuilder: max_workers = 24
2024-09-17 12:39:57 [INFO] LocalRunner: max_workers = 1
2024-09-17 12:39:57 [INFO] [task_scheduler.cc:159] Initializing Task #0: "main"
2024-09-17 12:39:58 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:40:38 [INFO] [task_scheduler.cc:193] Sending 64 s

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,6356.1504,7961.6795,7961.6795,64,



Total trials: 64
Total latency (us): 7961.68

2024-09-17 12:42:11 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      6356.1504 |    7961.6795 |             7961.6795 |     64 |      
----------------------------------------------------------------------------------------------------------
Total trials: 64
Total latency (us): 7961.68

2024-09-17 12:42:11 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:42:50 [INFO] [task_scheduler.cc:193] Sending 63 sample(s) to builder
2024-09-17 12:42:58 [INFO] [task_scheduler.cc:195] Sending 63 sample(s) to runner
2024-09-17 12:44:21 [DEBUG] XGB validation: p-rmse: 0.288602	a-peak@32: 1.000000
2024-09-17 12:44:21 [DEBUG] XGB iter   0: tr-p-rmse: 0.682074	tr-a-peak@32: 0.696663	tr-rmse:

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,6356.1504,7961.6795,7961.6795,127,



Total trials: 127
Total latency (us): 7961.68

2024-09-17 12:44:21 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      6356.1504 |    7961.6795 |             7961.6795 |    127 |      
----------------------------------------------------------------------------------------------------------
Total trials: 127
Total latency (us): 7961.68

2024-09-17 12:44:21 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:45:17 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:45:22 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:46:06 [DEBUG] XGB validation: p-rmse: 0.369822	a-peak@32: 0.907448
2024-09-17 12:46:06 [DEBUG] XGB iter   0: tr-p-rmse: 0.655411	tr-a-peak@32: 0.601278	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,7995.745,6329.0703,6329.0703,191,


2024-09-17 12:46:07 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      7995.7450 |    6329.0703 |             6329.0703 |    191 |      
----------------------------------------------------------------------------------------------------------
Total trials: 191
Total latency (us): 6329.07


Total trials: 191
Total latency (us): 6329.07

2024-09-17 12:46:07 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:46:59 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:47:03 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:47:40 [DEBUG] XGB validation: p-rmse: 0.203988	a-peak@32: 0.989474
2024-09-17 12:47:40 [DEBUG] XGB iter   0: tr-p-rmse: 0.607313	tr-a-peak@32: 0.848671	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,8278.4147,6112.9617,6112.9617,255,


2024-09-17 12:47:40 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      8278.4147 |    6112.9617 |             6112.9617 |    255 |      
----------------------------------------------------------------------------------------------------------
Total trials: 255
Total latency (us): 6112.96


Total trials: 255
Total latency (us): 6112.96

2024-09-17 12:47:40 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:48:35 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:48:44 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:49:15 [DEBUG] XGB validation: p-rmse: 0.104389	a-peak@32: 0.997167
2024-09-17 12:49:15 [DEBUG] XGB iter   0: tr-p-rmse: 0.566753	tr-a-peak@32: 0.994487	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,8282.5857,6109.8833,6109.8833,319,


2024-09-17 12:49:15 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      8282.5857 |    6109.8833 |             6109.8833 |    319 |      
----------------------------------------------------------------------------------------------------------
Total trials: 319
Total latency (us): 6109.88


Total trials: 319
Total latency (us): 6109.88

2024-09-17 12:49:15 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:50:04 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:50:09 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:50:42 [DEBUG] XGB validation: p-rmse: 0.038489	a-peak@32: 0.983011
2024-09-17 12:50:42 [DEBUG] XGB iter   0: tr-p-rmse: 0.523226	tr-a-peak@32: 0.912345	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9018.9111,5611.0579,5611.0579,383,


2024-09-17 12:50:43 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9018.9111 |    5611.0579 |             5611.0579 |    383 |      
----------------------------------------------------------------------------------------------------------
Total trials: 383
Total latency (us): 5611.06


Total trials: 383
Total latency (us): 5611.06

2024-09-17 12:50:43 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:51:32 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:51:37 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:52:09 [DEBUG] XGB validation: p-rmse: 0.063029	a-peak@32: 1.000000
2024-09-17 12:52:09 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9018.9111,5611.0579,5611.0579,447,


2024-09-17 12:52:09 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9018.9111 |    5611.0579 |             5611.0579 |    447 |      
----------------------------------------------------------------------------------------------------------
Total trials: 447
Total latency (us): 5611.06


Total trials: 447
Total latency (us): 5611.06

2024-09-17 12:52:09 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:53:06 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:53:10 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:53:44 [DEBUG] XGB validation: p-rmse: 0.045195	a-peak@32: 0.996464
2024-09-17 12:53:44 [DEBUG] XGB iter   0: tr-p-rmse: 0.474810	tr-a-peak@32: 0.934953	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9018.9111,5611.0579,5611.0579,511,



Total trials: 511
Total latency (us): 5611.06

2024-09-17 12:53:45 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9018.9111 |    5611.0579 |             5611.0579 |    511 |      
----------------------------------------------------------------------------------------------------------
Total trials: 511
Total latency (us): 5611.06

2024-09-17 12:53:45 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:54:37 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:55:01 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:55:38 [DEBUG] XGB validation: p-rmse: 0.170047	a-peak@32: 0.928372
2024-09-17 12:55:38 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9181.3087,5511.8103,5511.8103,575,


2024-09-17 12:55:38 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9181.3087 |    5511.8103 |             5511.8103 |    575 |      
----------------------------------------------------------------------------------------------------------
Total trials: 575
Total latency (us): 5511.81


Total trials: 575
Total latency (us): 5511.81

2024-09-17 12:55:38 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:56:28 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:56:33 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:57:03 [DEBUG] XGB validation: p-rmse: 0.109875	a-peak@32: 0.986750
2024-09-17 12:57:03 [DEBUG] XGB iter   0: tr-p-rmse: 0.452287	tr-a-peak@32: 0.881147	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9187.744,5507.9497,5507.9497,639,



Total trials: 639
Total latency (us): 5507.95

2024-09-17 12:57:05 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9187.7440 |    5507.9497 |             5507.9497 |    639 |      
----------------------------------------------------------------------------------------------------------
Total trials: 639
Total latency (us): 5507.95

2024-09-17 12:57:05 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:58:02 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:58:06 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 12:58:36 [DEBUG] XGB validation: p-rmse: 0.085712	a-peak@32: 0.968669
2024-09-17 12:58:36 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9529.8336,5310.2325,5310.2325,703,



Total trials: 703
Total latency (us): 5310.23

2024-09-17 12:58:36 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9529.8336 |    5310.2325 |             5310.2325 |    703 |      
----------------------------------------------------------------------------------------------------------
Total trials: 703
Total latency (us): 5310.23

2024-09-17 12:58:36 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 12:59:29 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 12:59:34 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:00:05 [DEBUG] XGB validation: p-rmse: 0.103093	a-peak@32: 0.947188
2024-09-17 13:00:05 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9682.3312,5226.5958,5226.5958,767,



Total trials: 767
Total latency (us): 5226.6

2024-09-17 13:00:05 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9682.3312 |    5226.5958 |             5226.5958 |    767 |      
----------------------------------------------------------------------------------------------------------
Total trials: 767
Total latency (us): 5226.6

2024-09-17 13:00:05 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:01:04 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:01:08 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:01:40 [DEBUG] XGB validation: p-rmse: 0.108505	a-peak@32: 0.995060
2024-09-17 13:01:40 [DEBUG] XGB iter   0: tr-p-rmse: 0.423035	tr-a-peak@32: 0.861206	tr-rmse:

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9682.3312,5226.5958,5226.5958,831,


2024-09-17 13:01:40 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9682.3312 |    5226.5958 |             5226.5958 |    831 |      
----------------------------------------------------------------------------------------------------------
Total trials: 831
Total latency (us): 5226.6


Total trials: 831
Total latency (us): 5226.6

2024-09-17 13:01:40 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:02:38 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:02:43 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:03:17 [DEBUG] XGB validation: p-rmse: 0.079081	a-peak@32: 0.990206
2024-09-17 13:03:17 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9978.2962,5071.5704,5071.5704,895,


2024-09-17 13:03:17 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9978.2962 |    5071.5704 |             5071.5704 |    895 |      
----------------------------------------------------------------------------------------------------------
Total trials: 895
Total latency (us): 5071.57


Total trials: 895
Total latency (us): 5071.57

2024-09-17 13:03:17 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:04:12 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:04:17 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:04:56 [DEBUG] XGB validation: p-rmse: 0.050574	a-peak@32: 0.990926
2024-09-17 13:04:56 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9978.2962,5071.5704,5071.5704,959,


2024-09-17 13:04:56 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9978.2962 |    5071.5704 |             5071.5704 |    959 |      
----------------------------------------------------------------------------------------------------------
Total trials: 959
Total latency (us): 5071.57


Total trials: 959
Total latency (us): 5071.57

2024-09-17 13:04:56 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:05:53 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:05:57 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:06:33 [DEBUG] XGB validation: p-rmse: 0.052605	a-peak@32: 0.976728
2024-09-17 13:06:33 [DEBUG] XGB iter   0: tr-p-rmse: 0.390247	tr-a-peak@32: 0.997658	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,9978.2962,5071.5704,5071.5704,1023,


2024-09-17 13:06:33 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |      9978.2962 |    5071.5704 |             5071.5704 |   1023 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1023
Total latency (us): 5071.57


Total trials: 1023
Total latency (us): 5071.57

2024-09-17 13:06:33 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:07:29 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:07:34 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:08:09 [DEBUG] XGB validation: p-rmse: 0.061129	a-peak@32: 0.999049
2024-09-17 13:08:09 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1087,


2024-09-17 13:08:09 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1087 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1087
Total latency (us): 5007.6


Total trials: 1087
Total latency (us): 5007.6

2024-09-17 13:08:09 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:09:04 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:09:09 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:09:39 [DEBUG] XGB validation: p-rmse: 0.030385	a-peak@32: 0.999954
2024-09-17 13:09:39 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1151,



Total trials: 1151
Total latency (us): 5007.6

2024-09-17 13:09:39 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1151 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1151
Total latency (us): 5007.6

2024-09-17 13:09:39 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:10:28 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:10:33 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:11:08 [DEBUG] XGB validation: p-rmse: 0.053096	a-peak@32: 0.999949
2024-09-17 13:11:08 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1215,


2024-09-17 13:11:08 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1215 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1215
Total latency (us): 5007.6


Total trials: 1215
Total latency (us): 5007.6

2024-09-17 13:11:08 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:12:07 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:12:11 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:12:40 [DEBUG] XGB validation: p-rmse: 0.038776	a-peak@32: 0.998973
2024-09-17 13:12:40 [DEBUG] XGB iter   0: tr-p-rmse: 0.369281	tr-a-peak@32: 0.988505	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1279,


2024-09-17 13:12:41 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1279 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1279
Total latency (us): 5007.6


Total trials: 1279
Total latency (us): 5007.6

2024-09-17 13:12:41 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:13:35 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:13:40 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:14:15 [DEBUG] XGB validation: p-rmse: 0.075647	a-peak@32: 0.999162
2024-09-17 13:14:15 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1343,



Total trials: 1343
Total latency (us): 5007.6

2024-09-17 13:14:15 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1343 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1343
Total latency (us): 5007.6

2024-09-17 13:14:15 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:15:06 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:15:11 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:15:46 [DEBUG] XGB validation: p-rmse: 0.043876	a-peak@32: 0.999914
2024-09-17 13:15:46 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1407,



Total trials: 1407
Total latency (us): 5007.6

2024-09-17 13:15:46 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1407 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1407
Total latency (us): 5007.6

2024-09-17 13:15:46 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:16:44 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:16:48 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:17:18 [DEBUG] XGB validation: p-rmse: 0.075743	a-peak@32: 0.999132
2024-09-17 13:17:18 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1471,


2024-09-17 13:17:18 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1471 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1471
Total latency (us): 5007.6


Total trials: 1471
Total latency (us): 5007.6

2024-09-17 13:17:18 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:18:12 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:18:18 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:19:01 [DEBUG] XGB validation: p-rmse: 0.083406	a-peak@32: 0.999609
2024-09-17 13:19:01 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1535,


2024-09-17 13:19:01 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1535 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1535
Total latency (us): 5007.6


Total trials: 1535
Total latency (us): 5007.6

2024-09-17 13:19:01 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:19:58 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:20:02 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:20:36 [DEBUG] XGB validation: p-rmse: 0.073546	a-peak@32: 0.999327
2024-09-17 13:20:36 [DEBUG] XGB iter   0: tr-p-rmse: 0.361295	tr-a-peak@32: 0.957664	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1599,


2024-09-17 13:20:36 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1599 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1599
Total latency (us): 5007.6


Total trials: 1599
Total latency (us): 5007.6

2024-09-17 13:20:36 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:21:30 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:21:35 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:22:03 [DEBUG] XGB validation: p-rmse: 0.076736	a-peak@32: 0.998053
2024-09-17 13:22:03 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1663,



Total trials: 1663
Total latency (us): 5007.6

2024-09-17 13:22:03 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1663 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1663
Total latency (us): 5007.6

2024-09-17 13:22:03 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:22:54 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:22:58 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:23:30 [DEBUG] XGB validation: p-rmse: 0.085997	a-peak@32: 0.999309
2024-09-17 13:23:30 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1727,


2024-09-17 13:23:30 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1727 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1727
Total latency (us): 5007.6


Total trials: 1727
Total latency (us): 5007.6

2024-09-17 13:23:30 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:24:26 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:24:32 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:25:05 [DEBUG] XGB validation: p-rmse: 0.057900	a-peak@32: 0.999381
2024-09-17 13:25:05 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1791,


2024-09-17 13:25:05 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1791 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1791
Total latency (us): 5007.6


Total trials: 1791
Total latency (us): 5007.6

2024-09-17 13:25:05 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:26:01 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:26:06 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:26:37 [DEBUG] XGB validation: p-rmse: 0.076619	a-peak@32: 0.993284
2024-09-17 13:26:37 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1855,


2024-09-17 13:26:37 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1855 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1855
Total latency (us): 5007.6


Total trials: 1855
Total latency (us): 5007.6

2024-09-17 13:26:37 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:27:32 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:27:37 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:28:08 [DEBUG] XGB validation: p-rmse: 0.091250	a-peak@32: 0.998187
2024-09-17 13:28:08 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1919,


2024-09-17 13:28:08 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1919 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1919
Total latency (us): 5007.6


Total trials: 1919
Total latency (us): 5007.6

2024-09-17 13:28:08 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:29:04 [INFO] [task_scheduler.cc:193] Sending 64 sample(s) to builder
2024-09-17 13:29:09 [INFO] [task_scheduler.cc:195] Sending 64 sample(s) to runner
2024-09-17 13:29:37 [DEBUG] XGB validation: p-rmse: 0.096306	a-peak@32: 0.994753
2024-09-17 13:29:37 [DEBUG] XGB iter   0: tr-p-rmse: 0.351105	tr-a-peak@32: 0.971046	tr-rms

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,1983,


2024-09-17 13:29:39 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   1983 |      
----------------------------------------------------------------------------------------------------------
Total trials: 1983
Total latency (us): 5007.6


Total trials: 1983
Total latency (us): 5007.6

2024-09-17 13:29:39 [INFO] [task_scheduler.cc:180] TaskScheduler picks Task #0: "main"
2024-09-17 13:30:33 [INFO] [task_scheduler.cc:193] Sending 17 sample(s) to builder
2024-09-17 13:30:36 [INFO] [task_scheduler.cc:195] Sending 17 sample(s) to runner
2024-09-17 13:30:41 [DEBUG] XGB validation: p-rmse: 0.032264	a-peak@32: 0.999390
2024-09-17 13:30:41 [INFO] [task_scheduler.cc:237] [Updated] Task #0: "main"


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,2000,


2024-09-17 13:30:41 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   2000 |      
----------------------------------------------------------------------------------------------------------
Total trials: 2000
Total latency (us): 5007.6


Total trials: 2000
Total latency (us): 5007.6

2024-09-17 13:30:41 [INFO] [task_scheduler.cc:260] Task #0 has finished. Remaining task(s): 0


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,main,50605632000,1,10105.7572,5007.6042,5007.6042,2000,Y


2024-09-17 13:30:41 [DEBUG] [task_scheduler.cc:318] 
 ID | Name |        FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
----------------------------------------------------------------------------------------------------------
  0 | main | 50605632000 |      1 |     10105.7572 |    5007.6042 |             5007.6042 |   2000 |    Y 
----------------------------------------------------------------------------------------------------------
Total trials: 2000
Total latency (us): 5007.6


Total trials: 2000
Total latency (us): 5007.6





In [24]:
ex = relax.build(mod, target)
device = tvm.metal()

np.random.seed(0)
vm = relax.VirtualMachine(ex, device, profile=True)
tvm_data = tvm.nd.array(data.cpu(), device=device)
tvm_params = [np.random.randn(*param.shape).astype("float32") for _, param in params]
tvm_params = [tvm.nd.array(param, device=device) for param in tvm_params]

# output_tvm = vm["forward"](tvm_data, *tvm_params)
# output_tvm = output_tvm.asnumpy()

# output_tvm

report = vm.profile("forward", tvm_data, *tvm_params)
csv = report.csv()

with open("profile_conv1d.csv", "w", encoding="utf-8") as f:
    f.write(csv)
    print("Profile saved to profile_conv1d.csv")


Profile saved to profile_conv1d.csv


In [33]:
vm_eval = relax.VirtualMachine(ex, device)
timing_res = vm_eval.time_evaluator("forward", device, number=3, repeat=10, min_repeat_ms=100)(tvm_data, *tvm_params)
print(timing_res)

Execution time summary:
 mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
   6.4725       6.4723       6.5175       6.4398       0.0245                  


In [29]:
# with target:
#     seq = tvm.transform.Sequential(
#         [
#             dl.ApplyDefaultSchedule(
#                 dl.gpu.Matmul(),
#                 dl.gpu.GEMV(),
#                 dl.gpu.Reduction(),
#                 dl.gpu.GeneralReduction(),
#                 dl.gpu.Fallback(),
#             ),
#         ]
#     )

#     mod2 = seq(mod)

# ex2 = relax.build(mod2, target)
# device = tvm.metal()

# np.random.seed(0)
# tvm_data = tvm.nd.array(data.cpu(), device=device)
# tvm_params = [np.random.randn(*param.shape).astype("float32") for _, param in params]
# tvm_params = [tvm.nd.array(param, device=device) for param in tvm_params]

# # output_tvm = vm["forward"](tvm_data, *tvm_params)
# # output_tvm = output_tvm.asnumpy()

# # output_tvm

# vm_eval_2 = relax.VirtualMachine(ex2, device)
# timing_res = vm_eval_2.time_evaluator("forward", device, number=3, repeat=10, min_repeat_ms=100)(tvm_data, *tvm_params)
# print(timing_res)

Execution time summary:
 mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
   6.4132       6.4090       6.4972       6.3697       0.0340                  
