This repository has been archived by the owner on Oct 16, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
Performance
daemyung jang edited this page Aug 28, 2023
·
12 revisions
All benchmarks run on NVIDIA A100.
adaptive avg pool2d forward:
['out_sz'] torch trident
0 [2] 2.407567 0.058167
1 [4] 0.658252 0.037786
2 [8] 0.221917 0.026749
3 [16] 0.105544 0.023464
4 [32] 0.035085 0.022730
5 [64] 0.024257 0.022849
6 [128] 0.023380 0.023016
7 [256] 0.028772 0.022900
8 [512] 0.059232 0.023216
9 [1024] 0.199658 0.023544
argmax forward:
['x_size'] torch trident
0 [256] 0.020706 0.021065
1 [512] 0.020730 0.020744
2 [768] 0.020973 0.020971
3 [1024] 0.021645 0.020988
4 [1280] 0.022271 0.021300
5 [1536] 0.021132 0.020372
6 [1792] 0.021736 0.020762
7 [2048] 0.023057 0.020707
8 [2304] 0.023628 0.021190
9 [2560] 0.025492 0.021214
10 [2816] 0.026092 0.020881
11 [3072] 0.025343 0.021014
12 [3328] 0.025790 0.020111
13 [3584] 0.025830 0.020833
14 [3840] 0.026767 0.021324
15 [4096] 0.027468 0.021382
16 [4352] 0.028422 0.021643
17 [4608] 0.029349 0.022147
18 [4864] 0.030226 0.020670
19 [5120] 0.031315 0.021295
batch norm forward:
['vec_sz'] torch trident
0 [256] 0.023134 0.022583
1 [512] 0.023538 0.022709
2 [768] 0.023290 0.022545
3 [1024] 0.023034 0.022257
4 [1280] 0.023910 0.022607
5 [1536] 0.023139 0.022387
6 [1792] 0.023353 0.022711
7 [2048] 0.022363 0.022029
8 [2304] 0.023016 0.022556
9 [2560] 0.023033 0.023223
10 [2816] 0.022987 0.022104
11 [3072] 0.022793 0.023033
12 [3328] 0.023338 0.022825
13 [3584] 0.022974 0.021979
14 [3840] 0.022476 0.021617
15 [4096] 0.022836 0.022610
16 [4352] 0.022388 0.022188
17 [4608] 0.022896 0.022006
18 [4864] 0.021999 0.022716
19 [5120] 0.023042 0.022825
conv2d forward:
['wgt_sz'] torch trident
0 [3] 0.041103 0.221850
1 [6] 0.095023 0.478966
2 [9] 0.100672 3.406986
3 [12] 0.147708 3.339508
4 [15] 0.211731 3.281305
5 [18] 0.286659 29.456692
6 [21] 0.372216 29.051914
7 [24] 0.423142 28.661348
8 [27] 0.522616 28.291069
9 [30] 0.633512 27.910589
10 [33] 0.757656 104.394592
11 [36] 0.888387 102.908684
12 [39] 0.944410 101.451553
13 [42] 1.081541 96.892746
14 [45] 1.239014 95.464378
15 [48] 1.414840 94.153313
16 [51] 1.556308 89.749115
17 [54] 1.566330 88.348534
18 [57] 1.718753 84.171700
19 [60] 1.929333 82.765495
cosine similarity forward:
['x_size'] torch trident
0 [256] 0.034287 0.021697
1 [512] 0.034731 0.022276
2 [768] 0.037526 0.022272
3 [1024] 0.040925 0.022317
4 [1280] 0.043659 0.022763
5 [1536] 0.046284 0.022247
6 [1792] 0.049074 0.022205
7 [2048] 0.051736 0.022904
8 [2304] 0.054257 0.021946
9 [2560] 0.056950 0.022003
10 [2816] 0.059393 0.022584
11 [3072] 0.062326 0.022532
12 [3328] 0.064925 0.022265
13 [3584] 0.067595 0.022355
14 [3840] 0.070255 0.022456
15 [4096] 0.074240 0.022657
16 [4352] 0.076394 0.022686
17 [4608] 0.081087 0.022750
18 [4864] 0.083882 0.022710
19 [5120] 0.087392 0.023055
dropout forward:
['inp_sz'] torch trident
0 [32] 0.021306 0.332626
1 [64] 0.020500 2.015552
2 [96] 0.020138 0.010680
3 [128] 0.021670 0.010663
4 [160] 0.020802 0.280228
5 [192] 0.021644 0.013820
6 [224] 0.021275 0.016002
7 [256] 0.021676 0.017109
8 [288] 0.021588 0.179013
9 [320] 0.020678 0.010304
10 [352] 0.020714 0.015291
11 [384] 0.020199 0.011216
12 [416] 0.020689 0.010679
13 [448] 0.020804 0.010294
14 [480] 0.020357 0.012001
15 [512] 0.019397 0.013506
16 [544] 0.021464 0.206963
17 [576] 0.021583 0.019843
18 [608] 0.020417 0.012403
19 [640] 0.019847 0.011097
geglu forward:
['m_size', 'n_size', 'k_size'] torch trident
0 [64, 64, 64] 0.021354 0.020451
1 [128, 128, 128] 0.021636 0.020918
2 [192, 192, 192] 0.021905 0.020655
3 [256, 256, 256] 0.023236 0.021336
4 [320, 320, 320] 0.025564 0.021546
5 [384, 384, 384] 0.028031 0.021886
6 [448, 448, 448] 0.029187 0.023142
7 [512, 512, 512] 0.040362 0.024485
8 [576, 576, 576] 0.043997 0.025695
9 [640, 640, 640] 0.053626 0.026415
10 [704, 704, 704] 0.072625 0.030867
11 [768, 768, 768] 0.087340 0.033605
12 [832, 832, 832] 0.104307 0.037652
13 [896, 896, 896] 0.111495 0.036811
14 [960, 960, 960] 0.147818 0.050095
15 [1024, 1024, 1024] 0.170902 0.052252
16 [1088, 1088, 1088] 0.182686 0.055583
17 [1152, 1152, 1152] 0.196736 0.057546
18 [1216, 1216, 1216] 0.266113 0.060088
19 [1280, 1280, 1280] 0.285112 0.064283
gelu forward:
['vec_sz'] torch trident
0 [32] 0.022305 0.024010
1 [64] 0.021480 0.021014
2 [96] 0.021314 0.021411
3 [128] 0.021445 0.020732
4 [160] 0.021137 0.020543
5 [192] 0.020323 0.021777
6 [224] 0.021238 0.020723
7 [256] 0.019334 0.019601
8 [288] 0.020640 0.021133
9 [320] 0.020125 0.020596
10 [352] 0.020577 0.022947
11 [384] 0.022291 0.022203
12 [416] 0.020669 0.021356
13 [448] 0.021701 0.020969
14 [480] 0.021262 0.021352
15 [512] 0.020019 0.020938
16 [544] 0.021020 0.021008
17 [576] 0.022246 0.021729
18 [608] 0.020899 0.021425
19 [640] 0.022112 0.021802
group norm forward:
['x_size'] torch trident
0 [256] 0.025725 0.023199
1 [512] 0.025670 0.023464
2 [768] 0.027073 0.023580
3 [1024] 0.028078 0.023427
4 [1280] 0.027882 0.023534
5 [1536] 0.027437 0.024913
6 [1792] 0.028506 0.025979
7 [2048] 0.030403 0.027012
8 [2304] 0.032977 0.028248
9 [2560] 0.035764 0.027539
10 [2816] 0.039341 0.028204
11 [3072] 0.042680 0.029558
12 [3328] 0.046159 0.032482
13 [3584] 0.049937 0.034912
14 [3840] 0.053295 0.039016
15 [4096] 0.056507 0.043251
16 [4352] 0.060207 0.047990
17 [4608] 0.062592 0.052024
18 [4864] 0.066442 0.056998
19 [5120] 0.069317 0.061311
instance norm forward:
['vec_sz'] torch trident
0 [256] 0.038573 0.035454
1 [512] 0.038105 0.036723
2 [768] 0.040120 0.036120
3 [1024] 0.042322 0.036739
4 [1280] 0.045153 0.038662
5 [1536] 0.048264 0.039444
6 [1792] 0.053254 0.040867
7 [2048] 0.057992 0.042211
8 [2304] 0.064147 0.034931
9 [2560] 0.068993 0.036207
10 [2816] 0.075807 0.038727
11 [3072] 0.080892 0.039948
12 [3328] 0.087498 0.042176
13 [3584] 0.091812 0.044240
14 [3840] 0.097806 0.046167
15 [4096] 0.100631 0.048703
16 [4352] 0.107499 0.058879
17 [4608] 0.109908 0.060808
18 [4864] 0.115795 0.062150
19 [5120] 0.119912 0.064156
layer norm forward:
['vec_sz'] torch trident
0 [256] 0.021118 0.024782
1 [512] 0.021461 0.021686
2 [768] 0.021175 0.020780
3 [1024] 0.021900 0.021802
4 [1280] 0.021330 0.021714
5 [1536] 0.021633 0.021711
6 [1792] 0.020744 0.020696
7 [2048] 0.021839 0.021539
8 [2304] 0.022324 0.021807
9 [2560] 0.021770 0.021088
10 [2816] 0.021014 0.021395
11 [3072] 0.022278 0.021676
12 [3328] 0.022456 0.021309
13 [3584] 0.021751 0.021331
14 [3840] 0.021567 0.021379
15 [4096] 0.022078 0.020263
16 [4352] 0.022002 0.021722
17 [4608] 0.021997 0.021869
18 [4864] 0.022282 0.021982
19 [5120] 0.022056 0.021488
leaky relu forward:
['vec_sz'] torch trident
0 [256] 0.021005 0.020980
1 [512] 0.022511 0.021971
2 [768] 0.021325 0.021440
3 [1024] 0.022451 0.021873
4 [1280] 0.021780 0.021909
5 [1536] 0.021164 0.021024
6 [1792] 0.022643 0.022342
7 [2048] 0.022411 0.022458
8 [2304] 0.021459 0.021601
9 [2560] 0.022024 0.022961
10 [2816] 0.022028 0.021987
11 [3072] 0.021372 0.020945
12 [3328] 0.021936 0.022828
13 [3584] 0.021697 0.021869
14 [3840] 0.021907 0.021844
15 [4096] 0.021795 0.022454
16 [4352] 0.021484 0.021635
17 [4608] 0.021981 0.021546
18 [4864] 0.021757 0.021791
19 [5120] 0.021142 0.021325
linear forward:
['m', 'k', 'n'] torch trident
0 [64, 64, 64] 0.023185 0.025027
1 [128, 128, 128] 0.023896 0.028511
2 [192, 192, 192] 0.023932 0.034961
3 [256, 256, 256] 0.024550 0.042637
4 [320, 320, 320] 0.025517 0.050404
5 [384, 384, 384] 0.027995 0.058362
6 [448, 448, 448] 0.027146 0.066534
7 [512, 512, 512] 0.038081 0.074077
8 [576, 576, 576] 0.041406 0.082127
9 [640, 640, 640] 0.050657 0.090077
10 [704, 704, 704] 0.069390 0.130439
11 [768, 768, 768] 0.084038 0.139850
12 [832, 832, 832] 0.100539 0.149570
13 [896, 896, 896] 0.107377 0.160891
14 [960, 960, 960] 0.143152 0.252760
15 [1024, 1024, 1024] 0.166092 0.271014
16 [1088, 1088, 1088] 0.178058 0.287770
17 [1152, 1152, 1152] 0.189092 0.304582
18 [1216, 1216, 1216] 0.258460 0.360783
19 [1280, 1280, 1280] 0.271525 0.379918
max forward:
['x_size'] torch trident
0 [256] 0.021197 0.021393
1 [512] 0.022050 0.020852
2 [768] 0.021268 0.021698
3 [1024] 0.021583 0.020962
4 [1280] 0.021846 0.021475
5 [1536] 0.022002 0.021123
6 [1792] 0.021992 0.021009
7 [2048] 0.024499 0.021199
8 [2304] 0.024140 0.021715
9 [2560] 0.026047 0.021768
10 [2816] 0.026565 0.020736
11 [3072] 0.026520 0.021667
12 [3328] 0.026602 0.020330
13 [3584] 0.026006 0.020809
14 [3840] 0.026896 0.021281
15 [4096] 0.028040 0.021051
16 [4352] 0.029165 0.021366
17 [4608] 0.030123 0.021743
18 [4864] 0.031170 0.021198
19 [5120] 0.032427 0.021708
max pool2d forward:
['knl_sz'] torch trident
0 [3] 0.024008 0.025759
1 [6] 0.023728 0.024272
2 [9] 0.025005 0.023951
3 [12] 0.027805 0.023877
4 [15] 0.049516 0.023808
5 [18] 0.052908 0.023477
6 [21] 0.086398 0.023982
7 [24] 0.073958 0.024179
8 [27] 0.144771 0.024248
9 [30] 0.138669 0.023742
10 [33] 0.215884 0.023377
11 [36] 0.160102 0.023983
12 [39] 0.301083 0.023471
13 [42] 0.253579 0.023420
14 [45] 0.402863 0.024328
15 [48] 0.293716 0.023880
16 [51] 0.517038 0.023673
17 [54] 0.427520 0.023766
18 [57] 0.632276 0.023664
19 [60] 0.423952 0.023468
mean forward:
['x_size'] torch trident
0 [256] 0.020902 0.021654
1 [512] 0.023592 0.021709
2 [768] 0.020238 0.020768
3 [1024] 0.020614 0.022218
4 [1280] 0.021060 0.020954
5 [1536] 0.021598 0.021121
6 [1792] 0.020809 0.020920
7 [2048] 0.021224 0.020428
8 [2304] 0.021563 0.020826
9 [2560] 0.022141 0.021087
10 [2816] 0.021764 0.021012
11 [3072] 0.021407 0.021504
12 [3328] 0.021320 0.020260
13 [3584] 0.021317 0.021127
14 [3840] 0.021336 0.020548
15 [4096] 0.021722 0.020964
16 [4352] 0.021804 0.021648
17 [4608] 0.020845 0.020209
18 [4864] 0.021264 0.020932
19 [5120] 0.021998 0.020492
prelu forward:
['x_size'] torch trident
0 [256] 0.020241 0.021688
1 [512] 0.021345 0.020901
2 [768] 0.021693 0.020927
3 [1024] 0.019818 0.021414
4 [1280] 0.021550 0.022033
5 [1536] 0.020729 0.021578
6 [1792] 0.021900 0.020802
7 [2048] 0.021480 0.020675
8 [2304] 0.020854 0.021683
9 [2560] 0.020792 0.020890
10 [2816] 0.021684 0.021991
11 [3072] 0.020977 0.021736
12 [3328] 0.021137 0.021433
13 [3584] 0.020437 0.021526
14 [3840] 0.022013 0.021667
15 [4096] 0.020915 0.021468
16 [4352] 0.020180 0.021417
17 [4608] 0.021918 0.020801
18 [4864] 0.021457 0.021540
19 [5120] 0.020618 0.021841
relu forward:
['vec_sz'] torch trident
0 [256] 0.021357 0.022970
1 [512] 0.022362 0.022252
2 [768] 0.022459 0.022476
3 [1024] 0.022130 0.022395
4 [1280] 0.021958 0.021651
5 [1536] 0.021866 0.022229
6 [1792] 0.022733 0.022071
7 [2048] 0.021260 0.021992
8 [2304] 0.021192 0.022202
9 [2560] 0.022313 0.022328
10 [2816] 0.022246 0.021974
11 [3072] 0.022024 0.022031
12 [3328] 0.021932 0.022192
13 [3584] 0.021622 0.021450
14 [3840] 0.022265 0.021693
15 [4096] 0.021439 0.021210
16 [4352] 0.021825 0.022100
17 [4608] 0.021213 0.023997
18 [4864] 0.021741 0.022328
19 [5120] 0.021096 0.021258
rms norm forward:
num_batches torch trident
0 10.0 0.551699 0.299372
1 20.0 1.074440 0.590074
2 30.0 1.599324 0.882735
3 40.0 2.126470 1.172853
4 50.0 2.655717 1.465834
5 60.0 3.184145 1.761751
6 70.0 3.713063 2.057398
7 80.0 4.243685 2.352075
8 90.0 4.769982 2.644195
9 100.0 5.306490 2.932819
10 110.0 5.833865 3.235161
11 120.0 6.363455 3.516276
12 130.0 6.927203 3.825462
13 140.0 7.456067 4.097469
14 150.0 7.987093 4.399677
15 160.0 8.515184 4.707554
16 170.0 9.045790 5.017117
17 180.0 9.574994 5.279696
18 190.0 10.201644 5.571455
19 200.0 10.736262 5.882734
silu forward:
['vec_sz'] torch trident
0 [256] 0.022070 0.022946
1 [512] 0.022447 0.022589
2 [768] 0.022707 0.022861
3 [1024] 0.022414 0.022491
4 [1280] 0.021912 0.021725
5 [1536] 0.022576 0.022236
6 [1792] 0.022307 0.022122
7 [2048] 0.022007 0.022185
8 [2304] 0.022339 0.022225
9 [2560] 0.021912 0.022194
10 [2816] 0.022476 0.022532
11 [3072] 0.022254 0.022373
12 [3328] 0.022488 0.021674
13 [3584] 0.021778 0.022514
14 [3840] 0.021805 0.021632
15 [4096] 0.022245 0.021788
16 [4352] 0.021603 0.022688
17 [4608] 0.022292 0.021977
18 [4864] 0.022381 0.022573
19 [5120] 0.022182 0.021311
softmax forward:
['vec_sz'] torch trident
0 [256] 0.022681 0.023073
1 [512] 0.022888 0.022484
2 [768] 0.021928 0.022804
3 [1024] 0.021966 0.022687
4 [1280] 0.022746 0.022945
5 [1536] 0.022762 0.022786
6 [1792] 0.022607 0.023716
7 [2048] 0.023711 0.022986
8 [2304] 0.022885 0.022920
9 [2560] 0.022680 0.023628
10 [2816] 0.022891 0.022775
11 [3072] 0.022816 0.022311
12 [3328] 0.022528 0.022757
13 [3584] 0.023191 0.022770
14 [3840] 0.023297 0.022166
15 [4096] 0.022537 0.022528
16 [4352] 0.022726 0.022756
17 [4608] 0.022360 0.022488
18 [4864] 0.022706 0.023114
19 [5120] 0.023349 0.023059
sum forward:
['x_size'] torch trident
0 [256] 0.021382 0.021381
1 [512] 0.022589 0.021707
2 [768] 0.022479 0.022966
3 [1024] 0.022760 0.022210
4 [1280] 0.021906 0.021297
5 [1536] 0.022299 0.022381
6 [1792] 0.022270 0.021776
7 [2048] 0.022716 0.021888
8 [2304] 0.021942 0.022108
9 [2560] 0.022631 0.021725
10 [2816] 0.023132 0.021489
11 [3072] 0.022700 0.021934
12 [3328] 0.021583 0.021084
13 [3584] 0.023142 0.022285
14 [3840] 0.022602 0.022164
15 [4096] 0.022776 0.022333
16 [4352] 0.023009 0.021862
17 [4608] 0.022772 0.022096
18 [4864] 0.022954 0.022066
19 [5120] 0.022858 0.021395
var forward:
['x_size'] torch trident
0 [256] 0.022989 0.023587
1 [512] 0.023642 0.023839
2 [768] 0.024049 0.023792
3 [1024] 0.023722 0.023551
4 [1280] 0.025110 0.023203
5 [1536] 0.026105 0.023395
6 [1792] 0.028618 0.023295
7 [2048] 0.028295 0.023508
8 [2304] 0.028742 0.023428
9 [2560] 0.028602 0.023168
10 [2816] 0.030211 0.023656
11 [3072] 0.032040 0.023471
12 [3328] 0.033635 0.023037
13 [3584] 0.035225 0.022689
14 [3840] 0.036975 0.023162
15 [4096] 0.038602 0.023622
16 [4352] 0.040470 0.023286
17 [4608] 0.042191 0.023399
18 [4864] 0.043794 0.023560
19 [5120] 0.045448 0.023358