Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (checkFPAtomicToDenormModeHazard(MI) > 0)
return HazardType;

// Hazards which cannot be mitigated with S_NOPs.
if (!IsHazardRecognizerMode) {
if (checkWMMACoexecutionHazards(MI) > 0)
return Hazard;
}

if (ST.hasNoDataDepHazard())
return NoHazard;

Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AMDGPU/misched-into-wmma-hazard-shadow.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -march=amdgcn -mcpu=gfx1250 -run-pass=postmisched,post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s

# Bring all independent V_LSHL_ADD_U32_e64 instructions into the shadow
# of the WMMA so then hazard recognizer only need to insert 4 V_NOP_e32
# instructions instead of 8.

---
name: test_wmma_scale_f32_16x16x128_f8f6f4_shadow_sched
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45
; GCN-LABEL: name: test_wmma_scale_f32_16x16x128_f8f6f4_shadow_sched
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45
; GCN-NEXT: {{ $}}
; GCN-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr46 = V_LSHL_ADD_U32_e64 killed $vgpr43, 1, $vgpr43, implicit $exec
; GCN-NEXT: $vgpr47 = V_LSHL_ADD_U32_e64 killed $vgpr42, 1, $vgpr42, implicit $exec
; GCN-NEXT: $vgpr48 = V_LSHL_ADD_U32_e64 killed $vgpr41, 1, $vgpr41, implicit $exec
; GCN-NEXT: $vgpr49 = V_LSHL_ADD_U32_e64 killed $vgpr40, 1, $vgpr40, implicit $exec
; GCN-NEXT: V_NOP_e32 implicit $exec
; GCN-NEXT: V_NOP_e32 implicit $exec
; GCN-NEXT: V_NOP_e32 implicit $exec
; GCN-NEXT: V_NOP_e32 implicit $exec
; GCN-NEXT: $vgpr4 = V_ADD_F32_e32 1065353216, killed $vgpr4, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 1065353216, killed $vgpr5, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr6 = V_ADD_F32_e32 1065353216, killed $vgpr6, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr7 = V_ADD_F32_e32 1065353216, killed $vgpr7, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr8 = V_ADD_F32_e32 1065353216, killed $vgpr8, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr9 = V_ADD_F32_e32 1065353216, killed $vgpr9, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr10 = V_ADD_F32_e32 1065353216, killed $vgpr10, implicit $mode, implicit $exec
; GCN-NEXT: $vgpr11 = V_ADD_F32_e32 1065353216, killed $vgpr11, implicit $mode, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr8_vgpr9_vgpr10_vgpr11, 32, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORDX4 killed renamable $vgpr44_vgpr45, killed renamable $vgpr46_vgpr47_vgpr48_vgpr49, 64, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr4 = V_ADD_F32_e32 1065353216, $vgpr4, implicit $mode, implicit $exec
$vgpr5 = V_ADD_F32_e32 1065353216, $vgpr5, implicit $mode, implicit $exec
$vgpr6 = V_ADD_F32_e32 1065353216, $vgpr6, implicit $mode, implicit $exec
$vgpr7 = V_ADD_F32_e32 1065353216, $vgpr7, implicit $mode, implicit $exec
$vgpr8 = V_ADD_F32_e32 1065353216, $vgpr8, implicit $mode, implicit $exec
$vgpr9 = V_ADD_F32_e32 1065353216, $vgpr9, implicit $mode, implicit $exec
$vgpr10 = V_ADD_F32_e32 1065353216, $vgpr10, implicit $mode, implicit $exec
$vgpr11 = V_ADD_F32_e32 1065353216, $vgpr11, implicit $mode, implicit $exec
$vgpr46 = V_LSHL_ADD_U32_e64 $vgpr43, 1, $vgpr43, implicit $exec
$vgpr47 = V_LSHL_ADD_U32_e64 $vgpr42, 1, $vgpr42, implicit $exec
$vgpr48 = V_LSHL_ADD_U32_e64 $vgpr41, 1, $vgpr41, implicit $exec
$vgpr49 = V_LSHL_ADD_U32_e64 $vgpr40, 1, $vgpr40, implicit $exec
GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr8_vgpr9_vgpr10_vgpr11, 32, 0, implicit $exec
GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr46_vgpr47_vgpr48_vgpr49, 64, 0, implicit $exec
S_ENDPGM 0
...