Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
case UniV2S16:
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
case UniV2S32:
return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
case UniB32:
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
case UniB64:
Expand Down Expand Up @@ -160,6 +162,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
case DivV2S16:
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
case DivV2S32:
return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
case DivB32:
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
case DivB64:
Expand Down Expand Up @@ -939,7 +943,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,

bool hasSALUFloat = ST->hasSALUFloatInsts();

addRulesForGOpcs({G_FADD, G_FMUL}, Standard)
addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
.Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
.Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
Expand Down
220 changes: 51 additions & 169 deletions llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s

define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
; GFX9-LABEL: v_constained_fadd_f32_fpexcept_strict:
Expand Down
47 changes: 35 additions & 12 deletions llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11-GISEL %s

define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
; GCN-LABEL: v_constained_fadd_f64_fpexcept_strict:
Expand Down Expand Up @@ -96,12 +96,38 @@ define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s5
; GCN-GISEL-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
; GCN-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GCN-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GCN-GISEL-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_constained_fadd_f64_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; GFX10PLUS-NEXT: ; return to shader part epilog
; GFX10-SDAG-LABEL: s_constained_fadd_f64_fpexcept_strict:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; GFX10-SDAG-NEXT: ; return to shader part epilog
;
; GFX10-GISEL-LABEL: s_constained_fadd_f64_fpexcept_strict:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX10-GISEL-NEXT: ; return to shader part epilog
;
; GFX11-SDAG-LABEL: s_constained_fadd_f64_fpexcept_strict:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; GFX11-SDAG-NEXT: ; return to shader part epilog
;
; GFX11-GISEL-LABEL: s_constained_fadd_f64_fpexcept_strict:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-GISEL-NEXT: ; return to shader part epilog
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
%cast = bitcast double %val to <2 x float>
ret <2 x float> %cast
Expand All @@ -113,6 +139,3 @@ declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3

attributes #0 = { strictfp }
attributes #1 = { inaccessiblememonly nounwind willreturn }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX11: {{.*}}
48 changes: 32 additions & 16 deletions llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s

; FIXME: promotion not handled without f16 insts

Expand Down Expand Up @@ -627,25 +627,41 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
;
; GFX8-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s2, v0
; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s2, v0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s1
; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s0, v0
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-GISEL-NEXT: s_and_b32 s1, 0xffff, s2
; GFX8-GISEL-NEXT: s_lshl_b32 s0, s0, 16
; GFX8-GISEL-NEXT: s_or_b32 s0, s1, s0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8-GISEL-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: v_pk_mul_f16 v0, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_mul_f16 v0, s2, s3
; GFX12-NEXT: ; return to shader part epilog
; GFX12-SDAG-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_mul_f16 v0, s2, s3
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_lshr_b32 s0, s2, 16
; GFX12-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX12-GISEL-NEXT: s_mul_f16 s2, s2, s3
; GFX12-GISEL-NEXT: s_mul_f16 s0, s0, s1
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: s_pack_ll_b32_b16 s0, s2, s0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x half> %val
}
Expand Down
15 changes: 6 additions & 9 deletions llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s

define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict:
Expand Down Expand Up @@ -339,6 +339,3 @@ declare <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float>, <2 x
declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)

attributes #0 = { strictfp }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX12-GISEL: {{.*}}
; GFX12-SDAG: {{.*}}
72 changes: 50 additions & 22 deletions llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s

define double @v_constained_fmul_f64_fpexcept_strict(double %x, double %y) #0 {
; GCN-LABEL: v_constained_fmul_f64_fpexcept_strict:
Expand Down Expand Up @@ -178,22 +178,50 @@ define <3 x double> @v_constained_fmul_v3f64_fpexcept_strict(<3 x double> %x, <3
}

define amdgpu_ps <2 x float> @s_constained_fmul_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
; GCN-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
; GCN-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
; GFX11-NEXT: ; return to shader part epilog
; GCN-SDAG-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: v_mov_b32_e32 v0, s4
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s5
; GCN-SDAG-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
; GCN-SDAG-NEXT: ; return to shader part epilog
;
; GCN-GISEL-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s5
; GCN-GISEL-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
; GCN-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GCN-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GCN-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-SDAG-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
; GFX10-SDAG-NEXT: ; return to shader part epilog
;
; GFX10-GISEL-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX10-GISEL-NEXT: ; return to shader part epilog
;
; GFX11-SDAG-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
; GFX11-SDAG-NEXT: ; return to shader part epilog
;
; GFX11-GISEL-LABEL: s_constained_fmul_f64_fpexcept_strict:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-GISEL-NEXT: ; return to shader part epilog
%val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
%cast = bitcast double %val to <2 x float>
ret <2 x float> %cast
Expand Down