Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
AMDGPU/GlobalISel: Implement expansion for rsq.clamp
Not sure why we handle this removed instruction on newer subtargets for this one and no others, but maintain compatibility with the DAG.
- Loading branch information
Showing
4 changed files
with
287 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
63 changes: 63 additions & 0 deletions
63
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.rsq.clamp.mir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s | ||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s | ||
|
||
--- | ||
name: test_rsq_clamp_flags_ieee_on_f32 | ||
tracksRegLiveness: true | ||
machineFunctionInfo: | ||
mode: | ||
ieee: true | ||
|
||
body: | | ||
bb.0: | ||
liveins: $vgpr0 | ||
; SI-LABEL: name: test_rsq_clamp_flags_ieee_on_f32 | ||
; SI: liveins: $vgpr0 | ||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 | ||
; SI: [[INT:%[0-9]+]]:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), [[COPY]](s32) | ||
; SI: $vgpr0 = COPY [[INT]](s32) | ||
; VI-LABEL: name: test_rsq_clamp_flags_ieee_on_f32 | ||
; VI: liveins: $vgpr0 | ||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 | ||
; VI: [[INT:%[0-9]+]]:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[COPY]](s32) | ||
; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x47EFFFFFE0000000 | ||
; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan ninf nsz G_FMINNUM_IEEE [[INT]], [[C]] | ||
; VI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC7EFFFFFE0000000 | ||
; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan ninf nsz G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[C1]] | ||
; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) | ||
%0:_(s32) = COPY $vgpr0 | ||
%1:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 | ||
$vgpr0 = COPY %1 | ||
... | ||
|
||
--- | ||
name: test_rsq_clamp_flags_ieee_off_f32 | ||
tracksRegLiveness: true | ||
machineFunctionInfo: | ||
mode: | ||
ieee: false | ||
|
||
body: | | ||
bb.0: | ||
liveins: $vgpr0 | ||
; SI-LABEL: name: test_rsq_clamp_flags_ieee_off_f32 | ||
; SI: liveins: $vgpr0 | ||
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 | ||
; SI: [[INT:%[0-9]+]]:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), [[COPY]](s32) | ||
; SI: $vgpr0 = COPY [[INT]](s32) | ||
; VI-LABEL: name: test_rsq_clamp_flags_ieee_off_f32 | ||
; VI: liveins: $vgpr0 | ||
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 | ||
; VI: [[INT:%[0-9]+]]:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), [[COPY]](s32) | ||
; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x47EFFFFFE0000000 | ||
; VI: [[FMINNUM:%[0-9]+]]:_(s32) = nnan ninf nsz G_FMINNUM [[INT]], [[C]] | ||
; VI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC7EFFFFFE0000000 | ||
; VI: [[FMAXNUM:%[0-9]+]]:_(s32) = nnan ninf nsz G_FMAXNUM [[FMINNUM]], [[C1]] | ||
; VI: $vgpr0 = COPY [[FMAXNUM]](s32) | ||
%0:_(s32) = COPY $vgpr0 | ||
%1:_(s32) = nnan ninf nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 | ||
$vgpr0 = COPY %1 | ||
... |
170 changes: 170 additions & 0 deletions
170
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s | ||
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s | ||
|
||
define float @v_rsq_clamp_f32(float %src) #0 { | ||
; SI-LABEL: v_rsq_clamp_f32: | ||
; SI: ; %bb.0: | ||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 | ||
; SI-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; VI-LABEL: v_rsq_clamp_f32: | ||
; VI: ; %bb.0: | ||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; VI-NEXT: v_rsq_f32_e32 v0, v0 | ||
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 | ||
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 | ||
; VI-NEXT: s_setpc_b64 s[30:31] | ||
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) | ||
ret float %rsq_clamp | ||
} | ||
|
||
define float @v_rsq_clamp_fabs_f32(float %src) #0 { | ||
; SI-LABEL: v_rsq_clamp_fabs_f32: | ||
; SI: ; %bb.0: | ||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0| | ||
; SI-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; VI-LABEL: v_rsq_clamp_fabs_f32: | ||
; VI: ; %bb.0: | ||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; VI-NEXT: v_rsq_f32_e64 v0, |v0| | ||
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 | ||
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 | ||
; VI-NEXT: s_setpc_b64 s[30:31] | ||
%fabs.src = call float @llvm.fabs.f32(float %src) | ||
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src) | ||
ret float %rsq_clamp | ||
} | ||
|
||
define double @v_rsq_clamp_f64(double %src) #0 { | ||
; SI-LABEL: v_rsq_clamp_f64: | ||
; SI: ; %bb.0: | ||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] | ||
; SI-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; VI-LABEL: v_rsq_clamp_f64: | ||
; VI: ; %bb.0: | ||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] | ||
; VI-NEXT: s_mov_b32 s4, -1 | ||
; VI-NEXT: s_mov_b32 s5, 0x7fefffff | ||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] | ||
; VI-NEXT: s_mov_b32 s5, 0xffefffff | ||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] | ||
; VI-NEXT: s_setpc_b64 s[30:31] | ||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) | ||
ret double %rsq_clamp | ||
} | ||
|
||
define double @v_rsq_clamp_fabs_f64(double %src) #0 { | ||
; SI-LABEL: v_rsq_clamp_fabs_f64: | ||
; SI: ; %bb.0: | ||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]| | ||
; SI-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; VI-LABEL: v_rsq_clamp_fabs_f64: | ||
; VI: ; %bb.0: | ||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]| | ||
; VI-NEXT: s_mov_b32 s4, -1 | ||
; VI-NEXT: s_mov_b32 s5, 0x7fefffff | ||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] | ||
; VI-NEXT: s_mov_b32 s5, 0xffefffff | ||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] | ||
; VI-NEXT: s_setpc_b64 s[30:31] | ||
%fabs.src = call double @llvm.fabs.f64(double %src) | ||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src) | ||
ret double %rsq_clamp | ||
} | ||
|
||
define float @v_rsq_clamp_undef_f32() #0 { | ||
; SI-LABEL: v_rsq_clamp_undef_f32: | ||
; SI: ; %bb.0: | ||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4 | ||
; SI-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; VI-LABEL: v_rsq_clamp_undef_f32: | ||
; VI: ; %bb.0: | ||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; VI-NEXT: v_rsq_f32_e32 v0, s4 | ||
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 | ||
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 | ||
; VI-NEXT: s_setpc_b64 s[30:31] | ||
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef) | ||
ret float %rsq_clamp | ||
} | ||
|
||
define double @v_rsq_clamp_undef_f64() #0 { | ||
; SI-LABEL: v_rsq_clamp_undef_f64: | ||
; SI: ; %bb.0: | ||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5] | ||
; SI-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; VI-LABEL: v_rsq_clamp_undef_f64: | ||
; VI: ; %bb.0: | ||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5] | ||
; VI-NEXT: s_mov_b32 s4, -1 | ||
; VI-NEXT: s_mov_b32 s5, 0x7fefffff | ||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] | ||
; VI-NEXT: s_mov_b32 s5, 0xffefffff | ||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] | ||
; VI-NEXT: s_setpc_b64 s[30:31] | ||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef) | ||
ret double %rsq_clamp | ||
} | ||
|
||
define float @v_rsq_clamp_f32_non_ieee(float %src) #2 { | ||
; SI-LABEL: v_rsq_clamp_f32_non_ieee: | ||
; SI: ; %bb.0: | ||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0 | ||
; SI-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; VI-LABEL: v_rsq_clamp_f32_non_ieee: | ||
; VI: ; %bb.0: | ||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; VI-NEXT: v_rsq_f32_e32 v0, v0 | ||
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0 | ||
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0 | ||
; VI-NEXT: s_setpc_b64 s[30:31] | ||
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) | ||
ret float %rsq_clamp | ||
} | ||
|
||
define double @v_rsq_clamp_f64_non_ieee(double %src) #2 { | ||
; SI-LABEL: v_rsq_clamp_f64_non_ieee: | ||
; SI: ; %bb.0: | ||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1] | ||
; SI-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; VI-LABEL: v_rsq_clamp_f64_non_ieee: | ||
; VI: ; %bb.0: | ||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1] | ||
; VI-NEXT: s_mov_b32 s4, -1 | ||
; VI-NEXT: s_mov_b32 s5, 0x7fefffff | ||
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] | ||
; VI-NEXT: s_mov_b32 s5, 0xffefffff | ||
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] | ||
; VI-NEXT: s_setpc_b64 s[30:31] | ||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) | ||
ret double %rsq_clamp | ||
} | ||
|
||
declare float @llvm.fabs.f32(float) #1 | ||
declare float @llvm.amdgcn.rsq.clamp.f32(float) #1 | ||
declare double @llvm.fabs.f64(double) #1 | ||
declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 | ||
|
||
attributes #0 = { nounwind } | ||
attributes #1 = { nounwind readnone } | ||
attributes #2 = { nounwind "amdgpu-ieee"="false" } |