Skip to content

Commit

Permalink
AMDGPU: Correct constants used in fast math log expansion
Browse files Browse the repository at this point in the history
The division between float constants was done with less
precision. Performing the divide in double and truncating to float
provides the same value as used in the library fast math expansion.
  • Loading branch information
arsenm committed Jun 13, 2023
1 parent 1f615b5 commit d0923a7
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 150 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1305,9 +1305,9 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::FROUND: return LowerFROUND(Op, DAG);
case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
case ISD::FLOG:
return LowerFLOG(Op, DAG, numbers::ln2f);
return LowerFLOG(Op, DAG, numbers::ln2);
case ISD::FLOG10:
return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f);
return LowerFLOG(Op, DAG, numbers::ln2 / numbers::ln10);
case ISD::FEXP:
return lowerFEXP(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1987,9 +1987,9 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_ATOMIC_CMPXCHG:
return legalizeAtomicCmpXChg(MI, MRI, B);
case TargetOpcode::G_FLOG:
return legalizeFlog(MI, B, numbers::ln2f);
return legalizeFlog(MI, B, numbers::ln2);
case TargetOpcode::G_FLOG10:
return legalizeFlog(MI, B, numbers::ln2f / numbers::ln10f);
return legalizeFlog(MI, B, numbers::ln2 / numbers::ln10);
case TargetOpcode::G_FEXP:
return legalizeFExp(MI, B);
case TargetOpcode::G_FPOW:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]]
; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
%0:_(s32) = COPY $vgpr0
Expand All @@ -31,7 +31,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[COPY]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FLOG2_]], [[C]]
; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
%0:_(s32) = COPY $vgpr0
Expand All @@ -51,7 +51,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]]
; CHECK-NEXT: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]]
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_1]], [[C]]
Expand All @@ -74,7 +74,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]]
; CHECK-NEXT: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]]
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_1]], [[C]]
Expand All @@ -100,7 +100,7 @@ body: |
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]]
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
Expand Down Expand Up @@ -130,7 +130,7 @@ body: |
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441360000000
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C1]]
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32)
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
Expand Down

0 comments on commit d0923a7

Please sign in to comment.