From d599c7a68f20259327573fee7b11192c8a435ef7 Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh@amd.com>
Date: Thu, 20 Nov 2025 11:21:12 -0600
Subject: [PATCH 1/5] [AMDGPU] Update log lowering to remove contract for
 AMDGCN backend

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   4 +-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   3 +
 llvm/test/CodeGen/AMDGPU/llvm.log.ll          | 331 +++++++++++++++++-
 llvm/test/CodeGen/AMDGPU/llvm.log10.ll        | 331 +++++++++++++++++-
 4 files changed, 660 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index db890df7c50f9..ffafe06211109 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2766,7 +2766,9 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
   EVT VT = Op.getValueType();
   SDNodeFlags Flags = Op->getFlags();
   SDLoc DL(Op);
-
+  // Our implementation of LOG is not contract safe, so disable instruction
+  // contraction.
+  Flags.setAllowContract(false);
   const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;
   assert(IsLog10 || Op.getOpcode() == ISD::FLOG);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 1a13b2226ecd6..5c8b720c54761 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3508,6 +3508,9 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
   MachineRegisterInfo &MRI = *B.getMRI();
   Register Dst = MI.getOperand(0).getReg();
   Register X = MI.getOperand(1).getReg();
+  // Our implementation of LOG is not contract safe, so disable contraction in
+  // the flags before reading the field.
+  MI.clearFlags(MachineInstr::FmContract);
   unsigned Flags = MI.getFlags();
   const LLT Ty = MRI.getType(X);
   MachineFunction &MF = B.getMF();
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index fc6b2d95b2af8..88f202adba07e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -316,6 +316,309 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
   ret void
 }
 
+define amdgpu_kernel void @s_log_contract_f32(ptr addrspace(1) %out, float %in) {
+; SI-SDAG-LABEL: s_log_contract_f32:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s6, v1
+; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
+; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s4, -v2
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s4, v3
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SDAG-NEXT:    s_endpgm
+;
+; SI-GISEL-LABEL: s_log_contract_f32:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0xb
+; SI-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x3f317217
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, s0, v0
+; SI-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT:    s_mov_b32 s6, -1
+; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v0
+; SI-GISEL-NEXT:    v_fma_f32 v1, v0, v1, -v4
+; SI-GISEL-NEXT:    v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT:    v_add_f32_e32 v1, v4, v1
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-GISEL-NEXT:    s_endpgm
+;
+; VI-SDAG-LABEL: s_log_contract_f32:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s0, 32, 0
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
+; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317000, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v3
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
+; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s2
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v0
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; VI-SDAG-NEXT:    s_endpgm
+;
+; VI-GISEL-LABEL: s_log_contract_f32:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; VI-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT:    v_ldexp_f32 v0, s0, v0
+; VI-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT:    v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT:    v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, v1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; VI-GISEL-NEXT:    s_endpgm
+;
+; GFX900-SDAG-LABEL: s_log_contract_f32:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX900-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s6, v1
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x3f317217
+; GFX900-SDAG-NEXT:    s_mov_b32 s3, 0x3377d1cf
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v1
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s2, -v3
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s3, v4
+; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT:    global_store_dword v2, v0, s[0:1]
+; GFX900-SDAG-NEXT:    s_endpgm
+;
+; GFX900-GISEL-LABEL: s_log_contract_f32:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GFX900-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3f317217
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, s0, v0
+; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0x7f800000
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX900-GISEL-NEXT:    v_mul_f32_e32 v5, 0x3f317217, v0
+; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v2, -v5
+; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT:    v_add_f32_e32 v2, v5, v2
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41b17218
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
+; GFX900-GISEL-NEXT:    s_endpgm
+;
+; GFX1100-SDAG-LABEL: s_log_contract_f32:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s1, 0x800000, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x41b17218, s1
+; GFX1100-SDAG-NEXT:    s_and_b32 s1, s1, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s0, s1
+; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT:    v_fma_f32 v3, 0x3f317217, v1, -v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v3, v1, 0x3377d1cf, v3
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
+; GFX1100-SDAG-NEXT:    s_endpgm
+;
+; GFX1100-GISEL-LABEL: s_log_contract_f32:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 s2, 0x800000, s0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX1100-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_ldexp_f32 v0, s0, v0
+; GFX1100-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-GISEL-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, s2
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    global_store_b32 v2, v0, s[0:1]
+; GFX1100-GISEL-NEXT:    s_endpgm
+;
+; R600-LABEL: s_log_contract_f32:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 23, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     SETGT * T0.W, literal.x, KC0[2].Z,
+; R600-NEXT:    8388608(1.175494e-38), 0(0.000000e+00)
+; R600-NEXT:     CNDE * T1.W, PV.W, 1.0, literal.x,
+; R600-NEXT:    1333788672(4.294967e+09), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE * T1.W, KC0[2].Z, PV.W,
+; R600-NEXT:     LOG_IEEE * T0.X, PV.W,
+; R600-NEXT:     AND_INT * T1.W, PS, literal.x,
+; R600-NEXT:    -4096(nan), 0(0.000000e+00)
+; R600-NEXT:     ADD * T2.W, T0.X, -PV.W,
+; R600-NEXT:     MUL_IEEE * T3.W, PV.W, literal.x,
+; R600-NEXT:    939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT:     MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
+; R600-NEXT:    939916788(3.194618e-05), 0(0.000000e+00)
+; R600-NEXT:     MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
+; R600-NEXT:    1060204544(6.931152e-01), 0(0.000000e+00)
+; R600-NEXT:     MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
+; R600-NEXT:     SETGT * T2.W, literal.y, |T0.X|,
+; R600-NEXT:    1060204544(6.931152e-01), 2139095040(INF)
+; R600-NEXT:     CNDE T1.W, PS, T0.X, PV.W,
+; R600-NEXT:     CNDE * T0.W, T0.W, 0.0, literal.x,
+; R600-NEXT:    1102148120(2.218071e+01), 0(0.000000e+00)
+; R600-NEXT:     ADD T0.X, PV.W, -PS,
+; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: s_log_contract_f32:
+; CM:       ; %bb.0:
+; CM-NEXT:    ALU 26, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    ALU clause starting at 4:
+; CM-NEXT:     SETGT * T0.W, literal.x, KC0[2].Z,
+; CM-NEXT:    8388608(1.175494e-38), 0(0.000000e+00)
+; CM-NEXT:     CNDE * T1.W, PV.W, 1.0, literal.x,
+; CM-NEXT:    1333788672(4.294967e+09), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE * T1.W, KC0[2].Z, PV.W,
+; CM-NEXT:     LOG_IEEE T0.X, T1.W,
+; CM-NEXT:     LOG_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT:     LOG_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT:     LOG_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; CM-NEXT:    -4096(nan), 0(0.000000e+00)
+; CM-NEXT:     ADD * T2.W, T0.X, -PV.W,
+; CM-NEXT:     MUL_IEEE * T3.W, PV.W, literal.x,
+; CM-NEXT:    939916788(3.194618e-05), 0(0.000000e+00)
+; CM-NEXT:     MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
+; CM-NEXT:    939916788(3.194618e-05), 0(0.000000e+00)
+; CM-NEXT:     MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
+; CM-NEXT:    1060204544(6.931152e-01), 0(0.000000e+00)
+; CM-NEXT:     MULADD_IEEE T0.Z, T1.W, literal.x, PV.W,
+; CM-NEXT:     SETGT * T1.W, literal.y, |T0.X|,
+; CM-NEXT:    1060204544(6.931152e-01), 2139095040(INF)
+; CM-NEXT:     CNDE T0.Z, PV.W, T0.X, PV.Z,
+; CM-NEXT:     CNDE * T0.W, T0.W, 0.0, literal.x,
+; CM-NEXT:    1102148120(2.218071e+01), 0(0.000000e+00)
+; CM-NEXT:     ADD * T0.X, PV.Z, -PV.W,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+  %result = call contract float @llvm.log.f32(float %in)
+  store float %result, ptr addrspace(1) %out
+  ret void
+}
+
 ; FIXME: We should be able to merge these packets together on Cayman so we
 ; have a maximum of 4 instructions.
 define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
@@ -6439,6 +6742,8 @@ define half @v_log_f16_fast(half %in) {
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3f317218, v0
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log_f16_fast:
@@ -7100,6 +7405,10 @@ define <2 x half> @v_log_v2f16_fast(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3f317218, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317218, v1
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log_v2f16_fast:
@@ -7365,6 +7674,12 @@ define <3 x half> @v_log_v3f16_fast(<3 x half> %in) {
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3f317218, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317218, v1
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317218, v2
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log_v3f16_fast:
@@ -7691,20 +8006,28 @@ define <4 x half> @v_log_v4f16_fast(<4 x half> %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3f317218, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317218, v2
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317218, v3
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317218, v2
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log_v4f16_fast:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index a141bceb3ce86..6dbb5823a2b0f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -316,6 +316,309 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
   ret void
 }
 
+define amdgpu_kernel void @s_log10_contract_f32(ptr addrspace(1) %out, float %in) {
+; SI-SDAG-LABEL: s_log10_contract_f32:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s6, v1
+; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
+; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s4, -v2
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s4, v3
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SDAG-NEXT:    s_endpgm
+;
+; SI-GISEL-LABEL: s_log10_contract_f32:
+; SI-GISEL:       ; %bb.0:
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0xb
+; SI-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x3e9a209a
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, s0, v0
+; SI-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT:    s_mov_b32 s6, -1
+; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v0
+; SI-GISEL-NEXT:    v_fma_f32 v1, v0, v1, -v4
+; SI-GISEL-NEXT:    v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT:    v_add_f32_e32 v1, v4, v1
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, v3
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-GISEL-NEXT:    s_endpgm
+;
+; VI-SDAG-LABEL: s_log10_contract_f32:
+; VI-SDAG:       ; %bb.0:
+; VI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s0, 32, 0
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
+; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a2000, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v3
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
+; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s2
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v0
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; VI-SDAG-NEXT:    s_endpgm
+;
+; VI-GISEL-LABEL: s_log10_contract_f32:
+; VI-GISEL:       ; %bb.0:
+; VI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; VI-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x7f800000
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT:    v_ldexp_f32 v0, s0, v0
+; VI-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT:    v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT:    v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT:    v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, v1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; VI-GISEL-NEXT:    s_endpgm
+;
+; GFX900-SDAG-LABEL: s_log10_contract_f32:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX900-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s6, v1
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x3e9a209a
+; GFX900-SDAG-NEXT:    s_mov_b32 s3, 0x3284fbcf
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v1
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s2, -v3
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s3, v4
+; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT:    global_store_dword v2, v0, s[0:1]
+; GFX900-SDAG-NEXT:    s_endpgm
+;
+; GFX900-GISEL-LABEL: s_log10_contract_f32:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GFX900-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e9a209a
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, s0, v0
+; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0x7f800000
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX900-GISEL-NEXT:    v_mul_f32_e32 v5, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v2, -v5
+; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT:    v_add_f32_e32 v2, v5, v2
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x411a209b
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
+; GFX900-GISEL-NEXT:    s_endpgm
+;
+; GFX1100-SDAG-LABEL: s_log10_contract_f32:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s1, 0x800000, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x411a209b, s1
+; GFX1100-SDAG-NEXT:    s_and_b32 s1, s1, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s0, s1
+; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT:    v_fma_f32 v3, 0x3e9a209a, v1, -v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v3, v1, 0x3284fbcf, v3
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
+; GFX1100-SDAG-NEXT:    s_endpgm
+;
+; GFX1100-GISEL-LABEL: s_log10_contract_f32:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 s2, 0x800000, s0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX1100-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_ldexp_f32 v0, s0, v0
+; GFX1100-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-GISEL-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, s2
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    global_store_b32 v2, v0, s[0:1]
+; GFX1100-GISEL-NEXT:    s_endpgm
+;
+; R600-LABEL: s_log10_contract_f32:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 23, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     SETGT * T0.W, literal.x, KC0[2].Z,
+; R600-NEXT:    8388608(1.175494e-38), 0(0.000000e+00)
+; R600-NEXT:     CNDE * T1.W, PV.W, 1.0, literal.x,
+; R600-NEXT:    1333788672(4.294967e+09), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE * T1.W, KC0[2].Z, PV.W,
+; R600-NEXT:     LOG_IEEE * T0.X, PV.W,
+; R600-NEXT:     AND_INT * T1.W, PS, literal.x,
+; R600-NEXT:    -4096(nan), 0(0.000000e+00)
+; R600-NEXT:     ADD * T2.W, T0.X, -PV.W,
+; R600-NEXT:     MUL_IEEE * T3.W, PV.W, literal.x,
+; R600-NEXT:    916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT:     MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
+; R600-NEXT:    916096251(4.605039e-06), 0(0.000000e+00)
+; R600-NEXT:     MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
+; R600-NEXT:    1050288128(3.010254e-01), 0(0.000000e+00)
+; R600-NEXT:     MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
+; R600-NEXT:     SETGT * T2.W, literal.y, |T0.X|,
+; R600-NEXT:    1050288128(3.010254e-01), 2139095040(INF)
+; R600-NEXT:     CNDE T1.W, PS, T0.X, PV.W,
+; R600-NEXT:     CNDE * T0.W, T0.W, 0.0, literal.x,
+; R600-NEXT:    1092231323(9.632960e+00), 0(0.000000e+00)
+; R600-NEXT:     ADD T0.X, PV.W, -PS,
+; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: s_log10_contract_f32:
+; CM:       ; %bb.0:
+; CM-NEXT:    ALU 26, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    ALU clause starting at 4:
+; CM-NEXT:     SETGT * T0.W, literal.x, KC0[2].Z,
+; CM-NEXT:    8388608(1.175494e-38), 0(0.000000e+00)
+; CM-NEXT:     CNDE * T1.W, PV.W, 1.0, literal.x,
+; CM-NEXT:    1333788672(4.294967e+09), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE * T1.W, KC0[2].Z, PV.W,
+; CM-NEXT:     LOG_IEEE T0.X, T1.W,
+; CM-NEXT:     LOG_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT:     LOG_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT:     LOG_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; CM-NEXT:    -4096(nan), 0(0.000000e+00)
+; CM-NEXT:     ADD * T2.W, T0.X, -PV.W,
+; CM-NEXT:     MUL_IEEE * T3.W, PV.W, literal.x,
+; CM-NEXT:    916096251(4.605039e-06), 0(0.000000e+00)
+; CM-NEXT:     MULADD_IEEE * T3.W, T1.W, literal.x, PV.W,
+; CM-NEXT:    916096251(4.605039e-06), 0(0.000000e+00)
+; CM-NEXT:     MULADD_IEEE * T2.W, T2.W, literal.x, PV.W,
+; CM-NEXT:    1050288128(3.010254e-01), 0(0.000000e+00)
+; CM-NEXT:     MULADD_IEEE T0.Z, T1.W, literal.x, PV.W,
+; CM-NEXT:     SETGT * T1.W, literal.y, |T0.X|,
+; CM-NEXT:    1050288128(3.010254e-01), 2139095040(INF)
+; CM-NEXT:     CNDE T0.Z, PV.W, T0.X, PV.Z,
+; CM-NEXT:     CNDE * T0.W, T0.W, 0.0, literal.x,
+; CM-NEXT:    1092231323(9.632960e+00), 0(0.000000e+00)
+; CM-NEXT:     ADD * T0.X, PV.Z, -PV.W,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+  %result = call contract float @llvm.log10.f32(float %in)
+  store float %result, ptr addrspace(1) %out
+  ret void
+}
+
 ; FIXME: We should be able to merge these packets together on Cayman so we
 ; have a maximum of 4 instructions.
 define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
@@ -6439,6 +6742,8 @@ define half @v_log10_f16_fast(half %in) {
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3e9a209b, v0
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log10_f16_fast:
@@ -7100,6 +7405,10 @@ define <2 x half> @v_log10_v2f16_fast(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3e9a209b, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209b, v1
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log10_v2f16_fast:
@@ -7365,6 +7674,12 @@ define <3 x half> @v_log10_v3f16_fast(<3 x half> %in) {
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3e9a209b, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209b, v1
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209b, v2
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log10_v3f16_fast:
@@ -7691,20 +8006,28 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3e9a209b, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209b, v2
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209b, v3
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209b, v2
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log10_v4f16_fast:

From 04c43c9998f4267009a9383ec5a6aca93da72f0a Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh@amd.com>
Date: Thu, 20 Nov 2025 12:57:52 -0600
Subject: [PATCH 2/5] Move flag modification placement

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ffafe06211109..3705b8e12c6b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2766,9 +2766,6 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
   EVT VT = Op.getValueType();
   SDNodeFlags Flags = Op->getFlags();
   SDLoc DL(Op);
-  // Our implementation of LOG is not contract safe, so disable instruction
-  // contraction.
-  Flags.setAllowContract(false);
   const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;
   assert(IsLog10 || Op.getOpcode() == ISD::FLOG);
 
@@ -2807,7 +2804,9 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
 
     SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
     SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);
-
+    // Our implementation of LOG is not contract safe, so disable instruction
+    // contraction.
+    Flags.setAllowContract(false);
     R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
     SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags);
     SDValue FMA0 = DAG.getNode(ISD::FMA, DL, VT, Y, C, NegR, Flags);
@@ -2830,7 +2829,9 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
     SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst);
     SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
     SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);
-
+    // Our implementation of LOG is not contract safe, so disable instruction
+    // contraction.
+    Flags.setAllowContract(false);
     SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
     SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);
     SDValue Mad1 = getMad(DAG, DL, VT, YT, CH, Mad0, Flags);

From e61c7b87499d04e6c79143bdf9e9dbaf940be855 Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh@amd.com>
Date: Thu, 20 Nov 2025 13:51:52 -0600
Subject: [PATCH 3/5] Update comment, apply same fix to global isel, and fix
 lit tests

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 14 +++++---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 34 ++++++++++++-------
 llvm/test/CodeGen/AMDGPU/llvm.log.ll          | 28 +++------------
 llvm/test/CodeGen/AMDGPU/llvm.log10.ll        | 28 +++------------
 4 files changed, 39 insertions(+), 65 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3705b8e12c6b7..2288d3196d2de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2804,8 +2804,11 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
 
     SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
     SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);
-    // Our implementation of LOG is not contract safe, so disable instruction
-    // contraction.
+    // Our implementation of LOG is not contract safe because we generate
+    // error-correcting summations based on the rounding error of the first
+    // multiplication below, so contracting the multiply with the final add will
+    // lead to inaccurate final results. Disable contraction for the expanded
+    // instructions.
     Flags.setAllowContract(false);
     R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
     SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags);
@@ -2829,8 +2832,11 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
     SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst);
     SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
     SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);
-    // Our implementation of LOG is not contract safe, so disable instruction
-    // contraction.
+    // Our implementation of LOG is not contract safe because we generate
+    // error-correcting summations based on the rounding error of the first
+    // multiplication below, so contracting the multiply with the final add will
+    // lead to inaccurate final results. Disable contraction for the expanded
+    // instructions.
     Flags.setAllowContract(false);
     SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
     SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5c8b720c54761..77f0e6448458e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3508,9 +3508,6 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
   MachineRegisterInfo &MRI = *B.getMRI();
   Register Dst = MI.getOperand(0).getReg();
   Register X = MI.getOperand(1).getReg();
-  // Our implementation of LOG is not contract safe, so disable contraction in
-  // the flags before reading the field.
-  MI.clearFlags(MachineInstr::FmContract);
   unsigned Flags = MI.getFlags();
   const LLT Ty = MRI.getType(X);
   MachineFunction &MF = B.getMF();
@@ -3554,12 +3551,17 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
 
     auto C = B.buildFConstant(Ty, IsLog10 ? c_log10 : c_log);
     auto CC = B.buildFConstant(Ty, IsLog10 ? cc_log10 : cc_log);
-
-    R = B.buildFMul(Ty, Y, C, Flags).getReg(0);
-    auto NegR = B.buildFNeg(Ty, R, Flags);
-    auto FMA0 = B.buildFMA(Ty, Y, C, NegR, Flags);
-    auto FMA1 = B.buildFMA(Ty, Y, CC, FMA0, Flags);
-    R = B.buildFAdd(Ty, R, FMA1, Flags).getReg(0);
+    // Our implementation of LOG is not contract safe because we generate
+    // error-correcting summations based on the rounding error of the first
+    // multiplication below, so contracting the multiply with the final add will
+    // lead to inaccurate final results. Disable contraction for the expanded
+    // instructions.
+    auto NewFlags = Flags & ~(MachineInstr::FmContract);
+    R = B.buildFMul(Ty, Y, C, NewFlags).getReg(0);
+    auto NegR = B.buildFNeg(Ty, R, NewFlags);
+    auto FMA0 = B.buildFMA(Ty, Y, C, NegR, NewFlags);
+    auto FMA1 = B.buildFMA(Ty, Y, CC, FMA0, NewFlags);
+    R = B.buildFAdd(Ty, R, FMA1, NewFlags).getReg(0);
   } else {
     // ch+ct is ln(2)/ln(10) to more than 36 bits
     const float ch_log10 = 0x1.344000p-2f;
@@ -3575,12 +3577,18 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
     auto MaskConst = B.buildConstant(Ty, 0xfffff000);
     auto YH = B.buildAnd(Ty, Y, MaskConst);
     auto YT = B.buildFSub(Ty, Y, YH, Flags);
-    auto YTCT = B.buildFMul(Ty, YT, CT, Flags);
+    // Our implementation of LOG is not contract safe because we generate
+    // error-correcting summations based on the rounding error of the first
+    // multiplication below, so contracting the multiply with the final add will
+    // lead to inaccurate final results. Disable contraction for the expanded
+    // instructions.
+    auto NewFlags = Flags & ~(MachineInstr::FmContract);
+    auto YTCT = B.buildFMul(Ty, YT, CT, NewFlags);
 
     Register Mad0 =
-        getMad(B, Ty, YH.getReg(0), CT.getReg(0), YTCT.getReg(0), Flags);
-    Register Mad1 = getMad(B, Ty, YT.getReg(0), CH.getReg(0), Mad0, Flags);
-    R = getMad(B, Ty, YH.getReg(0), CH.getReg(0), Mad1, Flags);
+        getMad(B, Ty, YH.getReg(0), CT.getReg(0), YTCT.getReg(0), NewFlags);
+    Register Mad1 = getMad(B, Ty, YT.getReg(0), CH.getReg(0), Mad0, NewFlags);
+    R = getMad(B, Ty, YH.getReg(0), CH.getReg(0), Mad1, NewFlags);
   }
 
   const bool IsFiniteOnly =
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index 88f202adba07e..33c8829a6d42c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -6742,8 +6742,6 @@ define half @v_log_f16_fast(half %in) {
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3f317218, v0
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log_f16_fast:
@@ -7405,10 +7403,6 @@ define <2 x half> @v_log_v2f16_fast(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3f317218, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log_v2f16_fast:
@@ -7674,12 +7668,6 @@ define <3 x half> @v_log_v3f16_fast(<3 x half> %in) {
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3f317218, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317218, v1
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317218, v2
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log_v3f16_fast:
@@ -8006,28 +7994,20 @@ define <4 x half> @v_log_v4f16_fast(<4 x half> %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3f317218, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317218, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317218, v3
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317218, v2
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317218, v3
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log_v4f16_fast:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 6dbb5823a2b0f..16a784f534f8c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -6742,8 +6742,6 @@ define half @v_log10_f16_fast(half %in) {
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3e9a209b, v0
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log10_f16_fast:
@@ -7405,10 +7403,6 @@ define <2 x half> @v_log10_v2f16_fast(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3e9a209b, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log10_v2f16_fast:
@@ -7674,12 +7668,6 @@ define <3 x half> @v_log10_v3f16_fast(<3 x half> %in) {
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3e9a209b, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209b, v1
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209b, v2
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log10_v3f16_fast:
@@ -8006,28 +7994,20 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3e9a209b, v0
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209b, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209b, v3
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209b, v2
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209b, v3
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_log10_v4f16_fast:

From 19c16487fba7f2f58e028fb773d0160933c5ff1d Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh@amd.com>
Date: Thu, 20 Nov 2025 17:22:11 -0600
Subject: [PATCH 4/5] Simplify comments

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp  | 10 ++++------
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 14 ++++++--------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2288d3196d2de..b87c8fc53915a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2805,9 +2805,8 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
     SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
     SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);
     // Our implementation of LOG is not contract safe because we generate
-    // error-correcting summations based on the rounding error of the first
-    // multiplication below, so contracting the multiply with the final add will
-    // lead to inaccurate final results. Disable contraction for the expanded
+    // error-correcting summations for which contraction may lead to an increase
+    // in the error of the approximation. Disable contraction for the expanded
     // instructions.
     Flags.setAllowContract(false);
     R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
@@ -2833,9 +2832,8 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
     SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
     SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);
     // Our implementation of LOG is not contract safe because we generate
-    // error-correcting summations based on the rounding error of the first
-    // multiplication below, so contracting the multiply with the final add will
-    // lead to inaccurate final results. Disable contraction for the expanded
+    // error-correcting summations for which contraction may lead to an increase
+    // in the error of the approximation. Disable contraction for the expanded
     // instructions.
     Flags.setAllowContract(false);
     SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 77f0e6448458e..3fbc77c65b876 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3552,10 +3552,9 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
     auto C = B.buildFConstant(Ty, IsLog10 ? c_log10 : c_log);
     auto CC = B.buildFConstant(Ty, IsLog10 ? cc_log10 : cc_log);
     // Our implementation of LOG is not contract safe because we generate
-    // error-correcting summations based on the rounding error of the first
-    // multiplication below, so contracting the multiply with the final add will
-    // lead to inaccurate final results. Disable contraction for the expanded
-    // instructions.
+    // error-correcting summations  for which contraction may lead to an
+    // increase in the error of the approximation. Disable contraction for the
+    // expanded instructions.
     auto NewFlags = Flags & ~(MachineInstr::FmContract);
     R = B.buildFMul(Ty, Y, C, NewFlags).getReg(0);
     auto NegR = B.buildFNeg(Ty, R, NewFlags);
@@ -3578,10 +3577,9 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
     auto YH = B.buildAnd(Ty, Y, MaskConst);
     auto YT = B.buildFSub(Ty, Y, YH, Flags);
     // Our implementation of LOG is not contract safe because we generate
-    // error-correcting summations based on the rounding error of the first
-    // multiplication below, so contracting the multiply with the final add will
-    // lead to inaccurate final results. Disable contraction for the expanded
-    // instructions.
+    // error-correcting summations  for which contraction may lead to an
+    // increase in the error of the approximation. Disable contraction for the
+    // expanded instructions.
     auto NewFlags = Flags & ~(MachineInstr::FmContract);
     auto YTCT = B.buildFMul(Ty, YT, CT, NewFlags);
 

From 1b6c2589f66bc1a49a9c6fc198522fc3b7be7257 Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh@amd.com>
Date: Fri, 21 Nov 2025 08:51:11 -0600
Subject: [PATCH 5/5] Update comments

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp  | 14 ++++++--------
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 14 ++++++--------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b87c8fc53915a..f1f6ff619cc71 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2804,10 +2804,9 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
 
     SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
     SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);
-    // Our implementation of LOG is not contract safe because we generate
-    // error-correcting summations for which contraction may lead to an increase
-    // in the error of the approximation. Disable contraction for the expanded
-    // instructions.
+    // Our implementation of LOG is not contract safe because we add correction
+    // terms for which contraction may lead to an increase in the error of the
+    // approximation. Disable contraction for the expanded instructions.
     Flags.setAllowContract(false);
     R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
     SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags);
@@ -2831,10 +2830,9 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
     SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst);
     SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
     SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);
-    // Our implementation of LOG is not contract safe because we generate
-    // error-correcting summations for which contraction may lead to an increase
-    // in the error of the approximation. Disable contraction for the expanded
-    // instructions.
+    // Our implementation of LOG is not contract safe because we add correction
+    // terms for which contraction may lead to an increase in the error of the
+    // approximation. Disable contraction for the expanded instructions.
     Flags.setAllowContract(false);
     SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
     SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 3fbc77c65b876..7443ba05961f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3551,10 +3551,9 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
 
     auto C = B.buildFConstant(Ty, IsLog10 ? c_log10 : c_log);
     auto CC = B.buildFConstant(Ty, IsLog10 ? cc_log10 : cc_log);
-    // Our implementation of LOG is not contract safe because we generate
-    // error-correcting summations  for which contraction may lead to an
-    // increase in the error of the approximation. Disable contraction for the
-    // expanded instructions.
+    // Our implementation of LOG is not contract safe because we add correction
+    // terms for which contraction may lead to an increase in the error of the
+    // approximation. Disable contraction for the expanded instructions.
     auto NewFlags = Flags & ~(MachineInstr::FmContract);
     R = B.buildFMul(Ty, Y, C, NewFlags).getReg(0);
     auto NegR = B.buildFNeg(Ty, R, NewFlags);
@@ -3576,10 +3575,9 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
     auto MaskConst = B.buildConstant(Ty, 0xfffff000);
     auto YH = B.buildAnd(Ty, Y, MaskConst);
     auto YT = B.buildFSub(Ty, Y, YH, Flags);
-    // Our implementation of LOG is not contract safe because we generate
-    // error-correcting summations  for which contraction may lead to an
-    // increase in the error of the approximation. Disable contraction for the
-    // expanded instructions.
+    // Our implementation of LOG is not contract safe because we add correction
+    // terms for which contraction may lead to an increase in the error of the
+    // approximation. Disable contraction for the expanded instructions.
     auto NewFlags = Flags & ~(MachineInstr::FmContract);
     auto YTCT = B.buildFMul(Ty, YT, CT, NewFlags);