[LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64 #75158

Acim-Maravic · 2023-12-12T10:13:43Z

Update DAG ISel to support 64bit versions S_FF1_I32_B64 and S_FLBIT_I32_B664

llvmbot · 2023-12-12T10:14:16Z

@llvm/pr-subscribers-backend-amdgpu

Author: Acim Maravic (Acim-Maravic)

Changes

Update DAG ISel to support 64bit versions S_FF1_I32_B64 and S_FLBIT_I32_B664

Patch is 176.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75158.diff

21 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+11)
(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+65)
(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+1)
(modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll (+21-60)
(modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll (+26-52)
(modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll (+75-216)
(modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll (+14-40)
(modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll (+14-40)
(modified) llvm/test/CodeGen/AMDGPU/ctlz.ll (+30-46)
(modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+787-16)
(modified) llvm/test/CodeGen/AMDGPU/cttz.ll (+21-31)
(modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (+24-27)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+30-75)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+12-39)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+12-39)
(modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+30-75)
(modified) llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll (+4-16)
(modified) llvm/test/CodeGen/AMDGPU/sdiv64.ll (+5-20)
(modified) llvm/test/CodeGen/AMDGPU/srem64.ll (+35-56)
(modified) llvm/test/CodeGen/AMDGPU/udiv64.ll (+28-46)
(modified) llvm/test/CodeGen/AMDGPU/urem64.ll (+32-44)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index fcbdf51b03c1fc..7c0947945c9ad0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3067,6 +3067,17 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
     return NewOpr;
   }
 
+  //  64-bit scalar ctlz/cttz should use S_FLBIT_I32_B64/S_FF1_I32_B64
+  //  64-bit scalar version produce 32-bit result
+  if (!(Src->isDivergent()) && Src.getValueType() == MVT::i64) {
+    SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
+    if (!ZeroUndef) {
+      const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
+      NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32);
+    }
+    return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
+  }
+
   SDValue Lo, Hi;
   std::tie(Lo, Hi) = split64BitValue(Src, DAG);
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0a06fa88b6b102..88a95ba9b00107 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6849,6 +6849,15 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
     Inst.eraseFromParent();
     return;
 
+  case AMDGPU::S_FLBIT_I32_B64:
+    splitScalar64BitCountOp(Worklist, Inst, AMDGPU::S_FLBIT_I32_B32);
+    Inst.eraseFromParent();
+    return;
+  case AMDGPU::S_FF1_I32_B64:
+    splitScalar64BitCountOp(Worklist, Inst, AMDGPU::S_FF1_I32_B32);
+    Inst.eraseFromParent();
+    return;
+  
   case AMDGPU::S_LSHL_B32:
     if (ST.hasOnlyRevVALUShifts()) {
       NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
@@ -7834,6 +7843,62 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
   addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
+void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Instr, unsigned Opcode, MachineDominatorTree *MDT) const {
+  MachineBasicBlock &MBB = *Instr.getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+  MachineBasicBlock::iterator MII = Instr;
+  const DebugLoc &DL = Instr.getDebugLoc();
+
+  MachineOperand &Dest = Instr.getOperand(0);
+  MachineOperand &Src = Instr.getOperand(1);
+
+  const MCInstrDesc &InstDesc = get(Opcode);
+  bool isCtlz = Opcode == AMDGPU::S_FLBIT_I32_B32;
+
+  
+  const TargetRegisterClass *SrcRC = Src.isReg() ?
+    MRI.getRegClass(Src.getReg()) :
+    &AMDGPU::SGPR_32RegClass;
+
+  const TargetRegisterClass *SrcSubRC =
+      RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
+
+  MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+                                                      AMDGPU::sub0, SrcSubRC);
+  MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+                                                      AMDGPU::sub1, SrcSubRC);
+  
+  Register MidReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register MidReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register MidReg3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register MidReg4 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+  MachineInstr *M1 = BuildMI(MBB, MII, DL, InstDesc, MidReg1).add(SrcRegSub0);
+  MachineInstr *M2 = BuildMI(MBB, MII, DL, InstDesc, MidReg2).add(SrcRegSub1);
+  MachineInstr *M3 = nullptr;
+  MachineInstr *M4 = nullptr;
+
+  if (isCtlz)
+    M3 = BuildMI(MBB, MII, DL, get(AMDGPU::S_ADD_I32), MidReg3).addReg(MidReg1).addImm(32);
+  else 
+    M3 = BuildMI(MBB, MII, DL, get(AMDGPU::S_ADD_I32), MidReg3).addReg(MidReg2).addImm(32);
+
+  if (isCtlz)
+    M4 = BuildMI(MBB, MII, DL, get(AMDGPU::V_MIN3_U32_e64), MidReg4).addReg(MidReg3).addReg(MidReg2).addImm(64);
+  else 
+    M4 = BuildMI(MBB, MII, DL, get(AMDGPU::V_MIN3_U32_e64), MidReg4).addReg(MidReg3).addReg(MidReg1).addImm(64);
+
+  MRI.replaceRegWith(Dest.getReg(), MidReg4);
+
+  Worklist.insert(M1);
+  Worklist.insert(M2);
+  Worklist.insert(M3);
+  Worklist.insert(M4);
+  
+  addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
+}
+
 void SIInstrInfo::addUsersToMoveToVALUWorklist(
     Register DstReg, MachineRegisterInfo &MRI,
     SIInstrWorklist &Worklist) const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 0ce31ac6d54ec5..effd891eb26edc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -147,6 +147,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
                             MachineInstr &Inst) const;
   void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
+  void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Instr, unsigned Opcode, MachineDominatorTree *MDT = nullptr) const;
   void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
                       MachineInstr &Inst) const;
 
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
index f8f50c7cb23a5a..12574c41f7cd31 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
@@ -458,13 +458,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
 ; GFX8-NEXT:    ; implicit-def: $vgpr1
 ; GFX8-NEXT:  .LBB2_1: ; %ComputeLoop
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX8-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX8-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX8-NEXT:    s_add_i32 s5, s5, 32
-; GFX8-NEXT:    s_min_u32 s5, s6, s5
+; GFX8-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8-NEXT:    s_mov_b32 m0, s5
 ; GFX8-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX8-NEXT:    s_lshl_b64 s[6:7], 1, s5
-; GFX8-NEXT:    s_mov_b32 m0, s5
 ; GFX8-NEXT:    v_writelane_b32 v1, s4, m0
 ; GFX8-NEXT:    s_add_i32 s4, s4, s8
 ; GFX8-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[6:7]
@@ -502,13 +499,10 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
 ; GFX9-NEXT:    ; implicit-def: $vgpr1
 ; GFX9-NEXT:  .LBB2_1: ; %ComputeLoop
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX9-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX9-NEXT:    s_add_i32 s5, s5, 32
-; GFX9-NEXT:    s_min_u32 s5, s6, s5
+; GFX9-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9-NEXT:    s_mov_b32 m0, s5
 ; GFX9-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX9-NEXT:    s_lshl_b64 s[6:7], 1, s5
-; GFX9-NEXT:    s_mov_b32 m0, s5
 ; GFX9-NEXT:    v_writelane_b32 v1, s4, m0
 ; GFX9-NEXT:    s_add_i32 s4, s4, s8
 ; GFX9-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[6:7]
@@ -545,10 +539,7 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
 ; GFX10W64-NEXT:    ; implicit-def: $vgpr1
 ; GFX10W64-NEXT:  .LBB2_1: ; %ComputeLoop
 ; GFX10W64-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX10W64-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX10W64-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX10W64-NEXT:    s_add_i32 s5, s5, 32
-; GFX10W64-NEXT:    s_min_u32 s5, s6, s5
+; GFX10W64-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX10W64-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX10W64-NEXT:    s_lshl_b64 s[6:7], 1, s5
 ; GFX10W64-NEXT:    v_writelane_b32 v1, s4, s5
@@ -627,16 +618,12 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
 ; GFX11W64-NEXT:    ; implicit-def: $vgpr1
 ; GFX11W64-NEXT:  .LBB2_1: ; %ComputeLoop
 ; GFX11W64-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX11W64-NEXT:    s_ctz_i32_b32 s5, s3
-; GFX11W64-NEXT:    s_ctz_i32_b32 s6, s2
-; GFX11W64-NEXT:    s_add_i32 s5, s5, 32
-; GFX11W64-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11W64-NEXT:    s_min_u32 s5, s6, s5
+; GFX11W64-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX11W64-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX11W64-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX11W64-NEXT:    s_lshl_b64 s[6:7], 1, s5
 ; GFX11W64-NEXT:    v_writelane_b32 v1, s4, s5
 ; GFX11W64-NEXT:    s_and_not1_b64 s[2:3], s[2:3], s[6:7]
-; GFX11W64-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11W64-NEXT:    s_add_i32 s4, s4, s8
 ; GFX11W64-NEXT:    s_cmp_lg_u64 s[2:3], 0
 ; GFX11W64-NEXT:    s_cbranch_scc1 .LBB2_1
@@ -739,13 +726,10 @@ define amdgpu_kernel void @struct_add_i32_varying_vdata(ptr addrspace(1) %out, p
 ; GFX8-NEXT:    ; implicit-def: $vgpr1
 ; GFX8-NEXT:  .LBB3_1: ; %ComputeLoop
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX8-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX8-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX8-NEXT:    s_add_i32 s5, s5, 32
-; GFX8-NEXT:    s_min_u32 s5, s6, s5
+; GFX8-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8-NEXT:    s_mov_b32 m0, s5
 ; GFX8-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX8-NEXT:    s_lshl_b64 s[6:7], 1, s5
-; GFX8-NEXT:    s_mov_b32 m0, s5
 ; GFX8-NEXT:    v_writelane_b32 v1, s4, m0
 ; GFX8-NEXT:    s_add_i32 s4, s4, s8
 ; GFX8-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[6:7]
@@ -785,13 +769,10 @@ define amdgpu_kernel void @struct_add_i32_varying_vdata(ptr addrspace(1) %out, p
 ; GFX9-NEXT:    ; implicit-def: $vgpr1
 ; GFX9-NEXT:  .LBB3_1: ; %ComputeLoop
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX9-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX9-NEXT:    s_add_i32 s5, s5, 32
-; GFX9-NEXT:    s_min_u32 s5, s6, s5
+; GFX9-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9-NEXT:    s_mov_b32 m0, s5
 ; GFX9-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX9-NEXT:    s_lshl_b64 s[6:7], 1, s5
-; GFX9-NEXT:    s_mov_b32 m0, s5
 ; GFX9-NEXT:    v_writelane_b32 v1, s4, m0
 ; GFX9-NEXT:    s_add_i32 s4, s4, s8
 ; GFX9-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[6:7]
@@ -830,10 +811,7 @@ define amdgpu_kernel void @struct_add_i32_varying_vdata(ptr addrspace(1) %out, p
 ; GFX10W64-NEXT:    ; implicit-def: $vgpr1
 ; GFX10W64-NEXT:  .LBB3_1: ; %ComputeLoop
 ; GFX10W64-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX10W64-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX10W64-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX10W64-NEXT:    s_add_i32 s5, s5, 32
-; GFX10W64-NEXT:    s_min_u32 s5, s6, s5
+; GFX10W64-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX10W64-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX10W64-NEXT:    s_lshl_b64 s[6:7], 1, s5
 ; GFX10W64-NEXT:    v_writelane_b32 v1, s4, s5
@@ -918,16 +896,12 @@ define amdgpu_kernel void @struct_add_i32_varying_vdata(ptr addrspace(1) %out, p
 ; GFX11W64-NEXT:    ; implicit-def: $vgpr1
 ; GFX11W64-NEXT:  .LBB3_1: ; %ComputeLoop
 ; GFX11W64-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX11W64-NEXT:    s_ctz_i32_b32 s5, s3
-; GFX11W64-NEXT:    s_ctz_i32_b32 s6, s2
-; GFX11W64-NEXT:    s_add_i32 s5, s5, 32
-; GFX11W64-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11W64-NEXT:    s_min_u32 s5, s6, s5
+; GFX11W64-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX11W64-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX11W64-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX11W64-NEXT:    s_lshl_b64 s[6:7], 1, s5
 ; GFX11W64-NEXT:    v_writelane_b32 v1, s4, s5
 ; GFX11W64-NEXT:    s_and_not1_b64 s[2:3], s[2:3], s[6:7]
-; GFX11W64-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11W64-NEXT:    s_add_i32 s4, s4, s8
 ; GFX11W64-NEXT:    s_cmp_lg_u64 s[2:3], 0
 ; GFX11W64-NEXT:    s_cbranch_scc1 .LBB3_1
@@ -1540,13 +1514,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
 ; GFX8-NEXT:    ; implicit-def: $vgpr1
 ; GFX8-NEXT:  .LBB7_1: ; %ComputeLoop
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX8-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX8-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX8-NEXT:    s_add_i32 s5, s5, 32
-; GFX8-NEXT:    s_min_u32 s5, s6, s5
+; GFX8-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8-NEXT:    s_mov_b32 m0, s5
 ; GFX8-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX8-NEXT:    s_lshl_b64 s[6:7], 1, s5
-; GFX8-NEXT:    s_mov_b32 m0, s5
 ; GFX8-NEXT:    v_writelane_b32 v1, s4, m0
 ; GFX8-NEXT:    s_add_i32 s4, s4, s8
 ; GFX8-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[6:7]
@@ -1584,13 +1555,10 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
 ; GFX9-NEXT:    ; implicit-def: $vgpr1
 ; GFX9-NEXT:  .LBB7_1: ; %ComputeLoop
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX9-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX9-NEXT:    s_add_i32 s5, s5, 32
-; GFX9-NEXT:    s_min_u32 s5, s6, s5
+; GFX9-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9-NEXT:    s_mov_b32 m0, s5
 ; GFX9-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX9-NEXT:    s_lshl_b64 s[6:7], 1, s5
-; GFX9-NEXT:    s_mov_b32 m0, s5
 ; GFX9-NEXT:    v_writelane_b32 v1, s4, m0
 ; GFX9-NEXT:    s_add_i32 s4, s4, s8
 ; GFX9-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[6:7]
@@ -1627,10 +1595,7 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
 ; GFX10W64-NEXT:    ; implicit-def: $vgpr1
 ; GFX10W64-NEXT:  .LBB7_1: ; %ComputeLoop
 ; GFX10W64-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX10W64-NEXT:    s_ff1_i32_b32 s5, s3
-; GFX10W64-NEXT:    s_ff1_i32_b32 s6, s2
-; GFX10W64-NEXT:    s_add_i32 s5, s5, 32
-; GFX10W64-NEXT:    s_min_u32 s5, s6, s5
+; GFX10W64-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX10W64-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX10W64-NEXT:    s_lshl_b64 s[6:7], 1, s5
 ; GFX10W64-NEXT:    v_writelane_b32 v1, s4, s5
@@ -1709,16 +1674,12 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
 ; GFX11W64-NEXT:    ; implicit-def: $vgpr1
 ; GFX11W64-NEXT:  .LBB7_1: ; %ComputeLoop
 ; GFX11W64-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX11W64-NEXT:    s_ctz_i32_b32 s5, s3
-; GFX11W64-NEXT:    s_ctz_i32_b32 s6, s2
-; GFX11W64-NEXT:    s_add_i32 s5, s5, 32
-; GFX11W64-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11W64-NEXT:    s_min_u32 s5, s6, s5
+; GFX11W64-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX11W64-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX11W64-NEXT:    v_readlane_b32 s8, v0, s5
 ; GFX11W64-NEXT:    s_lshl_b64 s[6:7], 1, s5
 ; GFX11W64-NEXT:    v_writelane_b32 v1, s4, s5
 ; GFX11W64-NEXT:    s_and_not1_b64 s[2:3], s[2:3], s[6:7]
-; GFX11W64-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11W64-NEXT:    s_add_i32 s4, s4, s8
 ; GFX11W64-NEXT:    s_cmp_lg_u64 s[2:3], 0
 ; GFX11W64-NEXT:    s_cbranch_scc1 .LBB7_1
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 81fd166e3779f8..d64112e6972de3 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -525,15 +525,12 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
 ; GFX8-NEXT:    ; implicit-def: $vgpr1
 ; GFX8-NEXT:  .LBB2_1: ; %ComputeLoop
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX8-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX8-NEXT:    s_ff1_i32_b32 s5, s2
-; GFX8-NEXT:    s_add_i32 s4, s4, 32
-; GFX8-NEXT:    s_min_u32 s7, s5, s4
-; GFX8-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX8-NEXT:    s_lshl_b64 s[4:5], 1, s7
-; GFX8-NEXT:    s_mov_b32 m0, s7
+; GFX8-NEXT:    s_ff1_i32_b64 s4, s[2:3]
+; GFX8-NEXT:    s_mov_b32 m0, s4
+; GFX8-NEXT:    v_readlane_b32 s7, v0, s4
+; GFX8-NEXT:    s_lshl_b64 s[4:5], 1, s4
 ; GFX8-NEXT:    v_writelane_b32 v1, s6, m0
-; GFX8-NEXT:    s_add_i32 s6, s6, s8
+; GFX8-NEXT:    s_add_i32 s6, s6, s7
 ; GFX8-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
 ; GFX8-NEXT:    s_cmp_lg_u64 s[2:3], 0
 ; GFX8-NEXT:    s_cbranch_scc1 .LBB2_1
@@ -574,15 +571,12 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
 ; GFX9-NEXT:    ; implicit-def: $vgpr1
 ; GFX9-NEXT:  .LBB2_1: ; %ComputeLoop
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX9-NEXT:    s_ff1_i32_b32 s5, s2
-; GFX9-NEXT:    s_add_i32 s4, s4, 32
-; GFX9-NEXT:    s_min_u32 s7, s5, s4
-; GFX9-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX9-NEXT:    s_lshl_b64 s[4:5], 1, s7
-; GFX9-NEXT:    s_mov_b32 m0, s7
+; GFX9-NEXT:    s_ff1_i32_b64 s4, s[2:3]
+; GFX9-NEXT:    s_mov_b32 m0, s4
+; GFX9-NEXT:    v_readlane_b32 s7, v0, s4
+; GFX9-NEXT:    s_lshl_b64 s[4:5], 1, s4
 ; GFX9-NEXT:    v_writelane_b32 v1, s6, m0
-; GFX9-NEXT:    s_add_i32 s6, s6, s8
+; GFX9-NEXT:    s_add_i32 s6, s6, s7
 ; GFX9-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
 ; GFX9-NEXT:    s_cmp_lg_u64 s[2:3], 0
 ; GFX9-NEXT:    s_cbranch_scc1 .LBB2_1
@@ -623,10 +617,7 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
 ; GFX1064-NEXT:    ; implicit-def: $vgpr1
 ; GFX1064-NEXT:  .LBB2_1: ; %ComputeLoop
 ; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX1064-NEXT:    s_ff1_i32_b32 s5, s2
-; GFX1064-NEXT:    s_add_i32 s4, s4, 32
-; GFX1064-NEXT:    s_min_u32 s7, s5, s4
+; GFX1064-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064-NEXT:    s_lshl_b64 s[4:5], 1, s7
 ; GFX1064-NEXT:    v_writelane_b32 v1, s6, s7
@@ -721,16 +712,12 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
 ; GFX1164-NEXT:    ; implicit-def: $vgpr1
 ; GFX1164-NEXT:  .LBB2_1: ; %ComputeLoop
 ; GFX1164-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX1164-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX1164-NEXT:    s_ctz_i32_b32 s5, s2
-; GFX1164-NEXT:    s_add_i32 s4, s4, 32
-; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164-NEXT:    s_min_u32 s7, s5, s4
+; GFX1164-NEXT:    s_ctz_i32_b64 s7, s[2:3]
+; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1164-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164-NEXT:    s_lshl_b64 s[4:5], 1, s7
 ; GFX1164-NEXT:    v_writelane_b32 v1, s6, s7
 ; GFX1164-NEXT:    s_and_not1_b64 s[2:3], s[2:3], s[4:5]
-; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1164-NEXT:    s_add_i32 s6, s6, s8
 ; GFX1164-NEXT:    s_cmp_lg_u64 s[2:3], 0
 ; GFX1164-NEXT:    s_cbranch_scc1 .LBB2_1
@@ -2038,15 +2025,12 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
 ; GFX8-NEXT:    ; implicit-def: $vgpr1
 ; GFX8-NEXT:  .LBB8_1: ; %ComputeLoop
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX8-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX8-NEXT:    s_ff1_i32_b32 s5, s2
-; GFX8-NEXT:    s_add_i32 s4, s4, 32
-; GFX8-NEXT:    s_min_u32 s7, s5, s4
-; GFX8-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX8-NEXT:    s_lshl_b64 s[4:5], 1, s7
-; GFX8-NEXT:    s_mov_b32 m0, s7
+; GFX8-NEXT:    s_ff1_i32_b64 s4, s[2:3]
+; GFX8-NEXT:    s_mov_b32 m0, s4
+; GFX8-NEXT:    v_readlane_b32 s7, v0, s4
+; GFX8-NEXT:    s_lshl_b64 s[4:5], 1, s4
 ; GFX8-NEXT:    v_writelane_b32 v1, s6, m0
-; GFX8-NEXT:    s_add_i32 s6, s6, s8
+; GFX8-NEXT:    s_add_i32 s6, s6, s7
 ; GFX8-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
 ; GFX8-NEXT:    s_cmp_lg_u64 s[2:3], 0
 ; GFX8-NEXT:    s_cbranch_scc1 .LBB8_1
@@ -2087,15 +2071,12 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
 ; GFX9-NEXT:    ; implicit-def: $vgpr1
 ; GFX9-NEXT:  .LBB8_1: ; %ComputeLoop
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX9-NEXT:    s_ff1_i32_b32 s5, s2
-; GFX9-NEXT:    s_add_i32 s4, s4, 32
-; GFX9-NEXT:    s_min_u32 s7, s5, s4
-; GFX9-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX9-NEXT:    s_lshl_b64 s[4:5], 1, s7
-; GFX9-NEXT:    s_mov_b32 m0, s7
+; GFX9-NEXT:    s_ff1_i32_b64 s4, s[2:3]
+; GFX9-NEXT:    s_mov_b32 m0, s4
+; GFX9-NEXT:    v_readlane_b32 s7, v0, s4
+; GFX9-NEXT:    s_lshl_b64 s[4:5], 1, s4
 ; GFX9-NEXT:    v_writelane_b32 v1, s6, m0
-; GFX9-NEXT:    s_add_i32 s6, s6, s8
+; GFX9-NEXT:    s_add_i32 s6, s6, s7
 ; GFX9-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
 ; GFX9-NEXT:    s_cmp_lg_u64 s[2:3], 0
 ; GFX9-NEXT:    s_cbranch_scc1 .LBB8_1
@@ -2136,10 +2117,7 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
 ; GFX1064-NEXT:    ; implicit-def: $vgpr1
 ; GFX1064-NEXT:  .LBB8_1: ; %ComputeLoop
 ; GFX1064-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX1064-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX1064-NEXT:    s_ff1_i32_b32 s5, ...
[truncated]

github-actions · 2023-12-12T10:16:09Z

✅ With the latest revision this PR passed the C/C++ code formatter.

tsymalla · 2023-12-12T18:33:25Z

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

@@ -3067,6 +3067,17 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
    return NewOpr;
  }

+  //  64-bit scalar ctlz/cttz should use S_FLBIT_I32_B64/S_FF1_I32_B64
+  //  64-bit scalar version produce 32-bit result
+  if (!(Src->isDivergent()) && Src.getValueType() == MVT::i64) {


The outer parentheses around Src->isDivergent() should not be required.

The type check should be an assert, since these ops should only be legal for i32 and i64.

But anyway this block should be merged with the block above - using "if (i32 or uniform i64) {...}". Only the final zext might need to be conditional.

tsymalla · 2023-12-12T18:34:15Z

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+  MachineOperand &Src = Instr.getOperand(1);
+
+  const MCInstrDesc &InstDesc = get(Opcode);
+  bool isCtlz = Opcode == AMDGPU::S_FLBIT_I32_B32;


Should start with capital I (IsCtlz)

jayfoad · 2023-12-13T09:47:31Z

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

@@ -3067,6 +3067,17 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
    return NewOpr;
  }

+  //  64-bit scalar ctlz/cttz should use S_FLBIT_I32_B64/S_FF1_I32_B64
+  //  64-bit scalar version produce 32-bit result
+  if (!(Src->isDivergent()) && Src.getValueType() == MVT::i64) {


The type check should be an assert, since these ops should only be legal for i32 and i64.

But anyway this block should be merged with the block above - using "if (i32 or uniform i64) {...}". Only the final zext might need to be conditional.

jayfoad · 2023-12-13T10:02:42Z

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

@@ -7834,6 +7843,56 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }

+void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,


Can you add a comment showing what code this generates? I don't understand what the final min3 with 64 is for.

If the input it all zeros then S_FLBIT_I32_B64/S_FF1_I32_B64 would return -1, so this expansion should do the same.

Also are there any tests for this code? If not could you add some? See move-to-valu-* for examples.

I added a comment. The final add with 64 was because I thought that when input is all zeroes it should return 64..., I changed that now. I have added tests, I hope that is okay now.

jayfoad · 2023-12-14T15:51:15Z

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

+    if (Is64BitScalar)
+      return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
    return NewOpr;


I think ZERO_EXTEND to the same type is optimized out as a no-op, so you could write this more simply:

Suggested change

if (Is64BitScalar)

return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);

return NewOpr;

return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr);

jayfoad · 2023-12-14T15:52:55Z

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+  MachineInstr *M1 = BuildMI(MBB, MII, DL, InstDesc, MidReg1).add(SrcRegSub0);
+  MachineInstr *M2 = BuildMI(MBB, MII, DL, InstDesc, MidReg2).add(SrcRegSub1);
+
+  BuildMI(MBB, MII, DL, get(AMDGPU::S_ADD_I32), MidReg3)


Doesn't this need to be an unsigned saturating add?

Yes, you are right I have missed that I should produce expected (-1) result for zero.

After that is called function SIInstrInfo::getVALUOp, in which for S_ADD_I32 it returns V_ADD_U32_e64 or V_ADD_CO_U32_e32, which are both unsigned saturating adds. Correct me if I am wrong?

After that is called function SIInstrInfo::getVALUOp, in which for S_ADD_I32 it returns V_ADD_U32_e64 or V_ADD_CO_U32_e32, which are both unsigned saturating adds. Correct me if I am wrong?

No. They are only saturating adds if you specify the "clamp" output modifier. Otherwise they have normal 2's complement wrap around behaviour.

arsenm · 2023-12-19T03:14:17Z

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+                                          MachineInstr &Inst, unsigned Opcode,
+                                          MachineDominatorTree *MDT) const {
+  //  (S_FLBIT_I32_B64 hi:lo) ->
+  // -> (umin (V_FFBH_U32_e32 hi), (uaddsat (V_FFBH_U32_e32 lo), 32))


This looks better than the generic expansion for ctlz/cttz, maybe something similar could also be implemented there?

Also, RegBankSelect should get similar treatment for GlobalISel

jayfoad

LGTM with test tweaks.

Should also do GlobalISel as Matt mentioned, but that can be a separate patch.

jayfoad · 2023-12-19T09:59:02Z

llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.mir

@@ -0,0 +1,308 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py


I don't think you need this file. The .ll version is simpler and tests the same thing.

jayfoad · 2023-12-19T09:59:41Z

llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll

+; GFX10-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
+; GFX10-NEXT:    s_endpgm
+  %val = load i64, ptr addrspace(1) %arrayidx, align 1
+  %ctlz = tail call i64 @llvm.ctlz.i64(i64 %val, i1 true) nounwind readnone


Can you also add a test for the i1 false version, and the same for cttz?

Acim-Maravic · 2023-12-20T13:00:06Z

LGTM with test tweaks.

Should also do GlobalISel as Matt mentioned, but that can be a separate patch.

I updated the tests.

I will do GlobalISel in another patch.

jayfoad · 2023-12-20T13:11:36Z

llvm/test/CodeGen/AMDGPU/ctlz.ll

-; SI-NEXT:    s_add_i32 s4, s4, 32
-; SI-NEXT:    v_min3_u32 v0, s4, v0, 64
+; SI-NEXT:    s_flbit_i32_b64 s4, s[4:5]
+; SI-NEXT:    s_min_u32 s4, s4, 32


32 is wrong here and many other places. Should be 64.

jayfoad

LGTM, thanks. Just one nit inline.

jayfoad · 2023-12-20T16:03:01Z

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

    SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
    if (!ZeroUndef) {
-      const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
-      NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32);
+      if (!Is64BitScalar) {


Don't really need this if - you could do something like SDValue ConstVal = DAG.getConstant(VT.getScalarSizeInBits(), SL, MVT::i32).

Acim-Maravic · 2023-12-20T16:49:55Z

LGTM, thanks. Just one nit inline.

Thank you for help and support

llvmbot added the backend:AMDGPU label Dec 12, 2023

Acim-Maravic force-pushed the s_flbit_i32_b64 branch 2 times, most recently from d435def to 11e8923 Compare December 12, 2023 16:33

tsymalla reviewed Dec 12, 2023

View reviewed changes

jayfoad reviewed Dec 13, 2023

View reviewed changes

Acim-Maravic force-pushed the s_flbit_i32_b64 branch 4 times, most recently from 440f05e to 6cb1e3f Compare December 14, 2023 15:31

jayfoad reviewed Dec 14, 2023

View reviewed changes

Acim-Maravic force-pushed the s_flbit_i32_b64 branch from 6cb1e3f to 777ca1e Compare December 18, 2023 15:07

arsenm reviewed Dec 19, 2023

View reviewed changes

jayfoad approved these changes Dec 19, 2023

View reviewed changes

Acim-Maravic force-pushed the s_flbit_i32_b64 branch from 777ca1e to 55e35e0 Compare December 20, 2023 12:52

jayfoad reviewed Dec 20, 2023

View reviewed changes

Acim-Maravic force-pushed the s_flbit_i32_b64 branch from 55e35e0 to 4c01063 Compare December 20, 2023 15:37

jayfoad approved these changes Dec 20, 2023

View reviewed changes

Acim-Maravic force-pushed the s_flbit_i32_b64 branch from 4c01063 to fa355b8 Compare December 20, 2023 16:49

Acim Maravic added 2 commits December 22, 2023 12:59

[NFC][LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64

45de122

[LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64

a48cd27

Acim-Maravic force-pushed the s_flbit_i32_b64 branch from fa355b8 to a48cd27 Compare December 22, 2023 17:24

mbrkusanin merged commit 48f36c6 into llvm:main Dec 25, 2023
4 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64 #75158

[LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64 #75158

Acim-Maravic commented Dec 12, 2023

llvmbot commented Dec 12, 2023

github-actions bot commented Dec 12, 2023 •

edited

tsymalla Dec 12, 2023 •

edited

jayfoad Dec 13, 2023

tsymalla Dec 12, 2023

jayfoad Dec 13, 2023

jayfoad Dec 13, 2023

Acim-Maravic Dec 14, 2023

jayfoad Dec 14, 2023

jayfoad Dec 14, 2023

Acim-Maravic Dec 14, 2023

Acim-Maravic Dec 15, 2023

jayfoad Dec 15, 2023

arsenm Dec 19, 2023

jayfoad left a comment

jayfoad Dec 19, 2023

jayfoad Dec 19, 2023

Acim-Maravic commented Dec 20, 2023

jayfoad Dec 20, 2023

jayfoad left a comment

jayfoad Dec 20, 2023

Acim-Maravic commented Dec 20, 2023

		@@ -0,0 +1,308 @@
		# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py

[LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64 #75158

[LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64 #75158

Conversation

Acim-Maravic commented Dec 12, 2023

llvmbot commented Dec 12, 2023

github-actions bot commented Dec 12, 2023 • edited

tsymalla Dec 12, 2023 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

jayfoad left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Acim-Maravic commented Dec 20, 2023

Choose a reason for hiding this comment

jayfoad left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Acim-Maravic commented Dec 20, 2023

github-actions bot commented Dec 12, 2023 •

edited

tsymalla Dec 12, 2023 •

edited