Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Promote the Pseudo Opcode of instructions that deduce the sign extension for extsw from 32 bits to 64 bits when eliminating the extsw instruction in PPCMIPeepholes optimization. #85451

Open
wants to merge 13 commits into
base: main
Choose a base branch
from

Conversation

diggerlin
Copy link
Contributor

@diggerlin diggerlin commented Mar 15, 2024

the patch will fix the bug which description in the issue #71030.

the bug only occurs in 64bit occurs when we spill. Since we don't know when the spill will happen, all instructions in the chain used to deduce sign extension for eliminating 'extsw' will need to be promoted to 64-bit pseudo instructions. The following instruction will promoted in PPCMIPeepholes: EXTSH, LHA, ISEL to EXTSH8, LHA8, ISEL8

@llvmbot
Copy link
Collaborator

llvmbot commented Mar 15, 2024

@llvm/pr-subscribers-backend-powerpc

Author: zhijian lin (diggerlin)

Changes

Patch is 49.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85451.diff

12 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+135)
  • (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.h (+5)
  • (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+2-3)
  • (modified) llvm/lib/Target/PowerPC/PPCMIPeephole.cpp (+1)
  • (modified) llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir (+1-1)
  • (modified) llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir (+4-4)
  • (added) llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir (+698)
  • (modified) llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll (+2)
  • (modified) llvm/test/CodeGen/PowerPC/select-constant-xor.ll (+4)
  • (modified) llvm/test/CodeGen/PowerPC/sext_elimination.mir (+8-2)
  • (modified) llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll (+3-1)
  • (modified) llvm/test/CodeGen/PowerPC/store-forward-be64.ll (+1)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 5f5eb31a5a85fa..0e9bdaf37d079d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5219,6 +5219,141 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // We limit the max depth to track incoming values of PHIs or binary ops
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
+
+void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
+                                                  MachineRegisterInfo *MRI,
+                                                  unsigned BinOpDepth,
+                                                  LiveVariables *LV) const {
+  if (MRI->getRegClass(Reg) == &PPC::G8RCRegClass)
+    return;
+
+  MachineInstr *MI = MRI->getVRegDef(Reg);
+  if (!MI)
+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsRelplaceIntr = false;
+  switch (Opcode) {
+  case PPC::OR:
+  case PPC::OR8:
+  case PPC::PHI:
+  case PPC::ISEL:
+    if (BinOpDepth < MAX_BINOP_DEPTH) {
+      if (Opcode == PPC::OR)
+        IsRelplaceIntr = true;
+      unsigned OperandEnd = 3, OperandStride = 1;
+      if (MI->getOpcode() == PPC::PHI) {
+        OperandEnd = MI->getNumOperands();
+        OperandStride = 2;
+      }
+
+      for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
+        assert(MI->getOperand(I).isReg() && "Operand must be register");
+        Register SrcReg = MI->getOperand(I).getReg();
+        replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth + 1, LV);
+      }
+    }
+    break;
+    // case PPC::COPY:
+  case PPC::ORI:
+  case PPC::XORI:
+  case PPC::ORI8:
+  case PPC::XORI8:
+  case PPC::ORIS:
+  case PPC::XORIS:
+  case PPC::ORIS8:
+  case PPC::XORIS8: {
+    if (Opcode == PPC::ORI || Opcode == PPC::XORI || Opcode == PPC::ORIS ||
+        Opcode == PPC::ORIS || Opcode == PPC::XORIS)
+      IsRelplaceIntr = true;
+    Register SrcReg = MI->getOperand(1).getReg();
+    replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+    break;
+  }
+  case PPC::AND:
+  case PPC::AND8: {
+    if (BinOpDepth < MAX_BINOP_DEPTH) {
+      if (Opcode == PPC::AND)
+        IsRelplaceIntr = true;
+      Register SrcReg1 = MI->getOperand(1).getReg();
+      replaceInstrAfterElimExt32To64(SrcReg1, MRI, BinOpDepth, LV);
+      Register SrcReg2 = MI->getOperand(2).getReg();
+      replaceInstrAfterElimExt32To64(SrcReg2, MRI, BinOpDepth, LV);
+    }
+    break;
+  }
+  default:
+    break;
+  }
+
+  const PPCInstrInfo *TII =
+      MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
+  if ((TII->isSExt32To64(Opcode) && !TII->isZExt32To64(Opcode)) ||
+      IsRelplaceIntr) {
+    DebugLoc DL = MI->getDebugLoc();
+    auto MBB = MI->getParent();
+
+    // If the oprand of the instruction is Register which isPPC::GRCRegClass, we
+    // need to promot the Oprande to PPC::G8RCRegClass.
+    DenseMap<unsigned, Register> PromoteRegs;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++) {
+      MachineOperand &Oprand = MI->getOperand(i);
+      if (Oprand.isReg()) {
+        Register OprandReg = Oprand.getReg();
+        if (!OprandReg.isVirtual())
+          continue;
+        if (MRI->getRegClass(OprandReg) == &PPC::GPRCRegClass) {
+          Register TmpReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+          Register DstTmpReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+
+          BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
+          BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
+              .addReg(TmpReg)
+              .addReg(OprandReg)
+              .addImm(PPC::sub_32);
+          PromoteRegs[i] = DstTmpReg;
+        } else {
+          PromoteRegs[i] = OprandReg;
+        }
+      }
+    }
+
+    Register NewReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+    Register SrcReg = MI->getOperand(0).getReg();
+
+    // Most of the opcode of 64-bit instruction equal to the opcode of 32-bit
+    // version of same instruction plus one. But there are some exception:
+    // PPC::ANDC_rec, PPC::ANDI_rec, PPC::ANDIS_rec.
+    unsigned NewOpcode = Opcode + 1;
+
+    if (Opcode == PPC::ANDC_rec)
+      NewOpcode = PPC::ANDC8_rec;
+    if (Opcode == PPC::ANDI_rec)
+      NewOpcode = PPC::ANDI8_rec;
+    if (Opcode == PPC::ANDIS_rec)
+      NewOpcode = PPC::ANDIS8_rec;
+
+    BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewReg);
+    MachineBasicBlock::instr_iterator Iter(MI);
+    --Iter;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++)
+      if (PromoteRegs.find(i) != PromoteRegs.end())
+        MachineInstrBuilder(*Iter->getMF(), Iter)
+            .addReg(PromoteRegs[i], RegState::Kill);
+      else
+        Iter->addOperand(MI->getOperand(i));
+
+    for (auto Iter = PromoteRegs.begin(); Iter != PromoteRegs.end(); Iter++)
+      LV->recomputeForSingleDefVirtReg(Iter->second);
+    MI->eraseFromParent();
+    BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
+        .addReg(NewReg, RegState::Kill, PPC::sub_32);
+    LV->recomputeForSingleDefVirtReg(NewReg);
+    return;
+  }
+  return;
+}
+
 // The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
 // does not count all of the recursions. The parameter BinOpDepth is incremented
 // only when isSignOrZeroExtended calls itself more than once. This is done to
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 045932dc0d3ba1..f6e79707913c7b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -17,6 +17,7 @@
 #include "PPC.h"
 #include "PPCRegisterInfo.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
@@ -610,6 +611,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                       const MachineRegisterInfo *MRI) const {
     return isSignOrZeroExtended(Reg, 0, MRI).second;
   }
+  void replaceInstrAfterElimExt32To64(const Register &Reg,
+                                      MachineRegisterInfo *MRI,
+                                      unsigned BinOpDepth,
+                                      LiveVariables *LV) const;
 
   bool convertToImmediateForm(MachineInstr &MI,
                               SmallSet<Register, 4> &RegsToUpdate,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 82da1a3c305983..7c94add841402a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2408,7 +2408,7 @@ defm SRW  : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                      [(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
 defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                       "sraw", "$RA, $RST, $RB", IIC_IntShift,
-                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
+                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>;
 }
 
 def : InstAlias<"mr $rA, $rB", (OR gprc:$rA, gprc:$rB, gprc:$rB)>;
@@ -2423,8 +2423,7 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 let hasSideEffects = 0 in {
 defm SRAWI : XForm_10rc<31, 824, (outs gprc:$RA), (ins gprc:$RST, u5imm:$RB),
                         "srawi", "$RA, $RST, $RB", IIC_IntShift,
-                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>,
-                        SExt32To64;
+                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>;
 defm CNTLZW : XForm_11r<31,  26, (outs gprc:$RA), (ins gprc:$RST),
                         "cntlzw", "$RA, $RST", IIC_IntGeneral,
                         [(set i32:$RA, (ctlz i32:$RST))]>, ZExt32To64;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 494e4b52a5b5eb..76b9c19db2b3eb 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1037,6 +1037,7 @@ bool PPCMIPeephole::simplifyCode() {
                    TII->isSignExtended(NarrowReg, MRI)) {
           // We can eliminate EXTSW if the input is known to be already
           // sign-extended.
+          TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
           Register TmpReg =
               MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
index dfbf412a939212..bcc1d29a3f6ea3 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@@ -604,7 +604,7 @@ body:             |
     %2 = LI 48
     %5 = COPY %0.sub_32
     %8 = SRW killed %5, killed %2
-    ; CHECK: LI 0
+    ; CHECK: LI8 0
     ; CHECK-LATE: li 3, 0
     $x3 = EXTSW_32_64 %8
     BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index 761316ed7726d7..f095ffa85f02db 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -1348,7 +1348,7 @@ body:             |
     %1 = LI 77
     %2 = ADDI killed %1, 44
     %3 = EXTSW_32_64 killed %2
-    ; CHECK: LI 121
+    ; CHECK: LI8 121
     ; CHECK-LATE: li 3, 121
     $x3 = COPY %3
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -3573,7 +3573,7 @@ body:             |
 
     %0 = LI 777
     %1 = ORI %0, 88
-    ; CHECK: LI 857
+    ; CHECK: LI8 857
     ; CHECK-LATE: li 3, 857
     $x3 = EXTSW_32_64 %1
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -4145,7 +4145,7 @@ body:             |
     %3 = IMPLICIT_DEF
     %2 = LI 17
     %4 = RLWINM killed %2, 4, 20, 27
-    ; CHECK: LI 272
+    ; CHECK: LI8 272
     ; CHECK-LATE: li 3, 272
     $x3 = EXTSW_32_64 %4
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -6456,7 +6456,7 @@ body:             |
 
     %0 = LI 871
     %1 = XORI %0, 17
-    ; CHECK: LI 886
+    ; CHECK: LI8 886
     ; CHECK-LATE: li 3, 886
     $x3 = EXTSW_32_64 %1
     BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
new file mode 100644
index 00000000000000..1b54ba7a38b816
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
@@ -0,0 +1,698 @@
+# RUN: llc -run-pass=ppc-mi-peepholes  -mtriple powerpc64-ibm-aix-xcoff %s -o - \
+# RUN:   -verify-machineinstrs | FileCheck %s
+
+--- |
+  ; ModuleID = '71030_tmp_reduce-O2.ll'
+  source_filename = "71030_tmp_reduce.c"
+  target datalayout = "E-m:a-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+  target triple = "powerpc64-ibm-aix-xcoff"
+  
+  @globalShortValue = local_unnamed_addr global i16 1, align 2
+  @globalCharValue = local_unnamed_addr global i8 0, align 1
+  @largeNumber = local_unnamed_addr global i64 -3664682556119382352, align 8
+  @someIntValue = local_unnamed_addr global i32 378441747, align 4
+  @unitIncrement = local_unnamed_addr global i32 1, align 4
+  @computedResultUll = local_unnamed_addr global i64 0, align 8
+  @computedResultShort = local_unnamed_addr global i16 0, align 2
+  @computedResultUChar = local_unnamed_addr global i8 0, align 1
+  @computedResultBool = local_unnamed_addr global i8 0, align 1
+  @computedResultChar = local_unnamed_addr global i8 0, align 1
+  @shortArray = local_unnamed_addr global [8 x i16] zeroinitializer, align 2
+  @charArray = local_unnamed_addr global [8 x [8 x [8 x i8]]] zeroinitializer, align 1
+  @longArray = local_unnamed_addr global [8 x [8 x i64]] zeroinitializer, align 8
+  @resultArray = local_unnamed_addr global [8 x [8 x i16]] zeroinitializer, align 2
+  @ullArray = local_unnamed_addr global [8 x i64] zeroinitializer, align 8
+  @intArray = local_unnamed_addr global [8 x [8 x [8 x i32]]] zeroinitializer, align 4
+  @_MergedGlobals = private constant <{ [29 x i8], [46 x i8] }> <{ [29 x i8] c"Computed Result (ULL): %llx\0A\00", [46 x i8] c"Computed convert largeNumber&&&& (ULL): %llx\0A\00" }>, align 1
+  
+  @.str.1 = private alias [29 x i8], ptr @_MergedGlobals
+  @.str = private alias [46 x i8], getelementptr inbounds (<{ [29 x i8], [46 x i8] }>, ptr @_MergedGlobals, i32 0, i32 1)
+  
+  ; Function Attrs: nofree nounwind
+  define noundef signext i32 @main() local_unnamed_addr #0 {
+  entry:
+    store i16 -1, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3), align 2, !tbaa !3
+    %0 = load i64, ptr @largeNumber, align 8, !tbaa !7
+    %conv = trunc i64 %0 to i32
+    %sext = shl i32 %conv, 16
+    %conv1 = ashr exact i32 %sext, 16
+    %sub = add nsw i32 %conv1, -1705
+    %call = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) getelementptr inbounds (<{ [29 x i8], [46 x i8] }>, ptr @_MergedGlobals, i32 0, i32 1), i32 noundef signext %sub)
+    %1 = load i16, ptr @globalShortValue, align 2, !tbaa !3
+    %2 = load i32, ptr @someIntValue, align 4, !tbaa !9
+    %3 = trunc i32 %2 to i8
+    %conv20 = add i8 %3, -19
+    %4 = load i32, ptr @unitIncrement, align 4
+    %5 = load i8, ptr @globalCharValue, align 1
+    %conv45 = sext i8 %5 to i32
+    %computedResultShort.promoted = load i16, ptr @computedResultShort, align 2, !tbaa !3
+    %resultArray.promoted = load i16, ptr @resultArray, align 2, !tbaa !3
+    %computedResultChar.promoted149 = load i8, ptr @computedResultChar, align 1, !tbaa !11
+    %6 = sext i8 %conv20 to i64
+    %7 = load i16, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3), align 2, !tbaa !3
+    %8 = load i16, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 2), align 2
+    %conv46 = sext i16 %8 to i32
+    %cond54 = tail call i32 @llvm.smin.i32(i32 %conv45, i32 %conv46)
+    %tobool = icmp ne i32 %cond54, 0
+    %conv55 = zext i1 %tobool to i8
+    %9 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @ullArray, i64 0, i64 3), align 8
+    %tobool72 = icmp ne i64 %9, 0
+    %frombool = zext i1 %tobool72 to i8
+    %smax = tail call i64 @llvm.smax.i64(i64 %6, i64 4)
+    %10 = add nuw nsw i64 %smax, 3
+    %11 = sub i64 %10, %6
+    %12 = lshr i64 %11, 2
+    %13 = add nuw nsw i64 %12, 1
+    %n.vec = and i64 %13, 9223372036854775806
+    %14 = shl i64 %n.vec, 2
+    %ind.end = add i64 %14, %6
+    %15 = shl i64 %6, 2
+    %16 = shl i64 %6, 3
+    %17 = add nsw i64 %16, -64
+    %scevgep30 = getelementptr i8, ptr @longArray, i64 %17
+    %18 = add nsw i64 %15, 64
+    %scevgep31 = getelementptr i8, ptr @intArray, i64 %18
+    %19 = lshr i64 %13, 1
+    %20 = shl nuw nsw i64 %19, 1
+    %21 = add nsw i64 %20, -2
+    %22 = lshr i64 %21, 1
+    %23 = add nuw i64 %22, 1
+    br label %for.body16
+  
+  for.cond.cleanup15:                               ; preds = %for.cond.cleanup25
+    %24 = tail call i16 @llvm.smin.i16(i16 %1, i16 %7)
+    %conv11.le = sext i16 %24 to i64
+    store i64 %conv11.le, ptr @computedResultUll, align 8, !tbaa !7
+    %call97 = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @_MergedGlobals, i64 noundef %conv11.le)
+    ret i32 0
+  
+  for.body16:                                       ; preds = %for.cond.cleanup25, %entry
+    %lsr.iv29 = phi i32 [ %lsr.iv.next, %for.cond.cleanup25 ], [ 8, %entry ]
+    %conv36.lcssa132140 = phi i16 [ %computedResultShort.promoted, %entry ], [ %conv36.lcssa131, %for.cond.cleanup25 ]
+    %and.lcssa135139 = phi i16 [ %resultArray.promoted, %entry ], [ %and.lcssa134, %for.cond.cleanup25 ]
+    %conv81118.lcssa.lcssa137138 = phi i8 [ %computedResultChar.promoted149, %entry ], [ %conv81118.lcssa.lcssa136, %for.cond.cleanup25 ]
+    %25 = icmp slt i8 %conv20, 8
+    br i1 %25, label %for.body31.lr.ph, label %for.cond.cleanup25
+  
+  for.body31.lr.ph:                                 ; preds = %for.body16
+    %26 = icmp ult i64 %11, 4
+    store i8 %conv55, ptr @computedResultUChar, align 1, !tbaa !11
+    br i1 %26, label %for.body31.preheader, label %vector.body.preheader
+  
+  vector.body.preheader:                            ; preds = %for.body31.lr.ph
+    call void @llvm.set.loop.iterations.i64(i64 %23)
+    br label %vector.body
+  
+  vector.body:                                      ; preds = %vector.body.preheader, %vector.body
+    %vec.phi = phi i16 [ %44, %vector.body ], [ %conv36.lcssa132140, %vector.body.preheader ]
+    %vec.phi159 = phi i16 [ %45, %vector.body ], [ 0, %vector.body.preheader ]
+    %vec.phi160 = phi i16 [ %46, %vector.body ], [ %and.lcssa135139, %vector.body.preheader ]
+    %vec.phi161 = phi i16 [ %47, %vector.body ], [ -1, %vector.body.preheader ]
+    %vec.phi162 = phi i8 [ %48, %vector.body ], [ %conv81118.lcssa.lcssa137138, %vector.body.preheader ]
+    %vec.phi163 = phi i8 [ %49, %vector.body ], [ 0, %vector.body.preheader ]
+    %27 = phi ptr [ %scevgep30, %vector.body.preheader ], [ %31, %vector.body ]
+    %28 = phi ptr [ %scevgep31, %vector.body.preheader ], [ %29, %vector.body ]
+    %29 = getelementptr i8, ptr %28, i64 32
+    %30 = getelementptr i8, ptr %29, i64 16
+    %31 = getelementptr i8, ptr %27, i64 64
+    %32 = getelementptr i8, ptr %31, i64 32
+    %33 = trunc i32 %4 to i16
+    %34 = load i64, ptr %31, align 8, !tbaa !7
+    %35 = load i64, ptr %32, align 8, !tbaa !7
+    %36 = trunc i64 %34 to i16
+    %37 = trunc i64 %35 to i16
+    %38 = load i32, ptr %29, align 4, !tbaa !9
+    %39 = load i32, ptr %30, align 4, !tbaa !9
+    %40 = trunc i32 %38 to i8
+    %41 = trunc i32 %39 to i8
+    %42 = mul i8 %40, -6
+    %43 = mul i8 %41, -6
+    %44 = sub i16 %vec.phi, %33
+    %45 = sub i16 %vec.phi159, %33
+    %46 = and i16 %vec.phi160, %36
+    %47 = and i16 %vec.phi161, %37
+    %48 = add i8 %42, %vec.phi162
+    %49 = add i8 %43, %vec.phi163
+    %50 = call i1 @llvm.loop.decrement.i64(i64 1)
+    br i1 %50, label %vector.body, label %middle.block, !llvm.loop !12
+  
+  middle.block:                                     ; preds = %vector.body
+    %51 = icmp eq i64 %13, %n.vec
+    %bin.rdx = add i16 %45, %44
+    %bin.rdx164 = and i16 %47, %46
+    %bin.rdx165 = add i8 %49, %48
+    br i1 %51, label %for.cond21.for.cond.cleanup25_crit_edge, label %for.body31.preheader
+  
+  for.body31.preheader:                             ; preds = %middle.block, %for.body31.lr.ph
+    %indvars.iv.ph = phi i64 [ %6, %for.body31.lr.ph ], [ %ind.end, %middle.block ]
+    %conv36121128.ph = phi i16 [ %conv36.lcssa132140, %for.body31.lr.ph ], [ %bin.rdx, %middle.block ]
+    %and122127.ph = phi i16 [ %and.lcssa135139, %for.body31.lr.ph ], [ %bin.rdx164, %middle.block ]
+    %conv81118.lcssa124126.ph = phi i8 [ %conv81118.lcssa.lcssa137138, %for.body31.lr.ph ], [ %bin.rdx165, %middle.block ]
+    %52 = shl i64 %indvars.iv.ph, 2
+    %53 = shl i64 %indvars.iv.ph, 3
+    %scevgep = getelementptr i8, ptr getelementptr ([8 x [8 x i64]], ptr @longArray, i64 -1, i64 7, i64 4), i64 %53
+    %scevgep32 = getelementptr i8, ptr getelementptr inbounds ([8 x [8 x [8 x i32]]], ptr @intArray, i64 0, i64 0, i64 2, i64 4), i64 %52
+    %smax33 = call i64 @llvm.smax.i64(i64 %indvars.iv.ph, i64 4)
+    %54 = add i64 %smax33, 3
+    %55 = sub i64 %54, %indvars.iv.ph
+    %56 = lshr i64 %55, 2
+    %57 = add nuw nsw i64 %56, 1
+    call void @llvm.set.loop.iterations.i64(i64 %57)
+    br label %for.body31
+  
+  for.cond21.for.cond.cleanup25_crit_edge:          ; preds = %for.body31, %middle.block
+    %conv36.lcssa = phi i16 [ %bin.rdx, %middle.block ], [ %conv36, %for.body31 ]
+    %and.lcssa = phi i16 [ %bin.rdx164, %middle.block ], [ %and, %for.body31 ]
+    %.lcssa = phi i8 [ %bin.rdx165, %middle.block ], [ %67, %for.body31 ]
+    %58 = trunc i16 %1 to i8
+    store i16 %conv36.lcssa, ptr @computedResultShort, align 2, !tbaa !3
+    store i8 %58, ptr getelementptr inbounds ([8 x [8 x [8 x i8]]], ptr @charArray, i64 0, i64 2, i64 0, i64 3), align 1, !tbaa !11
+    store i16 %and.lcssa, ptr @resultArray, align 2, !tbaa !3
+    store i8 %frombool, ptr @computedResultBool, align 1, !tbaa !16
+    store i8 %.lcssa, ptr @computedResultChar, align...
[truncated]

@diggerlin diggerlin changed the title first implement of fixing issue 71030 fix a bug of PPCMIPeepholes which description in issue 71030 Mar 15, 2024
Copy link
Contributor

@amy-kwan amy-kwan left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initial review, and I plan to review again.

return;

unsigned Opcode = MI->getOpcode();
bool IsRelplaceIntr = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool IsRelplaceIntr = false;
bool IsReplaceInstr = false;

Do we mean this?

DenseMap<unsigned, Register> PromoteRegs;
DenseMap<unsigned, Register> ReCalRegs;
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
MachineOperand &Oprand = MI->getOperand(i);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
MachineOperand &Oprand = MI->getOperand(i);
MachineOperand &Operand = MI->getOperand(i);

I think we can use Operand instead of Oprand here and below.

@@ -2408,7 +2408,7 @@ defm SRW : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
[(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
"sraw", "$RA, $RST, $RB", IIC_IntShift,
[(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
[(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I ask why SExt32To64 is removed from just SRAW and SRAWI?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no 64bit pseudo-code instruction for SRAW and SRAWI , that there is no SRAW8 and SRAWI8 pseudo-code instruction

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Group code review comment:
Is the removal of the SExt32To64 flag truly correct? We're not sure if this instruction really makes sense in 32-bit mode. The sign extend flag should depend on the behaviour of the instruction itself (if the instruction is meant to sign extend, then presumably the flag should still be present).

It would be good investigate this.

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp Show resolved Hide resolved
@nemanjai
Copy link
Member

nemanjai commented Apr 4, 2024

I think it would be worthwhile investigating whether this can be accomplished significantly more easily and succinctly by implementing another InstrMapping similar to the record-form ones we have (getRecordFormOpcode, getNonRecordFormOpcode). If we have a pair of mappings similar to those but that represent get64BitMnemonic and get32BitMnemonic, we should be able to just use those rather than a big switch.

@diggerlin
Copy link
Contributor Author

I think it would be worthwhile investigating whether this can be accomplished significantly more easily and succinctly by implementing another InstrMapping similar to the record-form ones we have (getRecordFormOpcode, getNonRecordFormOpcode). If we have a pair of mappings similar to those but that represent get64BitMnemonic and get32BitMnemonic, we should be able to just use those rather than a big switch.

I added a

def get64BitInstrFromSignedExt32BitInstr : InstrMapping {
...
}

Copy link
Contributor

@amy-kwan amy-kwan left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would good to update the title and description, rather than "fix a bug".

@@ -0,0 +1,702 @@
# RUN: llc -run-pass=ppc-mi-peepholes -mtriple powerpc64-ibm-aix-xcoff %s -o - \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we commit this test case first before this patch? Then, it will be easier to see how this patch changes the test case.

@@ -2408,7 +2408,7 @@ defm SRW : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
[(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
"sraw", "$RA, $RST, $RB", IIC_IntShift,
[(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
[(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Group code review comment:
Is the removal of the SExt32To64 flag truly correct? We're not sure if this instruction really makes sense in 32-bit mode. The sign extend flag should depend on the behaviour of the instruction itself (if the instruction is meant to sign extend, then presumably the flag should still be present).

It would be good investigate this.

let FilterClass = "SExt32To64";
// Instructions with the same opcode.
let RowFields = ["Inst"];
// Instructions with the same Interpretation64Bit value form a column.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "form a column" mean?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let FilterClass = "SExt32To64"

means we will deal with the instruction which only has flag SExt32To64

let RowFields = ["Inst"];

means that if two instruction has the same Pseduo Opcode will be on the same row.

let ColFields = ["Interpretation64Bit"];

means that the instruction for 64bit is one column, 32-bit is another column.

it will generate something like this

int get64BitInstrFromSignedExt32BitInstr(uint16_t Opcode) {
static const uint16_t get64BitInstrFromSignedExt32BitInstrTable[][2] = {
  { PPC::ANDI_rec, PPC::ANDI8_rec },
  { PPC::EXTSB, PPC::EXTSB8 },
  { PPC::EXTSB8_32_64, PPC::EXTSB8 },
  { PPC::EXTSB_rec, PPC::EXTSB8_rec },
  { PPC::EXTSH, PPC::EXTSH8 },
  { PPC::EXTSH8_32_64, PPC::EXTSH8 },
  { PPC::EXTSH_rec, PPC::EXTSH8_rec },
  { PPC::EXTSW, PPC::EXTSW_32_64 },
  { PPC::EXTSW_rec, PPC::EXTSW_32_64_rec },
  { PPC::LBZ, PPC::LBZ8 },
  { PPC::LBZX, PPC::LBZX8 },
  { PPC::LHA, PPC::LHA8 },
  { PPC::LHAX, PPC::LHAX8 },
  { PPC::LHZ, PPC::LHZ8 },
  { PPC::LHZX, PPC::LHZX8 },
  { PPC::LI, PPC::LI8 },

let RowFields = ["Inst"];
// Instructions with the same Interpretation64Bit value form a column.
let ColFields = ["Interpretation64Bit"];
// The key column are not the Interpretation64Bit-form instructions.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// The key column are not the Interpretation64Bit-form instructions.
// The key column are not the Interpretation64Bit-form instructions.

@diggerlin diggerlin changed the title fix a bug of PPCMIPeepholes which description in issue 71030 promote PseduoCode from 32bit to 64bits after eliminating the extsw instruction in PPCMIPeepholes optimization Apr 24, 2024
@diggerlin diggerlin changed the title promote PseduoCode from 32bit to 64bits after eliminating the extsw instruction in PPCMIPeepholes optimization promote Pseduo Opcode from 32bit to 64bits after eliminating the extsw instruction in PPCMIPeepholes optimization Apr 25, 2024
diggerlin added a commit that referenced this pull request Apr 30, 2024
Add pre-commit MIR test for PR "[Promote Pseudo Opcode from 32-bit to
64-bit after eliminating the extsw instruction in PPCMIPeepholes
optimization](#85451)" which
fixes bug reported in the issue "[Inconsistent Output at -O1 and -O2
Optimization Levels on PowerPC64 Due to Complex Type Casting and Nested
Loop Structure](#71030)".
…w instruction in PPCMIPeepholes optimization
Copy link
Contributor

@amy-kwan amy-kwan left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some group code review comments.

@@ -1051,6 +1051,7 @@ bool PPCMIPeephole::simplifyCode() {
TII->isSignExtended(NarrowReg, MRI)) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A couple comments regarding this line:

  • This call does not make sense with the comment on 1052-1053. It would be better to put this call above the comment.
  • The naming of the function seems unclear. We can consider renaming the function from replaceInstrAfter to promoteInstrBefore, or something similar.
  • Add new documentation for the function.

@@ -5234,6 +5234,218 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
// We limit the max depth to track incoming values of PHIs or binary ops
// (e.g. AND) to avoid excessive cost.
const unsigned MAX_BINOP_DEPTH = 1;

void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment to describe this function.

OperandStride = 2;
}

for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to use < instead of != here.

return;

unsigned Opcode = MI->getOpcode();
bool IsReplaceInstr = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we update the name of this variable? Something like IsPromotedInstr?

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp Show resolved Hide resolved
Copy link
Contributor

@kamaub kamaub left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rename the PR to "Promote 32bit Pseduo Opcodes to 64bits when extsw is eliminated".

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp Outdated Show resolved Hide resolved
Comment on lines 1057 to 1060
// eliminated. If there is no promotion, it will use the 'stw' instead
// of 'std', and 'lwz' instead of 'ld' when spilling, since the
// register class is 32-bits. Consequently, the high 32-bit
// information will be lost.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might flow nicer if we put the what will happen if we don't promote first and then say how we promote after.

Comment on lines 5244 to 5245
// and defined registers in the instruction may also need to be promoted from
// 32-bit to 64-bit based on the promoted instruction description. If a used
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should mention the tablegen flag used to map PPC instructions and the opcodes defined by the issignorzeroextended().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put the new adding comment in the code which is in the middle of the function instead of the description of the function.

Comment on lines 5238 to 5241
// The `PromoteInstr32To64ForEmliEXTSW` function is recursive. The parameter
// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
// incremented only when `PromoteInstr32To64ForEmliEXTSW` calls itself more
// than once. This is done to prevent exponential recursion. The function will
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably only mention the recursion after explaining how we follow the register uses below.

case PPC::COPY: {
Register SrcReg = MI->getOperand(1).getReg();
const MachineFunction *MF = MI->getMF();
if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should copy the explanation comments for these conditions from issignorzeroextended() as well

IsNonSignedExtInstrPromoted = true;
};

switch (Opcode) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all the cases for opcode switch statements in PromoteInstr32To64ForEmliEXTSW coming from the union of definedBySignExtendingOp() and isSignExtended()/isSignOrZeroExtended()?

Copy link
Contributor Author

@diggerlin diggerlin May 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes,
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp#L1051
-->https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCInstrInfo.h#L617

bool isSignExtended(const unsigned Reg,
                      const MachineRegisterInfo *MRI) const {
    return isSignOrZeroExtended(Reg, 0, MRI).first;
  }

-->
PPCInstrInfo::isSignOrZeroExtended() in the PPCInstrInfo.cpp will call

-->
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp#L5253
definedBySignExtendingOp()

@diggerlin diggerlin changed the title promote Pseduo Opcode from 32bit to 64bits after eliminating the extsw instruction in PPCMIPeepholes optimization Promote the Pseudo Opcode of instructions that deduce the sign extension for extsw from 32 bits to 64 bits when eliminating the extsw instruction in PPCMIPeepholes optimization. May 9, 2024
Copy link

github-actions bot commented May 15, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 70ada5b178a14363dc6d30fbf531e47d3933a086 8550317de4c96a7bef3f030a0f7ab609331b97df -- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp llvm/lib/Target/PowerPC/PPCInstrInfo.h llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
View the diff from clang-format here.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9c22642954..5b5741d202 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5418,7 +5418,7 @@ void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,
     BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
     MachineBasicBlock::instr_iterator Iter(MI);
     --Iter;
-    MachineInstrBuilder  MIBuilder(*Iter->getMF(), Iter);
+    MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
     for (unsigned i = 1; i < MI->getNumOperands(); i++) {
       if (PromoteRegs.find(i) != PromoteRegs.end())
         MIBuilder.addReg(PromoteRegs[i], RegState::Kill);

Copy link
Contributor

@amy-kwan amy-kwan left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initial review of comments within the patch.
Planning another review.

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp Outdated Show resolved Hide resolved
// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
// incremented only when `PromoteInstr32To64ForEmliEXTSW` calls itself more
// than once. This is done to prevent exponential recursion.
void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,

}
break;
case PPC::COPY: {
// Refer to the logic of the `case PPC::COPY` statement in the function
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Refer to the logic of the `case PPC::COPY` statement in the function
// Refers to the logic of the `case PPC::COPY` statement in the function

// are sign- or zero-extended.
const MachineFunction *MF = MI->getMF();
if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
// If this is a copy from another register, we recursively promote source.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// If this is a copy from another register, we recursively promote source.
// If this is a copy from another register, we recursively promote the source.

Comment on lines 5303 to 5304
// From here on everything is SVR4ABI. COPY will be eliminated in other
// pass, we do not need promote COPY pseduo opcode.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// From here on everything is SVR4ABI. COPY will be eliminated in other
// pass, we do not need promote COPY pseduo opcode.
// From here on everything is SVR4ABI. COPY will be eliminated in the other
// pass, we do not need promote the COPY pseudo opcode.

// pass, we do not need promote COPY pseduo opcode.

if (SrcReg != PPC::X3)
// If this is a copy from another register, we recursively promote source.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// If this is a copy from another register, we recursively promote source.
// If this is a copy from another register, we recursively promote the source.

diggerlin and others added 2 commits May 28, 2024 15:48
@diggerlin diggerlin requested review from amy-kwan and kamaub May 28, 2024 20:13
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants