Skip to content

Commit

Permalink
[PowerPC] Mask constant operands in ValueBit tracking (#67653)
Browse files Browse the repository at this point in the history
In IR or C code, shift amount larger than value size is undefined
behavior. But in practice, backend lowering for shift_parts produces
add/sub of shift amounts, thus constant shift amounts might be
negative or larger than value size, which depends on ISA definition.

PowerPC ISA says, the lowest 7 bits (6 bits for 32-bit instruction)
will be taken, and if the highest among them is 1, result will be
zero, otherwise the low 6 bits (or 5 on 32-bit) are used as shift
amount.

This commit emulates the behavior and avoids array overflow in bit
permutation's value bits calculator.

(cherry picked from commit 292d9e8)
  • Loading branch information
ecnelises authored and tstellar committed Feb 20, 2024
1 parent 12114d2 commit 3b4b047
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 13 deletions.
37 changes: 24 additions & 13 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1635,7 +1635,8 @@ class BitPermutationSelector {
default: break;
case ISD::ROTL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned RotAmt = V.getConstantOperandVal(1);
assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

Expand All @@ -1648,31 +1649,41 @@ class BitPermutationSelector {
case ISD::SHL:
case PPCISD::SHL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
// sld takes 7 bits, slw takes 6.
unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

for (unsigned i = ShiftAmt; i < NumBits; ++i)
Bits[i] = LHSBits[i - ShiftAmt];

for (unsigned i = 0; i < ShiftAmt; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
if (ShiftAmt >= NumBits) {
for (unsigned i = 0; i < NumBits; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
} else {
for (unsigned i = ShiftAmt; i < NumBits; ++i)
Bits[i] = LHSBits[i - ShiftAmt];
for (unsigned i = 0; i < ShiftAmt; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
}

return std::make_pair(Interesting = true, &Bits);
}
break;
case ISD::SRL:
case PPCISD::SRL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
// srd takes lowest 7 bits, srw takes 6.
unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);

const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;

for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
Bits[i] = LHSBits[i + ShiftAmt];

for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
if (ShiftAmt >= NumBits) {
for (unsigned i = 0; i < NumBits; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
} else {
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
Bits[i] = LHSBits[i + ShiftAmt];
for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
}

return std::make_pair(Interesting = true, &Bits);
}
Expand Down
132 changes: 132 additions & 0 deletions llvm/test/CodeGen/PowerPC/pr59074.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE64
; RUN: llc -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE32
; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE64
; RUN: llc -mtriple=powerpc-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE32

; To verify this doesn't crash due to array out of bound.
define void @pr59074(ptr %0) {
; LE64-LABEL: pr59074:
; LE64: # %bb.0: # %entry
; LE64-NEXT: lwz 6, 0(3)
; LE64-NEXT: li 7, 12
; LE64-NEXT: ld 4, 16(3)
; LE64-NEXT: ld 5, 24(3)
; LE64-NEXT: addi 6, 6, -12
; LE64-NEXT: std 4, 16(3)
; LE64-NEXT: std 5, 24(3)
; LE64-NEXT: srd 6, 7, 6
; LE64-NEXT: li 7, 0
; LE64-NEXT: std 7, 8(3)
; LE64-NEXT: std 6, 0(3)
; LE64-NEXT: blr
;
; LE32-LABEL: pr59074:
; LE32: # %bb.0: # %entry
; LE32-NEXT: stwu 1, -80(1)
; LE32-NEXT: .cfi_def_cfa_offset 80
; LE32-NEXT: lwz 4, 0(3)
; LE32-NEXT: xxlxor 0, 0, 0
; LE32-NEXT: li 5, 4
; LE32-NEXT: addi 6, 1, 16
; LE32-NEXT: li 7, 0
; LE32-NEXT: li 8, 12
; LE32-NEXT: xxswapd 0, 0
; LE32-NEXT: addi 4, 4, -12
; LE32-NEXT: rlwinm 9, 4, 29, 28, 31
; LE32-NEXT: stxvd2x 0, 6, 5
; LE32-NEXT: stw 7, 44(1)
; LE32-NEXT: stw 7, 40(1)
; LE32-NEXT: stw 7, 36(1)
; LE32-NEXT: stw 8, 16(1)
; LE32-NEXT: lwzux 5, 9, 6
; LE32-NEXT: li 6, 7
; LE32-NEXT: lwz 7, 8(9)
; LE32-NEXT: nand 6, 4, 6
; LE32-NEXT: lwz 8, 4(9)
; LE32-NEXT: clrlwi 4, 4, 29
; LE32-NEXT: lwz 9, 12(9)
; LE32-NEXT: clrlwi 6, 6, 27
; LE32-NEXT: subfic 11, 4, 32
; LE32-NEXT: srw 5, 5, 4
; LE32-NEXT: slwi 10, 7, 1
; LE32-NEXT: srw 7, 7, 4
; LE32-NEXT: slw 6, 10, 6
; LE32-NEXT: srw 10, 8, 4
; LE32-NEXT: slw 8, 8, 11
; LE32-NEXT: slw 11, 9, 11
; LE32-NEXT: srw 4, 9, 4
; LE32-NEXT: or 5, 8, 5
; LE32-NEXT: or 7, 11, 7
; LE32-NEXT: or 6, 10, 6
; LE32-NEXT: stw 4, 12(3)
; LE32-NEXT: stw 7, 8(3)
; LE32-NEXT: stw 5, 0(3)
; LE32-NEXT: stw 6, 4(3)
; LE32-NEXT: addi 1, 1, 80
; LE32-NEXT: blr
;
; BE64-LABEL: pr59074:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lwz 6, 12(3)
; BE64-NEXT: li 7, 12
; BE64-NEXT: ld 4, 24(3)
; BE64-NEXT: ld 5, 16(3)
; BE64-NEXT: addi 6, 6, -12
; BE64-NEXT: std 4, 24(3)
; BE64-NEXT: std 5, 16(3)
; BE64-NEXT: srd 6, 7, 6
; BE64-NEXT: li 7, 0
; BE64-NEXT: std 7, 0(3)
; BE64-NEXT: std 6, 8(3)
; BE64-NEXT: blr
;
; BE32-LABEL: pr59074:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lwz 4, 12(3)
; BE32-NEXT: xxlxor 0, 0, 0
; BE32-NEXT: addi 5, 1, -64
; BE32-NEXT: li 6, 12
; BE32-NEXT: li 7, 0
; BE32-NEXT: addi 8, 1, -48
; BE32-NEXT: li 10, 7
; BE32-NEXT: stxvw4x 0, 0, 5
; BE32-NEXT: addi 4, 4, -12
; BE32-NEXT: stw 6, -36(1)
; BE32-NEXT: stw 7, -40(1)
; BE32-NEXT: stw 7, -44(1)
; BE32-NEXT: rlwinm 9, 4, 29, 28, 31
; BE32-NEXT: stw 7, -48(1)
; BE32-NEXT: sub 5, 8, 9
; BE32-NEXT: nand 6, 4, 10
; BE32-NEXT: clrlwi 4, 4, 29
; BE32-NEXT: clrlwi 6, 6, 27
; BE32-NEXT: lwz 7, 4(5)
; BE32-NEXT: lwz 8, 8(5)
; BE32-NEXT: lwz 9, 0(5)
; BE32-NEXT: lwz 5, 12(5)
; BE32-NEXT: slwi 10, 7, 1
; BE32-NEXT: srw 11, 8, 4
; BE32-NEXT: srw 7, 7, 4
; BE32-NEXT: srw 5, 5, 4
; BE32-NEXT: slw 6, 10, 6
; BE32-NEXT: subfic 10, 4, 32
; BE32-NEXT: srw 4, 9, 4
; BE32-NEXT: slw 8, 8, 10
; BE32-NEXT: slw 10, 9, 10
; BE32-NEXT: or 6, 11, 6
; BE32-NEXT: or 7, 10, 7
; BE32-NEXT: or 5, 8, 5
; BE32-NEXT: stw 4, 0(3)
; BE32-NEXT: stw 6, 8(3)
; BE32-NEXT: stw 5, 12(3)
; BE32-NEXT: stw 7, 4(3)
; BE32-NEXT: blr
entry:
%v1 = load <2 x i128>, <2 x i128>* %0
%v2 = insertelement <2 x i128> %v1, i128 12, i32 0
%v3 = sub <2 x i128> %v1, %v2
%v4 = lshr <2 x i128> %v2, %v3
store <2 x i128> %v4, <2 x i128>* %0
ret void
}

0 comments on commit 3b4b047

Please sign in to comment.