Skip to content

Commit

Permalink
[InstCombine] Fold memcmp of constant arrays and variable size
Browse files Browse the repository at this point in the history
The memcmp simplifier is limited to folding to constants calls with constant
arrays and constant sizes.  This change adds the ability to simplify
memcmp(A, B, N) calls with constant A and B and variable N to the pseudocode
equivalent of

N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0)

where Pos is the offset of the first mismatch between A and B.

Differential Revision: https://reviews.llvm.org/D127766
  • Loading branch information
msebor committed Jun 17, 2022
1 parent fc6b228 commit 5fb67e3
Show file tree
Hide file tree
Showing 3 changed files with 400 additions and 27 deletions.
72 changes: 45 additions & 27 deletions llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
Expand Up @@ -1142,6 +1142,45 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
CI->getType());
}

// Optimize a memcmp call CI with constant arrays LHS and RHS and nonconstant
// Size.
static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
Value *Size, IRBuilderBase &B,
const DataLayout &DL) {
if (LHS == RHS) // memcmp(s,s,x) -> 0
return Constant::getNullValue(CI->getType());

StringRef LStr, RStr;
if (!getConstantStringInfo(LHS, LStr, 0, /*TrimAtNul=*/false) ||
!getConstantStringInfo(RHS, RStr, 0, /*TrimAtNul=*/false))
return nullptr;

// If the contents of both constant arrays are known, fold a call to
// memcmp(A, B, N) to
// N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0)
// where Pos is the first mismatch between A and B, determined below.

Value *Zero = ConstantInt::get(CI->getType(), 0);

uint64_t MinSize = std::min(LStr.size(), RStr.size());
for (uint64_t Pos = 0; Pos < MinSize; ++Pos) {
if (LStr[Pos] != RStr[Pos]) {
Value *MaxSize = ConstantInt::get(Size->getType(), Pos);
Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize);
typedef unsigned char UChar;
int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1;
Value *Res = ConstantInt::get(CI->getType(), IRes);
return B.CreateSelect(Cmp, Zero, Res);
}
}

// One array is a leading part of the other of equal or greater size.
// Fold the result to zero. Size is assumed to be in bounds, since
// otherwise the call would be undefined.
return Zero;
}

// Optimize a memcmp call CI with constant size Len.
static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
uint64_t Len, IRBuilderBase &B,
const DataLayout &DL) {
Expand Down Expand Up @@ -1196,25 +1235,6 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
}
}

// Constant folding: memcmp(x, y, Len) -> constant (all arguments are const).
// TODO: This is limited to i8 arrays.
StringRef LHSStr, RHSStr;
if (getConstantStringInfo(LHS, LHSStr) &&
getConstantStringInfo(RHS, RHSStr)) {
// Make sure we're not reading out-of-bounds memory.
if (Len > LHSStr.size() || Len > RHSStr.size())
return nullptr;
// Fold the memcmp and normalize the result. This way we get consistent
// results across multiple platforms.
uint64_t Ret = 0;
int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
if (Cmp < 0)
Ret = -1;
else if (Cmp > 0)
Ret = 1;
return ConstantInt::get(CI->getType(), Ret);
}

return nullptr;
}

Expand All @@ -1224,19 +1244,17 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);

if (LHS == RHS) // memcmp(s,s,x) -> 0
return Constant::getNullValue(CI->getType());

annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
// Handle constant lengths.

if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, B, DL))
return Res;

// Handle constant Size.
ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
if (!LenC)
return nullptr;

if (Value *Res =
optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL))
return Res;
return nullptr;
return optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL);
}

Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
Expand Down
249 changes: 249 additions & 0 deletions llvm/test/Transforms/InstCombine/memcmp-5.ll
@@ -0,0 +1,249 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
;
; Exercise folding of memcmp calls with constant arrays and nonconstant
; sizes.

declare i32 @memcmp(i8*, i8*, i64)

@ax = external constant [8 x i8]
@a01230123 = constant [8 x i8] c"01230123"
@b01230123 = constant [8 x i8] c"01230123"
@c01230129 = constant [8 x i8] c"01230129"
@d9123012 = constant [7 x i8] c"9123012"


; Exercise memcmp(A, B, N) folding of arrays with the same bytes.

define void @fold_memcmp_a_b_n(i32* %pcmp, i64 %n) {
; CHECK-LABEL: @fold_memcmp_a_b_n(
; CHECK-NEXT: store i32 0, i32* [[PCMP:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[N:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i32
; CHECK-NEXT: [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
; CHECK-NEXT: store i32 [[TMP2]], i32* [[S0_1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP3]] to i32
; CHECK-NEXT: [[S0_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2
; CHECK-NEXT: store i32 [[TMP4]], i32* [[S0_2]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
; CHECK-NEXT: [[S0_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3
; CHECK-NEXT: store i32 [[TMP6]], i32* [[S0_3]], align 4
; CHECK-NEXT: [[S0_4:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4
; CHECK-NEXT: store i32 0, i32* [[S0_4]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i32
; CHECK-NEXT: [[S0_5:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5
; CHECK-NEXT: store i32 [[TMP8]], i32* [[S0_5]], align 4
; CHECK-NEXT: ret void
;

%p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0

%q0 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 0
%q1 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 1
%q2 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 2
%q3 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 3
%q4 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 4
%q5 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 5

; Fold memcmp(a, b, n) to 0.
%c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %n)
%s0_0 = getelementptr i32, i32* %pcmp, i64 0
store i32 %c0_0, i32* %s0_0

; Fold memcmp(a, b + 1, n) to N != 0 ? -1 : 0.
%c0_1 = call i32 @memcmp(i8* %p0, i8* %q1, i64 %n)
%s0_1 = getelementptr i32, i32* %pcmp, i64 1
store i32 %c0_1, i32* %s0_1

; Fold memcmp(a, b + 2, n) to N != 0 ? -1 : 0.
%c0_2 = call i32 @memcmp(i8* %p0, i8* %q2, i64 %n)
%s0_2 = getelementptr i32, i32* %pcmp, i64 2
store i32 %c0_2, i32* %s0_2

; Fold memcmp(a, b + 3, n) to N != 0 ? -1 : 0.
%c0_3 = call i32 @memcmp(i8* %p0, i8* %q3, i64 %n)
%s0_3 = getelementptr i32, i32* %pcmp, i64 3
store i32 %c0_3, i32* %s0_3

; Fold memcmp(a, b + 4, n) to 0.
%c0_4 = call i32 @memcmp(i8* %p0, i8* %q4, i64 %n)
%s0_4 = getelementptr i32, i32* %pcmp, i64 4
store i32 %c0_4, i32* %s0_4

; Fold memcmp(a, b + 5, n) to N != 0 ? -1 : 0.
%c0_5 = call i32 @memcmp(i8* %p0, i8* %q5, i64 %n)
%s0_5 = getelementptr i32, i32* %pcmp, i64 5
store i32 %c0_5, i32* %s0_5

ret void
}

; Vefify that a memcmp() call involving a constant array with unknown
; contents is not folded.

define void @call_memcmp_a_ax_n(i32* %pcmp, i64 %n) {
; CHECK-LABEL: @call_memcmp_a_ax_n(
; CHECK-NEXT: [[C0_0:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @a01230123, i64 0, i64 0), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @ax, i64 0, i64 0), i64 [[N:%.*]])
; CHECK-NEXT: store i32 [[C0_0]], i32* [[PCMP:%.*]], align 4
; CHECK-NEXT: ret void
;

%p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0
%q0 = getelementptr [8 x i8], [8 x i8]* @ax, i64 0, i64 0

; Do not fold memcmp(a, ax, n).
%c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %n)
%s0_0 = getelementptr i32, i32* %pcmp, i64 0
store i32 %c0_0, i32* %s0_0

ret void
}


; Exercise memcmp(A, C, N) folding of arrays with the same leading bytes
; but a difference in the trailing byte.

define void @fold_memcmp_a_c_n(i32* %pcmp, i64 %n) {
; CHECK-LABEL: @fold_memcmp_a_c_n(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[N:%.*]], 7
; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i32
; CHECK-NEXT: store i32 [[TMP2]], i32* [[PCMP:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP3]] to i32
; CHECK-NEXT: [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
; CHECK-NEXT: store i32 [[TMP4]], i32* [[S0_1]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
; CHECK-NEXT: [[S0_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2
; CHECK-NEXT: store i32 [[TMP6]], i32* [[S0_2]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i32
; CHECK-NEXT: [[S0_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3
; CHECK-NEXT: store i32 [[TMP8]], i32* [[S0_3]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 3
; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i32
; CHECK-NEXT: [[S0_4:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4
; CHECK-NEXT: store i32 [[TMP10]], i32* [[S0_4]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[N]], 3
; CHECK-NEXT: [[TMP12:%.*]] = sext i1 [[TMP11]] to i32
; CHECK-NEXT: [[S0_5:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5
; CHECK-NEXT: store i32 [[TMP12]], i32* [[S0_5]], align 4
; CHECK-NEXT: ret void
;

%p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0

%q0 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 0
%q1 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 1
%q2 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 2
%q3 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 3
%q4 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 4
%q5 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 5

; Fold memcmp(a, c, n) to N > 7 ? -1 : 0.
%c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %n)
%s0_0 = getelementptr i32, i32* %pcmp, i64 0
store i32 %c0_0, i32* %s0_0

; Fold memcmp(a, c + 1, n) to N != 0 ? -1 : 0.
%c0_1 = call i32 @memcmp(i8* %p0, i8* %q1, i64 %n)
%s0_1 = getelementptr i32, i32* %pcmp, i64 1
store i32 %c0_1, i32* %s0_1

; Fold memcmp(a, c + 2, n) to N != 0 ? -1 : 0.
%c0_2 = call i32 @memcmp(i8* %p0, i8* %q2, i64 %n)
%s0_2 = getelementptr i32, i32* %pcmp, i64 2
store i32 %c0_2, i32* %s0_2

; Fold memcmp(a, c + 3, n) to N != 0 ? -1 : 0.
%c0_3 = call i32 @memcmp(i8* %p0, i8* %q3, i64 %n)
%s0_3 = getelementptr i32, i32* %pcmp, i64 3
store i32 %c0_3, i32* %s0_3

; Fold memcmp(a, c + 4, n) to N > 3 ? -1 : 0.
%c0_4 = call i32 @memcmp(i8* %p0, i8* %q4, i64 %n)
%s0_4 = getelementptr i32, i32* %pcmp, i64 4
store i32 %c0_4, i32* %s0_4

; Fold memcmp(a, c + 5, n) to N != 0 ? -1 : 0.
%c0_5 = call i32 @memcmp(i8* %p0, i8* %q4, i64 %n)
%s0_5 = getelementptr i32, i32* %pcmp, i64 5
store i32 %c0_5, i32* %s0_5

ret void
}


; Exercise memcmp(A, D, N) folding of arrays of different sizes and
; a difference in the leading byte.

define void @fold_memcmp_a_d_n(i32* %pcmp, i64 %n) {
; CHECK-LABEL: @fold_memcmp_a_d_n(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[N:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i32
; CHECK-NEXT: store i32 [[TMP2]], i32* [[PCMP:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP3]] to i32
; CHECK-NEXT: [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
; CHECK-NEXT: store i32 [[TMP4]], i32* [[S0_1]], align 4
; CHECK-NEXT: [[S1_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2
; CHECK-NEXT: store i32 0, i32* [[S1_1]], align 4
; CHECK-NEXT: [[S6_6:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3
; CHECK-NEXT: store i32 0, i32* [[S6_6]], align 4
; CHECK-NEXT: ret void
;

%p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0
%p1 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 1
%p6 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 6

%q0 = getelementptr [7 x i8], [7 x i8]* @d9123012, i64 0, i64 0
%q1 = getelementptr [7 x i8], [7 x i8]* @d9123012, i64 0, i64 1
%q6 = getelementptr [7 x i8], [7 x i8]* @d9123012, i64 0, i64 6

; Fold memcmp(a, d, n) to N != 0 ? -1 : 0.
%c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %n)
%s0_0 = getelementptr i32, i32* %pcmp, i64 0
store i32 %c0_0, i32* %s0_0

; Fold memcmp(a, d + 1, n) to N != 0 -1 : 0.
%c0_1 = call i32 @memcmp(i8* %p0, i8* %q1, i64 %n)
%s0_1 = getelementptr i32, i32* %pcmp, i64 1
store i32 %c0_1, i32* %s0_1

; Fold memcmp(a + 1, d + 1, n) to 0.
%c1_1 = call i32 @memcmp(i8* %p1, i8* %q1, i64 %n)
%s1_1 = getelementptr i32, i32* %pcmp, i64 2
store i32 %c1_1, i32* %s1_1

; Fold memcmp(a + 6, d + 6, n) to 0.
%c6_6 = call i32 @memcmp(i8* %p6, i8* %q6, i64 %n)
%s6_6 = getelementptr i32, i32* %pcmp, i64 3
store i32 %c6_6, i32* %s6_6

ret void
}


; Exercise memcmp(A, D, N) folding of arrays with the same bytes and
; a nonzero size.

define void @fold_memcmp_a_d_nz(i32* %pcmp, i64 %n) {
; CHECK-LABEL: @fold_memcmp_a_d_nz(
; CHECK-NEXT: store i32 -1, i32* [[PCMP:%.*]], align 4
; CHECK-NEXT: ret void
;

%p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0
%q0 = getelementptr [7 x i8], [7 x i8]* @d9123012, i64 0, i64 0
%nz = or i64 %n, 1

%c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %nz)
%s0_0 = getelementptr i32, i32* %pcmp, i64 0
store i32 %c0_0, i32* %s0_0

ret void
}

0 comments on commit 5fb67e3

Please sign in to comment.