-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][SME] Lower memchr to __arm_sc_memchr in streaming[-compatible] functions #168896
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…le] functions This allows us to avoid some streaming-mode switches.
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) ChangesThis allows us to avoid some streaming-mode switches. Full diff: https://github.com/llvm/llvm-project/pull/168896.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index ce7e836f66446..71e0edf03a16d 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -315,6 +315,7 @@ def MEMCMP : RuntimeLibcall;
def MEMCPY : RuntimeLibcall;
def MEMMOVE : RuntimeLibcall;
def MEMSET : RuntimeLibcall;
+def MEMCHR : RuntimeLibcall;
def CALLOC : RuntimeLibcall;
def BZERO : RuntimeLibcall;
def STRLEN : RuntimeLibcall;
@@ -997,6 +998,7 @@ def fesetmode : RuntimeLibcallImpl<FESETMODE>;
def memcpy : RuntimeLibcallImpl<MEMCPY>;
def memmove : RuntimeLibcallImpl<MEMMOVE>;
def memset : RuntimeLibcallImpl<MEMSET>;
+def memchr : RuntimeLibcallImpl<MEMCHR>;
// DSEPass can emit calloc if it finds a pair of malloc/memset
def calloc : RuntimeLibcallImpl<CALLOC>;
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 48e03ad853d26..38c7a3d55f856 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -156,29 +156,35 @@ SDValue AArch64SelectionDAGInfo::EmitMOPS(unsigned Opcode, SelectionDAG &DAG,
}
SDValue AArch64SelectionDAGInfo::EmitStreamingCompatibleMemLibCall(
- SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op0, SDValue Op1,
SDValue Size, RTLIB::Libcall LC) const {
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
const AArch64TargetLowering *TLI = STI.getTargetLowering();
TargetLowering::ArgListTy Args;
- Args.emplace_back(Dst, PointerType::getUnqual(*DAG.getContext()));
+ Args.emplace_back(Op0, PointerType::getUnqual(*DAG.getContext()));
RTLIB::Libcall NewLC;
switch (LC) {
case RTLIB::MEMCPY: {
NewLC = RTLIB::SC_MEMCPY;
- Args.emplace_back(Src, PointerType::getUnqual(*DAG.getContext()));
+ Args.emplace_back(Op1, PointerType::getUnqual(*DAG.getContext()));
break;
}
case RTLIB::MEMMOVE: {
NewLC = RTLIB::SC_MEMMOVE;
- Args.emplace_back(Src, PointerType::getUnqual(*DAG.getContext()));
+ Args.emplace_back(Op1, PointerType::getUnqual(*DAG.getContext()));
break;
}
case RTLIB::MEMSET: {
NewLC = RTLIB::SC_MEMSET;
- Args.emplace_back(DAG.getZExtOrTrunc(Src, DL, MVT::i32),
+ Args.emplace_back(DAG.getZExtOrTrunc(Op1, DL, MVT::i32),
+ Type::getInt32Ty(*DAG.getContext()));
+ break;
+ }
+ case RTLIB::MEMCHR: {
+ NewLC = RTLIB::SC_MEMCHR;
+ Args.emplace_back(DAG.getZExtOrTrunc(Op1, DL, MVT::i32),
Type::getInt32Ty(*DAG.getContext()));
break;
}
@@ -194,7 +200,11 @@ SDValue AArch64SelectionDAGInfo::EmitStreamingCompatibleMemLibCall(
PointerType *RetTy = PointerType::getUnqual(*DAG.getContext());
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
TLI->getLibcallCallingConv(NewLC), RetTy, Symbol, std::move(Args));
- return TLI->LowerCallTo(CLI).second;
+
+ auto [Result, ChainOut] = TLI->LowerCallTo(CLI);
+ if (LC == RTLIB::MEMCHR)
+ return DAG.getMergeValues({Result, ChainOut}, DL);
+ return ChainOut;
}
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
@@ -255,6 +265,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
return SDValue();
}
+std::pair<SDValue, SDValue> AArch64SelectionDAGInfo::EmitTargetCodeForMemchr(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Src,
+ SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const {
+ auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+ SMEAttrs Attrs = AFI->getSMEFnAttrs();
+ if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody()) {
+ SDValue Result = EmitStreamingCompatibleMemLibCall(
+ DAG, dl, Chain, Src, Char, Length, RTLIB::MEMCHR);
+ return std::make_pair(Result.getValue(0), Result.getValue(1));
+ }
+ return std::make_pair(SDValue(), SDValue());
+}
+
static const int kSetTagLoopThreshold = 176;
static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 42c2797ebdd17..656a58c1dc1bf 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -53,14 +53,19 @@ class AArch64SelectionDAGInfo : public SelectionDAGGenTargetInfo {
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
+ std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const override;
+
SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
MachinePointerInfo DstPtrInfo,
bool ZeroData) const override;
SDValue EmitStreamingCompatibleMemLibCall(SelectionDAG &DAG, const SDLoc &DL,
- SDValue Chain, SDValue Dst,
- SDValue Src, SDValue Size,
+ SDValue Chain, SDValue Op0,
+ SDValue Op1, SDValue Size,
RTLIB::Libcall LC) const;
};
} // namespace llvm
diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
index 9c66b38c46973..fc4ae272046a0 100644
--- a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
+++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
@@ -153,6 +153,160 @@ entry:
ret void
}
+define ptr @se_memchr(ptr %src, i64 %n) "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: se_memchr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov x2, x1
+; CHECK-NEXT: mov w1, #5 // =0x5
+; CHECK-NEXT: bl __arm_sc_memchr
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; CHECK-NO-SME-ROUTINES-LABEL: se_memchr:
+; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NO-SME-ROUTINES-NEXT: cntd x9
+; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: str x9, [sp, #80] // 8-byte Spill
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset vg, -16
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w30, -24
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w29, -32
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b8, -40
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b9, -48
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b10, -56
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b11, -64
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b12, -72
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b13, -80
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b14, -88
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b15, -96
+; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x1
+; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
+; CHECK-NO-SME-ROUTINES-NEXT: mov w1, #5 // =0x5
+; CHECK-NO-SME-ROUTINES-NEXT: bl memchr
+; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
+; CHECK-NO-SME-ROUTINES-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore vg
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w30
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w29
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b8
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b9
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b10
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b11
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b12
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b13
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b14
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b15
+; CHECK-NO-SME-ROUTINES-NEXT: ret
+;
+; CHECK-MOPS-LABEL: se_memchr:
+; CHECK-MOPS: // %bb.0: // %entry
+; CHECK-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 16
+; CHECK-MOPS-NEXT: .cfi_offset w30, -16
+; CHECK-MOPS-NEXT: mov x2, x1
+; CHECK-MOPS-NEXT: mov w1, #5 // =0x5
+; CHECK-MOPS-NEXT: bl __arm_sc_memchr
+; CHECK-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-MOPS-NEXT: ret
+entry:
+ %res = tail call ptr @memchr(ptr %src, i32 5, i64 %n)
+ ret ptr %res
+}
+
+define ptr @sc_memchr(ptr %src, i64 %n) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: sc_memchr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov x2, x1
+; CHECK-NEXT: mov w1, #5 // =0x5
+; CHECK-NEXT: bl __arm_sc_memchr
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; CHECK-NO-SME-ROUTINES-LABEL: sc_memchr:
+; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NO-SME-ROUTINES-NEXT: cntd x9
+; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w19, -8
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset vg, -16
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w30, -24
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset w29, -32
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b8, -40
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b9, -48
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b10, -56
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b11, -64
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b12, -72
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b13, -80
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b14, -88
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_offset b15, -96
+; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x1
+; CHECK-NO-SME-ROUTINES-NEXT: mrs x19, SVCR
+; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB4_2
+; CHECK-NO-SME-ROUTINES-NEXT: // %bb.1: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
+; CHECK-NO-SME-ROUTINES-NEXT: .LBB4_2: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: mov w1, #5 // =0x5
+; CHECK-NO-SME-ROUTINES-NEXT: bl memchr
+; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB4_4
+; CHECK-NO-SME-ROUTINES-NEXT: // %bb.3: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
+; CHECK-NO-SME-ROUTINES-NEXT: .LBB4_4: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldr x19, [sp, #88] // 8-byte Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w19
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore vg
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w30
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore w29
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b8
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b9
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b10
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b11
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b12
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b13
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b14
+; CHECK-NO-SME-ROUTINES-NEXT: .cfi_restore b15
+; CHECK-NO-SME-ROUTINES-NEXT: ret
+;
+; CHECK-MOPS-LABEL: sc_memchr:
+; CHECK-MOPS: // %bb.0: // %entry
+; CHECK-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 16
+; CHECK-MOPS-NEXT: .cfi_offset w30, -16
+; CHECK-MOPS-NEXT: mov x2, x1
+; CHECK-MOPS-NEXT: mov w1, #5 // =0x5
+; CHECK-MOPS-NEXT: bl __arm_sc_memchr
+; CHECK-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-MOPS-NEXT: ret
+entry:
+ %res = tail call ptr @memchr(ptr %src, i32 5, i64 %n)
+ ret ptr %res
+}
+
define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind {
; CHECK-LABEL: sc_memcpy:
; CHECK: // %bb.0: // %entry
@@ -179,15 +333,15 @@ define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind {
; CHECK-NO-SME-ROUTINES-NEXT: mrs x19, SVCR
; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst]
; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src]
-; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_2
+; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB5_2
; CHECK-NO-SME-ROUTINES-NEXT: // %bb.1: // %entry
; CHECK-NO-SME-ROUTINES-NEXT: smstop sm
-; CHECK-NO-SME-ROUTINES-NEXT: .LBB3_2: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: .LBB5_2: // %entry
; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy
-; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_4
+; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB5_4
; CHECK-NO-SME-ROUTINES-NEXT: // %bb.3: // %entry
; CHECK-NO-SME-ROUTINES-NEXT: smstart sm
-; CHECK-NO-SME-ROUTINES-NEXT: .LBB3_4: // %entry
+; CHECK-NO-SME-ROUTINES-NEXT: .LBB5_4: // %entry
; CHECK-NO-SME-ROUTINES-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
@@ -283,3 +437,4 @@ entry:
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
+declare ptr @memchr(ptr, i32, i64)
|
🐧 Linux x64 Test Results
|
| TLI->getLibcallCallingConv(NewLC), RetTy, Symbol, std::move(Args)); | ||
| return TLI->LowerCallTo(CLI).second; | ||
|
|
||
| auto [Result, ChainOut] = TLI->LowerCallTo(CLI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this mean the return value of memset, memmove, etc are discarded?
I know that the returned pointers are just the destination and are almost certainly ignored at the programmer's level, but if there's some extra-paranoid code out there which verifies the return value what happens? Is it handled elsewhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's right. However, the hooks for the calls other than memchr are only used to lower the intrinsics, which return void. The memchr hook is used to replace the standard libcall (and there's no memchr intrinsic).
This allows us to avoid some streaming-mode switches.