Skip to content

Commit

Permalink
[CGP] Add generic TargetLowering::shouldAlignPointerArgs() implementa…
Browse files Browse the repository at this point in the history
…tion

This function was added for ARM targets, but aligning global/stack pointer
arguments passed to memcpy/memmove/memset can improve code size and
performance for all targets that don't have fast unaligned accesses.
This adds a generic implementation that adjusts the alignment to pointer
size if unaligned accesses are slow.
Review D134168 suggests that this significantly improves performance on
synthetic benchmarks such as Dhrystone on RV32 as it avoids memcpy() calls.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D134282
  • Loading branch information
arichardson committed Feb 9, 2023
1 parent f28c28e commit bd87a24
Show file tree
Hide file tree
Showing 10 changed files with 138 additions and 59 deletions.
8 changes: 4 additions & 4 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Expand Up @@ -1932,10 +1932,10 @@ class TargetLoweringBase {
/// the object whose address is being passed. If so then MinSize is set to the
/// minimum size the object must be to be aligned and PrefAlign is set to the
/// preferred alignment.
virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
Align & /*PrefAlign*/) const {
return false;
}
virtual bool
shouldUpdatePointerArgAlignment(const CallInst *CI, unsigned &MinSize,
Align &PrefAlign,
const TargetTransformInfo &TTI) const;

//===--------------------------------------------------------------------===//
/// \name Helpers for TargetTransformInfo implementations
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Expand Up @@ -2221,10 +2221,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
}

// Align the pointer arguments to this call if the target thinks it's a good
// idea
// idea (generally only useful for memcpy/memmove/memset).
unsigned MinSize;
Align PrefAlign;
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
if (TLI->shouldUpdatePointerArgAlignment(CI, MinSize, PrefAlign, *TTI)) {
for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
Expand Down
37 changes: 37 additions & 0 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Expand Up @@ -42,6 +42,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
Expand Down Expand Up @@ -948,6 +949,42 @@ bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS,
return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
}

bool TargetLoweringBase::shouldUpdatePointerArgAlignment(
const CallInst *CI, unsigned &MinSize, Align &PrefAlign,
const TargetTransformInfo &TTI) const {
// For now, we only adjust alignment for memcpy/memmove/memset calls.
auto *MemCI = dyn_cast<MemIntrinsic>(CI);
if (!MemCI)
return false;
auto AddrSpace = MemCI->getDestAddressSpace();
// We assume that scalar register sized values can be loaded/stored
// efficiently. If this is not the case for a given target it should override
// this function.
auto PrefSizeBits =
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize();
PrefAlign = Align(PrefSizeBits / 8);
// When building with -Oz, we only increase the alignment if the object is
// at least 8 bytes in size to avoid increased stack/global padding.
// Otherwise, we require at least PrefAlign bytes to be copied.
MinSize = PrefAlign.value();
if (CI->getFunction()->hasMinSize())
MinSize = std::max(MinSize, 8u);

// XXX: we could determine the MachineMemOperand flags instead of assuming
// load+store (but it probably makes no difference for supported targets).
unsigned FastUnalignedAccess = 0;
if (allowsMisalignedMemoryAccesses(
LLT::scalar(PrefSizeBits), AddrSpace, Align(1),
MachineMemOperand::MOStore | MachineMemOperand::MOLoad,
&FastUnalignedAccess) &&
FastUnalignedAccess) {
// If unaligned loads&stores are fast, there is no need to adjust
// alignment.
return false;
}
return true; // unaligned accesses are not possible or slow.
}

void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
// If the command-line option was specified, ignore this request.
if (!JumpIsExpensiveOverride.getNumOccurrences())
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -1920,8 +1920,9 @@ ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
// source/dest is aligned and the copy size is large enough. We therefore want
// to align such objects passed to memory intrinsics.
bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
Align &PrefAlign) const {
bool ARMTargetLowering::shouldUpdatePointerArgAlignment(
const CallInst *CI, unsigned &MinSize, Align &PrefAlign,
const TargetTransformInfo &TTI) const {
if (!isa<MemIntrinsic>(CI))
return false;
MinSize = 8;
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Expand Up @@ -572,8 +572,9 @@ class VectorType;
const TargetRegisterClass *
getRegClassFor(MVT VT, bool isDivergent = false) const override;

bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
Align &PrefAlign) const override;
bool shouldUpdatePointerArgAlignment(
const CallInst *CI, unsigned &MinSize, Align &PrefAlign,
const TargetTransformInfo &TTI) const override;

/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
Expand Down
61 changes: 23 additions & 38 deletions llvm/test/CodeGen/RISCV/memcpy-inline.ll
Expand Up @@ -295,50 +295,35 @@ entry:
}

define void @t6() nounwind {
; RV32ALIGNED-LABEL: t6:
; RV32ALIGNED: # %bb.0: # %entry
; RV32ALIGNED-NEXT: addi sp, sp, -16
; RV32ALIGNED-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ALIGNED-NEXT: lui a0, %hi(spool.splbuf)
; RV32ALIGNED-NEXT: addi a0, a0, %lo(spool.splbuf)
; RV32ALIGNED-NEXT: lui a1, %hi(.L.str6)
; RV32ALIGNED-NEXT: addi a1, a1, %lo(.L.str6)
; RV32ALIGNED-NEXT: li a2, 14
; RV32ALIGNED-NEXT: call memcpy@plt
; RV32ALIGNED-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ALIGNED-NEXT: addi sp, sp, 16
; RV32ALIGNED-NEXT: ret
; RV32-LABEL: t6:
; RV32: # %bb.0: # %entry
; RV32-NEXT: lui a0, %hi(spool.splbuf)
; RV32-NEXT: li a1, 88
; RV32-NEXT: sh a1, %lo(spool.splbuf+12)(a0)
; RV32-NEXT: lui a1, 361862
; RV32-NEXT: addi a1, a1, -1960
; RV32-NEXT: sw a1, %lo(spool.splbuf+8)(a0)
; RV32-NEXT: lui a1, 362199
; RV32-NEXT: addi a1, a1, 559
; RV32-NEXT: sw a1, %lo(spool.splbuf+4)(a0)
; RV32-NEXT: lui a1, 460503
; RV32-NEXT: addi a1, a1, 1071
; RV32-NEXT: sw a1, %lo(spool.splbuf)(a0)
; RV32-NEXT: ret
;
; RV64ALIGNED-LABEL: t6:
; RV64ALIGNED: # %bb.0: # %entry
; RV64ALIGNED-NEXT: addi sp, sp, -16
; RV64ALIGNED-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ALIGNED-NEXT: lui a0, %hi(spool.splbuf)
; RV64ALIGNED-NEXT: addi a0, a0, %lo(spool.splbuf)
; RV64ALIGNED-NEXT: lui a1, %hi(.L.str6)
; RV64ALIGNED-NEXT: addi a1, a1, %lo(.L.str6)
; RV64ALIGNED-NEXT: li a2, 14
; RV64ALIGNED-NEXT: call memcpy@plt
; RV64ALIGNED-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ALIGNED-NEXT: addi sp, sp, 16
; RV64ALIGNED-NEXT: li a1, 88
; RV64ALIGNED-NEXT: sh a1, %lo(spool.splbuf+12)(a0)
; RV64ALIGNED-NEXT: lui a1, %hi(.LCPI6_0)
; RV64ALIGNED-NEXT: ld a1, %lo(.LCPI6_0)(a1)
; RV64ALIGNED-NEXT: lui a2, 361862
; RV64ALIGNED-NEXT: addiw a2, a2, -1960
; RV64ALIGNED-NEXT: sw a2, %lo(spool.splbuf+8)(a0)
; RV64ALIGNED-NEXT: sd a1, %lo(spool.splbuf)(a0)
; RV64ALIGNED-NEXT: ret
;
; RV32UNALIGNED-LABEL: t6:
; RV32UNALIGNED: # %bb.0: # %entry
; RV32UNALIGNED-NEXT: lui a0, %hi(spool.splbuf)
; RV32UNALIGNED-NEXT: li a1, 88
; RV32UNALIGNED-NEXT: sh a1, %lo(spool.splbuf+12)(a0)
; RV32UNALIGNED-NEXT: lui a1, 361862
; RV32UNALIGNED-NEXT: addi a1, a1, -1960
; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf+8)(a0)
; RV32UNALIGNED-NEXT: lui a1, 362199
; RV32UNALIGNED-NEXT: addi a1, a1, 559
; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf+4)(a0)
; RV32UNALIGNED-NEXT: lui a1, 460503
; RV32UNALIGNED-NEXT: addi a1, a1, 1071
; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf)(a0)
; RV32UNALIGNED-NEXT: ret
;
; RV64UNALIGNED-LABEL: t6:
; RV64UNALIGNED: # %bb.0: # %entry
; RV64UNALIGNED-NEXT: lui a0, %hi(.L.str6)
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/WebAssembly/bulk-memory.ll
Expand Up @@ -154,7 +154,7 @@ define void @memset_1024(ptr %dest, i8 %val) {
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 8
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
Expand All @@ -171,7 +171,7 @@ define void @memcpy_alloca_src(ptr %dst) {
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 8
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
Expand All @@ -188,7 +188,7 @@ define void @memcpy_alloca_dst(ptr %src) {
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 8
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/WebAssembly/bulk-memory64.ll
Expand Up @@ -157,7 +157,7 @@ define void @memset_1024(ptr %dest, i8 %val) {
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8
; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
Expand All @@ -174,7 +174,7 @@ define void @memcpy_alloca_src(ptr %dst) {
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8
; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
Expand All @@ -191,7 +191,7 @@ define void @memcpy_alloca_dst(ptr %src) {
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8
; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
Expand Down
Expand Up @@ -134,7 +134,7 @@ define i64 @test_return_i2(i64 %i.coerce) {
; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.0, align 4)
; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 4), (load (s8) from %ir.2, align 4)
; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 8), (load (s8) from %ir.2, align 8)
; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.3, align 4)
; ALL: $rax = COPY [[LOAD]](s64)
; ALL: RET 0, implicit $rax
Expand Down Expand Up @@ -166,9 +166,9 @@ define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) {
; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64)
; ALL: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.1)
; ALL: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4)
; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4)
; ALL: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4)
; ALL: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 8), (load (s8) from %ir.3, align 8)
; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 8), (load (s8) from %ir.5, align 8)
; ALL: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 8)
; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp)
; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64)
; ALL: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8)
Expand Down Expand Up @@ -210,7 +210,7 @@ define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) {
; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
; ALL: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2, align 4)
; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4)
; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8)
; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4)
; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4)
Expand Down
@@ -0,0 +1,55 @@
; RUN: opt -mtriple=riscv32 -data-layout="e-m:e-p:32:32" -S -codegenprepare < %s \
; RUN: | FileCheck %s '-D#NEW_ALIGNMENT=4'
; RUN: opt -mtriple=riscv64 -data-layout="e-m:e-p:64:64" -S -codegenprepare < %s \
; RUN: | FileCheck %s '-D#NEW_ALIGNMENT=8'

@str = private unnamed_addr constant [45 x i8] c"THIS IS A LONG STRING THAT SHOULD BE ALIGNED\00", align 1


declare void @use(ptr %arg)


; CHECK: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [45 x i8] c"THIS IS A LONG STRING THAT SHOULD BE ALIGNED\00", align [[#NEW_ALIGNMENT]]

define void @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DST:%.*]] = alloca [45 x i8], align [[#NEW_ALIGNMENT]]
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i32(ptr align [[#NEW_ALIGNMENT]] [[DST]], ptr align [[#NEW_ALIGNMENT]] dereferenceable(31) @str, i32 31, i1 false)
; CHECK-NEXT: ret void

entry:
%dst = alloca [45 x i8], align 1
tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %dst, ptr align 1 dereferenceable(31) @str, i32 31, i1 false)
ret void
}

; negative test - check that we don't align objects that are too small
define void @no_align(ptr %src) {
; CHECK-LABEL: @no_align(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DST:%.*]] = alloca [3 x i8], align 1
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[SRC:%.*]], i32 31, i1 false)
; CHECK-NEXT: ret void
;
entry:
%dst = alloca [3 x i8], align 1
tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %dst, ptr %src, i32 31, i1 false)
ret void
}

; negative test - check that minsize requires at least 8 byte object size
define void @no_align_minsize(ptr %src) minsize {
; CHECK-LABEL: @no_align_minsize(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DST:%.*]] = alloca [7 x i8], align 1
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[SRC:%.*]], i32 31, i1 false)
; CHECK-NEXT: ret void
;
entry:
%dst = alloca [7 x i8], align 1
tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %dst, ptr %src, i32 31, i1 false)
ret void
}

declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)

0 comments on commit bd87a24

Please sign in to comment.