From 569f60b57e904083c7563f7b4636963a54ec2017 Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Mon, 18 Aug 2025 09:28:29 -0700 Subject: [PATCH 1/2] [RISCV] Unaligned vec mem => prefer alt opc vec Return `true` in `RISCVTTIImpl::preferAlternateOpcodeVectorization` if subtarget supports unaligned memory accesses. --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 4 ++++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index c707fb110b10c..5e300182657d5 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2731,6 +2731,10 @@ unsigned RISCVTTIImpl::getMinTripCountTailFoldingThreshold() const { return RVVMinTripCount; } +bool RISCVTTIImpl::preferAlternateOpcodeVectorization() const { + return ST->enableUnalignedVectorMem(); +} + TTI::AddressingModeKind RISCVTTIImpl::getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 3236b2a35c853..06fd8bb38e20a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -132,7 +132,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase { unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override; - bool preferAlternateOpcodeVectorization() const override { return false; } + bool preferAlternateOpcodeVectorization() const override; bool preferEpilogueVectorization() const override { // Epilogue vectorization is usually unprofitable - tail folding or From efdd5b55317df43fb3f346c476c5d2bca4829a82 Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Fri, 29 Aug 2025 14:00:48 -0700 Subject: [PATCH 2/2] updated checks in complex-loads.ll correctly. --- .../SLPVectorizer/RISCV/complex-loads.ll | 511 ++++++++++++++---- 1 file changed, 395 insertions(+), 116 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 027968296d003..5ebe44206c702 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -275,135 +275,414 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; UNALIGNED_VEC_MEM-LABEL: define i32 @test( ; UNALIGNED_VEC_MEM-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] { ; UNALIGNED_VEC_MEM-NEXT: entry: +; UNALIGNED_VEC_MEM-NEXT: [[TMP54:%.*]] = load i8, ptr [[PIX1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV:%.*]] = zext i8 [[TMP54]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[TMP58:%.*]] = load i8, ptr [[PIX2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV2:%.*]] = zext i8 [[TMP58]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB:%.*]] = sub i32 [[CONV]], [[CONV2]] ; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4 +; UNALIGNED_VEC_MEM-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i32 ; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4 +; UNALIGNED_VEC_MEM-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX5]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV6:%.*]] = zext i8 [[TMP3]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB7:%.*]] = sub i32 [[CONV4]], [[CONV6]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL:%.*]] = shl i32 [[SUB7]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD:%.*]] = add i32 [[SHL]], [[SUB]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1 +; UNALIGNED_VEC_MEM-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV9:%.*]] = zext i8 [[TMP4]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX10:%.*]] = getelementptr i8, ptr [[PIX2]], i64 1 +; UNALIGNED_VEC_MEM-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX10]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV11:%.*]] = zext i8 [[TMP5]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB12:%.*]] = sub i32 [[CONV9]], [[CONV11]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i8, ptr [[PIX1]], i64 5 +; UNALIGNED_VEC_MEM-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX13]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV14:%.*]] = zext i8 [[TMP6]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX15:%.*]] = getelementptr i8, ptr [[PIX2]], i64 5 +; UNALIGNED_VEC_MEM-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV16:%.*]] = zext i8 [[TMP7]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB17:%.*]] = sub i32 [[CONV14]], [[CONV16]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL18:%.*]] = shl i32 [[SUB17]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD19:%.*]] = add i32 [[SHL18]], [[SUB12]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX20:%.*]] = getelementptr i8, ptr [[PIX1]], i64 2 +; UNALIGNED_VEC_MEM-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX20]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV21:%.*]] = zext i8 [[TMP8]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i8, ptr [[PIX2]], i64 2 +; UNALIGNED_VEC_MEM-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX22]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV23:%.*]] = zext i8 [[TMP9]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB24:%.*]] = sub i32 [[CONV21]], [[CONV23]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX25:%.*]] = getelementptr i8, ptr [[PIX1]], i64 6 +; UNALIGNED_VEC_MEM-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX25]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV26:%.*]] = zext i8 [[TMP10]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX27:%.*]] = getelementptr i8, ptr [[PIX2]], i64 6 +; UNALIGNED_VEC_MEM-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX27]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV28:%.*]] = zext i8 [[TMP11]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB29:%.*]] = sub i32 [[CONV26]], [[CONV28]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL30:%.*]] = shl i32 [[SUB29]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD31:%.*]] = add i32 [[SHL30]], [[SUB24]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3 +; UNALIGNED_VEC_MEM-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV33:%.*]] = zext i8 [[TMP12]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX34:%.*]] = getelementptr i8, ptr [[PIX2]], i64 3 +; UNALIGNED_VEC_MEM-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX34]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV35:%.*]] = zext i8 [[TMP13]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB36:%.*]] = sub i32 [[CONV33]], [[CONV35]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX37:%.*]] = getelementptr i8, ptr [[PIX1]], i64 7 +; UNALIGNED_VEC_MEM-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX37]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV38:%.*]] = zext i8 [[TMP14]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX39:%.*]] = getelementptr i8, ptr [[PIX2]], i64 7 +; UNALIGNED_VEC_MEM-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX39]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV40:%.*]] = zext i8 [[TMP15]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB41:%.*]] = sub i32 [[CONV38]], [[CONV40]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL42:%.*]] = shl i32 [[SUB41]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD43:%.*]] = add i32 [[SHL42]], [[SUB36]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD44:%.*]] = add i32 [[ADD19]], [[ADD]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB45:%.*]] = sub i32 [[ADD]], [[ADD19]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD46:%.*]] = add i32 [[ADD43]], [[ADD31]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB47:%.*]] = sub i32 [[ADD31]], [[ADD43]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD48:%.*]] = add i32 [[ADD46]], [[ADD44]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB51:%.*]] = sub i32 [[ADD44]], [[ADD46]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD55:%.*]] = add i32 [[SUB47]], [[SUB45]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB59:%.*]] = sub i32 [[SUB45]], [[SUB47]] ; UNALIGNED_VEC_MEM-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]] ; UNALIGNED_VEC_MEM-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] +; UNALIGNED_VEC_MEM-NEXT: [[TMP16:%.*]] = load i8, ptr [[ADD_PTR3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP16]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[TMP17:%.*]] = load i8, ptr [[ADD_PTR644]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV2_1:%.*]] = zext i8 [[TMP17]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB_1:%.*]] = sub i32 [[CONV_1]], [[CONV2_1]] ; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4 +; UNALIGNED_VEC_MEM-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX3_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV4_1:%.*]] = zext i8 [[TMP18]] to i32 ; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4 +; UNALIGNED_VEC_MEM-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX5_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV6_1:%.*]] = zext i8 [[TMP19]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB7_1:%.*]] = sub i32 [[CONV4_1]], [[CONV6_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL_1:%.*]] = shl i32 [[SUB7_1]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_1:%.*]] = add i32 [[SHL_1]], [[SUB_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1 +; UNALIGNED_VEC_MEM-NEXT: [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX8_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV9_1:%.*]] = zext i8 [[TMP20]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX10_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 1 +; UNALIGNED_VEC_MEM-NEXT: [[TMP21:%.*]] = load i8, ptr [[ARRAYIDX10_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV11_1:%.*]] = zext i8 [[TMP21]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB12_1:%.*]] = sub i32 [[CONV9_1]], [[CONV11_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX13_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 5 +; UNALIGNED_VEC_MEM-NEXT: [[TMP22:%.*]] = load i8, ptr [[ARRAYIDX13_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV14_1:%.*]] = zext i8 [[TMP22]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX15_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 5 +; UNALIGNED_VEC_MEM-NEXT: [[TMP23:%.*]] = load i8, ptr [[ARRAYIDX15_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV16_1:%.*]] = zext i8 [[TMP23]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB17_1:%.*]] = sub i32 [[CONV14_1]], [[CONV16_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL18_1:%.*]] = shl i32 [[SUB17_1]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD19_1:%.*]] = add i32 [[SHL18_1]], [[SUB12_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX20_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 2 +; UNALIGNED_VEC_MEM-NEXT: [[TMP24:%.*]] = load i8, ptr [[ARRAYIDX20_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV21_1:%.*]] = zext i8 [[TMP24]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX22_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 2 +; UNALIGNED_VEC_MEM-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX22_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV23_1:%.*]] = zext i8 [[TMP25]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB24_1:%.*]] = sub i32 [[CONV21_1]], [[CONV23_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX25_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 6 +; UNALIGNED_VEC_MEM-NEXT: [[TMP26:%.*]] = load i8, ptr [[ARRAYIDX25_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV26_1:%.*]] = zext i8 [[TMP26]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 6 +; UNALIGNED_VEC_MEM-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX27_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV28_1:%.*]] = zext i8 [[TMP27]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB29_1:%.*]] = sub i32 [[CONV26_1]], [[CONV28_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL30_1:%.*]] = shl i32 [[SUB29_1]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD31_1:%.*]] = add i32 [[SHL30_1]], [[SUB24_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX32_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3 +; UNALIGNED_VEC_MEM-NEXT: [[TMP28:%.*]] = load i8, ptr [[ARRAYIDX32_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP28]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX34_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 3 +; UNALIGNED_VEC_MEM-NEXT: [[TMP29:%.*]] = load i8, ptr [[ARRAYIDX34_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV35_1:%.*]] = zext i8 [[TMP29]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB36_1:%.*]] = sub i32 [[CONV33_1]], [[CONV35_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX37_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 7 +; UNALIGNED_VEC_MEM-NEXT: [[TMP30:%.*]] = load i8, ptr [[ARRAYIDX37_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV38_1:%.*]] = zext i8 [[TMP30]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX39_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 7 +; UNALIGNED_VEC_MEM-NEXT: [[TMP31:%.*]] = load i8, ptr [[ARRAYIDX39_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV40_1:%.*]] = zext i8 [[TMP31]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB41_1:%.*]] = sub i32 [[CONV38_1]], [[CONV40_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL42_1:%.*]] = shl i32 [[SUB41_1]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD43_1:%.*]] = add i32 [[SHL42_1]], [[SUB36_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD44_1:%.*]] = add i32 [[ADD19_1]], [[ADD_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB45_1:%.*]] = sub i32 [[ADD_1]], [[ADD19_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD46_1:%.*]] = add i32 [[ADD43_1]], [[ADD31_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB47_1:%.*]] = sub i32 [[ADD31_1]], [[ADD43_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD48_1:%.*]] = add i32 [[ADD46_1]], [[ADD44_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB51_1:%.*]] = sub i32 [[ADD44_1]], [[ADD46_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD55_1:%.*]] = add i32 [[SUB47_1]], [[SUB45_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB59_1:%.*]] = sub i32 [[SUB45_1]], [[SUB47_1]] ; UNALIGNED_VEC_MEM-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; UNALIGNED_VEC_MEM-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] +; UNALIGNED_VEC_MEM-NEXT: [[TMP32:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[TMP33:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB_2:%.*]] = sub i32 [[CONV_2]], [[CONV2_2]] ; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4 +; UNALIGNED_VEC_MEM-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32 ; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4 +; UNALIGNED_VEC_MEM-NEXT: [[TMP35:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB7_2:%.*]] = sub i32 [[CONV4_2]], [[CONV6_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL_2:%.*]] = shl i32 [[SUB7_2]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_2:%.*]] = add i32 [[SHL_2]], [[SUB_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 1 +; UNALIGNED_VEC_MEM-NEXT: [[TMP36:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 1 +; UNALIGNED_VEC_MEM-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB12_2:%.*]] = sub i32 [[CONV9_2]], [[CONV11_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 5 +; UNALIGNED_VEC_MEM-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX13_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV14_2:%.*]] = zext i8 [[TMP38]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX15_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 5 +; UNALIGNED_VEC_MEM-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX15_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV16_2:%.*]] = zext i8 [[TMP39]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB17_2:%.*]] = sub i32 [[CONV14_2]], [[CONV16_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL18_2:%.*]] = shl i32 [[SUB17_2]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD19_2:%.*]] = add i32 [[SHL18_2]], [[SUB12_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 2 +; UNALIGNED_VEC_MEM-NEXT: [[TMP40:%.*]] = load i8, ptr [[ARRAYIDX20_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV21_2:%.*]] = zext i8 [[TMP40]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX22_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 2 +; UNALIGNED_VEC_MEM-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX22_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV23_2:%.*]] = zext i8 [[TMP41]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB24_2:%.*]] = sub i32 [[CONV21_2]], [[CONV23_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX25_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 6 +; UNALIGNED_VEC_MEM-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP42]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX27_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 6 +; UNALIGNED_VEC_MEM-NEXT: [[TMP43:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP43]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB29_2:%.*]] = sub i32 [[CONV26_2]], [[CONV28_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL30_2:%.*]] = shl i32 [[SUB29_2]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD31_2:%.*]] = add i32 [[SHL30_2]], [[SUB24_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX32_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 3 +; UNALIGNED_VEC_MEM-NEXT: [[TMP44:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP44]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX34_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 3 +; UNALIGNED_VEC_MEM-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP45]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB36_2:%.*]] = sub i32 [[CONV33_2]], [[CONV35_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX37_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 7 +; UNALIGNED_VEC_MEM-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX37_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV38_2:%.*]] = zext i8 [[TMP46]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX39_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 7 +; UNALIGNED_VEC_MEM-NEXT: [[TMP47:%.*]] = load i8, ptr [[ARRAYIDX39_2]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV40_2:%.*]] = zext i8 [[TMP47]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB41_2:%.*]] = sub i32 [[CONV38_2]], [[CONV40_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL42_2:%.*]] = shl i32 [[SUB41_2]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD43_2:%.*]] = add i32 [[SHL42_2]], [[SUB36_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD44_2:%.*]] = add i32 [[ADD19_2]], [[ADD_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB45_2:%.*]] = sub i32 [[ADD_2]], [[ADD19_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD46_2:%.*]] = add i32 [[ADD43_2]], [[ADD31_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB47_2:%.*]] = sub i32 [[ADD31_2]], [[ADD43_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD48_2:%.*]] = add i32 [[ADD46_2]], [[ADD44_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB51_2:%.*]] = sub i32 [[ADD44_2]], [[ADD46_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD55_2:%.*]] = add i32 [[SUB47_2]], [[SUB45_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB59_2:%.*]] = sub i32 [[SUB45_2]], [[SUB47_2]] +; UNALIGNED_VEC_MEM-NEXT: [[TMP48:%.*]] = load i8, ptr null, align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP48]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[TMP49:%.*]] = load i8, ptr null, align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV2_3:%.*]] = zext i8 [[TMP49]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB_3:%.*]] = sub i32 [[CONV_3]], [[CONV2_3]] ; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4 +; UNALIGNED_VEC_MEM-NEXT: [[TMP50:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP50]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX5_4:%.*]] = getelementptr i8, ptr null, i64 4 +; UNALIGNED_VEC_MEM-NEXT: [[TMP51:%.*]] = load i8, ptr [[ARRAYIDX5_4]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP51]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB7_3:%.*]] = sub i32 [[CONV4_3]], [[CONV6_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL_3:%.*]] = shl i32 [[SUB7_3]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_3:%.*]] = add i32 [[SHL_3]], [[SUB_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr i8, ptr null, i64 1 +; UNALIGNED_VEC_MEM-NEXT: [[TMP52:%.*]] = load i8, ptr [[ARRAYIDX8_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV9_3:%.*]] = zext i8 [[TMP52]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX10_3:%.*]] = getelementptr i8, ptr null, i64 1 +; UNALIGNED_VEC_MEM-NEXT: [[TMP53:%.*]] = load i8, ptr [[ARRAYIDX10_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV11_3:%.*]] = zext i8 [[TMP53]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB12_3:%.*]] = sub i32 [[CONV9_3]], [[CONV11_3]] ; UNALIGNED_VEC_MEM-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV14_3:%.*]] = zext i8 [[TMP0]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX15_3:%.*]] = getelementptr i8, ptr null, i64 5 +; UNALIGNED_VEC_MEM-NEXT: [[TMP55:%.*]] = load i8, ptr [[ARRAYIDX15_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV16_3:%.*]] = zext i8 [[TMP55]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB17_3:%.*]] = sub i32 [[CONV14_3]], [[CONV16_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL18_3:%.*]] = shl i32 [[SUB17_3]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD19_3:%.*]] = add i32 [[SHL18_3]], [[SUB12_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 +; UNALIGNED_VEC_MEM-NEXT: [[TMP56:%.*]] = load i8, ptr [[ARRAYIDX20_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV21_3:%.*]] = zext i8 [[TMP56]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 +; UNALIGNED_VEC_MEM-NEXT: [[TMP57:%.*]] = load i8, ptr [[ARRAYIDX22_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV23_3:%.*]] = zext i8 [[TMP57]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB24_3:%.*]] = sub i32 [[CONV21_3]], [[CONV23_3]] ; UNALIGNED_VEC_MEM-NEXT: [[TMP1:%.*]] = load i8, ptr null, align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP11:%.*]] = sub <4 x i32> [[TMP8]], [[TMP10]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP12:%.*]] = shl <4 x i32> [[TMP11]], splat (i32 16) -; UNALIGNED_VEC_MEM-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP13]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP14]], [[TMP13]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP17]], [[TMP18]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP20:%.*]] = sub <4 x i32> [[TMP17]], [[TMP18]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP22:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP23:%.*]] = zext <4 x i8> [[TMP22]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP24:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[TMP24]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP26:%.*]] = sub <4 x i32> [[TMP23]], [[TMP25]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP27:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP29:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP30:%.*]] = zext <4 x i8> [[TMP29]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP31:%.*]] = sub <4 x i32> [[TMP28]], [[TMP30]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP32:%.*]] = shl <4 x i32> [[TMP31]], splat (i32 16) -; UNALIGNED_VEC_MEM-NEXT: [[TMP33:%.*]] = add <4 x i32> [[TMP32]], [[TMP26]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP33]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP35:%.*]] = add <4 x i32> [[TMP34]], [[TMP33]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP36:%.*]] = sub <4 x i32> [[TMP34]], [[TMP33]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP39:%.*]] = add <4 x i32> [[TMP37]], [[TMP38]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP40:%.*]] = sub <4 x i32> [[TMP37]], [[TMP38]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP45:%.*]] = zext <4 x i8> [[TMP44]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP46:%.*]] = sub <4 x i32> [[TMP43]], [[TMP45]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP47:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP48:%.*]] = zext <4 x i8> [[TMP47]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP49:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP50:%.*]] = zext <4 x i8> [[TMP49]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP51:%.*]] = sub <4 x i32> [[TMP48]], [[TMP50]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP52:%.*]] = shl <4 x i32> [[TMP51]], splat (i32 16) -; UNALIGNED_VEC_MEM-NEXT: [[TMP53:%.*]] = add <4 x i32> [[TMP52]], [[TMP46]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP54:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP53]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP56:%.*]] = sub <4 x i32> [[TMP54]], [[TMP53]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP59:%.*]] = add <4 x i32> [[TMP57]], [[TMP58]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP60:%.*]] = sub <4 x i32> [[TMP57]], [[TMP58]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP62:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) -; UNALIGNED_VEC_MEM-NEXT: [[TMP63:%.*]] = load <4 x i8>, ptr null, align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP64:%.*]] = zext <4 x i8> [[TMP63]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP65:%.*]] = load <4 x i8>, ptr null, align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP66:%.*]] = zext <4 x i8> [[TMP65]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP67:%.*]] = sub <4 x i32> [[TMP64]], [[TMP66]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP68:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP69:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0 -; UNALIGNED_VEC_MEM-NEXT: [[TMP70:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP0]], i32 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP71:%.*]] = shufflevector <2 x i8> [[TMP62]], <2 x i8> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP72:%.*]] = shufflevector <4 x i8> [[TMP70]], <4 x i8> [[TMP71]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP73:%.*]] = zext <4 x i8> [[TMP72]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP74:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; UNALIGNED_VEC_MEM-NEXT: [[TMP75:%.*]] = zext <4 x i8> [[TMP74]] to <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP76:%.*]] = shufflevector <4 x i32> [[TMP75]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP77:%.*]] = sub <4 x i32> [[TMP73]], [[TMP76]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP78:%.*]] = shl <4 x i32> [[TMP77]], splat (i32 16) -; UNALIGNED_VEC_MEM-NEXT: [[TMP79:%.*]] = add <4 x i32> [[TMP78]], [[TMP68]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP80:%.*]] = shufflevector <4 x i32> [[TMP79]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP81:%.*]] = add <4 x i32> [[TMP79]], [[TMP80]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP82:%.*]] = sub <4 x i32> [[TMP79]], [[TMP80]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP83:%.*]] = shufflevector <4 x i32> [[TMP81]], <4 x i32> [[TMP82]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP84:%.*]] = shufflevector <4 x i32> [[TMP83]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP85:%.*]] = add <4 x i32> [[TMP83]], [[TMP84]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP86:%.*]] = sub <4 x i32> [[TMP83]], [[TMP84]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP87:%.*]] = shufflevector <4 x i32> [[TMP85]], <4 x i32> [[TMP86]], <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP88:%.*]] = shufflevector <4 x i32> [[TMP41]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> poison, <4 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP90:%.*]] = add <4 x i32> [[TMP88]], [[TMP89]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP91:%.*]] = sub <4 x i32> [[TMP21]], [[TMP41]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP92:%.*]] = shufflevector <4 x i32> [[TMP91]], <4 x i32> [[TMP90]], <8 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP93:%.*]] = add <4 x i32> [[TMP87]], [[TMP61]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP94:%.*]] = sub <4 x i32> [[TMP61]], [[TMP87]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP95:%.*]] = shufflevector <4 x i32> [[TMP94]], <4 x i32> poison, <8 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP96:%.*]] = shufflevector <4 x i32> [[TMP93]], <4 x i32> poison, <8 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <8 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP98:%.*]] = add <8 x i32> [[TMP97]], [[TMP92]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP99:%.*]] = sub <8 x i32> [[TMP92]], [[TMP97]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP100:%.*]] = shufflevector <8 x i32> [[TMP98]], <8 x i32> [[TMP99]], <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP101:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP102:%.*]] = shufflevector <4 x i32> [[TMP43]], <4 x i32> poison, <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP103:%.*]] = shufflevector <16 x i32> [[TMP101]], <16 x i32> [[TMP102]], <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP104:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP105:%.*]] = shufflevector <16 x i32> [[TMP103]], <16 x i32> [[TMP104]], <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP106:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP107:%.*]] = shufflevector <16 x i32> [[TMP105]], <16 x i32> [[TMP106]], <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP108:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP109:%.*]] = shufflevector <16 x i32> [[TMP107]], <16 x i32> [[TMP108]], <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP110:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP111:%.*]] = shufflevector <16 x i32> [[TMP109]], <16 x i32> [[TMP110]], <16 x i32> -; UNALIGNED_VEC_MEM-NEXT: [[TMP112:%.*]] = lshr <16 x i32> [[TMP111]], splat (i32 15) -; UNALIGNED_VEC_MEM-NEXT: [[TMP113:%.*]] = and <16 x i32> [[TMP112]], splat (i32 65537) -; UNALIGNED_VEC_MEM-NEXT: [[TMP114:%.*]] = mul <16 x i32> [[TMP113]], splat (i32 65535) -; UNALIGNED_VEC_MEM-NEXT: [[TMP115:%.*]] = add <16 x i32> [[TMP114]], [[TMP100]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP116:%.*]] = xor <16 x i32> [[TMP115]], [[TMP111]] -; UNALIGNED_VEC_MEM-NEXT: [[TMP117:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP116]]) +; UNALIGNED_VEC_MEM-NEXT: [[CONV26_3:%.*]] = zext i8 [[TMP1]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6 +; UNALIGNED_VEC_MEM-NEXT: [[TMP59:%.*]] = load i8, ptr [[ARRAYIDX27_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV28_3:%.*]] = zext i8 [[TMP59]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB29_3:%.*]] = sub i32 [[CONV26_3]], [[CONV28_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL30_3:%.*]] = shl i32 [[SUB29_3]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD31_3:%.*]] = add i32 [[SHL30_3]], [[SUB24_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX32_3:%.*]] = getelementptr i8, ptr null, i64 3 +; UNALIGNED_VEC_MEM-NEXT: [[TMP60:%.*]] = load i8, ptr [[ARRAYIDX32_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV33_3:%.*]] = zext i8 [[TMP60]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX34_3:%.*]] = getelementptr i8, ptr null, i64 3 +; UNALIGNED_VEC_MEM-NEXT: [[TMP61:%.*]] = load i8, ptr [[ARRAYIDX34_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV35_3:%.*]] = zext i8 [[TMP61]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB36_3:%.*]] = sub i32 [[CONV33_3]], [[CONV35_3]] +; UNALIGNED_VEC_MEM-NEXT: [[TMP62:%.*]] = load i8, ptr null, align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV38_3:%.*]] = zext i8 [[TMP62]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[ARRAYIDX39_3:%.*]] = getelementptr i8, ptr null, i64 7 +; UNALIGNED_VEC_MEM-NEXT: [[TMP63:%.*]] = load i8, ptr [[ARRAYIDX39_3]], align 1 +; UNALIGNED_VEC_MEM-NEXT: [[CONV40_3:%.*]] = zext i8 [[TMP63]] to i32 +; UNALIGNED_VEC_MEM-NEXT: [[SUB41_3:%.*]] = sub i32 [[CONV38_3]], [[CONV40_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SHL42_3:%.*]] = shl i32 [[SUB41_3]], 16 +; UNALIGNED_VEC_MEM-NEXT: [[ADD43_3:%.*]] = add i32 [[SHL42_3]], [[SUB36_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD44_3:%.*]] = add i32 [[ADD19_3]], [[ADD_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB45_3:%.*]] = sub i32 [[ADD_3]], [[ADD19_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD46_3:%.*]] = add i32 [[ADD43_3]], [[ADD31_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB47_3:%.*]] = sub i32 [[ADD31_3]], [[ADD43_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD48_3:%.*]] = add i32 [[ADD46_3]], [[ADD44_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB51_3:%.*]] = sub i32 [[ADD44_3]], [[ADD46_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD55_3:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB59_3:%.*]] = sub i32 [[SUB45_3]], [[SUB47_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_3]], [[ADD48_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB102:%.*]] = sub i32 [[ADD48_2]], [[ADD48_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I:%.*]] = lshr i32 [[CONV_3]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[CONV_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I49:%.*]] = lshr i32 [[ADD46_2]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[ADD46_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I54:%.*]] = lshr i32 [[ADD46_1]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[ADD46_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I59:%.*]] = lshr i32 [[ADD46]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[ADD46]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD78_1:%.*]] = add i32 [[ADD55_1]], [[ADD55]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB86_1:%.*]] = sub i32 [[ADD55]], [[ADD55_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD94_1:%.*]] = add i32 [[ADD55_3]], [[ADD55_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB102_1:%.*]] = sub i32 [[ADD55_2]], [[ADD55_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD103_1:%.*]] = add i32 [[ADD94_1]], [[ADD78_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB104_1:%.*]] = sub i32 [[ADD78_1]], [[ADD94_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD105_1:%.*]] = add i32 [[SUB102_1]], [[SUB86_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB106_1:%.*]] = sub i32 [[SUB86_1]], [[SUB102_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[CONV9_2]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I_1:%.*]] = mul i32 [[AND_I_1]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[CONV9_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I49_1:%.*]] = lshr i32 [[CONV_2]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I50_1:%.*]] = and i32 [[SHR_I49_1]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I51_1:%.*]] = mul i32 [[AND_I50_1]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I52_1:%.*]] = add i32 [[MUL_I51_1]], [[ADD105_1]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I53_1:%.*]] = xor i32 [[ADD_I52_1]], [[CONV_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[SUB47_1]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[SUB47_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I59_1:%.*]] = lshr i32 [[SUB47]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[SUB47]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I53_1]], [[ADD113]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB51_1]], [[SUB51]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB86_2:%.*]] = sub i32 [[SUB51]], [[SUB51_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD94_2:%.*]] = add i32 [[SUB51_3]], [[SUB51_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB102_2:%.*]] = sub i32 [[SUB51_2]], [[SUB51_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD103_2:%.*]] = add i32 [[ADD94_2]], [[ADD78_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB104_2:%.*]] = sub i32 [[ADD78_2]], [[ADD94_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD105_2:%.*]] = add i32 [[SUB102_2]], [[SUB86_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB106_2:%.*]] = sub i32 [[SUB86_2]], [[SUB102_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I_2:%.*]] = lshr i32 [[CONV9_1]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I_2:%.*]] = and i32 [[SHR_I_2]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I_2:%.*]] = mul i32 [[AND_I_2]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I_2:%.*]] = add i32 [[MUL_I_2]], [[ADD103_2]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I_2:%.*]] = xor i32 [[ADD_I_2]], [[CONV9_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I54_2:%.*]] = lshr i32 [[CONV21_1]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I55_2:%.*]] = and i32 [[SHR_I54_2]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I56_2:%.*]] = mul i32 [[AND_I55_2]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I57_2:%.*]] = add i32 [[MUL_I56_2]], [[SUB104_2]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I58_2:%.*]] = xor i32 [[ADD_I57_2]], [[CONV21_1]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I59_2:%.*]] = lshr i32 [[ADD44]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[ADD44]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[XOR_I_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[XOR_I58_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD78_3:%.*]] = add i32 [[SUB59_1]], [[SUB59]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB59_1]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD94_3:%.*]] = add i32 [[SUB59_3]], [[SUB59_2]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB102_3:%.*]] = sub i32 [[SUB59_2]], [[SUB59_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD103_3:%.*]] = add i32 [[ADD94_3]], [[ADD78_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB104_3:%.*]] = sub i32 [[ADD78_3]], [[ADD94_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I_3:%.*]] = lshr i32 [[CONV9]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I_3:%.*]] = and i32 [[SHR_I_3]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I_3:%.*]] = mul i32 [[AND_I_3]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I_3:%.*]] = add i32 [[MUL_I_3]], [[ADD103_3]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I_3:%.*]] = xor i32 [[ADD_I_3]], [[CONV9]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I49_3:%.*]] = lshr i32 [[CONV]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I54_3:%.*]] = lshr i32 [[CONV21]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I55_3:%.*]] = and i32 [[SHR_I54_3]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I56_3:%.*]] = mul i32 [[AND_I55_3]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I57_3:%.*]] = add i32 [[MUL_I56_3]], [[SUB104_3]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I58_3:%.*]] = xor i32 [[ADD_I57_3]], [[CONV21]] +; UNALIGNED_VEC_MEM-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 +; UNALIGNED_VEC_MEM-NEXT: [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537 +; UNALIGNED_VEC_MEM-NEXT: [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535 +; UNALIGNED_VEC_MEM-NEXT: [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]] +; UNALIGNED_VEC_MEM-NEXT: [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[XOR_I_3]] +; UNALIGNED_VEC_MEM-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[XOR_I58_3]] +; UNALIGNED_VEC_MEM-NEXT: [[TMP117:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]] ; UNALIGNED_VEC_MEM-NEXT: ret i32 [[TMP117]] ; entry: