diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll new file mode 100644 index 0000000000000..2704a4439f0d5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -mtriple=arm64-apple-ios -S -passes=slp-vectorizer < %s | FileCheck %s + +; fshl instruction cost model is an overestimate causing this test to vectorize when it is not beneficial to do so. +define i64 @fshl(i64 %or1, i64 %or2, i64 %or3 ) { +; CHECK-LABEL: define i64 @fshl +; CHECK-SAME: (i64 [[OR1:%.*]], i64 [[OR2:%.*]], i64 [[OR3:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[OR2]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[OR3]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> , i64 [[OR1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> , <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> ) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> , <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP2]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1 +; CHECK-NEXT: [[ADD3:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[XOR5:%.*]] = xor i64 [[ADD3]], [[TMP12]] +; CHECK-NEXT: ret i64 [[XOR5]] +; +entry: + %or4 = tail call i64 @llvm.fshl.i64(i64 %or2, i64 0, i64 1) + %xor1 = xor i64 %or4, 0 + %or5 = tail call i64 @llvm.fshl.i64(i64 %or3, i64 0, i64 2) + %xor2 = xor i64 %or5, %or1 + %add1 = add i64 %xor1, %or1 + %add2 = add i64 0, %xor2 + %or6 = tail call i64 @llvm.fshl.i64(i64 %or1, i64 %or2, i64 17) + %xor3 = xor i64 %or6, %add1 + %or7 = tail call i64 @llvm.fshl.i64(i64 0, i64 0, i64 21) + %xor4 = xor i64 %or7, %add2 + %add3 = or i64 %xor3, %add2 + %xor5 = xor i64 %add3, %xor4 + ret i64 %xor5 +} + +declare i64 @llvm.fshl.i64(i64, i64, i64)