Skip to content

Commit

Permalink
[Test][SLP] Add tests for PR52275
Browse files Browse the repository at this point in the history
  • Loading branch information
anton-afanasyev committed Feb 15, 2022
1 parent dd145f9 commit f16a9df
Showing 1 changed file with 135 additions and 3 deletions.
138 changes: 135 additions & 3 deletions llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll
@@ -1,15 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-- | FileCheck %s
; RUN: opt < %s -slp-vectorizer -S -mcpu=core-i7 | FileCheck %s
; RUN: opt < %s -slp-vectorizer -slp-threshold=-100 -S -mcpu=core-i7 | FileCheck %s --check-prefix=FORCE_SLP

define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) {
; CHECK-LABEL: @pr52275(
define <4 x i8> @test(<4 x i8> %v, i8* %x) {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]]
; CHECK-NEXT: ret <4 x i8> [[V2]]
;
; FORCE_SLP-LABEL: @test(
; FORCE_SLP-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1
; FORCE_SLP-NEXT: [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>*
; FORCE_SLP-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4
; FORCE_SLP-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; FORCE_SLP-NEXT: [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; FORCE_SLP-NEXT: [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]]
; FORCE_SLP-NEXT: ret <4 x i8> [[V2]]
;
%x0 = load i8, i8* %x, align 4
%g1 = getelementptr inbounds i8, i8* %x, i64 1
Expand All @@ -19,3 +29,125 @@ define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) {
%v2 = add <4 x i8> %v0, %v1
ret <4 x i8> %v2
}

define <2 x i8> @test2(<2 x i8> %t6, i32* %t1) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[TMP3]], [[TMP3]]
; CHECK-NEXT: ret <2 x i8> [[T11]]
;
; FORCE_SLP-LABEL: @test2(
; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
; FORCE_SLP-NEXT: [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
; FORCE_SLP-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; FORCE_SLP-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[TMP3]], [[TMP3]]
; FORCE_SLP-NEXT: ret <2 x i8> [[T11]]
;
%t3 = load i32, i32* %t1, align 4
%t4 = getelementptr inbounds i32, i32* %t1, i64 1
%t5 = load i32, i32* %t4, align 4
%t7 = trunc i32 %t3 to i8
%t8 = insertelement <2 x i8> %t6, i8 %t7, i64 0
%t9 = trunc i32 %t5 to i8
%t10 = insertelement <2 x i8> %t8, i8 %t9, i64 1
%t11 = add <2 x i8> %t10, %t8
ret <2 x i8> %t11
}

define <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) {
; CHECK-LABEL: @test_reorder(
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[TMP4]], [[TMP4]]
; CHECK-NEXT: ret <2 x i8> [[T11]]
;
; FORCE_SLP-LABEL: @test_reorder(
; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
; FORCE_SLP-NEXT: [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
; FORCE_SLP-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; FORCE_SLP-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
; FORCE_SLP-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <2 x i32> <i32 1, i32 0>
; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[TMP4]], [[TMP4]]
; FORCE_SLP-NEXT: ret <2 x i8> [[T11]]
;
%t3 = load i32, i32* %t1, align 4
%t4 = getelementptr inbounds i32, i32* %t1, i64 1
%t5 = load i32, i32* %t4, align 4
%t7 = trunc i32 %t3 to i8
%t8 = insertelement <2 x i8> %t6, i8 %t7, i64 1
%t9 = trunc i32 %t5 to i8
%t10 = insertelement <2 x i8> %t8, i8 %t9, i64 0
%t11 = add <2 x i8> %t10, %t8
ret <2 x i8> %t11
}

define <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) {
; CHECK-LABEL: @test_subvector(
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[T101:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[TMP4]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T101]], [[T101]]
; CHECK-NEXT: ret <4 x i8> [[T11]]
;
; FORCE_SLP-LABEL: @test_subvector(
; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
; FORCE_SLP-NEXT: [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
; FORCE_SLP-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; FORCE_SLP-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
; FORCE_SLP-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; FORCE_SLP-NEXT: [[T101:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[TMP4]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T101]], [[T101]]
; FORCE_SLP-NEXT: ret <4 x i8> [[T11]]
;
%t3 = load i32, i32* %t1, align 4
%t4 = getelementptr inbounds i32, i32* %t1, i64 1
%t5 = load i32, i32* %t4, align 4
%t7 = trunc i32 %t3 to i8
%t8 = insertelement <4 x i8> %t6, i8 %t7, i64 0
%t9 = trunc i32 %t5 to i8
%t10 = insertelement <4 x i8> %t8, i8 %t9, i64 1
%t11 = add <4 x i8> %t10, %t8
ret <4 x i8> %t11
}

define <4 x i8> @test_subvector_reorder(<4 x i8> %t6, i32* %t1) {
; CHECK-LABEL: @test_subvector_reorder(
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[T81:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T81]], [[T81]]
; CHECK-NEXT: ret <4 x i8> [[T11]]
;
; FORCE_SLP-LABEL: @test_subvector_reorder(
; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
; FORCE_SLP-NEXT: [[TMP1:%.*]] = bitcast i32* [[T1]] to <2 x i32>*
; FORCE_SLP-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; FORCE_SLP-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
; FORCE_SLP-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
; FORCE_SLP-NEXT: [[T81:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T81]], [[T81]]
; FORCE_SLP-NEXT: ret <4 x i8> [[T11]]
;
%t3 = load i32, i32* %t1, align 4
%t4 = getelementptr inbounds i32, i32* %t1, i64 1
%t5 = load i32, i32* %t4, align 4
%t7 = trunc i32 %t3 to i8
%t8 = insertelement <4 x i8> %t6, i8 %t7, i64 3
%t9 = trunc i32 %t5 to i8
%t10 = insertelement <4 x i8> %t8, i8 %t9, i64 2
%t11 = add <4 x i8> %t10, %t8
ret <4 x i8> %t11
}

0 comments on commit f16a9df

Please sign in to comment.