46 changes: 20 additions & 26 deletions llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,20 @@
define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
; CHECK-LABEL: @phis(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
; CHECK: bb0:
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: br label [[BB1]]
; CHECK: bb1:
; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY:%.*]] ], [ [[B2]], [[BB0]] ]
; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY]] ], [ [[B3]], [[BB0]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2
; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3
; CHECK-NEXT: ret <4 x half> [[O3]]
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x half> [[TMP8]]
;
entry:
%a0 = extractelement <4 x half> %in1, i64 0
Expand Down Expand Up @@ -52,23 +49,20 @@ bb1:
define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
; CHECK-LABEL: @phis_reverse(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
; CHECK: bb0:
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: br label [[BB1]]
; CHECK: bb1:
; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY:%.*]] ], [ [[B3]], [[BB0]] ]
; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY]] ], [ [[B2]], [[BB0]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2
; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3
; CHECK-NEXT: ret <4 x half> [[O3]]
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x half> [[TMP8]]
;
entry:
%a0 = extractelement <4 x half> %in1, i64 0
Expand Down
129 changes: 28 additions & 101 deletions llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,10 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,dce < %s | FileCheck -check-prefixes=GCN,VI %s

define half @reduction_half4(<4 x half> %a) {
; GFX9-LABEL: @reduction_half4(
; GFX9-NEXT: entry:
; GFX9-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[A:%.*]])
; GFX9-NEXT: ret half [[TMP0]]
;
; VI-LABEL: @reduction_half4(
; VI-NEXT: entry:
; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1
; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2
; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3
; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
; VI-NEXT: ret half [[ADD3]]
; GCN-LABEL: @reduction_half4(
; GCN-NEXT: entry:
; GCN-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[A:%.*]])
; GCN-NEXT: ret half [[TMP0]]
;
entry:
%elt0 = extractelement <4 x half> %a, i64 0
Expand All @@ -33,29 +22,10 @@ entry:
}

define half @reduction_half8(<8 x half> %vec8) {
; GFX9-LABEL: @reduction_half8(
; GFX9-NEXT: entry:
; GFX9-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[VEC8:%.*]])
; GFX9-NEXT: ret half [[TMP0]]
;
; VI-LABEL: @reduction_half8(
; VI-NEXT: entry:
; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8:%.*]], i64 0
; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1
; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2
; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3
; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4
; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5
; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
; VI-NEXT: [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]]
; VI-NEXT: [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]]
; VI-NEXT: [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]]
; VI-NEXT: [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]]
; VI-NEXT: ret half [[ADD7]]
; GCN-LABEL: @reduction_half8(
; GCN-NEXT: entry:
; GCN-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[VEC8:%.*]])
; GCN-NEXT: ret half [[TMP0]]
;
entry:
%elt0 = extractelement <8 x half> %vec8, i64 0
Expand Down Expand Up @@ -86,38 +56,25 @@ define half @reduction_half16(<16 x half> %vec16) {
;
; VI-LABEL: @reduction_half16(
; VI-NEXT: entry:
; VI-NEXT: [[ELT0:%.*]] = extractelement <16 x half> [[VEC16:%.*]], i64 0
; VI-NEXT: [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1
; VI-NEXT: [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2
; VI-NEXT: [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3
; VI-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4
; VI-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5
; VI-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6
; VI-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7
; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8
; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16:%.*]], i64 8
; VI-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9
; VI-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10
; VI-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11
; VI-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12
; VI-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13
; VI-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
; VI-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]]
; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]]
; VI-NEXT: [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]]
; VI-NEXT: [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]]
; VI-NEXT: [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]]
; VI-NEXT: [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]]
; VI-NEXT: [[ADD8:%.*]] = fadd fast half [[ELT8]], [[ADD7]]
; VI-NEXT: [[ADD9:%.*]] = fadd fast half [[ELT9]], [[ADD8]]
; VI-NEXT: [[ADD10:%.*]] = fadd fast half [[ELT10]], [[ADD9]]
; VI-NEXT: [[ADD11:%.*]] = fadd fast half [[ELT11]], [[ADD10]]
; VI-NEXT: [[ADD12:%.*]] = fadd fast half [[ELT12]], [[ADD11]]
; VI-NEXT: [[ADD13:%.*]] = fadd fast half [[ELT13]], [[ADD12]]
; VI-NEXT: [[ADD14:%.*]] = fadd fast half [[ELT14]], [[ADD13]]
; VI-NEXT: [[ADD15:%.*]] = fadd fast half [[ELT15]], [[ADD14]]
; VI-NEXT: ret half [[ADD15]]
; VI-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; VI-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[TMP0]])
; VI-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[ELT8]]
; VI-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[ELT9]], [[ELT10]]
; VI-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[ELT11]], [[ELT12]]
; VI-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[ELT13]], [[ELT14]]
; VI-NEXT: [[OP_RDX4:%.*]] = fadd fast half [[OP_RDX]], [[OP_RDX1]]
; VI-NEXT: [[OP_RDX5:%.*]] = fadd fast half [[OP_RDX2]], [[OP_RDX3]]
; VI-NEXT: [[OP_RDX6:%.*]] = fadd fast half [[OP_RDX4]], [[OP_RDX5]]
; VI-NEXT: [[OP_RDX7:%.*]] = fadd fast half [[OP_RDX6]], [[ELT15]]
; VI-NEXT: ret half [[OP_RDX7]]
;
entry:
%elt0 = extractelement <16 x half> %vec16, i64 0
Expand Down Expand Up @@ -183,21 +140,10 @@ entry:
}

define i16 @reduction_v4i16(<4 x i16> %a) {
; GFX9-LABEL: @reduction_v4i16(
; GFX9-NEXT: entry:
; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A:%.*]])
; GFX9-NEXT: ret i16 [[TMP0]]
;
; VI-LABEL: @reduction_v4i16(
; VI-NEXT: entry:
; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[A]], i64 1
; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[A]], i64 2
; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[A]], i64 3
; VI-NEXT: [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]]
; VI-NEXT: [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]]
; VI-NEXT: [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]]
; VI-NEXT: ret i16 [[ADD3]]
; GCN-LABEL: @reduction_v4i16(
; GCN-NEXT: entry:
; GCN-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A:%.*]])
; GCN-NEXT: ret i16 [[TMP0]]
;
entry:
%elt0 = extractelement <4 x i16> %a, i64 0
Expand All @@ -213,29 +159,10 @@ entry:
}

define i16 @reduction_v8i16(<8 x i16> %vec8) {
; GFX9-LABEL: @reduction_v8i16(
; GFX9-NEXT: entry:
; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VEC8:%.*]])
; GFX9-NEXT: ret i16 [[TMP0]]
;
; VI-LABEL: @reduction_v8i16(
; VI-NEXT: entry:
; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0
; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1
; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2
; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3
; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4
; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5
; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6
; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7
; VI-NEXT: [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]]
; VI-NEXT: [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]]
; VI-NEXT: [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]]
; VI-NEXT: [[ADD4:%.*]] = add i16 [[ELT4]], [[ADD3]]
; VI-NEXT: [[ADD5:%.*]] = add i16 [[ELT5]], [[ADD4]]
; VI-NEXT: [[ADD6:%.*]] = add i16 [[ELT6]], [[ADD5]]
; VI-NEXT: [[ADD7:%.*]] = add i16 [[ELT7]], [[ADD6]]
; VI-NEXT: ret i16 [[ADD7]]
; GCN-LABEL: @reduction_v8i16(
; GCN-NEXT: entry:
; GCN-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VEC8:%.*]])
; GCN-NEXT: ret i16 [[TMP0]]
;
entry:
%elt0 = extractelement <8 x i16> %vec8, i64 0
Expand Down