44 changes: 22 additions & 22 deletions llvm/test/Analysis/CostModel/X86/ctpop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v2i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v2i64'
Expand Down Expand Up @@ -128,11 +128,11 @@ define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v4i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v4i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v4i64'
Expand Down Expand Up @@ -165,11 +165,11 @@ define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v8i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i64'
Expand Down Expand Up @@ -202,11 +202,11 @@ define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v4i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v4i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v4i32'
Expand Down Expand Up @@ -239,11 +239,11 @@ define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v8i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i32'
Expand Down Expand Up @@ -276,11 +276,11 @@ define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v16i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v16i32'
Expand Down Expand Up @@ -313,11 +313,11 @@ define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v8i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v8i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v8i16'
Expand Down Expand Up @@ -350,11 +350,11 @@ define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v16i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v16i16'
Expand Down Expand Up @@ -387,11 +387,11 @@ define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v32i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v32i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v32i16'
Expand Down Expand Up @@ -424,7 +424,7 @@ define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v16i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v16i8'
Expand Down Expand Up @@ -461,11 +461,11 @@ define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v32i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v32i8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v32i8'
Expand Down Expand Up @@ -498,11 +498,11 @@ define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
;
; SSE42-LABEL: 'var_ctpop_v64i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
;
; AVX1-LABEL: 'var_ctpop_v64i8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctpop
;
; AVX2-LABEL: 'var_ctpop_v64i8'
Expand Down
62 changes: 10 additions & 52 deletions llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,26 +143,11 @@ define void @ctpop_4i32() #0 {
; SSE42-NEXT: store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
; SSE42-NEXT: ret void
;
; AVX1-LABEL: @ctpop_4i32(
; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
; AVX1-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
; AVX1-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
; AVX1-NEXT: [[CTPOP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD0]])
; AVX1-NEXT: [[CTPOP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD1]])
; AVX1-NEXT: [[CTPOP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD2]])
; AVX1-NEXT: [[CTPOP3:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD3]])
; AVX1-NEXT: store i32 [[CTPOP0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
; AVX1-NEXT: store i32 [[CTPOP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
; AVX1-NEXT: store i32 [[CTPOP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
; AVX1-NEXT: store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
; AVX1-NEXT: ret void
;
; AVX2-LABEL: @ctpop_4i32(
; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
; AVX2-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
; AVX2-NEXT: ret void
; AVX-LABEL: @ctpop_4i32(
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
; AVX-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
; AVX-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
; AVX-NEXT: ret void
;
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
Expand Down Expand Up @@ -216,38 +201,11 @@ define void @ctpop_8i32() #0 {
; SSE42-NEXT: store i32 [[CTPOP7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
; SSE42-NEXT: ret void
;
; AVX1-LABEL: @ctpop_8i32(
; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
; AVX1-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
; AVX1-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
; AVX1-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
; AVX1-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
; AVX1-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
; AVX1-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
; AVX1-NEXT: [[CTPOP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD0]])
; AVX1-NEXT: [[CTPOP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD1]])
; AVX1-NEXT: [[CTPOP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD2]])
; AVX1-NEXT: [[CTPOP3:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD3]])
; AVX1-NEXT: [[CTPOP4:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD4]])
; AVX1-NEXT: [[CTPOP5:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD5]])
; AVX1-NEXT: [[CTPOP6:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD6]])
; AVX1-NEXT: [[CTPOP7:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD7]])
; AVX1-NEXT: store i32 [[CTPOP0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
; AVX1-NEXT: store i32 [[CTPOP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
; AVX1-NEXT: store i32 [[CTPOP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
; AVX1-NEXT: store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
; AVX1-NEXT: store i32 [[CTPOP4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
; AVX1-NEXT: store i32 [[CTPOP5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
; AVX1-NEXT: store i32 [[CTPOP6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
; AVX1-NEXT: store i32 [[CTPOP7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
; AVX1-NEXT: ret void
;
; AVX2-LABEL: @ctpop_8i32(
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> [[TMP1]])
; AVX2-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
; AVX2-NEXT: ret void
; AVX-LABEL: @ctpop_8i32(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> [[TMP1]])
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
; AVX-NEXT: ret void
;
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/tools/llvm-mca/X86/BdVer2/resources-3dnow.s
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ pswapd (%rax), %mm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18]
# CHECK-NEXT: 38.00 38.00 - - - - - 8.00 17.50 17.50 3.00 3.00 2.00 8.00 45.50 1.50 1.00 1.00 - 38.00 38.00 - -
# CHECK-NEXT: 38.00 38.00 - - - - - 8.00 17.50 17.50 1.00 1.00 2.00 8.00 44.50 2.50 1.00 1.00 2.00 38.00 38.00 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
Expand Down Expand Up @@ -219,5 +219,5 @@ pswapd (%rax), %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - 1.00 - 1.00 - - - - 1.50 1.50 - - pmulhrw (%rax), %mm2
# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - prefetch (%rax)
# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - prefetchw (%rax)
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - pswapd %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - pswapd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - pswapd %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - pswapd (%rax), %mm2
192 changes: 96 additions & 96 deletions llvm/test/tools/llvm-mca/X86/BdVer2/resources-avx1.s

Large diffs are not rendered by default.

38 changes: 19 additions & 19 deletions llvm/test/tools/llvm-mca/X86/BdVer2/resources-mmx.s
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ pxor (%rax), %mm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18]
# CHECK-NEXT: 70.00 70.00 - - - 2.00 - - 3.50 3.50 45.00 45.00 6.00 2.00 19.50 35.50 27.00 27.00 24.00 69.00 69.00 - 2.00
# CHECK-NEXT: 70.00 70.00 - - - 2.00 - - 3.50 3.50 27.00 27.00 6.00 2.00 10.50 44.50 27.00 27.00 42.00 69.00 69.00 - 2.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
Expand All @@ -310,12 +310,12 @@ pxor (%rax), %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.50 1.50 - - 0.50 0.50 - - - 1.50 1.50 - - movq (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - 0.50 0.50 - - - - 1.00 - - - - - - - - movq %mm0, %rcx
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - 1.00 - - 1.50 1.50 - - - - 1.00 movq %mm0, (%rax)
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - packsswb %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - packsswb (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - packssdw %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - packssdw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - packuswb %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - packuswb (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - packsswb %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - packsswb (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - packssdw %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - packssdw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - packuswb %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - packuswb (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - paddb %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - paddb (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - paddd %mm0, %mm2
Expand Down Expand Up @@ -392,17 +392,17 @@ pxor (%rax), %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - psubusw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - psubw %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - psubw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - punpckhbw %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - punpckhbw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - punpckhdq %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - punpckhdq (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - punpckhwd %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - punpckhwd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - punpcklbw %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - punpcklbw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - punpckldq %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - punpckldq (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - punpcklwd %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - punpcklwd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - punpckhbw %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - punpckhbw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - punpckhdq %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - punpckhdq (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - punpckhwd %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - punpckhwd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - punpcklbw %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - punpcklbw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - punpckldq %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - punpckldq (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - punpcklwd %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - punpcklwd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - pxor %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - pxor (%rax), %mm2
8 changes: 4 additions & 4 deletions llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse1.s
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ xorps (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18]
# CHECK-NEXT: 97.00 97.00 - - - 15.00 - 18.00 79.00 79.00 18.50 18.50 2.00 25.00 58.50 33.50 17.50 17.50 - 84.00 84.00 - 26.00
# CHECK-NEXT: 97.00 97.00 - - - 15.00 - 18.00 79.00 79.00 15.50 15.50 2.00 25.00 57.00 35.00 17.50 17.50 3.00 84.00 84.00 - 26.00

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
Expand Down Expand Up @@ -387,7 +387,7 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - - 1.00 - - - - - - - divss %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - 4.50 4.50 - - - - - 1.00 - - - 1.50 1.50 - - divss (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - - - - - - 0.50 0.50 - - ldmxcsr (%rax)
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - maskmovq %mm0, %mm1
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - maskmovq %mm0, %mm1
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - maxps %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - 0.50 0.50 - - - - 1.00 - - - - 1.50 1.50 - - maxps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - maxss %xmm0, %xmm2
Expand Down Expand Up @@ -444,8 +444,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - prefetchnta (%rax)
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - psadbw %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - psadbw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - pshufw $1, %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - pshufw $1, (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - pshufw $1, %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - pshufw $1, (%rax), %mm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - rcpps %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - 0.50 0.50 - - - - - 1.00 - - - 1.50 1.50 - - rcpps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - rcpss %xmm0, %xmm2
Expand Down
62 changes: 31 additions & 31 deletions llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse2.s

Large diffs are not rendered by default.

86 changes: 43 additions & 43 deletions llvm/test/tools/llvm-mca/X86/BdVer2/resources-sse41.s

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions llvm/test/tools/llvm-mca/X86/BdVer2/resources-ssse3.s
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1 9 1.50 * pmulhrsw (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 pshufb %mm0, %mm2
# CHECK-NEXT: 1 8 1.50 * pshufb (%rax), %mm2
# CHECK-NEXT: 1 3 1.50 pshufb %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 pshufb %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.50 * pshufb (%rax), %xmm2
# CHECK-NEXT: 1 2 0.50 psignb %mm0, %mm2
# CHECK-NEXT: 1 7 1.50 * psignb (%rax), %mm2
Expand Down Expand Up @@ -198,7 +198,7 @@ psignw (%rax), %xmm2

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18]
# CHECK-NEXT: 48.00 48.00 - - - - - - - - 69.00 69.00 8.00 - 24.00 16.00 12.00 12.00 - 48.00 48.00 - -
# CHECK-NEXT: 48.00 48.00 - - - - - - - - 60.00 60.00 8.00 - 20.00 20.00 12.00 12.00 8.00 48.00 48.00 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
Expand All @@ -214,10 +214,10 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - pabsw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - pabsw %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - pabsw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - palignr $1, %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - palignr $1, (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - palignr $1, %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - palignr $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - palignr $1, %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - palignr $1, (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - palignr $1, %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - palignr $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 2.00 2.00 - - 0.50 0.50 - - - - - - - phaddd %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 2.00 2.00 - - 0.50 0.50 - - - 1.50 1.50 - - phaddd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 2.00 2.00 - - 0.50 0.50 - - - - - - - phaddd %xmm0, %xmm2
Expand Down Expand Up @@ -250,10 +250,10 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - 1.00 - 1.00 - - - - 1.50 1.50 - - pmulhrsw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - 1.00 - 1.00 - - - - - - - - pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - 1.00 - 1.00 - - - - 1.50 1.50 - - pmulhrsw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - pshufb %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - pshufb (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 1.50 1.50 - - 0.50 0.50 - - - - - - - pshufb %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.50 1.50 - - 0.50 0.50 - - - 1.50 1.50 - - pshufb (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - pshufb %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - pshufb (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - pshufb %xmm0, %xmm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - pshufb (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - psignb %mm0, %mm2
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - psignb (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - psignb %xmm0, %xmm2
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/tools/llvm-mca/X86/BdVer2/resources-xop.s
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ vpshlw %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 9 1.50 * vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 4 1.00 vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1 9 1.50 * vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 2 1.50 vpperm %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1 2 1.00 vpperm %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1 7 1.50 * vpperm (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1 7 1.50 * vpperm %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 3 1.00 vprotb %xmm0, %xmm1, %xmm3
Expand Down Expand Up @@ -396,7 +396,7 @@ vpshlw %xmm0, (%rax), %xmm3

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18]
# CHECK-NEXT: 115.50 115.50 - - - - - - 18.00 18.00 42.50 42.50 32.00 12.00 49.50 107.50 23.00 23.00 44.00 115.50 115.50 - -
# CHECK-NEXT: 115.50 115.50 - - - - - - - - 29.00 29.00 32.00 12.00 36.00 124.00 23.00 23.00 74.00 115.50 115.50 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
Expand All @@ -412,12 +412,12 @@ vpshlw %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 2.00 - - - - - - - vfrczsd (%rax), %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 10.00 - - - - - - - vfrczss %xmm0, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 2.00 - - - - - - - vfrczss (%rax), %xmm3
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - vpcmov %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - vpcmov (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - 1.50 1.50 - - vpcmov %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - 2.00 2.00 - - 0.50 0.50 - - - - - - - vpcmov %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 2.00 2.00 - - 0.50 0.50 - - - 1.50 1.50 - - vpcmov (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 2.00 2.00 - - 0.50 0.50 - - - 1.50 1.50 - - vpcmov %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - vpcmov %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vpcmov (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vpcmov %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - 2.00 - - - - vpcmov %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 2.00 - - 2.00 1.50 1.50 - - vpcmov (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 2.00 - - 2.00 1.50 1.50 - - vpcmov %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - vpcomltb %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - vpcomltb (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - vpcomltd %xmm0, %xmm1, %xmm3
Expand All @@ -434,18 +434,18 @@ vpshlw %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - vpcomltuw (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - vpcomltw %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - vpcomltw (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 - - - - - - - vpermil2pd $0, %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - 1.00 1.00 - - - - 0.50 0.50 - - - 1.50 1.50 - - vpermil2pd $0, (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - 1.00 1.00 - - - - 0.50 0.50 - - - 1.50 1.50 - - vpermil2pd $0, %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 - - - - - - - vpermil2pd $0, %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - 2.00 2.00 - - - - 1.00 1.00 - - - 1.50 1.50 - - vpermil2pd $0, (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - 2.00 2.00 - - - - 1.00 1.00 - - - 1.50 1.50 - - vpermil2pd $0, %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 0.50 0.50 - - - - - - - vpermil2ps $0, %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - 1.00 1.00 - - - - 0.50 0.50 - - - 1.50 1.50 - - vpermil2ps $0, (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - 1.00 1.00 - - - - 0.50 0.50 - - - 1.50 1.50 - - vpermil2ps $0, %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - 1.00 1.00 - - - - - - - vpermil2ps $0, %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - 2.00 2.00 - - - - 1.00 1.00 - - - 1.50 1.50 - - vpermil2ps $0, (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - 2.00 2.00 - - - - 1.00 1.00 - - - 1.50 1.50 - - vpermil2ps $0, %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - vpermil2pd $0, %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vpermil2pd $0, (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vpermil2pd $0, %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - 2.00 - - - - vpermil2pd $0, %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 2.00 - - 2.00 1.50 1.50 - - vpermil2pd $0, (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 2.00 - - 2.00 1.50 1.50 - - vpermil2pd $0, %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - vpermil2ps $0, %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vpermil2ps $0, (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vpermil2ps $0, %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 - - 2.00 - - - - vpermil2ps $0, %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 2.00 - - 2.00 1.50 1.50 - - vpermil2ps $0, (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 2.00 - - 2.00 1.50 1.50 - - vpermil2ps $0, %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - vphaddbd %xmm0, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - 1.50 1.50 - - vphaddbd (%rax), %xmm3
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - 0.50 0.50 - - - - - vphaddbq %xmm0, %xmm3
Expand Down Expand Up @@ -500,9 +500,9 @@ vpshlw %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - 1.00 - 1.00 - - - - 1.50 1.50 - - vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - 1.00 - 1.00 - - - - - - - - vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - 1.00 - 1.00 - - - - 1.50 1.50 - - vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - 1.50 1.50 - - 0.50 0.50 - - - - - - - vpperm %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.50 1.50 - - 0.50 0.50 - - - 1.50 1.50 - - vpperm (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - 1.50 1.50 - - 0.50 0.50 - - - 1.50 1.50 - - vpperm %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - vpperm %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vpperm (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vpperm %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - vprotb %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vprotb (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - - 1.00 - - 1.00 1.50 1.50 - - vprotb %xmm0, (%rax), %xmm3
Expand Down
62 changes: 31 additions & 31 deletions llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 614
# CHECK-NEXT: Total Cycles: 516
# CHECK-NEXT: Total uOps: 1100

# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.79
# CHECK-NEXT: IPC: 0.98
# CHECK-NEXT: Block RThroughput: 6.0
# CHECK-NEXT: uOps Per Cycle: 2.13
# CHECK-NEXT: IPC: 1.16
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Expand Down Expand Up @@ -61,33 +61,33 @@

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18]
# CHECK-NEXT: - - - - - - - - 6.00 6.00 - - - - 2.61 3.39 - - - - - - -
# CHECK-NEXT: - - - - - - - - 5.00 5.00 - - - - 2.00 4.00 - - 1.00 - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: - - - - - - - - 1.58 0.42 - - - - - 1.00 - - - - - - - vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 0.44 1.56 - - - - 0.61 0.39 - - - - - - - vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 1.58 0.42 - - - - - 1.00 - - - - - - - vmulps %ymm2, %ymm3, %ymm4
# CHECK-NEXT: - - - - - - - - 0.40 1.60 - - - - 1.00 - - - - - - - - vaddps %ymm4, %ymm5, %ymm6
# CHECK-NEXT: - - - - - - - - 1.58 0.42 - - - - - 1.00 - - - - - - - vmulps %ymm6, %ymm3, %ymm4
# CHECK-NEXT: - - - - - - - - 0.42 1.58 - - - - 1.00 - - - - - - - - vaddps %ymm4, %ymm5, %ymm0
# CHECK-NEXT: - - - - - - - - 1.02 0.98 - - - - - 1.00 - - - - - - - vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - - - 1.00 - - 1.00 - - - - vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.94 1.06 - - - - - 1.00 - - - - - - - vmulps %ymm2, %ymm3, %ymm4
# CHECK-NEXT: - - - - - - - - 1.04 0.96 - - - - 1.00 - - - - - - - - vaddps %ymm4, %ymm5, %ymm6
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - 1.00 - - - - - - - vmulps %ymm6, %ymm3, %ymm4
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 - - - - - - - - vaddps %ymm4, %ymm5, %ymm0

# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 01
# CHECK-NEXT: 0123456789 012
# CHECK-NEXT: Index 0123456789 0123456789

# CHECK: [0,0] DeeeeeER . . . . .. vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: [0,1] DeeeE--R . . . . .. vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2
# CHECK-NEXT: [0,2] .D==eeeeeER . . . .. vmulps %ymm2, %ymm3, %ymm4
# CHECK-NEXT: [0,3] .D=======eeeeeER . . .. vaddps %ymm4, %ymm5, %ymm6
# CHECK-NEXT: [0,4] . D===========eeeeeER . .. vmulps %ymm6, %ymm3, %ymm4
# CHECK-NEXT: [0,5] . D================eeeeeER .. vaddps %ymm4, %ymm5, %ymm0
# CHECK-NEXT: [1,0] . D====================eeeeeER. vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: [1,1] . DeeeE----------------------R. vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2
# CHECK-NEXT: [1,2] . D==eeeeeE-----------------R. vmulps %ymm2, %ymm3, %ymm4
# CHECK-NEXT: [1,3] . D=======eeeeeE------------R. vaddps %ymm4, %ymm5, %ymm6
# CHECK-NEXT: [1,4] . D===========eeeeeE--------R vmulps %ymm6, %ymm3, %ymm4
# CHECK-NEXT: [1,5] . D================eeeeeE---R vaddps %ymm4, %ymm5, %ymm0
# CHECK: [0,0] DeeeeeER . . . . . . vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: [0,1] D=eeeE-R . . . . . . vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2
# CHECK-NEXT: [0,2] .D===eeeeeER . . . . . vmulps %ymm2, %ymm3, %ymm4
# CHECK-NEXT: [0,3] .D========eeeeeER . . . . vaddps %ymm4, %ymm5, %ymm6
# CHECK-NEXT: [0,4] . D============eeeeeER . . . vmulps %ymm6, %ymm3, %ymm4
# CHECK-NEXT: [0,5] . D=================eeeeeER . . vaddps %ymm4, %ymm5, %ymm0
# CHECK-NEXT: [1,0] . D=====================eeeeeER. vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: [1,1] . DeeeE-----------------------R. vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2
# CHECK-NEXT: [1,2] . D==eeeeeE------------------R. vmulps %ymm2, %ymm3, %ymm4
# CHECK-NEXT: [1,3] . D=======eeeeeE-------------R. vaddps %ymm4, %ymm5, %ymm6
# CHECK-NEXT: [1,4] . D===========eeeeeE---------R vmulps %ymm6, %ymm3, %ymm4
# CHECK-NEXT: [1,5] . D================eeeeeE----R vaddps %ymm4, %ymm5, %ymm0

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
Expand All @@ -96,10 +96,10 @@
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 11.0 0.5 0.0 vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1. 2 1.0 1.0 12.0 vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2
# CHECK-NEXT: 2. 2 3.0 0.0 8.5 vmulps %ymm2, %ymm3, %ymm4
# CHECK-NEXT: 3. 2 8.0 0.0 6.0 vaddps %ymm4, %ymm5, %ymm6
# CHECK-NEXT: 4. 2 12.0 0.0 4.0 vmulps %ymm6, %ymm3, %ymm4
# CHECK-NEXT: 5. 2 17.0 0.0 1.5 vaddps %ymm4, %ymm5, %ymm0
# CHECK-NEXT: 2 8.7 0.3 5.3 <total>
# CHECK-NEXT: 0. 2 11.5 0.5 0.0 vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1. 2 1.5 1.5 12.0 vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2
# CHECK-NEXT: 2. 2 3.5 0.0 9.0 vmulps %ymm2, %ymm3, %ymm4
# CHECK-NEXT: 3. 2 8.5 0.0 6.5 vaddps %ymm4, %ymm5, %ymm6
# CHECK-NEXT: 4. 2 12.5 0.0 4.5 vmulps %ymm6, %ymm3, %ymm4
# CHECK-NEXT: 5. 2 17.5 0.0 2.0 vaddps %ymm4, %ymm5, %ymm0
# CHECK-NEXT: 2 9.2 0.3 5.7 <total>
60 changes: 30 additions & 30 deletions llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ vaddps %ymm1, %ymm1, %ymm0

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 300
# CHECK-NEXT: Total Cycles: 255
# CHECK-NEXT: Total Cycles: 205
# CHECK-NEXT: Total uOps: 600

# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 2.35
# CHECK-NEXT: IPC: 1.18
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK-NEXT: uOps Per Cycle: 2.93
# CHECK-NEXT: IPC: 1.46
# CHECK-NEXT: Block RThroughput: 2.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Expand All @@ -59,7 +59,7 @@ vaddps %ymm1, %ymm1, %ymm0
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 5 1.00 vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 2 1.00 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2 2 1.50 vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 2 1.00 vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
Expand Down Expand Up @@ -88,13 +88,13 @@ vaddps %ymm1, %ymm1, %ymm0

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18]
# CHECK-NEXT: - - - - - - - - 2.48 2.52 1.00 1.00 - - 1.02 1.98 1.00 1.00 - - - - -
# CHECK-NEXT: - - - - - - - - 1.00 1.00 2.00 2.00 - - 1.00 - 2.00 2.00 - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: - - - - - - - - 0.98 1.02 - - - - 1.00 - - - - - - - - vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - - - - vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 0.02 1.98 - - - - - - - vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 - - - - - - - - vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - 2.00 - - - - - vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - 2.00 - - - - - - vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Timeline view:
# CHECK-NEXT: 0
Expand All @@ -105,10 +105,10 @@ vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: [0,2] .D=eeE-R . vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,0] .DeeeeeER . vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeeE--R . vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [1,2] . D===eeER. vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,2] . D==eeER . vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,0] . DeeeeeER vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . DeeE---R vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . D=eeE-R vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,1] . D=eeE--R vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . D==eeER vblendps $2, %ymm1, %ymm2, %ymm3

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
Expand All @@ -118,21 +118,21 @@ vaddps %ymm1, %ymm1, %ymm0

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 3 1.0 1.0 2.7 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 2.7 0.3 0.7 vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 3 1.6 0.8 1.1 <total>
# CHECK-NEXT: 1. 3 1.3 1.3 2.3 vxorps %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 2.7 0.0 0.3 vblendps $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 3 1.7 0.8 0.9 <total>

# CHECK: [1] Code Region - ZERO-IDIOM-2

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 300
# CHECK-NEXT: Total Cycles: 255
# CHECK-NEXT: Total Cycles: 205
# CHECK-NEXT: Total uOps: 600

# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 2.35
# CHECK-NEXT: IPC: 1.18
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK-NEXT: uOps Per Cycle: 2.93
# CHECK-NEXT: IPC: 1.46
# CHECK-NEXT: Block RThroughput: 2.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Expand All @@ -145,7 +145,7 @@ vaddps %ymm1, %ymm1, %ymm0
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 5 1.00 vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 2 2 1.00 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2 2 1.50 vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 2 1.00 vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Resources:
# CHECK-NEXT: [0.0] - PdAGLU01
Expand Down Expand Up @@ -174,13 +174,13 @@ vaddps %ymm1, %ymm1, %ymm0

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18]
# CHECK-NEXT: - - - - - - - - 2.48 2.52 1.00 1.00 - - 1.02 1.98 1.00 1.00 - - - - -
# CHECK-NEXT: - - - - - - - - 1.00 1.00 2.00 2.00 - - 1.00 - 2.00 2.00 - - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions:
# CHECK-NEXT: - - - - - - - - 0.98 1.02 - - - - 1.00 - - - - - - - - vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - 1.00 1.00 - - - - - vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - 0.02 1.98 - - - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 - - - - - - - - vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - 2.00 - - - - - vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - 2.00 - - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Timeline view:
# CHECK-NEXT: 0
Expand All @@ -191,10 +191,10 @@ vaddps %ymm1, %ymm1, %ymm0
# CHECK-NEXT: [0,2] .D=eeE-R . vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,0] .DeeeeeER . vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [1,1] . DeeE--R . vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [1,2] . D===eeER. vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [1,2] . D==eeER . vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,0] . DeeeeeER vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [2,1] . DeeE---R vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . D=eeE-R vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: [2,1] . D=eeE--R vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: [2,2] . D==eeER vblendpd $2, %ymm1, %ymm2, %ymm3

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
Expand All @@ -204,9 +204,9 @@ vaddps %ymm1, %ymm1, %ymm0

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddpd %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 3 1.0 1.0 2.7 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 2.7 0.3 0.7 vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 3 1.6 0.8 1.1 <total>
# CHECK-NEXT: 1. 3 1.3 1.3 2.3 vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: 2. 3 2.7 0.0 0.3 vblendpd $2, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 3 1.7 0.8 0.9 <total>

# CHECK: [2] Code Region - ZERO-IDIOM-3

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# BDVER2: Dispatch Width: 4
# BDVER2-NEXT: uOps Per Cycle: 0.20
# BDVER2-NEXT: IPC: 0.20
# BDVER2-NEXT: Block RThroughput: 2.0
# BDVER2-NEXT: Block RThroughput: 1.5

# BDWELL: Dispatch Width: 4
# BDWELL-NEXT: uOps Per Cycle: 0.40
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3
# BDVER2: Dispatch Width: 4
# BDVER2-NEXT: uOps Per Cycle: 0.20
# BDVER2-NEXT: IPC: 0.20
# BDVER2-NEXT: Block RThroughput: 2.0
# BDVER2-NEXT: Block RThroughput: 1.5

# BDWELL: Dispatch Width: 4
# BDWELL-NEXT: uOps Per Cycle: 0.40
Expand Down