From 3c9064ed963ebfc59e4164cbea0b932734785813 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 31 Mar 2020 19:58:29 +0100 Subject: [PATCH] [X86] Run XOP vector rotation tests with/without AVX2 I noticed this while reviewing D77152 - by only testing bdver4 we weren't checking an XOP target that only had AVX1 --- llvm/test/CodeGen/X86/rotate_vec.ll | 62 ++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll index d2d6462486160..fbaf2d0f09141 100644 --- a/llvm/test/CodeGen/X86/rotate_vec.ll +++ b/llvm/test/CodeGen/X86/rotate_vec.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,XOP +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefixes=CHECK,AVX512 define <4 x i32> @rot_v4i32_splat(<4 x i32> %x) { @@ -77,10 +78,20 @@ define <4 x i32> @rot_v4i32_non_splat_2masks(<4 x i32> %x) { } define <4 x i32> @rot_v4i32_zero_non_splat(<4 x i32> %x) { -; CHECK-LABEL: rot_v4i32_zero_non_splat: -; CHECK: # %bb.0: -; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 -; CHECK-NEXT: retq +; XOPAVX1-LABEL: rot_v4i32_zero_non_splat: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: rot_v4i32_zero_non_splat: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vbroadcastss %xmm0, %xmm0 +; XOPAVX2-NEXT: retq +; +; AVX512-LABEL: rot_v4i32_zero_non_splat: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 +; AVX512-NEXT: retq %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %2 @@ -97,12 +108,19 @@ define <4 x i32> @rot_v4i32_allsignbits(<4 x i32> %x, <4 x i32> %y) { } define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) { -; XOP-LABEL: rot_v4i32_mask_ashr0: -; XOP: # %bb.0: -; XOP-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: vprotd $1, %xmm0, %xmm0 -; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: rot_v4i32_mask_ashr0: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX1-NEXT: vprotd $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: rot_v4i32_mask_ashr0: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX2-NEXT: vprotd $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: rot_v4i32_mask_ashr0: ; AVX512: # %bb.0: @@ -118,13 +136,21 @@ define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) { } define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) { -; XOP-LABEL: rot_v4i32_mask_ashr1: -; XOP: # %bb.0: -; XOP-NEXT: vpsrad $25, %xmm0, %xmm0 -; XOP-NEXT: vprotd $1, %xmm0, %xmm0 -; XOP-NEXT: vpbroadcastd %xmm0, %xmm0 -; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: rot_v4i32_mask_ashr1: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0 +; XOPAVX1-NEXT: vprotd $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: rot_v4i32_mask_ashr1: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsrad $25, %xmm0, %xmm0 +; XOPAVX2-NEXT: vprotd $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0 +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: rot_v4i32_mask_ashr1: ; AVX512: # %bb.0: