diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 636e88898a55e..3907e864bed1e 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -387,6 +387,12 @@ class PowerPC_VSX_Sca_DDD_Intrinsic [llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; +/// PowerPC_VSX_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_VSX_WWW_Intrinsic + : PowerPC_VSX_Intrinsic; //===----------------------------------------------------------------------===// // PowerPC Altivec Intrinsic Definitions. @@ -1214,6 +1220,7 @@ def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">; def int_ppc_altivec_vrlb : PowerPC_Vec_BBB_Intrinsic<"vrlb">; def int_ppc_altivec_vrlh : PowerPC_Vec_HHH_Intrinsic<"vrlh">; def int_ppc_altivec_vrlw : PowerPC_Vec_WWW_Intrinsic<"vrlw">; +def int_ppc_vsx_xvrlw : PowerPC_VSX_WWW_Intrinsic<"xvrlw">; def int_ppc_altivec_vrld : PowerPC_Vec_DDD_Intrinsic<"vrld">; let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 23d6d8853800f..fe1eea2b33615 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -889,6 +889,7 @@ def : Pat<(v16i8 (rotl v16i8:$vA, v16i8:$vB)), (v16i8 (VRLB v16i8:$vA, v16i8:$vB))>; def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)), (v8i16 (VRLH v8i16:$vA, v8i16:$vB))>; +let Predicates = [IsNotISAFuture] in def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (VRLW v4i32:$vA, v4i32:$vB))>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index dfbbba0116f25..e417ffe6d3677 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -420,8 +420,10 @@ let Predicates = [HasVSX, IsISAFuture] in { : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), "vucmprlh $VRT, $VRA, $VRB", []>; - def XVRLW: XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvrlw $XT, $XA, $XB", []>; + def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvrlw $XT, $XA, $XB", + [(set v4i32:$XT, (int_ppc_vsx_xvrlw v4i32:$XA, + v4i32:$XB))]>; // AES Acceleration Instructions def XXAESENCP : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp), @@ -550,6 +552,10 @@ def : Pat<(int_ppc_vsx_stxvprl v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRL $XTp, $RA, $RB)>; def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp, $RA, $RB)>; +let Predicates = [HasVSX, IsISAFuture] in { + def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA, + v4i32:$vB))>; +} //---------------------------- Instruction aliases ---------------------------// // Predicate combinations available: diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll index 12078adbbc2f3..383dcdb06c331 100644 --- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32 ; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64 ; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64 +; RUN: llc < %s -mcpu=future -mtriple=powerpc64le-- | FileCheck %s --check-prefix=FUTURE declare i8 @llvm.fshl.i8(i8, i8, i8) declare i16 @llvm.fshl.i16(i16, i16, i16) @@ -24,6 +25,13 @@ define i8 @rotl_i8_const_shift(i8 %x) { ; CHECK-NEXT: rlwimi 4, 3, 3, 0, 28 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotl_i8_const_shift: +; FUTURE: # %bb.0: +; FUTURE-NEXT: rotlwi 4, 3, 27 +; FUTURE-NEXT: rlwimi 4, 3, 3, 0, 28 +; FUTURE-NEXT: mr 3, 4 +; FUTURE-NEXT: blr %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) ret i8 %f } @@ -43,6 +51,11 @@ define i64 @rotl_i64_const_shift(i64 %x) { ; CHECK64: # %bb.0: ; CHECK64-NEXT: rotldi 3, 3, 3 ; CHECK64-NEXT: blr +; +; FUTURE-LABEL: rotl_i64_const_shift: +; FUTURE: # %bb.0: +; FUTURE-NEXT: rotldi 3, 3, 3 +; FUTURE-NEXT: blr %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) ret i64 %f } @@ -60,6 +73,17 @@ define i16 @rotl_i16(i16 %x, i16 %z) { ; CHECK-NEXT: srw 4, 5, 4 ; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotl_i16: +; FUTURE: # %bb.0: +; FUTURE-NEXT: clrlwi 6, 4, 28 +; FUTURE-NEXT: neg 4, 4 +; FUTURE-NEXT: clrlwi 5, 3, 16 +; FUTURE-NEXT: clrlwi 4, 4, 28 +; FUTURE-NEXT: slw 3, 3, 6 +; FUTURE-NEXT: srw 4, 5, 4 +; FUTURE-NEXT: or 3, 3, 4 +; FUTURE-NEXT: blr %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) ret i16 %f } @@ -69,6 +93,11 @@ define i32 @rotl_i32(i32 %x, i32 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: rotlw 3, 3, 4 ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotl_i32: +; FUTURE: # %bb.0: +; FUTURE-NEXT: rotlw 3, 3, 4 +; FUTURE-NEXT: blr %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) ret i32 %f } @@ -100,6 +129,11 @@ define i64 @rotl_i64(i64 %x, i64 %z) { ; CHECK64: # %bb.0: ; CHECK64-NEXT: rotld 3, 3, 4 ; CHECK64-NEXT: blr +; +; FUTURE-LABEL: rotl_i64: +; FUTURE: # %bb.0: +; FUTURE-NEXT: rotld 3, 3, 4 +; FUTURE-NEXT: blr %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } @@ -124,6 +158,11 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { ; CHECK64: # %bb.0: ; CHECK64-NEXT: vrlw 2, 2, 3 ; CHECK64-NEXT: blr +; +; FUTURE-LABEL: rotl_v4i32: +; FUTURE: # %bb.0: +; FUTURE-NEXT: xvrlw 34, 34, 35 +; FUTURE-NEXT: blr %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) ret <4 x i32> %f } @@ -150,6 +189,12 @@ define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) { ; CHECK64-NEXT: vspltisw 3, 3 ; CHECK64-NEXT: vrlw 2, 2, 3 ; CHECK64-NEXT: blr +; +; FUTURE-LABEL: rotl_v4i32_const_shift: +; FUTURE: # %bb.0: +; FUTURE-NEXT: vspltisw 3, 3 +; FUTURE-NEXT: xvrlw 34, 34, 35 +; FUTURE-NEXT: blr %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) ret <4 x i32> %f } @@ -163,6 +208,13 @@ define i8 @rotr_i8_const_shift(i8 %x) { ; CHECK-NEXT: rlwimi 4, 3, 5, 0, 26 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotr_i8_const_shift: +; FUTURE: # %bb.0: +; FUTURE-NEXT: rotlwi 4, 3, 29 +; FUTURE-NEXT: rlwimi 4, 3, 5, 0, 26 +; FUTURE-NEXT: mr 3, 4 +; FUTURE-NEXT: blr %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) ret i8 %f } @@ -172,6 +224,11 @@ define i32 @rotr_i32_const_shift(i32 %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: rotlwi 3, 3, 29 ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotr_i32_const_shift: +; FUTURE: # %bb.0: +; FUTURE-NEXT: rotlwi 3, 3, 29 +; FUTURE-NEXT: blr %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) ret i32 %f } @@ -189,6 +246,17 @@ define i16 @rotr_i16(i16 %x, i16 %z) { ; CHECK-NEXT: slw 3, 3, 4 ; CHECK-NEXT: or 3, 5, 3 ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotr_i16: +; FUTURE: # %bb.0: +; FUTURE-NEXT: clrlwi 6, 4, 28 +; FUTURE-NEXT: neg 4, 4 +; FUTURE-NEXT: clrlwi 5, 3, 16 +; FUTURE-NEXT: clrlwi 4, 4, 28 +; FUTURE-NEXT: srw 5, 5, 6 +; FUTURE-NEXT: slw 3, 3, 4 +; FUTURE-NEXT: or 3, 5, 3 +; FUTURE-NEXT: blr %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) ret i16 %f } @@ -199,6 +267,12 @@ define i32 @rotr_i32(i32 %x, i32 %z) { ; CHECK-NEXT: neg 4, 4 ; CHECK-NEXT: rotlw 3, 3, 4 ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotr_i32: +; FUTURE: # %bb.0: +; FUTURE-NEXT: neg 4, 4 +; FUTURE-NEXT: rotlw 3, 3, 4 +; FUTURE-NEXT: blr %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) ret i32 %f } @@ -231,6 +305,12 @@ define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK64-NEXT: neg 4, 4 ; CHECK64-NEXT: rotld 3, 3, 4 ; CHECK64-NEXT: blr +; +; FUTURE-LABEL: rotr_i64: +; FUTURE: # %bb.0: +; FUTURE-NEXT: neg 4, 4 +; FUTURE-NEXT: rotld 3, 3, 4 +; FUTURE-NEXT: blr %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } @@ -263,6 +343,12 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { ; CHECK64-NEXT: vsubuwm 3, 4, 3 ; CHECK64-NEXT: vrlw 2, 2, 3 ; CHECK64-NEXT: blr +; +; FUTURE-LABEL: rotr_v4i32: +; FUTURE: # %bb.0: +; FUTURE-NEXT: vnegw 3, 3 +; FUTURE-NEXT: xvrlw 34, 34, 35 +; FUTURE-NEXT: blr %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) ret <4 x i32> %f } @@ -293,6 +379,12 @@ define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { ; CHECK64-NEXT: vsubuwm 3, 4, 3 ; CHECK64-NEXT: vrlw 2, 2, 3 ; CHECK64-NEXT: blr +; +; FUTURE-LABEL: rotr_v4i32_const_shift: +; FUTURE: # %bb.0: +; FUTURE-NEXT: xxspltiw 0, 29 +; FUTURE-NEXT: xvrlw 34, 34, 0 +; FUTURE-NEXT: blr %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) ret <4 x i32> %f } @@ -301,6 +393,10 @@ define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { ; CHECK-LABEL: rotl_i32_shift_by_bitwidth: ; CHECK: # %bb.0: ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotl_i32_shift_by_bitwidth: +; FUTURE: # %bb.0: +; FUTURE-NEXT: blr %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) ret i32 %f } @@ -309,6 +405,10 @@ define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { ; CHECK-LABEL: rotr_i32_shift_by_bitwidth: ; CHECK: # %bb.0: ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotr_i32_shift_by_bitwidth: +; FUTURE: # %bb.0: +; FUTURE-NEXT: blr %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) ret i32 %f } @@ -317,6 +417,10 @@ define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { ; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: ; CHECK: # %bb.0: ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotl_v4i32_shift_by_bitwidth: +; FUTURE: # %bb.0: +; FUTURE-NEXT: blr %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) ret <4 x i32> %f } @@ -325,6 +429,10 @@ define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { ; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: ; CHECK: # %bb.0: ; CHECK-NEXT: blr +; +; FUTURE-LABEL: rotr_v4i32_shift_by_bitwidth: +; FUTURE: # %bb.0: +; FUTURE-NEXT: blr %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) ret <4 x i32> %f } diff --git a/llvm/test/CodeGen/PowerPC/vec_rotate_lw.ll b/llvm/test/CodeGen/PowerPC/vec_rotate_lw.ll new file mode 100644 index 0000000000000..03b1456f0c036 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec_rotate_lw.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +define <4 x i32> @testVRLWMI(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: testVRLWMI: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrlw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.vsx.xvrlw(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} diff --git a/llvm/test/CodeGen/PowerPC/vector-rotates.ll b/llvm/test/CodeGen/PowerPC/vector-rotates.ll index 2de8804ba8e24..38e273634da2a 100644 --- a/llvm/test/CodeGen/PowerPC/vector-rotates.ll +++ b/llvm/test/CodeGen/PowerPC/vector-rotates.ll @@ -5,6 +5,9 @@ ; RUN: llc -O3 -mtriple=powerpc64-unknown-unknown -ppc-asm-full-reg-names \ ; RUN: -verify-machineinstrs -mcpu=pwr7 < %s | \ ; RUN: FileCheck --check-prefix=CHECK-P7 %s +; RUN: llc -O3 -mtriple=powerpc64-unknown-unknown -ppc-asm-full-reg-names \ +; RUN: -verify-machineinstrs -mcpu=future < %s | \ +; RUN: FileCheck --check-prefix=CHECK-FUTURE %s define <16 x i8> @rotl_v16i8(<16 x i8> %a) { ; CHECK-P8-LABEL: rotl_v16i8: @@ -23,6 +26,14 @@ define <16 x i8> @rotl_v16i8(<16 x i8> %a) { ; CHECK-P7-NEXT: lxvw4x vs35, 0, r3 ; CHECK-P7-NEXT: vrlb v2, v2, v3 ; CHECK-P7-NEXT: blr +; +; CHECK-FUTURE-LABEL: rotl_v16i8: +; CHECK-FUTURE: # %bb.0: # %entry +; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-FUTURE-NEXT: lxv vs35, 0(r3) +; CHECK-FUTURE-NEXT: vrlb v2, v2, v3 +; CHECK-FUTURE-NEXT: blr entry: %b = shl <16 x i8> %a, %c = lshr <16 x i8> %a, @@ -47,6 +58,14 @@ define <8 x i16> @rotl_v8i16(<8 x i16> %a) { ; CHECK-P7-NEXT: lxvw4x vs35, 0, r3 ; CHECK-P7-NEXT: vrlh v2, v2, v3 ; CHECK-P7-NEXT: blr +; +; CHECK-FUTURE-LABEL: rotl_v8i16: +; CHECK-FUTURE: # %bb.0: # %entry +; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-FUTURE-NEXT: lxv vs35, 0(r3) +; CHECK-FUTURE-NEXT: vrlh v2, v2, v3 +; CHECK-FUTURE-NEXT: blr entry: %b = shl <8 x i16> %a, %c = lshr <8 x i16> %a, @@ -71,6 +90,14 @@ define <4 x i32> @rotl_v4i32_0(<4 x i32> %a) { ; CHECK-P7-NEXT: lxvw4x vs35, 0, r3 ; CHECK-P7-NEXT: vrlw v2, v2, v3 ; CHECK-P7-NEXT: blr +; +; CHECK-FUTURE-LABEL: rotl_v4i32_0: +; CHECK-FUTURE: # %bb.0: # %entry +; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-FUTURE-NEXT: lxv vs0, 0(r3) +; CHECK-FUTURE-NEXT: xvrlw vs34, vs34, vs0 +; CHECK-FUTURE-NEXT: blr entry: %b = shl <4 x i32> %a, %c = lshr <4 x i32> %a, @@ -94,6 +121,12 @@ define <4 x i32> @rotl_v4i32_1(<4 x i32> %a) { ; CHECK-P7-NEXT: vsubuwm v3, v4, v3 ; CHECK-P7-NEXT: vrlw v2, v2, v3 ; CHECK-P7-NEXT: blr +; +; CHECK-FUTURE-LABEL: rotl_v4i32_1: +; CHECK-FUTURE: # %bb.0: # %entry +; CHECK-FUTURE-NEXT: xxspltiw vs0, 23 +; CHECK-FUTURE-NEXT: xvrlw vs34, vs34, vs0 +; CHECK-FUTURE-NEXT: blr entry: %b = shl <4 x i32> %a, %c = lshr <4 x i32> %a, @@ -124,6 +157,14 @@ define <2 x i64> @rotl_v2i64(<2 x i64> %a) { ; CHECK-P7-NEXT: addi r3, r1, -16 ; CHECK-P7-NEXT: lxvd2x vs34, 0, r3 ; CHECK-P7-NEXT: blr +; +; CHECK-FUTURE-LABEL: rotl_v2i64: +; CHECK-FUTURE: # %bb.0: # %entry +; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-FUTURE-NEXT: lxv vs35, 0(r3) +; CHECK-FUTURE-NEXT: vrld v2, v2, v3 +; CHECK-FUTURE-NEXT: blr entry: %b = shl <2 x i64> %a, %c = lshr <2 x i64> %a,