From aa5bc6edb36bb24da916ad27e9ac50d73e51265c Mon Sep 17 00:00:00 2001 From: Dong Bo Date: Mon, 8 Feb 2021 02:12:29 +0000 Subject: [PATCH] 8258953: AArch64: move NEON instructions to aarch64_neon.ad Reviewed-by: njian, aph --- src/hotspot/cpu/aarch64/aarch64.ad | 2424 -------------------- src/hotspot/cpu/aarch64/aarch64_neon.ad | 2423 ++++++++++++++++++- src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 | 824 ++++++- 3 files changed, 3226 insertions(+), 2445 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 9202b340cf0ef..01915b33e9bba 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -16597,2430 +16597,6 @@ instruct tlsLoadP(thread_RegP dst) ins_pipe(pipe_class_empty); %} -// ====================VECTOR INSTRUCTIONS===================================== - -// Load vector (32 bits) -instruct loadV4(vecD dst, vmem4 mem) -%{ - predicate(n->as_LoadVector()->memory_size() == 4); - match(Set dst (LoadVector mem)); - ins_cost(4 * INSN_COST); - format %{ "ldrs $dst,$mem\t# vector (32 bits)" %} - ins_encode( aarch64_enc_ldrvS(dst, mem) ); - ins_pipe(vload_reg_mem64); -%} - -// Load vector (64 bits) -instruct loadV8(vecD dst, vmem8 mem) -%{ - predicate(n->as_LoadVector()->memory_size() == 8); - match(Set dst (LoadVector mem)); - ins_cost(4 * INSN_COST); - format %{ "ldrd $dst,$mem\t# vector (64 bits)" %} - ins_encode( aarch64_enc_ldrvD(dst, mem) ); - ins_pipe(vload_reg_mem64); -%} - -// Load Vector (128 bits) -instruct loadV16(vecX dst, vmem16 mem) -%{ - predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16); - match(Set dst (LoadVector mem)); - ins_cost(4 * INSN_COST); - format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} - ins_encode( aarch64_enc_ldrvQ(dst, mem) ); - ins_pipe(vload_reg_mem128); -%} - -// Store Vector (32 bits) -instruct storeV4(vecD src, vmem4 mem) -%{ - predicate(n->as_StoreVector()->memory_size() == 4); - match(Set mem (StoreVector mem src)); - ins_cost(4 * INSN_COST); - format %{ "strs $mem,$src\t# vector (32 bits)" %} - ins_encode( aarch64_enc_strvS(src, mem) ); - ins_pipe(vstore_reg_mem64); -%} - -// Store Vector (64 bits) -instruct storeV8(vecD src, vmem8 mem) -%{ - predicate(n->as_StoreVector()->memory_size() == 8); - match(Set mem (StoreVector mem src)); - ins_cost(4 * INSN_COST); - format %{ "strd $mem,$src\t# vector (64 bits)" %} - ins_encode( aarch64_enc_strvD(src, mem) ); - ins_pipe(vstore_reg_mem64); -%} - -// Store Vector (128 bits) -instruct storeV16(vecX src, vmem16 mem) -%{ - predicate(n->as_StoreVector()->memory_size() == 16); - match(Set mem (StoreVector mem src)); - ins_cost(4 * INSN_COST); - format %{ "strq $mem,$src\t# vector (128 bits)" %} - ins_encode( aarch64_enc_strvQ(src, mem) ); - ins_pipe(vstore_reg_mem128); -%} - -instruct replicate8B(vecD dst, iRegIorL2I src) -%{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (ReplicateB src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (8B)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg)); - %} - ins_pipe(vdup_reg_reg64); -%} - -instruct replicate16B(vecX dst, iRegIorL2I src) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 16); - match(Set dst (ReplicateB src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (16B)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg)); - %} - ins_pipe(vdup_reg_reg128); -%} - -instruct replicate8B_imm(vecD dst, immI con) -%{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (ReplicateB con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(8B)" %} - ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff); - %} - ins_pipe(vmovi_reg_imm64); -%} - -instruct replicate16B_imm(vecX dst, immI con) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 16); - match(Set dst (ReplicateB con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(16B)" %} - ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff); - %} - ins_pipe(vmovi_reg_imm128); -%} - -instruct replicate4S(vecD dst, iRegIorL2I src) -%{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (ReplicateS src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (4S)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg)); - %} - ins_pipe(vdup_reg_reg64); -%} - -instruct replicate8S(vecX dst, iRegIorL2I src) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 8); - match(Set dst (ReplicateS src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (8S)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg)); - %} - ins_pipe(vdup_reg_reg128); -%} - -instruct replicate4S_imm(vecD dst, immI con) -%{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (ReplicateS con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(4H)" %} - ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff); - %} - ins_pipe(vmovi_reg_imm64); -%} - -instruct replicate8S_imm(vecX dst, immI con) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 8); - match(Set dst (ReplicateS con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(8H)" %} - ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff); - %} - ins_pipe(vmovi_reg_imm128); -%} - -instruct replicate2I(vecD dst, iRegIorL2I src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateI src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (2I)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg)); - %} - ins_pipe(vdup_reg_reg64); -%} - -instruct replicate4I(vecX dst, iRegIorL2I src) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 4); - match(Set dst (ReplicateI src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (4I)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg)); - %} - ins_pipe(vdup_reg_reg128); -%} - -instruct replicate2I_imm(vecD dst, immI con) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateI con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(2I)" %} - ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant); - %} - ins_pipe(vmovi_reg_imm64); -%} - -instruct replicate4I_imm(vecX dst, immI con) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 4); - match(Set dst (ReplicateI con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(4I)" %} - ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant); - %} - ins_pipe(vmovi_reg_imm128); -%} - -instruct replicate2L(vecX dst, iRegL src) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 2); - match(Set dst (ReplicateL src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (2L)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg)); - %} - ins_pipe(vdup_reg_reg128); -%} - -instruct replicate2L_zero(vecX dst, immI0 zero) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 2); - match(Set dst (ReplicateI zero)); - ins_cost(INSN_COST); - format %{ "movi $dst, $zero\t# vector(4I)" %} - ins_encode %{ - __ eor(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg)); - %} - ins_pipe(vmovi_reg_imm128); -%} - -instruct replicate2F(vecD dst, vRegF src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateF src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (2F)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg)); - %} - ins_pipe(vdup_reg_freg64); -%} - -instruct replicate4F(vecX dst, vRegF src) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 4); - match(Set dst (ReplicateF src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (4F)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg)); - %} - ins_pipe(vdup_reg_freg128); -%} - -instruct replicate2D(vecX dst, vRegD src) -%{ - predicate(UseSVE == 0 && n->as_Vector()->length() == 2); - match(Set dst (ReplicateD src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (2D)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg)); - %} - ins_pipe(vdup_reg_dreg128); -%} - -// ====================REDUCTION ARITHMETIC==================================== - -instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2) -%{ - predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); - match(Set dst (AddReductionVI isrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP tmp, TEMP tmp2); - format %{ "umov $tmp, $vsrc, S, 0\n\t" - "umov $tmp2, $vsrc, S, 1\n\t" - "addw $tmp, $isrc, $tmp\n\t" - "addw $dst, $tmp, $tmp2\t# add reduction2I" - %} - ins_encode %{ - __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); - __ umov($tmp2$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); - __ addw($tmp$$Register, $isrc$$Register, $tmp$$Register); - __ addw($dst$$Register, $tmp$$Register, $tmp2$$Register); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp) -%{ - predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); - match(Set dst (AddReductionVI isrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP vtmp, TEMP itmp); - format %{ "addv $vtmp, T4S, $vsrc\n\t" - "umov $itmp, $vtmp, S, 0\n\t" - "addw $dst, $itmp, $isrc\t# add reduction4I" - %} - ins_encode %{ - __ addv(as_FloatRegister($vtmp$$reg), __ T4S, - as_FloatRegister($vsrc$$reg)); - __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0); - __ addw($dst$$Register, $itmp$$Register, $isrc$$Register); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) -%{ - predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); - match(Set dst (MulReductionVI isrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP tmp, TEMP dst); - format %{ "umov $tmp, $vsrc, S, 0\n\t" - "mul $dst, $tmp, $isrc\n\t" - "umov $tmp, $vsrc, S, 1\n\t" - "mul $dst, $tmp, $dst\t# mul reduction2I" - %} - ins_encode %{ - __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); - __ mul($dst$$Register, $tmp$$Register, $isrc$$Register); - __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); - __ mul($dst$$Register, $tmp$$Register, $dst$$Register); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp) -%{ - predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); - match(Set dst (MulReductionVI isrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP vtmp, TEMP itmp, TEMP dst); - format %{ "ins $vtmp, D, $vsrc, 0, 1\n\t" - "mulv $vtmp, T2S, $vtmp, $vsrc\n\t" - "umov $itmp, $vtmp, S, 0\n\t" - "mul $dst, $itmp, $isrc\n\t" - "umov $itmp, $vtmp, S, 1\n\t" - "mul $dst, $itmp, $dst\t# mul reduction4I" - %} - ins_encode %{ - __ ins(as_FloatRegister($vtmp$$reg), __ D, - as_FloatRegister($vsrc$$reg), 0, 1); - __ mulv(as_FloatRegister($vtmp$$reg), __ T2S, - as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg)); - __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0); - __ mul($dst$$Register, $itmp$$Register, $isrc$$Register); - __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1); - __ mul($dst$$Register, $itmp$$Register, $dst$$Register); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_add2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp) -%{ - match(Set dst (AddReductionVF fsrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP tmp, TEMP dst); - format %{ "fadds $dst, $fsrc, $vsrc\n\t" - "ins $tmp, S, $vsrc, 0, 1\n\t" - "fadds $dst, $dst, $tmp\t# add reduction2F" - %} - ins_encode %{ - __ fadds(as_FloatRegister($dst$$reg), - as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ S, - as_FloatRegister($vsrc$$reg), 0, 1); - __ fadds(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_add4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp) -%{ - match(Set dst (AddReductionVF fsrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP tmp, TEMP dst); - format %{ "fadds $dst, $fsrc, $vsrc\n\t" - "ins $tmp, S, $vsrc, 0, 1\n\t" - "fadds $dst, $dst, $tmp\n\t" - "ins $tmp, S, $vsrc, 0, 2\n\t" - "fadds $dst, $dst, $tmp\n\t" - "ins $tmp, S, $vsrc, 0, 3\n\t" - "fadds $dst, $dst, $tmp\t# add reduction4F" - %} - ins_encode %{ - __ fadds(as_FloatRegister($dst$$reg), - as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ S, - as_FloatRegister($vsrc$$reg), 0, 1); - __ fadds(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ S, - as_FloatRegister($vsrc$$reg), 0, 2); - __ fadds(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ S, - as_FloatRegister($vsrc$$reg), 0, 3); - __ fadds(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_mul2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp) -%{ - match(Set dst (MulReductionVF fsrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP tmp, TEMP dst); - format %{ "fmuls $dst, $fsrc, $vsrc\n\t" - "ins $tmp, S, $vsrc, 0, 1\n\t" - "fmuls $dst, $dst, $tmp\t# mul reduction2F" - %} - ins_encode %{ - __ fmuls(as_FloatRegister($dst$$reg), - as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ S, - as_FloatRegister($vsrc$$reg), 0, 1); - __ fmuls(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_mul4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp) -%{ - match(Set dst (MulReductionVF fsrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP tmp, TEMP dst); - format %{ "fmuls $dst, $fsrc, $vsrc\n\t" - "ins $tmp, S, $vsrc, 0, 1\n\t" - "fmuls $dst, $dst, $tmp\n\t" - "ins $tmp, S, $vsrc, 0, 2\n\t" - "fmuls $dst, $dst, $tmp\n\t" - "ins $tmp, S, $vsrc, 0, 3\n\t" - "fmuls $dst, $dst, $tmp\t# mul reduction4F" - %} - ins_encode %{ - __ fmuls(as_FloatRegister($dst$$reg), - as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ S, - as_FloatRegister($vsrc$$reg), 0, 1); - __ fmuls(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ S, - as_FloatRegister($vsrc$$reg), 0, 2); - __ fmuls(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ S, - as_FloatRegister($vsrc$$reg), 0, 3); - __ fmuls(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_add2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp) -%{ - match(Set dst (AddReductionVD dsrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP tmp, TEMP dst); - format %{ "faddd $dst, $dsrc, $vsrc\n\t" - "ins $tmp, D, $vsrc, 0, 1\n\t" - "faddd $dst, $dst, $tmp\t# add reduction2D" - %} - ins_encode %{ - __ faddd(as_FloatRegister($dst$$reg), - as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ D, - as_FloatRegister($vsrc$$reg), 0, 1); - __ faddd(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_class_default); -%} - -instruct reduce_mul2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp) -%{ - match(Set dst (MulReductionVD dsrc vsrc)); - ins_cost(INSN_COST); - effect(TEMP tmp, TEMP dst); - format %{ "fmuld $dst, $dsrc, $vsrc\n\t" - "ins $tmp, D, $vsrc, 0, 1\n\t" - "fmuld $dst, $dst, $tmp\t# mul reduction2D" - %} - ins_encode %{ - __ fmuld(as_FloatRegister($dst$$reg), - as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg)); - __ ins(as_FloatRegister($tmp$$reg), __ D, - as_FloatRegister($vsrc$$reg), 0, 1); - __ fmuld(as_FloatRegister($dst$$reg), - as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); - %} - ins_pipe(pipe_class_default); -%} - -// ====================VECTOR ARITHMETIC======================================= - -// --------------------------------- ADD -------------------------------------- - -instruct vadd8B(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (AddVB src1 src2)); - ins_cost(INSN_COST); - format %{ "addv $dst,$src1,$src2\t# vector (8B)" %} - ins_encode %{ - __ addv(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop64); -%} - -instruct vadd16B(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (AddVB src1 src2)); - ins_cost(INSN_COST); - format %{ "addv $dst,$src1,$src2\t# vector (16B)" %} - ins_encode %{ - __ addv(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop128); -%} - -instruct vadd4S(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (AddVS src1 src2)); - ins_cost(INSN_COST); - format %{ "addv $dst,$src1,$src2\t# vector (4H)" %} - ins_encode %{ - __ addv(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop64); -%} - -instruct vadd8S(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AddVS src1 src2)); - ins_cost(INSN_COST); - format %{ "addv $dst,$src1,$src2\t# vector (8H)" %} - ins_encode %{ - __ addv(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop128); -%} - -instruct vadd2I(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVI src1 src2)); - ins_cost(INSN_COST); - format %{ "addv $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ addv(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop64); -%} - -instruct vadd4I(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVI src1 src2)); - ins_cost(INSN_COST); - format %{ "addv $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ addv(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop128); -%} - -instruct vadd2L(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVL src1 src2)); - ins_cost(INSN_COST); - format %{ "addv $dst,$src1,$src2\t# vector (2L)" %} - ins_encode %{ - __ addv(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop128); -%} - -instruct vadd2F(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVF src1 src2)); - ins_cost(INSN_COST); - format %{ "fadd $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ fadd(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp64); -%} - -instruct vadd4F(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVF src1 src2)); - ins_cost(INSN_COST); - format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ fadd(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp128); -%} - -instruct vadd2D(vecX dst, vecX src1, vecX src2) -%{ - match(Set dst (AddVD src1 src2)); - ins_cost(INSN_COST); - format %{ "fadd $dst,$src1,$src2\t# vector (2D)" %} - ins_encode %{ - __ fadd(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp128); -%} - -// --------------------------------- SUB -------------------------------------- - -instruct vsub8B(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (SubVB src1 src2)); - ins_cost(INSN_COST); - format %{ "subv $dst,$src1,$src2\t# vector (8B)" %} - ins_encode %{ - __ subv(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop64); -%} - -instruct vsub16B(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (SubVB src1 src2)); - ins_cost(INSN_COST); - format %{ "subv $dst,$src1,$src2\t# vector (16B)" %} - ins_encode %{ - __ subv(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop128); -%} - -instruct vsub4S(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (SubVS src1 src2)); - ins_cost(INSN_COST); - format %{ "subv $dst,$src1,$src2\t# vector (4H)" %} - ins_encode %{ - __ subv(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop64); -%} - -instruct vsub8S(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (SubVS src1 src2)); - ins_cost(INSN_COST); - format %{ "subv $dst,$src1,$src2\t# vector (8H)" %} - ins_encode %{ - __ subv(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop128); -%} - -instruct vsub2I(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (SubVI src1 src2)); - ins_cost(INSN_COST); - format %{ "subv $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ subv(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop64); -%} - -instruct vsub4I(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (SubVI src1 src2)); - ins_cost(INSN_COST); - format %{ "subv $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ subv(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop128); -%} - -instruct vsub2L(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (SubVL src1 src2)); - ins_cost(INSN_COST); - format %{ "subv $dst,$src1,$src2\t# vector (2L)" %} - ins_encode %{ - __ subv(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop128); -%} - -instruct vsub2F(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (SubVF src1 src2)); - ins_cost(INSN_COST); - format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ fsub(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp64); -%} - -instruct vsub4F(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (SubVF src1 src2)); - ins_cost(INSN_COST); - format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ fsub(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp128); -%} - -instruct vsub2D(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (SubVD src1 src2)); - ins_cost(INSN_COST); - format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %} - ins_encode %{ - __ fsub(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp128); -%} - -// --------------------------------- MUL -------------------------------------- - -instruct vmul8B(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (MulVB src1 src2)); - ins_cost(INSN_COST); - format %{ "mulv $dst,$src1,$src2\t# vector (8B)" %} - ins_encode %{ - __ mulv(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmul64); -%} - -instruct vmul16B(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (MulVB src1 src2)); - ins_cost(INSN_COST); - format %{ "mulv $dst,$src1,$src2\t# vector (16B)" %} - ins_encode %{ - __ mulv(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmul128); -%} - -instruct vmul4S(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (MulVS src1 src2)); - ins_cost(INSN_COST); - format %{ "mulv $dst,$src1,$src2\t# vector (4H)" %} - ins_encode %{ - __ mulv(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmul64); -%} - -instruct vmul8S(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (MulVS src1 src2)); - ins_cost(INSN_COST); - format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %} - ins_encode %{ - __ mulv(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmul128); -%} - -instruct vmul2I(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (MulVI src1 src2)); - ins_cost(INSN_COST); - format %{ "mulv $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ mulv(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmul64); -%} - -instruct vmul4I(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (MulVI src1 src2)); - ins_cost(INSN_COST); - format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ mulv(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmul128); -%} - -instruct vmul2F(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (MulVF src1 src2)); - ins_cost(INSN_COST); - format %{ "fmul $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ fmul(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp64); -%} - -instruct vmul4F(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (MulVF src1 src2)); - ins_cost(INSN_COST); - format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ fmul(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -instruct vmul2D(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (MulVD src1 src2)); - ins_cost(INSN_COST); - format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %} - ins_encode %{ - __ fmul(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -// --------------------------------- MLA -------------------------------------- - -instruct vmla4S(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (AddVS dst (MulVS src1 src2))); - ins_cost(INSN_COST); - format %{ "mlav $dst,$src1,$src2\t# vector (4H)" %} - ins_encode %{ - __ mlav(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmla64); -%} - -instruct vmla8S(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AddVS dst (MulVS src1 src2))); - ins_cost(INSN_COST); - format %{ "mlav $dst,$src1,$src2\t# vector (8H)" %} - ins_encode %{ - __ mlav(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmla128); -%} - -instruct vmla2I(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVI dst (MulVI src1 src2))); - ins_cost(INSN_COST); - format %{ "mlav $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ mlav(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmla64); -%} - -instruct vmla4I(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVI dst (MulVI src1 src2))); - ins_cost(INSN_COST); - format %{ "mlav $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ mlav(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmla128); -%} - -// dst + src1 * src2 -instruct vmla2F(vecD dst, vecD src1, vecD src2) %{ - predicate(UseFMA && n->as_Vector()->length() == 2); - match(Set dst (FmaVF dst (Binary src1 src2))); - format %{ "fmla $dst,$src1,$src2\t# vector (2S)" %} - ins_cost(INSN_COST); - ins_encode %{ - __ fmla(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp64); -%} - -// dst + src1 * src2 -instruct vmla4F(vecX dst, vecX src1, vecX src2) %{ - predicate(UseFMA && n->as_Vector()->length() == 4); - match(Set dst (FmaVF dst (Binary src1 src2))); - format %{ "fmla $dst,$src1,$src2\t# vector (4S)" %} - ins_cost(INSN_COST); - ins_encode %{ - __ fmla(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -// dst + src1 * src2 -instruct vmla2D(vecX dst, vecX src1, vecX src2) %{ - predicate(UseFMA && n->as_Vector()->length() == 2); - match(Set dst (FmaVD dst (Binary src1 src2))); - format %{ "fmla $dst,$src1,$src2\t# vector (2D)" %} - ins_cost(INSN_COST); - ins_encode %{ - __ fmla(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -// --------------------------------- MLS -------------------------------------- - -instruct vmls4S(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (SubVS dst (MulVS src1 src2))); - ins_cost(INSN_COST); - format %{ "mlsv $dst,$src1,$src2\t# vector (4H)" %} - ins_encode %{ - __ mlsv(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmla64); -%} - -instruct vmls8S(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (SubVS dst (MulVS src1 src2))); - ins_cost(INSN_COST); - format %{ "mlsv $dst,$src1,$src2\t# vector (8H)" %} - ins_encode %{ - __ mlsv(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmla128); -%} - -instruct vmls2I(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (SubVI dst (MulVI src1 src2))); - ins_cost(INSN_COST); - format %{ "mlsv $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ mlsv(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmla64); -%} - -instruct vmls4I(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (SubVI dst (MulVI src1 src2))); - ins_cost(INSN_COST); - format %{ "mlsv $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ mlsv(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmla128); -%} - -// dst - src1 * src2 -instruct vmls2F(vecD dst, vecD src1, vecD src2) %{ - predicate(UseFMA && n->as_Vector()->length() == 2); - match(Set dst (FmaVF dst (Binary (NegVF src1) src2))); - match(Set dst (FmaVF dst (Binary src1 (NegVF src2)))); - format %{ "fmls $dst,$src1,$src2\t# vector (2S)" %} - ins_cost(INSN_COST); - ins_encode %{ - __ fmls(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp64); -%} - -// dst - src1 * src2 -instruct vmls4F(vecX dst, vecX src1, vecX src2) %{ - predicate(UseFMA && n->as_Vector()->length() == 4); - match(Set dst (FmaVF dst (Binary (NegVF src1) src2))); - match(Set dst (FmaVF dst (Binary src1 (NegVF src2)))); - format %{ "fmls $dst,$src1,$src2\t# vector (4S)" %} - ins_cost(INSN_COST); - ins_encode %{ - __ fmls(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -// dst - src1 * src2 -instruct vmls2D(vecX dst, vecX src1, vecX src2) %{ - predicate(UseFMA && n->as_Vector()->length() == 2); - match(Set dst (FmaVD dst (Binary (NegVD src1) src2))); - match(Set dst (FmaVD dst (Binary src1 (NegVD src2)))); - format %{ "fmls $dst,$src1,$src2\t# vector (2D)" %} - ins_cost(INSN_COST); - ins_encode %{ - __ fmls(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -// --------------- Vector Multiply-Add Shorts into Integer -------------------- - -instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{ - predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); - match(Set dst (MulAddVS2VI src1 src2)); - ins_cost(INSN_COST); - effect(TEMP_DEF dst, TEMP tmp); - format %{ "smullv $tmp, $src1, $src2\t# vector (4H)\n\t" - "smullv $dst, $src1, $src2\t# vector (8H)\n\t" - "addpv $dst, $tmp, $dst\t# vector (4S)\n\t" %} - ins_encode %{ - __ smullv(as_FloatRegister($tmp$$reg), __ T4H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - __ smullv(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - __ addpv(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($tmp$$reg), - as_FloatRegister($dst$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -// --------------------------------- DIV -------------------------------------- - -instruct vdiv2F(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (DivVF src1 src2)); - ins_cost(INSN_COST); - format %{ "fdiv $dst,$src1,$src2\t# vector (2S)" %} - ins_encode %{ - __ fdiv(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp64); -%} - -instruct vdiv4F(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (DivVF src1 src2)); - ins_cost(INSN_COST); - format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ fdiv(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -instruct vdiv2D(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (DivVD src1 src2)); - ins_cost(INSN_COST); - format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %} - ins_encode %{ - __ fdiv(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vmuldiv_fp128); -%} - -// --------------------------------- SQRT ------------------------------------- - -instruct vsqrt2F(vecD dst, vecD src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (SqrtVF src)); - format %{ "fsqrt $dst, $src\t# vector (2F)" %} - ins_encode %{ - __ fsqrt(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg)); - %} - ins_pipe(vunop_fp64); -%} - -instruct vsqrt4F(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (SqrtVF src)); - format %{ "fsqrt $dst, $src\t# vector (4F)" %} - ins_encode %{ - __ fsqrt(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); - %} - ins_pipe(vsqrt_fp128); -%} - -instruct vsqrt2D(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (SqrtVD src)); - format %{ "fsqrt $dst, $src\t# vector (2D)" %} - ins_encode %{ - __ fsqrt(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg)); - %} - ins_pipe(vsqrt_fp128); -%} - -// --------------------------------- NEG -------------------------------------- - -instruct vneg2F(vecD dst, vecD src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (NegVF src)); - ins_cost(INSN_COST * 3); - format %{ "fneg $dst,$src\t# vector (2S)" %} - ins_encode %{ - __ fneg(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg)); - %} - ins_pipe(vunop_fp64); -%} - -instruct vneg4F(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (NegVF src)); - ins_cost(INSN_COST * 3); - format %{ "fneg $dst,$src\t# vector (4S)" %} - ins_encode %{ - __ fneg(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg)); - %} - ins_pipe(vunop_fp128); -%} - -instruct vneg2D(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (NegVD src)); - ins_cost(INSN_COST * 3); - format %{ "fneg $dst,$src\t# vector (2D)" %} - ins_encode %{ - __ fneg(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg)); - %} - ins_pipe(vunop_fp128); -%} - -// --------------------------------- AND -------------------------------------- - -instruct vand8B(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length_in_bytes() == 4 || - n->as_Vector()->length_in_bytes() == 8); - match(Set dst (AndV src1 src2)); - ins_cost(INSN_COST); - format %{ "and $dst,$src1,$src2\t# vector (8B)" %} - ins_encode %{ - __ andr(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vlogical64); -%} - -instruct vand16B(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length_in_bytes() == 16); - match(Set dst (AndV src1 src2)); - ins_cost(INSN_COST); - format %{ "and $dst,$src1,$src2\t# vector (16B)" %} - ins_encode %{ - __ andr(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vlogical128); -%} - -// --------------------------------- OR --------------------------------------- - -instruct vor8B(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length_in_bytes() == 4 || - n->as_Vector()->length_in_bytes() == 8); - match(Set dst (OrV src1 src2)); - ins_cost(INSN_COST); - format %{ "and $dst,$src1,$src2\t# vector (8B)" %} - ins_encode %{ - __ orr(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vlogical64); -%} - -instruct vor16B(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length_in_bytes() == 16); - match(Set dst (OrV src1 src2)); - ins_cost(INSN_COST); - format %{ "orr $dst,$src1,$src2\t# vector (16B)" %} - ins_encode %{ - __ orr(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vlogical128); -%} - -// --------------------------------- XOR -------------------------------------- - -instruct vxor8B(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length_in_bytes() == 4 || - n->as_Vector()->length_in_bytes() == 8); - match(Set dst (XorV src1 src2)); - ins_cost(INSN_COST); - format %{ "xor $dst,$src1,$src2\t# vector (8B)" %} - ins_encode %{ - __ eor(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vlogical64); -%} - -instruct vxor16B(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length_in_bytes() == 16); - match(Set dst (XorV src1 src2)); - ins_cost(INSN_COST); - format %{ "xor $dst,$src1,$src2\t# vector (16B)" %} - ins_encode %{ - __ eor(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vlogical128); -%} - -// ------------------------------ Shift --------------------------------------- -instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{ - predicate(n->as_Vector()->length_in_bytes() == 4 || - n->as_Vector()->length_in_bytes() == 8); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "dup $dst, $cnt\t# shift count vector (8B)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg)); - %} - ins_pipe(vdup_reg_reg64); -%} - -instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{ - predicate(n->as_Vector()->length_in_bytes() == 16); - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "dup $dst, $cnt\t# shift count vector (16B)" %} - ins_encode %{ - __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); - %} - ins_pipe(vdup_reg_reg128); -%} - -instruct vsll8B(vecD dst, vecD src, vecD shift) %{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (LShiftVB src shift)); - ins_cost(INSN_COST); - format %{ "sshl $dst,$src,$shift\t# vector (8B)" %} - ins_encode %{ - __ sshl(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), - as_FloatRegister($shift$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsll16B(vecX dst, vecX src, vecX shift) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (LShiftVB src shift)); - ins_cost(INSN_COST); - format %{ "sshl $dst,$src,$shift\t# vector (16B)" %} - ins_encode %{ - __ sshl(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), - as_FloatRegister($shift$$reg)); - %} - ins_pipe(vshift128); -%} - -// Right shifts with vector shift count on aarch64 SIMD are implemented -// as left shift by negative shift count. -// There are two cases for vector shift count. -// -// Case 1: The vector shift count is from replication. -// | | -// LoadVector RShiftCntV -// | / -// RShiftVI -// Note: In inner loop, multiple neg instructions are used, which can be -// moved to outer loop and merge into one neg instruction. -// -// Case 2: The vector shift count is from loading. -// This case isn't supported by middle-end now. But it's supported by -// panama/vectorIntrinsics(JEP 338: Vector API). -// | | -// LoadVector LoadVector -// | / -// RShiftVI -// - -instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (RShiftVB src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "sshl $dst,$src,$tmp\t# vector (8B)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T8B, - as_FloatRegister($shift$$reg)); - __ sshl(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (RShiftVB src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "sshl $dst,$src,$tmp\t# vector (16B)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T16B, - as_FloatRegister($shift$$reg)); - __ sshl(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (URShiftVB src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "ushl $dst,$src,$tmp\t# vector (8B)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T8B, - as_FloatRegister($shift$$reg)); - __ ushl(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (URShiftVB src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "ushl $dst,$src,$tmp\t# vector (16B)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T16B, - as_FloatRegister($shift$$reg)); - __ ushl(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (LShiftVB src (LShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "shl $dst, $src, $shift\t# vector (8B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 8) { - __ eor(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - } else { - __ shl(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift64_imm); -%} - -instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (LShiftVB src (LShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "shl $dst, $src, $shift\t# vector (16B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 8) { - __ eor(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - } else { - __ shl(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift128_imm); -%} - -instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (RShiftVB src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "sshr $dst, $src, $shift\t# vector (8B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 8) sh = 7; - __ sshr(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), sh); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (RShiftVB src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "sshr $dst, $src, $shift\t# vector (16B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 8) sh = 7; - __ sshr(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), sh); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (URShiftVB src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "ushr $dst, $src, $shift\t# vector (8B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 8) { - __ eor(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - } else { - __ ushr(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift64_imm); -%} - -instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (URShiftVB src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "ushr $dst, $src, $shift\t# vector (16B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 8) { - __ eor(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - } else { - __ ushr(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift128_imm); -%} - -instruct vsll4S(vecD dst, vecD src, vecD shift) %{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (LShiftVS src shift)); - ins_cost(INSN_COST); - format %{ "sshl $dst,$src,$shift\t# vector (4H)" %} - ins_encode %{ - __ sshl(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src$$reg), - as_FloatRegister($shift$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsll8S(vecX dst, vecX src, vecX shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (LShiftVS src shift)); - ins_cost(INSN_COST); - format %{ "sshl $dst,$src,$shift\t# vector (8H)" %} - ins_encode %{ - __ sshl(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src$$reg), - as_FloatRegister($shift$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (RShiftVS src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "sshl $dst,$src,$tmp\t# vector (4H)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T8B, - as_FloatRegister($shift$$reg)); - __ sshl(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (RShiftVS src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "sshl $dst,$src,$tmp\t# vector (8H)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T16B, - as_FloatRegister($shift$$reg)); - __ sshl(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (URShiftVS src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "ushl $dst,$src,$tmp\t# vector (4H)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T8B, - as_FloatRegister($shift$$reg)); - __ ushl(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (URShiftVS src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "ushl $dst,$src,$tmp\t# vector (8H)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T16B, - as_FloatRegister($shift$$reg)); - __ ushl(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (LShiftVS src (LShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "shl $dst, $src, $shift\t# vector (4H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 16) { - __ eor(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - } else { - __ shl(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift64_imm); -%} - -instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (LShiftVS src (LShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "shl $dst, $src, $shift\t# vector (8H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 16) { - __ eor(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - } else { - __ shl(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift128_imm); -%} - -instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (RShiftVS src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "sshr $dst, $src, $shift\t# vector (4H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 16) sh = 15; - __ sshr(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src$$reg), sh); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (RShiftVS src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "sshr $dst, $src, $shift\t# vector (8H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 16) sh = 15; - __ sshr(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src$$reg), sh); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 2 || - n->as_Vector()->length() == 4); - match(Set dst (URShiftVS src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "ushr $dst, $src, $shift\t# vector (4H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 16) { - __ eor(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - } else { - __ ushr(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift64_imm); -%} - -instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (URShiftVS src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "ushr $dst, $src, $shift\t# vector (8H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 16) { - __ eor(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), - as_FloatRegister($src$$reg)); - } else { - __ ushr(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift128_imm); -%} - -instruct vsll2I(vecD dst, vecD src, vecD shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (LShiftVI src shift)); - ins_cost(INSN_COST); - format %{ "sshl $dst,$src,$shift\t# vector (2S)" %} - ins_encode %{ - __ sshl(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg), - as_FloatRegister($shift$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsll4I(vecX dst, vecX src, vecX shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (LShiftVI src shift)); - ins_cost(INSN_COST); - format %{ "sshl $dst,$src,$shift\t# vector (4S)" %} - ins_encode %{ - __ sshl(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg), - as_FloatRegister($shift$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (RShiftVI src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "sshl $dst,$src,$tmp\t# vector (2S)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T8B, - as_FloatRegister($shift$$reg)); - __ sshl(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (RShiftVI src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "sshl $dst,$src,$tmp\t# vector (4S)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T16B, - as_FloatRegister($shift$$reg)); - __ sshl(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (URShiftVI src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "ushl $dst,$src,$tmp\t# vector (2S)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T8B, - as_FloatRegister($shift$$reg)); - __ ushl(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift64); -%} - -instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (URShiftVI src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "ushl $dst,$src,$tmp\t# vector (4S)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T16B, - as_FloatRegister($shift$$reg)); - __ ushl(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (LShiftVI src (LShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "shl $dst, $src, $shift\t# vector (2S)" %} - ins_encode %{ - __ shl(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (LShiftVI src (LShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "shl $dst, $src, $shift\t# vector (4S)" %} - ins_encode %{ - __ shl(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (RShiftVI src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "sshr $dst, $src, $shift\t# vector (2S)" %} - ins_encode %{ - __ sshr(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (RShiftVI src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "sshr $dst, $src, $shift\t# vector (4S)" %} - ins_encode %{ - __ sshr(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (URShiftVI src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "ushr $dst, $src, $shift\t# vector (2S)" %} - ins_encode %{ - __ ushr(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (URShiftVI src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "ushr $dst, $src, $shift\t# vector (4S)" %} - ins_encode %{ - __ ushr(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsll2L(vecX dst, vecX src, vecX shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (LShiftVL src shift)); - ins_cost(INSN_COST); - format %{ "sshl $dst,$src,$shift\t# vector (2D)" %} - ins_encode %{ - __ sshl(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg), - as_FloatRegister($shift$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (RShiftVL src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "sshl $dst,$src,$tmp\t# vector (2D)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T16B, - as_FloatRegister($shift$$reg)); - __ sshl(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (URShiftVL src shift)); - ins_cost(INSN_COST); - effect(TEMP tmp); - format %{ "negr $tmp,$shift\t" - "ushl $dst,$src,$tmp\t# vector (2D)" %} - ins_encode %{ - __ negr(as_FloatRegister($tmp$$reg), __ T16B, - as_FloatRegister($shift$$reg)); - __ ushl(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg), - as_FloatRegister($tmp$$reg)); - %} - ins_pipe(vshift128); -%} - -instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (LShiftVL src (LShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "shl $dst, $src, $shift\t# vector (2D)" %} - ins_encode %{ - __ shl(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (RShiftVL src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "sshr $dst, $src, $shift\t# vector (2D)" %} - ins_encode %{ - __ sshr(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (URShiftVL src (RShiftCntV shift))); - ins_cost(INSN_COST); - format %{ "ushr $dst, $src, $shift\t# vector (2D)" %} - ins_encode %{ - __ ushr(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsraa8B_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "ssra $dst, $src, $shift\t# vector (8B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 8) sh = 7; - __ ssra(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), sh); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsraa16B_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "ssra $dst, $src, $shift\t# vector (16B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 8) sh = 7; - __ ssra(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), sh); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsraa4S_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "ssra $dst, $src, $shift\t# vector (4H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 16) sh = 15; - __ ssra(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src$$reg), sh); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsraa8S_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "ssra $dst, $src, $shift\t# vector (8H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh >= 16) sh = 15; - __ ssra(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src$$reg), sh); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsraa2I_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "ssra $dst, $src, $shift\t# vector (2S)" %} - ins_encode %{ - __ ssra(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsraa4I_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "ssra $dst, $src, $shift\t# vector (4S)" %} - ins_encode %{ - __ ssra(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsraa2L_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVL dst (RShiftVL src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "ssra $dst, $src, $shift\t# vector (2D)" %} - ins_encode %{ - __ ssra(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsrla8B_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "usra $dst, $src, $shift\t# vector (8B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh < 8) { - __ usra(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift64_imm); -%} - -instruct vsrla16B_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "usra $dst, $src, $shift\t# vector (16B)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh < 8) { - __ usra(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift128_imm); -%} - -instruct vsrla4S_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "usra $dst, $src, $shift\t# vector (4H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh < 16) { - __ usra(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift64_imm); -%} - -instruct vsrla8S_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "usra $dst, $src, $shift\t# vector (8H)" %} - ins_encode %{ - int sh = (int)$shift$$constant; - if (sh < 16) { - __ usra(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($src$$reg), sh); - } - %} - ins_pipe(vshift128_imm); -%} - -instruct vsrla2I_imm(vecD dst, vecD src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "usra $dst, $src, $shift\t# vector (2S)" %} - ins_encode %{ - __ usra(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift64_imm); -%} - -instruct vsrla4I_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "usra $dst, $src, $shift\t# vector (4S)" %} - ins_encode %{ - __ usra(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vsrla2L_imm(vecX dst, vecX src, immI shift) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AddVL dst (URShiftVL src (RShiftCntV shift)))); - ins_cost(INSN_COST); - format %{ "usra $dst, $src, $shift\t# vector (2D)" %} - ins_encode %{ - __ usra(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg), - (int)$shift$$constant); - %} - ins_pipe(vshift128_imm); -%} - -instruct vmax2F(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); - match(Set dst (MaxV src1 src2)); - ins_cost(INSN_COST); - format %{ "fmax $dst,$src1,$src2\t# vector (2F)" %} - ins_encode %{ - __ fmax(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp64); -%} - -instruct vmax4F(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); - match(Set dst (MaxV src1 src2)); - ins_cost(INSN_COST); - format %{ "fmax $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ fmax(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp128); -%} - -instruct vmax2D(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); - match(Set dst (MaxV src1 src2)); - ins_cost(INSN_COST); - format %{ "fmax $dst,$src1,$src2\t# vector (2D)" %} - ins_encode %{ - __ fmax(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp128); -%} - -instruct vmin2F(vecD dst, vecD src1, vecD src2) -%{ - predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); - match(Set dst (MinV src1 src2)); - ins_cost(INSN_COST); - format %{ "fmin $dst,$src1,$src2\t# vector (2F)" %} - ins_encode %{ - __ fmin(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp64); -%} - -instruct vmin4F(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); - match(Set dst (MinV src1 src2)); - ins_cost(INSN_COST); - format %{ "fmin $dst,$src1,$src2\t# vector (4S)" %} - ins_encode %{ - __ fmin(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp128); -%} - -instruct vmin2D(vecX dst, vecX src1, vecX src2) -%{ - predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); - match(Set dst (MinV src1 src2)); - ins_cost(INSN_COST); - format %{ "fmin $dst,$src1,$src2\t# vector (2D)" %} - ins_encode %{ - __ fmin(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src1$$reg), - as_FloatRegister($src2$$reg)); - %} - ins_pipe(vdop_fp128); -%} - -instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{ - predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); - match(Set dst (RoundDoubleModeV src rmode)); - format %{ "frint $dst, $src, $rmode" %} - ins_encode %{ - switch ($rmode$$constant) { - case RoundDoubleModeNode::rmode_rint: - __ frintn(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg)); - break; - case RoundDoubleModeNode::rmode_floor: - __ frintm(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg)); - break; - case RoundDoubleModeNode::rmode_ceil: - __ frintp(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg)); - break; - } - %} - ins_pipe(vdop_fp128); -%} - -instruct vpopcount4I(vecX dst, vecX src) %{ - predicate(UsePopCountInstruction && n->as_Vector()->length() == 4); - match(Set dst (PopCountVI src)); - format %{ - "cnt $dst, $src\t# vector (16B)\n\t" - "uaddlp $dst, $dst\t# vector (16B)\n\t" - "uaddlp $dst, $dst\t# vector (8H)" - %} - ins_encode %{ - __ cnt(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($dst$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($dst$$reg)); - %} - ins_pipe(pipe_class_default); -%} - -instruct vpopcount2I(vecD dst, vecD src) %{ - predicate(UsePopCountInstruction && n->as_Vector()->length() == 2); - match(Set dst (PopCountVI src)); - format %{ - "cnt $dst, $src\t# vector (8B)\n\t" - "uaddlp $dst, $dst\t# vector (8B)\n\t" - "uaddlp $dst, $dst\t# vector (4H)" - %} - ins_encode %{ - __ cnt(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($dst$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($dst$$reg)); - %} - ins_pipe(pipe_class_default); -%} - //----------PEEPHOLE RULES----------------------------------------------------- // These must follow all instruction definitions as they use the names // defined in the instructions definitions. diff --git a/src/hotspot/cpu/aarch64/aarch64_neon.ad b/src/hotspot/cpu/aarch64/aarch64_neon.ad index 2ed8b9488c723..59aa45c2de16e 100644 --- a/src/hotspot/cpu/aarch64/aarch64_neon.ad +++ b/src/hotspot/cpu/aarch64/aarch64_neon.ad @@ -30,7 +30,7 @@ // ------------------------------ Load/store/reinterpret ----------------------- -// Load vector (16 bits) +// Load Vector (16 bits) instruct loadV2(vecD dst, memory mem) %{ predicate(n->as_LoadVector()->memory_size() == 2); @@ -3702,3 +3702,2424 @@ instruct vabd2D(vecX dst, vecX src1, vecX src2) %} ins_pipe(vunop_fp128); %} + +// Load Vector (32 bits) +instruct loadV4(vecD dst, vmem4 mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 4); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrs $dst,$mem\t# vector (32 bits)" %} + ins_encode( aarch64_enc_ldrvS(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Load Vector (64 bits) +instruct loadV8(vecD dst, vmem8 mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrd $dst,$mem\t# vector (64 bits)" %} + ins_encode( aarch64_enc_ldrvD(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Load Vector (128 bits) +instruct loadV16(vecX dst, vmem16 mem) +%{ + predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} + ins_encode( aarch64_enc_ldrvQ(dst, mem) ); + ins_pipe(vload_reg_mem128); +%} + +// Store Vector (32 bits) +instruct storeV4(vecD src, vmem4 mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 4); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strs $mem,$src\t# vector (32 bits)" %} + ins_encode( aarch64_enc_strvS(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} + +// Store Vector (64 bits) +instruct storeV8(vecD src, vmem8 mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strd $mem,$src\t# vector (64 bits)" %} + ins_encode( aarch64_enc_strvD(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} + +// Store Vector (128 bits) +instruct storeV16(vecX src, vmem16 mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strq $mem,$src\t# vector (128 bits)" %} + ins_encode( aarch64_enc_strvQ(src, mem) ); + ins_pipe(vstore_reg_mem128); +%} + +instruct replicate8B(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg64); +%} + +instruct replicate16B(vecX dst, iRegIorL2I src) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (16B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct replicate8B_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector (8B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff); + %} + ins_pipe(vmovi_reg_imm64); +%} + +instruct replicate16B_imm(vecX dst, immI con) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 16); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector (16B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff); + %} + ins_pipe(vmovi_reg_imm128); +%} + +instruct replicate4S(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg64); +%} + +instruct replicate8S(vecX dst, iRegIorL2I src) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct replicate4S_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector (4H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff); + %} + ins_pipe(vmovi_reg_imm64); +%} + +instruct replicate8S_imm(vecX dst, immI con) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 8); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector (8H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff); + %} + ins_pipe(vmovi_reg_imm128); +%} + +instruct replicate2I(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg64); +%} + +instruct replicate4I(vecX dst, iRegIorL2I src) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct replicate2I_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector (2I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant); + %} + ins_pipe(vmovi_reg_imm64); +%} + +instruct replicate4I_imm(vecX dst, immI con) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector (4I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant); + %} + ins_pipe(vmovi_reg_imm128); +%} + +instruct replicate2L(vecX dst, iRegL src) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2L)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct replicate2L_zero(vecX dst, immI0 zero) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + ins_cost(INSN_COST); + format %{ "movi $dst, $zero\t# vector (4I)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg)); + %} + ins_pipe(vmovi_reg_imm128); +%} + +instruct replicate2F(vecD dst, vRegF src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vdup_reg_freg64); +%} + +instruct replicate4F(vecX dst, vRegF src) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vdup_reg_freg128); +%} + +instruct replicate2D(vecX dst, vRegD src) +%{ + predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2D)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vdup_reg_dreg128); +%} + +// ====================REDUCTION ARITHMETIC==================================== + +instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AddReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP tmp2); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $tmp2, $vsrc, S, 1\n\t" + "addw $tmp, $isrc, $tmp\n\t" + "addw $dst, $tmp, $tmp2\t# add reduction2I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($tmp2$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ addw($tmp$$Register, $isrc$$Register, $tmp$$Register); + __ addw($dst$$Register, $tmp$$Register, $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AddReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP vtmp, TEMP itmp); + format %{ "addv $vtmp, T4S, $vsrc\n\t" + "umov $itmp, $vtmp, S, 0\n\t" + "addw $dst, $itmp, $isrc\t# add reduction4I" + %} + ins_encode %{ + __ addv(as_FloatRegister($vtmp$$reg), __ T4S, + as_FloatRegister($vsrc$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0); + __ addw($dst$$Register, $itmp$$Register, $isrc$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "mul $dst, $tmp, $isrc\n\t" + "umov $tmp, $vsrc, S, 1\n\t" + "mul $dst, $tmp, $dst\t# mul reduction2I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ mul($dst$$Register, $tmp$$Register, $isrc$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ mul($dst$$Register, $tmp$$Register, $dst$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP vtmp, TEMP itmp, TEMP dst); + format %{ "ins $vtmp, D, $vsrc, 0, 1\n\t" + "mulv $vtmp, T2S, $vtmp, $vsrc\n\t" + "umov $itmp, $vtmp, S, 0\n\t" + "mul $dst, $itmp, $isrc\n\t" + "umov $itmp, $vtmp, S, 1\n\t" + "mul $dst, $itmp, $dst\t# mul reduction4I" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp$$reg), __ D, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp$$reg), __ T2S, + as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0); + __ mul($dst$$Register, $itmp$$Register, $isrc$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1); + __ mul($dst$$Register, $itmp$$Register, $dst$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_add2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp) +%{ + match(Set dst (AddReductionVF fsrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fadds $dst, $fsrc, $vsrc\n\t" + "ins $tmp, S, $vsrc, 0, 1\n\t" + "fadds $dst, $dst, $tmp\t# add reduction2F" + %} + ins_encode %{ + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 1); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_add4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp) +%{ + match(Set dst (AddReductionVF fsrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fadds $dst, $fsrc, $vsrc\n\t" + "ins $tmp, S, $vsrc, 0, 1\n\t" + "fadds $dst, $dst, $tmp\n\t" + "ins $tmp, S, $vsrc, 0, 2\n\t" + "fadds $dst, $dst, $tmp\n\t" + "ins $tmp, S, $vsrc, 0, 3\n\t" + "fadds $dst, $dst, $tmp\t# add reduction4F" + %} + ins_encode %{ + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 1); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 2); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 3); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_mul2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp) +%{ + match(Set dst (MulReductionVF fsrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fmuls $dst, $fsrc, $vsrc\n\t" + "ins $tmp, S, $vsrc, 0, 1\n\t" + "fmuls $dst, $dst, $tmp\t# mul reduction2F" + %} + ins_encode %{ + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 1); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_mul4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp) +%{ + match(Set dst (MulReductionVF fsrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fmuls $dst, $fsrc, $vsrc\n\t" + "ins $tmp, S, $vsrc, 0, 1\n\t" + "fmuls $dst, $dst, $tmp\n\t" + "ins $tmp, S, $vsrc, 0, 2\n\t" + "fmuls $dst, $dst, $tmp\n\t" + "ins $tmp, S, $vsrc, 0, 3\n\t" + "fmuls $dst, $dst, $tmp\t# mul reduction4F" + %} + ins_encode %{ + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 1); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 2); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 3); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_add2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp) +%{ + match(Set dst (AddReductionVD dsrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "faddd $dst, $dsrc, $vsrc\n\t" + "ins $tmp, D, $vsrc, 0, 1\n\t" + "faddd $dst, $dst, $tmp\t# add reduction2D" + %} + ins_encode %{ + __ faddd(as_FloatRegister($dst$$reg), + as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ D, + as_FloatRegister($vsrc$$reg), 0, 1); + __ faddd(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_mul2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp) +%{ + match(Set dst (MulReductionVD dsrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fmuld $dst, $dsrc, $vsrc\n\t" + "ins $tmp, D, $vsrc, 0, 1\n\t" + "fmuld $dst, $dst, $tmp\t# mul reduction2D" + %} + ins_encode %{ + __ fmuld(as_FloatRegister($dst$$reg), + as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ D, + as_FloatRegister($vsrc$$reg), 0, 1); + __ fmuld(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +instruct vadd8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vadd16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vadd4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vadd8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vadd2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vadd4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vadd2L(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2L)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vadd2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp64); +%} + +instruct vadd4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vadd2D(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +// --------------------------------- SUB -------------------------------------- + +instruct vsub8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vsub16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vsub4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vsub8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vsub2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vsub4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vsub2L(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2L)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vsub2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp64); +%} + +instruct vsub4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vsub2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +// --------------------------------- MUL -------------------------------------- + +instruct vmul8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (MulVB src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul64); +%} + +instruct vmul16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (MulVB src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul128); +%} + +instruct vmul4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul64); +%} + +instruct vmul8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul128); +%} + +instruct vmul2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul64); +%} + +instruct vmul4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul128); +%} + +instruct vmul2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp64); +%} + +instruct vmul4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +instruct vmul2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// --------------------------------- MLA -------------------------------------- + +instruct vmla4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (AddVS dst (MulVS src1 src2))); + ins_cost(INSN_COST); + format %{ "mlav $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mlav(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla64); +%} + +instruct vmla8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (MulVS src1 src2))); + ins_cost(INSN_COST); + format %{ "mlav $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ mlav(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla128); +%} + +instruct vmla2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI dst (MulVI src1 src2))); + ins_cost(INSN_COST); + format %{ "mlav $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mlav(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla64); +%} + +instruct vmla4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst (MulVI src1 src2))); + ins_cost(INSN_COST); + format %{ "mlav $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ mlav(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla128); +%} + +// dst + src1 * src2 +instruct vmla2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set dst (FmaVF dst (Binary src1 src2))); + ins_cost(INSN_COST); + format %{ "fmla $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fmla(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp64); +%} + +// dst + src1 * src2 +instruct vmla4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVF dst (Binary src1 src2))); + ins_cost(INSN_COST); + format %{ "fmla $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmla(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// dst + src1 * src2 +instruct vmla2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set dst (FmaVD dst (Binary src1 src2))); + ins_cost(INSN_COST); + format %{ "fmla $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmla(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// --------------------------------- MLS -------------------------------------- + +instruct vmls4S(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (SubVS dst (MulVS src1 src2))); + ins_cost(INSN_COST); + format %{ "mlsv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mlsv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla64); +%} + +instruct vmls8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS dst (MulVS src1 src2))); + ins_cost(INSN_COST); + format %{ "mlsv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ mlsv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla128); +%} + +instruct vmls2I(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI dst (MulVI src1 src2))); + ins_cost(INSN_COST); + format %{ "mlsv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mlsv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla64); +%} + +instruct vmls4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI dst (MulVI src1 src2))); + ins_cost(INSN_COST); + format %{ "mlsv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ mlsv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla128); +%} + +// dst - src1 * src2 +instruct vmls2F(vecD dst, vecD src1, vecD src2) %{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set dst (FmaVF dst (Binary (NegVF src1) src2))); + match(Set dst (FmaVF dst (Binary src1 (NegVF src2)))); + ins_cost(INSN_COST); + format %{ "fmls $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fmls(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp64); +%} + +// dst - src1 * src2 +instruct vmls4F(vecX dst, vecX src1, vecX src2) %{ + predicate(UseFMA && n->as_Vector()->length() == 4); + match(Set dst (FmaVF dst (Binary (NegVF src1) src2))); + match(Set dst (FmaVF dst (Binary src1 (NegVF src2)))); + ins_cost(INSN_COST); + format %{ "fmls $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmls(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// dst - src1 * src2 +instruct vmls2D(vecX dst, vecX src1, vecX src2) %{ + predicate(UseFMA && n->as_Vector()->length() == 2); + match(Set dst (FmaVD dst (Binary (NegVD src1) src2))); + match(Set dst (FmaVD dst (Binary src1 (NegVD src2)))); + ins_cost(INSN_COST); + format %{ "fmls $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmls(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// --------------- Vector Multiply-Add Shorts into Integer -------------------- + +instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MulAddVS2VI src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "smullv $tmp, $src1, $src2\t# vector (4H)\n\t" + "smullv $dst, $src1, $src2\t# vector (8H)\n\t" + "addpv $dst, $tmp, $dst\t# vector (4S)\n\t" %} + ins_encode %{ + __ smullv(as_FloatRegister($tmp$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + __ smullv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + __ addpv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($tmp$$reg), + as_FloatRegister($dst$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// --------------------------------- DIV -------------------------------------- + +instruct vdiv2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp64); +%} + +instruct vdiv4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +instruct vdiv2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// --------------------------------- SQRT ------------------------------------- + +instruct vsqrt2F(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SqrtVF src)); + format %{ "fsqrt $dst, $src\t# vector (2F)" %} + ins_encode %{ + __ fsqrt(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg)); + %} + ins_pipe(vunop_fp64); +%} + +instruct vsqrt4F(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SqrtVF src)); + format %{ "fsqrt $dst, $src\t# vector (4F)" %} + ins_encode %{ + __ fsqrt(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); + %} + ins_pipe(vsqrt_fp128); +%} + +instruct vsqrt2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SqrtVD src)); + format %{ "fsqrt $dst, $src\t# vector (2D)" %} + ins_encode %{ + __ fsqrt(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); + %} + ins_pipe(vsqrt_fp128); +%} + +// --------------------------------- NEG -------------------------------------- + +instruct vneg2F(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (NegVF src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (2S)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vunop_fp64); +%} + +instruct vneg4F(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (NegVF src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (4S)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vunop_fp128); +%} + +instruct vneg2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (NegVD src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (2D)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vunop_fp128); +%} + +// --------------------------------- AND -------------------------------------- + +instruct vand8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vand16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical128); +%} + +// --------------------------------- OR --------------------------------------- + +instruct vor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "orr $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vor16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "orr $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical128); +%} + +// --------------------------------- XOR -------------------------------------- + +instruct vxor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vxor16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical128); +%} + +// ------------------------------ Shift --------------------------------------- + +instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "dup $dst, $cnt\t# shift count vector (8B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg)); + %} + ins_pipe(vdup_reg_reg64); +%} + +instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "dup $dst, $cnt\t# shift count vector (16B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct vsll8B(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsll16B(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (16B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift128); +%} + +// Right shifts with vector shift count on aarch64 SIMD are implemented +// as left shift by negative shift count. +// There are two cases for vector shift count. +// +// Case 1: The vector shift count is from replication. +// | | +// LoadVector RShiftCntV +// | / +// RShiftVI +// Note: In inner loop, multiple neg instructions are used, which can be +// moved to outer loop and merge into one neg instruction. +// +// Case 2: The vector shift count is from loading. +// This case isn't supported by middle-end now. But it's supported by +// panama/vectorIntrinsics(JEP 338: Vector API). +// | | +// LoadVector LoadVector +// | / +// RShiftVI +// + +instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (8B)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (16B)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (8B)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (16B)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src (LShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src (LShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) sh = 7; + __ sshr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) sh = 7; + __ sshr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsll4S(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsll8S(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (4H)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (8H)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (4H)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (8H)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src (LShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src (LShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) sh = 15; + __ sshr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) sh = 15; + __ sshr(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsll2I(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsll4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (2S)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (4S)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (2S)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (4S)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src (LShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src (LShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsll2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2D)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (2D)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (2D)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src (LShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsraa8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) sh = 7; + __ ssra(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsraa16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) sh = 7; + __ ssra(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsraa4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) sh = 15; + __ ssra(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsraa8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) sh = 15; + __ ssra(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsraa2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ ssra(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsraa4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ ssra(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsraa2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL dst (RShiftVL src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ ssra(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrla8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh < 8) { + __ usra(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrla16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh < 8) { + __ usra(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrla4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh < 16) { + __ usra(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrla8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh < 16) { + __ usra(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrla2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ usra(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrla4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ usra(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrla2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL dst (URShiftVL src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ usra(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vmax2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmax $dst,$src1,$src2\t# vector (2F)" %} + ins_encode %{ + __ fmax(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp64); +%} + +instruct vmax4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmax $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmax(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vmax2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmax $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmax(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vmin2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmin $dst,$src1,$src2\t# vector (2F)" %} + ins_encode %{ + __ fmin(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp64); +%} + +instruct vmin4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmin $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmin(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vmin2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "fmin $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmin(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "frint $dst, $src, $rmode" %} + ins_encode %{ + switch ($rmode$$constant) { + case RoundDoubleModeNode::rmode_rint: + __ frintn(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_floor: + __ frintm(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_ceil: + __ frintp(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + break; + } + %} + ins_pipe(vdop_fp128); +%} + +instruct vpopcount4I(vecX dst, vecX src) %{ + predicate(UsePopCountInstruction && n->as_Vector()->length() == 4); + match(Set dst (PopCountVI src)); + format %{ + "cnt $dst, $src\t# vector (16B)\n\t" + "uaddlp $dst, $dst\t# vector (16B)\n\t" + "uaddlp $dst, $dst\t# vector (8H)" + %} + ins_encode %{ + __ cnt(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vpopcount2I(vecD dst, vecD src) %{ + predicate(UsePopCountInstruction && n->as_Vector()->length() == 2); + match(Set dst (PopCountVI src)); + format %{ + "cnt $dst, $src\t# vector (8B)\n\t" + "uaddlp $dst, $dst\t# vector (8B)\n\t" + "uaddlp $dst, $dst\t# vector (4H)" + %} + ins_encode %{ + __ cnt(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($dst$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_class_default); +%} diff --git a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 index eb484637b2518..0918114ff760e 100644 --- a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 @@ -57,28 +57,20 @@ dnl // ====================VECTOR INSTRUCTIONS================================== // ------------------------------ Load/store/reinterpret ----------------------- - -// Load vector (16 bits) -instruct loadV2(vecD dst, memory mem) -%{ - predicate(n->as_LoadVector()->memory_size() == 2); - match(Set dst (LoadVector mem)); - ins_cost(4 * INSN_COST); - format %{ "ldrh $dst,$mem\t# vector (16 bits)" %} - ins_encode( aarch64_enc_ldrvH(dst, mem) ); - ins_pipe(vload_reg_mem64); -%} - -// Store Vector (16 bits) -instruct storeV2(vecD src, memory mem) +define(`VLoadStore', ` +// ifelse(load, $3, Load, Store) Vector ($6 bits) +instruct $3V$4`'(vec$5 $7, ifelse($4, 2, memory, vmem$4) mem) %{ - predicate(n->as_StoreVector()->memory_size() == 2); - match(Set mem (StoreVector mem src)); + predicate($8`n->as_'ifelse(load, $3, Load, Store)Vector()->memory_size() == $4); + match(Set ifelse(load, $3, dst (LoadVector mem), mem (StoreVector mem src))); ins_cost(4 * INSN_COST); - format %{ "strh $mem,$src\t# vector (16 bits)" %} - ins_encode( aarch64_enc_strvH(src, mem) ); - ins_pipe(vstore_reg_mem64); -%} + format %{ "$1 ifelse(load, $3, `$dst,$mem', `$mem,$src')\t# vector ($6 bits)" %} + ins_encode( `aarch64_enc_'ifelse(load, $3, ldr, str)v$2($7, mem) ); + ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64)); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 $8 +VLoadStore(ldrh, H, load, 2, D, 16, dst, ) +VLoadStore(strh, H, store, 2, D, 16, src, ) dnl define(`REINTERPRET', ` instruct reinterpret$1`'(vec$1 dst) @@ -1507,3 +1499,795 @@ VFABD(fabd, fabd, 2, F, D, S, 64) VFABD(fabd, fabd, 4, F, X, S, 128) VFABD(fabd, fabd, 2, D, X, D, 128) dnl +VLoadStore(ldrs, S, load, 4, D, 32, dst, ) +VLoadStore(ldrd, D, load, 8, D, 64, dst, ) +VLoadStore(ldrq, Q, load, 16, X, 128, dst, UseSVE == 0 && ) +VLoadStore(strs, S, store, 4, D, 32, src, ) +VLoadStore(strd, D, store, 8, D, 64, src, ) +VLoadStore(strq, Q, store, 16, X, 128, src, ) +dnl +define(`VREPLICATE', ` +instruct replicate$3$4$5`'(vec$6 dst, $7 ifelse($7, immI0, zero, $7, immI, con, src)) +%{ + predicate(ifelse($8, UseSVE == 0 && , $8, + $8, , , $8` + ')n->as_Vector()->length() == $3); + match(Set dst (Replicate`'ifelse($7, immI0, I, $4) ifelse($7, immI0, zero, $7, immI, con, $7, zero, I, src))); + ins_cost(INSN_COST); + format %{ "$1 $dst, $ifelse($7, immI0, zero, $7, immI, con, src)`\t# vector ('ifelse($4$7, SimmI, $3H, $2, eor, 4I, $3$4)`)"' %} + ins_encode %{ + __ $2(as_FloatRegister($dst$$reg), __ ifelse( + $2, eor, T16B, T$3`'$9),ifelse( + `$4 $7', `B immI', ` '$con$$constant & 0xff, + `$4 $7', `S immI', ` '$con$$constant & 0xffff, + `$4 $7', `I immI', ` '$con$$constant, + `$2', eor,` + as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg)', + `$7', vRegF,` + as_FloatRegister($src$$reg)', + `$7', vRegD,` + as_FloatRegister($src$$reg)', + ` 'as_Register($src$$reg))); + %} + ins_pipe(ifelse($7, immI0, v$1_reg_imm, + $7, immI, v$1_reg_imm, + $7, iRegIorL2I, v$1_reg_reg, + $7, zero, vmovi_reg_imm, + $7, iRegL, vdup_reg_reg, + $4, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($6, X, 128, 64)); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 +VREPLICATE(dup, dup, 8, B, , D, iRegIorL2I, n->as_Vector()->length() == 4 ||, B) +VREPLICATE(dup, dup, 16, B, , X, iRegIorL2I, UseSVE == 0 && , B) +VREPLICATE(movi, mov, 8, B, _imm, D, immI, n->as_Vector()->length() == 4 ||, B) +VREPLICATE(movi, mov, 16, B, _imm, X, immI, UseSVE == 0 && , B) +VREPLICATE(dup, dup, 4, S, , D, iRegIorL2I, n->as_Vector()->length() == 2 ||, H) +VREPLICATE(dup, dup, 8, S, , X, iRegIorL2I, UseSVE == 0 && , H) +VREPLICATE(movi, mov, 4, S, _imm, D, immI, n->as_Vector()->length() == 2 ||, H) +VREPLICATE(movi, mov, 8, S, _imm, X, immI, UseSVE == 0 && , H) +VREPLICATE(dup, dup, 2, I, , D, iRegIorL2I, , S) +VREPLICATE(dup, dup, 4, I, , X, iRegIorL2I, UseSVE == 0 && , S) +VREPLICATE(movi, mov, 2, I, _imm, D, immI, , S) +VREPLICATE(movi, mov, 4, I, _imm, X, immI, UseSVE == 0 && , S) +VREPLICATE(dup, dup, 2, L, , X, iRegL, UseSVE == 0 && , D) +VREPLICATE(movi, eor, 2, L, _zero, X, immI0, UseSVE == 0 && , D) +VREPLICATE(dup, dup, 2, F, , D, vRegF, , S) +VREPLICATE(dup, dup, 4, F, , X, vRegF, UseSVE == 0 && , S) +VREPLICATE(dup, dup, 2, D, , X, vRegD, UseSVE == 0 && , D) +dnl + +// ====================REDUCTION ARITHMETIC==================================== +dnl +define(`REDUCE_ADD_INT', ` +instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, ifelse($1, 2, iRegINoSp tmp, vecX vtmp), iRegINoSp ifelse($1, 2, tmp2, itmp)) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AddReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP ifelse($1, 2, tmp, vtmp), TEMP ifelse($1, 2, tmp2, itmp)); + format %{ ifelse($1, 2, `"umov $tmp, $vsrc, S, 0\n\t" + "umov $tmp2, $vsrc, S, 1\n\t" + "addw $tmp, $isrc, $tmp\n\t" + "addw $dst, $tmp, $tmp2\t# add reduction2I"',`"addv $vtmp, T4S, $vsrc\n\t" + "umov $itmp, $vtmp, S, 0\n\t" + "addw $dst, $itmp, $isrc\t# add reduction4I"') + %} + ins_encode %{ + ifelse($1, 2, `__ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($tmp2$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ addw($tmp$$Register, $isrc$$Register, $tmp$$Register); + __ addw($dst$$Register, $tmp$$Register, $tmp2$$Register);', `__ addv(as_FloatRegister($vtmp$$reg), __ T4S, + as_FloatRegister($vsrc$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0); + __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);') + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 +REDUCE_ADD_INT(2, I, D) +REDUCE_ADD_INT(4, I, X) +dnl +define(`REDUCE_MUL_INT', ` +instruct reduce_mul$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, ifelse($1, 2, iRegINoSp tmp`)', vecX vtmp`,' iRegINoSp itmp`)') +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP ifelse($1, 2, tmp, vtmp), TEMP ifelse($1, 2, dst, itmp`,' TEMP dst)); + format %{ ifelse($1, 2, `"umov $tmp, $vsrc, S, 0\n\t" + "mul $dst, $tmp, $isrc\n\t" + "umov $tmp, $vsrc, S, 1\n\t" + "mul $dst, $tmp, $dst\t# mul reduction2I"',`"ins $vtmp, D, $vsrc, 0, 1\n\t" + "mulv $vtmp, T2S, $vtmp, $vsrc\n\t" + "umov $itmp, $vtmp, S, 0\n\t" + "mul $dst, $itmp, $isrc\n\t" + "umov $itmp, $vtmp, S, 1\n\t" + "mul $dst, $itmp, $dst\t# mul reduction4I"') + %} + ins_encode %{ + ifelse($1, 2, `__ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ mul($dst$$Register, $tmp$$Register, $isrc$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ mul($dst$$Register, $tmp$$Register, $dst$$Register);', `__ ins(as_FloatRegister($vtmp$$reg), __ D, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp$$reg), __ T2S, + as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0); + __ mul($dst$$Register, $itmp$$Register, $isrc$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1); + __ mul($dst$$Register, $itmp$$Register, $dst$$Register);') + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 +REDUCE_MUL_INT(2, I, D) +REDUCE_MUL_INT(4, I, X) +dnl +define(`REDUCE_MULORADD_FORD', ` +instruct reduce_$6$2$3`'(vReg$3 dst, vReg$3 $4src, vec$5 vsrc, vec$5 tmp) +%{ + match(Set dst (ifelse($6, add, Add, Mul)ReductionV$3 $4src vsrc)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "$1 $dst, $$4src, $vsrc\n\t" + "ins $tmp, ifelse($3, F, S, D), $vsrc, 0, 1\n\t" + ifelse($2, 2, `"$1 $dst, $dst, $tmp\t# $6 reduction$2$3"', + `"$1 $dst, $dst, $tmp\n\t" + "ins $tmp, S, $vsrc, 0, 2\n\t" + "$1 $dst, $dst, $tmp\n\t" + "ins $tmp, S, $vsrc, 0, 3\n\t" + "$1 $dst, $dst, $tmp\t# $6 reduction4F"') + %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), + as_FloatRegister($$4src$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ ifelse($3, F, S, D), + as_FloatRegister($vsrc$$reg), 0, 1); + __ $1(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));ifelse($2, 4, ` + __ ins(as_FloatRegister($tmp$$reg), __ ifelse($3, F, S, D), + as_FloatRegister($vsrc$$reg), 0, 2); + __ $1(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 3); + __ $1(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));') + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 +REDUCE_MULORADD_FORD(fadds, 2, F, f, D, add) +REDUCE_MULORADD_FORD(fadds, 4, F, f, X, add) +REDUCE_MULORADD_FORD(fmuls, 2, F, f, D, mul) +REDUCE_MULORADD_FORD(fmuls, 4, F, f, X, mul) +REDUCE_MULORADD_FORD(faddd, 2, D, d, X, add) +REDUCE_MULORADD_FORD(fmuld, 2, D, d, X, mul) + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- +define(`VADD', ` +instruct vadd$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) +%{ifelse($2$3, 8B, ` + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8);', + $2$3, 4S, ` + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4);', + $2$5, 2D, , ` + predicate(n->as_Vector()->length() == $2);') + match(Set dst (AddV$3 src1 src2)); + ins_cost(INSN_COST); + format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop`'ifelse($3, F, _fp, $3, D, _fp)`'ifelse($4, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VADD(addv, 8, B, D, B) +VADD(addv, 16, B, X, B) +VADD(addv, 4, S, D, H) +VADD(addv, 8, S, X, H) +VADD(addv, 2, I, D, S) +VADD(addv, 4, I, X, S) +VADD(addv, 2, L, X, L) +VADD(fadd, 2, F, D, S) +VADD(fadd, 4, F, X, S) +VADD(fadd, 2, D, X, D) + +// --------------------------------- SUB -------------------------------------- +define(`VSUB', ` +instruct vsub$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) +%{ifelse($2$3, 8B, ` + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8);', + $2$3, 4S, ` + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4);',` + predicate(n->as_Vector()->length() == $2);') + match(Set dst (SubV$3 src1 src2)); + ins_cost(INSN_COST); + format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop`'ifelse($3, F, _fp, $3, D, _fp)`'ifelse($4, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VSUB(subv, 8, B, D, B) +VSUB(subv, 16, B, X, B) +VSUB(subv, 4, S, D, H) +VSUB(subv, 8, S, X, H) +VSUB(subv, 2, I, D, S) +VSUB(subv, 4, I, X, S) +VSUB(subv, 2, L, X, L) +VSUB(fsub, 2, F, D, S) +VSUB(fsub, 4, F, X, S) +VSUB(fsub, 2, D, X, D) + +// --------------------------------- MUL -------------------------------------- +define(`VMUL', ` +instruct vmul$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) +%{ifelse($2$3, 8B, ` + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8);', + $2$3, 4S, ` + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4);',` + predicate(n->as_Vector()->length() == $2);') + match(Set dst (MulV$3 src1 src2)); + ins_cost(INSN_COST); + format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul`'ifelse($3, F, div_fp, $3, D, div_fp)`'ifelse($4, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VMUL(mulv, 8, B, D, B) +VMUL(mulv, 16, B, X, B) +VMUL(mulv, 4, S, D, H) +VMUL(mulv, 8, S, X, H) +VMUL(mulv, 2, I, D, S) +VMUL(mulv, 4, I, X, S) +VMUL(fmul, 2, F, D, S) +VMUL(fmul, 4, F, X, S) +VMUL(fmul, 2, D, X, D) + +// --------------------------------- MLA -------------------------------------- +define(`VMLA', `ifelse($1, fmla, ` +// dst + src1 * src2') +instruct vmla$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) +%{ifelse($2$3, 4S, ` + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4);', $1, fmla, ` + predicate(UseFMA && n->as_Vector()->length() == $2);', ` + predicate(n->as_Vector()->length() == $2);') + match(Set dst (ifelse($1, mlav, `AddV'$3` dst (MulV$3 src1 src2)', FmaV$3 `dst (Binary src1 src2)'))); + ins_cost(INSN_COST); + format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vm`'ifelse($3, F, uldiv_fp, $3, D, uldiv_fp, la)`'ifelse($4, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VMLA(mlav, 4, S, D, H) +VMLA(mlav, 8, S, X, H) +VMLA(mlav, 2, I, D, S) +VMLA(mlav, 4, I, X, S) +VMLA(fmla, 2, F, D, S) +VMLA(fmla, 4, F, X, S) +VMLA(fmla, 2, D, X, D) + +// --------------------------------- MLS -------------------------------------- +define(`VMLS', `ifelse($1, fmls, ` +// dst - src1 * src2') +instruct vmls$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ifelse($2$3, 4S, ` + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4);', $1, fmls, ` + predicate(UseFMA && n->as_Vector()->length() == $2);', ` + predicate(n->as_Vector()->length() == $2);') + match(Set dst (ifelse($1, mlsv, `SubV'$3` dst (MulV$3 src1 src2)', FmaV$3 `dst (Binary (NegV'$3 `src1) src2))); + match(Set dst (FmaV$3 dst (Binary src1 (NegV'$3 `src2))'))); + ins_cost(INSN_COST); + format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vm`'ifelse($3, F, uldiv_fp, $3, D, uldiv_fp, la)`'ifelse($4, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VMLS(mlsv, 4, S, D, H) +VMLS(mlsv, 8, S, X, H) +VMLS(mlsv, 2, I, D, S) +VMLS(mlsv, 4, I, X, S) +VMLS(fmls, 2, F, D, S) +VMLS(fmls, 4, F, X, S) +VMLS(fmls, 2, D, X, D) + +// --------------- Vector Multiply-Add Shorts into Integer -------------------- + +instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{ + predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MulAddVS2VI src1 src2)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "smullv $tmp, $src1, $src2\t# vector (4H)\n\t" + "smullv $dst, $src1, $src2\t# vector (8H)\n\t" + "addpv $dst, $tmp, $dst\t# vector (4S)\n\t" %} + ins_encode %{ + __ smullv(as_FloatRegister($tmp$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + __ smullv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + __ addpv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($tmp$$reg), + as_FloatRegister($dst$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// --------------------------------- DIV -------------------------------------- +define(`VDIV', ` +instruct vdiv$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) +%{ + predicate(n->as_Vector()->length() == $2); + match(Set dst (DivV$3 src1 src2)); + ins_cost(INSN_COST); + format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp`'ifelse($4, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VDIV(fdiv, 2, F, D, S) +VDIV(fdiv, 4, F, X, S) +VDIV(fdiv, 2, D, X, D) + +// --------------------------------- SQRT ------------------------------------- +define(`VSQRT', ` +instruct vsqrt$2$3`'(vec$4 dst, vec$4 src) +%{ + predicate(n->as_Vector()->length() == $2); + match(Set dst (SqrtV$3 src)); + format %{ "$1 $dst, $src\t# vector ($2$3)" %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), __ T$2$5, as_FloatRegister($src$$reg)); + %} + ins_pipe(v`'ifelse($2$3, 2F, unop, sqrt)_fp`'ifelse($4, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VSQRT(fsqrt, 2, F, D, S) +VSQRT(fsqrt, 4, F, X, S) +VSQRT(fsqrt, 2, D, X, D) + +// --------------------------------- NEG -------------------------------------- +define(`VNEG', ` +instruct vneg$2$3`'(vec$4 dst, vec$4 src) +%{ + predicate(n->as_Vector()->length() == $2); + match(Set dst (NegV$3 src)); + ins_cost(INSN_COST * 3); + format %{ "$1 $dst,$src\t# vector ($2$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), + as_FloatRegister($src$$reg)); + %} + ins_pipe(vunop_fp`'ifelse($4, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VNEG(fneg, 2, F, D, S) +VNEG(fneg, 4, F, X, S) +VNEG(fneg, 2, D, X, D) +dnl +define(`VLOGICAL', ` +instruct v$3$5$6`'(vec$7 dst, vec$7 src1, vec$7 src2) +%{ + predicate(ifelse($5, 8, n->as_Vector()->length_in_bytes() == 4 ||` + ')n->as_Vector()->length_in_bytes() == $5); + match(Set dst ($4V src1 src2)); + ins_cost(INSN_COST); + format %{ "$1 $dst,$src1,$src2\t# vector ($5$6)" %} + ins_encode %{ + __ $2(as_FloatRegister($dst$$reg), __ T$5$6, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical`'ifelse($7, D, 64, 128)); +%}')dnl + +// --------------------------------- AND -------------------------------------- +dnl $1 $2 $3 $4 $5 $6 $7 +VLOGICAL(and, andr, and, And, 8, B, D) +VLOGICAL(and, andr, and, And, 16, B, X) + +// --------------------------------- OR --------------------------------------- +VLOGICAL(orr, orr, or, Or, 8, B, D) +VLOGICAL(orr, orr, or, Or, 16, B, X) + +// --------------------------------- XOR -------------------------------------- +VLOGICAL(xor, eor, xor, Xor, 8, B, D) +VLOGICAL(xor, eor, xor, Xor, 16, B, X) + +// ------------------------------ Shift --------------------------------------- +dnl +define(`VSHIFTCNT', ` +instruct vshiftcnt$3$4`'(vec$5 dst, iRegIorL2I cnt) %{ + predicate(ifelse($3, 8, n->as_Vector()->length_in_bytes() == 4 ||` + ')n->as_Vector()->length_in_bytes() == $3); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "$1 $dst, $cnt\t# shift count vector ($3$4)" %} + ins_encode %{ + __ $2(as_FloatRegister($dst$$reg), __ T$3$4, as_Register($cnt$$reg)); + %} + ins_pipe(vdup_reg_reg`'ifelse($5, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 +VSHIFTCNT(dup, dup, 8, B, D) +VSHIFTCNT(dup, dup, 16, B, X) +dnl +define(`VSLL', ` +instruct vsll$3$4`'(vec$6 dst, vec$6 src, vec$6 shift) %{ + predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` + ', + $3$4, 4S, n->as_Vector()->length() == 2 ||` + ')n->as_Vector()->length() == $3); + match(Set dst (LShiftV$4 src shift)); + ins_cost(INSN_COST); + format %{ "$1 $dst,$src,$shift\t# vector ($3$5)" %} + ins_encode %{ + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift`'ifelse($6, D, 64, 128)); +%}')dnl +dnl +define(`VSRA', ` +instruct vsra$3$4`'(vec$6 dst, vec$6 src, vec$6 shift, vec$6 tmp) %{ + predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` + ', + $3$4, 4S, n->as_Vector()->length() == 2 ||` + ')n->as_Vector()->length() == $3); + match(Set dst (RShiftV$4 src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "$1 $tmp,$shift\t" + "$2 $dst,$src,$tmp\t# vector ($3$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($tmp$$reg), __ T`'ifelse($6, D, 8B, 16B), + as_FloatRegister($shift$$reg)); + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift`'ifelse($6, D, 64, 128)); +%}')dnl +dnl +define(`VSRL', ` +instruct vsrl$3$4`'(vec$6 dst, vec$6 src, vec$6 shift, vec$6 tmp) %{ + predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` + ', + $3$4, 4S, n->as_Vector()->length() == 2 ||` + ')n->as_Vector()->length() == $3); + match(Set dst (URShiftV$4 src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "$1 $tmp,$shift\t" + "$2 $dst,$src,$tmp\t# vector ($3$5)" %} + ins_encode %{ + __ $1(as_FloatRegister($tmp$$reg), __ T`'ifelse($6, D, 8B, 16B), + as_FloatRegister($shift$$reg)); + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift`'ifelse($6, D, 64, 128)); +%}')dnl +dnl +define(`VSLL_IMM', ` +instruct vsll$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ + predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` + ', + $3$4, 4S, n->as_Vector()->length() == 2 ||` + ')n->as_Vector()->length() == $3); + match(Set dst (LShiftV$4 src (LShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} + ins_encode %{ifelse($4, B,` + int sh = (int)$shift$$constant; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B), + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh); + }', $4, S,` + int sh = (int)$shift$$constant; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B), + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh); + }', ` + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), + (int)$shift$$constant);') + %} + ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); +%}')dnl +define(`VSRA_IMM', ` +instruct vsra$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ + predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` + ', + $3$4, 4S, n->as_Vector()->length() == 2 ||` + ')n->as_Vector()->length() == $3); + match(Set dst (RShiftV$4 src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} + ins_encode %{ifelse($4, B,` + int sh = (int)$shift$$constant; + if (sh >= 8) sh = 7; + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh);', $4, S,` + int sh = (int)$shift$$constant; + if (sh >= 16) sh = 15; + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh);', ` + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), + (int)$shift$$constant);') + %} + ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); +%}')dnl +dnl +define(`VSRL_IMM', ` +instruct vsrl$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ + predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 ||` + ', + $3$4, 4S, n->as_Vector()->length() == 2 ||` + ')n->as_Vector()->length() == $3); + match(Set dst (URShiftV$4 src (RShiftCntV shift))); + ins_cost(INSN_COST); + format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} + ins_encode %{ifelse($4, B,` + int sh = (int)$shift$$constant; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B), + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh); + }', $4, S,` + int sh = (int)$shift$$constant; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ ifelse($6, D, T8B, T16B), + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh); + }', ` + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), + (int)$shift$$constant);') + %} + ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); +%}')dnl +dnl +define(`VSRLA_IMM', ` +instruct vsrla$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ + predicate(n->as_Vector()->length() == $3); + match(Set dst (AddV$4 dst (URShiftV$4 src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} + ins_encode %{ifelse($4, B,` + int sh = (int)$shift$$constant; + if (sh < 8) { + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh); + }', $4, S,` + int sh = (int)$shift$$constant; + if (sh < 16) { + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh); + }', ` + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), + (int)$shift$$constant);') + %} + ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); +%}')dnl +dnl +define(`VSRAA_IMM', ` +instruct vsraa$3$4_imm`'(vec$6 dst, vec$6 src, immI shift) %{ + predicate(n->as_Vector()->length() == $3); + match(Set dst (AddV$4 dst (RShiftV$4 src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "$1 $dst, $src, $shift\t# vector ($3$5)" %} + ins_encode %{ifelse($4, B,` + int sh = (int)$shift$$constant; + if (sh >= 8) sh = 7; + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh);', $4, S,` + int sh = (int)$shift$$constant; + if (sh >= 16) sh = 15; + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), sh);', ` + __ $2(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src$$reg), + (int)$shift$$constant);') + %} + ins_pipe(vshift`'ifelse($6, D, 64, 128)_imm); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 +VSLL(sshl, sshl, 8, B, B, D) +VSLL(sshl, sshl, 16, B, B, X) + +// Right shifts with vector shift count on aarch64 SIMD are implemented +// as left shift by negative shift count. +// There are two cases for vector shift count. +// +// Case 1: The vector shift count is from replication. +// | | +// LoadVector RShiftCntV +// | / +// RShiftVI +// Note: In inner loop, multiple neg instructions are used, which can be +// moved to outer loop and merge into one neg instruction. +// +// Case 2: The vector shift count is from loading. +// This case isn't supported by middle-end now. But it's supported by +// panama/vectorIntrinsics(JEP 338: Vector API). +// | | +// LoadVector LoadVector +// | / +// RShiftVI +// +dnl $1 $2 $3 $4 $5 $6 +VSRA(negr, sshl, 8, B, B, D) +VSRA(negr, sshl, 16, B, B, X) +VSRL(negr, ushl, 8, B, B, D) +VSRL(negr, ushl, 16, B, B, X) +VSLL_IMM(shl, shl, 8, B, B, D) +VSLL_IMM(shl, shl, 16, B, B, X) +VSRA_IMM(sshr, sshr, 8, B, B, D) +VSRA_IMM(sshr, sshr, 16, B, B, X) +VSRL_IMM(ushr, ushr, 8, B, B, D) +VSRL_IMM(ushr, ushr, 16, B, B, X) +VSLL(sshl, sshl, 4, S, H, D) +VSLL(sshl, sshl, 8, S, H, X) +VSRA(negr, sshl, 4, S, H, D) +VSRA(negr, sshl, 8, S, H, X) +VSRL(negr, ushl, 4, S, H, D) +VSRL(negr, ushl, 8, S, H, X) +VSLL_IMM(shl, shl, 4, S, H, D) +VSLL_IMM(shl, shl, 8, S, H, X) +VSRA_IMM(sshr, sshr, 4, S, H, D) +VSRA_IMM(sshr, sshr, 8, S, H, X) +VSRL_IMM(ushr, ushr, 4, S, H, D) +VSRL_IMM(ushr, ushr, 8, S, H, X) +VSLL(sshl, sshl, 2, I, S, D) +VSLL(sshl, sshl, 4, I, S, X) +VSRA(negr, sshl, 2, I, S, D) +VSRA(negr, sshl, 4, I, S, X) +VSRL(negr, ushl, 2, I, S, D) +VSRL(negr, ushl, 4, I, S, X) +VSLL_IMM(shl, shl, 2, I, S, D) +VSLL_IMM(shl, shl, 4, I, S, X) +VSRA_IMM(sshr, sshr, 2, I, S, D) +VSRA_IMM(sshr, sshr, 4, I, S, X) +VSRL_IMM(ushr, ushr, 2, I, S, D) +VSRL_IMM(ushr, ushr, 4, I, S, X) +VSLL(sshl, sshl, 2, L, D, X) +VSRA(negr, sshl, 2, L, D, X) +VSRL(negr, ushl, 2, L, D, X) +VSLL_IMM(shl, shl, 2, L, D, X) +VSRA_IMM(sshr, sshr, 2, L, D, X) +VSRL_IMM(ushr, ushr, 2, L, D, X) +VSRAA_IMM(ssra, ssra, 8, B, B, D) +VSRAA_IMM(ssra, ssra, 16, B, B, X) +VSRAA_IMM(ssra, ssra, 4, S, H, D) +VSRAA_IMM(ssra, ssra, 8, S, H, X) +VSRAA_IMM(ssra, ssra, 2, I, S, D) +VSRAA_IMM(ssra, ssra, 4, I, S, X) +VSRAA_IMM(ssra, ssra, 2, L, D, X) +VSRLA_IMM(usra, usra, 8, B, B, D) +VSRLA_IMM(usra, usra, 16, B, B, X) +VSRLA_IMM(usra, usra, 4, S, H, D) +VSRLA_IMM(usra, usra, 8, S, H, X) +VSRLA_IMM(usra, usra, 2, I, S, D) +VSRLA_IMM(usra, usra, 4, I, S, X) +VSRLA_IMM(usra, usra, 2, L, D, X) +dnl +define(`VMINMAX', ` +instruct v$1$3`'ifelse($5, S, F, D)`'(vec$6 dst, vec$6 src1, vec$6 src2) +%{ + predicate(n->as_Vector()->length() == $3 && n->bottom_type()->is_vect()->element_basic_type() == T_`'ifelse($5, S, FLOAT, DOUBLE)); + match(Set dst ($2V src1 src2)); + ins_cost(INSN_COST); + format %{ "f$1 $dst,$src1,$src2\t# vector ($3$4)" %} + ins_encode %{ + __ f$1(as_FloatRegister($dst$$reg), __ T$3$5, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp`'ifelse($6, D, 64, 128)); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 +VMINMAX(max, Max, 2, F, S, D) +VMINMAX(max, Max, 4, S, S, X) +VMINMAX(max, Max, 2, D, D, X) +VMINMAX(min, Min, 2, F, S, D) +VMINMAX(min, Min, 4, S, S, X) +VMINMAX(min, Min, 2, D, D, X) + +instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "frint $dst, $src, $rmode" %} + ins_encode %{ + switch ($rmode$$constant) { + case RoundDoubleModeNode::rmode_rint: + __ frintn(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_floor: + __ frintm(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_ceil: + __ frintp(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + break; + } + %} + ins_pipe(vdop_fp128); +%} +dnl +define(`VPOPCOUNT', ` +instruct vpopcount$1$2`'(vec$5 dst, vec$5 src) %{ + predicate(UsePopCountInstruction && n->as_Vector()->length() == $1); + match(Set dst (PopCountVI src)); + format %{ + "cnt $dst, $src\t# vector ($3B)\n\t" + "uaddlp $dst, $dst\t# vector ($3B)\n\t" + "uaddlp $dst, $dst\t# vector ($4H)" + %} + ins_encode %{ + __ cnt(as_FloatRegister($dst$$reg), __ T$3B, + as_FloatRegister($src$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T$3B, + as_FloatRegister($dst$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T$4H, + as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 $5 +VPOPCOUNT(4, I, 16, 8, X) +VPOPCOUNT(2, I, 8, 4, D)