Skip to content

Commit

Permalink
8239549: AArch64: Backend support for MulAddVS2VI node
Browse files Browse the repository at this point in the history
Reviewed-by: aph
  • Loading branch information
Pengfei Li committed Feb 26, 2020
1 parent e6f0c6b commit 934db29
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 10 deletions.
61 changes: 55 additions & 6 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2074,15 +2074,24 @@ const bool Matcher::match_rule_supported(int opcode) {
return ret_value; // Per default match rules are supported.
}

// Identify extra cases that we might want to provide match rules for vector nodes and
// other intrinsics guarded with vector length (vlen) and element type (bt).
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
if (!match_rule_supported(opcode)) {
return false;
}

// TODO
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
// Add rules here.
// Special cases which require vector length
switch (opcode) {
case Op_MulAddVS2VI: {
if (vlen != 4) {
return false;
}
break;
}
}

return ret_value; // Per default match rules are supported.
return true; // Per default match rules are supported.
}

const bool Matcher::has_predicated_vectors(void) {
Expand Down Expand Up @@ -10555,6 +10564,22 @@ instruct smnegL(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2, immL0 zero) %{
ins_pipe(imac_reg_reg);
%}

// Combined Multiply-Add Shorts into Integer (dst = src1 * src2 + src3 * src4)

instruct muladdS2I(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3, iRegIorL2I src4) %{
match(Set dst (MulAddS2I (Binary src1 src2) (Binary src3 src4)));

ins_cost(INSN_COST * 5);
format %{ "mulw rscratch1, $src1, $src2\n\t"
"maddw $dst, $src3, $src4, rscratch1" %}

ins_encode %{
__ mulw(rscratch1, as_Register($src1$$reg), as_Register($src2$$reg));
__ maddw(as_Register($dst$$reg), as_Register($src3$$reg), as_Register($src4$$reg), rscratch1); %}

ins_pipe(imac_reg_reg);
%}

// Integer Divide

instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
Expand Down Expand Up @@ -16935,6 +16960,30 @@ instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
ins_pipe(vmuldiv_fp128);
%}

// --------------- Vector Multiply-Add Shorts into Integer --------------------

instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{
predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
match(Set dst (MulAddVS2VI src1 src2));
ins_cost(INSN_COST);
effect(TEMP tmp);
format %{ "smullv $tmp, $src1, $src2\t# vector (4H)\n\t"
"smullv $dst, $src1, $src2\t# vector (8H)\n\t"
"addpv $dst, $tmp, $dst\t# vector (4S)\n\t" %}
ins_encode %{
__ smullv(as_FloatRegister($tmp$$reg), __ T4H,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
__ smullv(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
__ addpv(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($tmp$$reg),
as_FloatRegister($dst$$reg));
%}
ins_pipe(vmuldiv_fp128);
%}

// --------------------------------- DIV --------------------------------------

instruct vdiv2F(vecD dst, vecD src1, vecD src2)
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2259,6 +2259,8 @@ void mvnw(Register Rd, Register Rm,
INSN(mlsv, 1, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(sshl, 0, 0b010001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(ushl, 1, 0b010001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(addpv, 0, 0b101111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S

Expand Down
8 changes: 6 additions & 2 deletions src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2019, Red Hat Inc. All rights reserved.
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -451,6 +451,10 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(OptoScheduling)) {
OptoScheduling = true;
}

if (FLAG_IS_DEFAULT(AlignVector)) {
AlignVector = AvoidUnalignedAccesses;
}
#endif
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
/**
* @test
* @bug 8214751
* @summary Add C2 x86 Superword support for VNNI VPDPWSSD Instruction
* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64"
* @summary Test operations in C2 MulAddS2I and MulAddVS2VI nodes.
*
* @run main/othervm -XX:LoopUnrollLimit=250
* -XX:CompileThresholdScaling=0.1
Expand Down

0 comments on commit 934db29

Please sign in to comment.