Skip to content

Commit

Permalink
tcg/loongarch64: Lower basic tcg vec ops to LSX
Browse files Browse the repository at this point in the history
LSX support on host cpu is detected via hwcap.

Lower the following ops to LSX:

- dup_vec
- dupi_vec
- dupm_vec
- ld_vec
- st_vec

Signed-off-by: Jiajie Chen <c@jia.je>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230908022302.180442-3-c@jia.je>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
jiegec authored and rth7680 committed Sep 15, 2023
1 parent af88a28 commit 16288de
Show file tree
Hide file tree
Showing 5 changed files with 270 additions and 2 deletions.
2 changes: 2 additions & 0 deletions tcg/loongarch64/tcg-target-con-set.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
C_O0_I1(r)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rZ)
C_O0_I2(w, r)
C_O1_I1(r, r)
C_O1_I1(w, r)
C_O1_I2(r, r, rC)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)
Expand Down
1 change: 1 addition & 0 deletions tcg/loongarch64/tcg-target-con-str.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('w', ALL_VECTOR_REGS)

/*
* Define constraint letters for constants:
Expand Down
219 changes: 218 additions & 1 deletion tcg/loongarch64/tcg-target.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include "../tcg-ldst.c.inc"
#include <asm/hwcap.h>

bool use_lsx_instructions;

#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
"zero",
Expand Down Expand Up @@ -65,7 +67,39 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
"s5",
"s6",
"s7",
"s8"
"s8",
"vr0",
"vr1",
"vr2",
"vr3",
"vr4",
"vr5",
"vr6",
"vr7",
"vr8",
"vr9",
"vr10",
"vr11",
"vr12",
"vr13",
"vr14",
"vr15",
"vr16",
"vr17",
"vr18",
"vr19",
"vr20",
"vr21",
"vr22",
"vr23",
"vr24",
"vr25",
"vr26",
"vr27",
"vr28",
"vr29",
"vr30",
"vr31",
};
#endif

Expand Down Expand Up @@ -102,6 +136,15 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_A2,
TCG_REG_A1,
TCG_REG_A0,

/* Vector registers */
TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
/* V24 - V31 are caller-saved, and skipped. */
};

static const int tcg_target_call_iarg_regs[] = {
Expand Down Expand Up @@ -135,6 +178,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define TCG_CT_CONST_WSZ 0x2000

#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)

static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
{
Expand Down Expand Up @@ -1486,6 +1530,154 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
}

static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg rd, TCGReg rs)
{
switch (vece) {
case MO_8:
tcg_out_opc_vreplgr2vr_b(s, rd, rs);
break;
case MO_16:
tcg_out_opc_vreplgr2vr_h(s, rd, rs);
break;
case MO_32:
tcg_out_opc_vreplgr2vr_w(s, rd, rs);
break;
case MO_64:
tcg_out_opc_vreplgr2vr_d(s, rd, rs);
break;
default:
g_assert_not_reached();
}
return true;
}

static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg base, intptr_t offset)
{
/* Handle imm overflow and division (vldrepl.d imm is divided by 8) */
if (offset < -0x800 || offset > 0x7ff || \
(offset & ((1 << vece) - 1)) != 0) {
tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
base = TCG_REG_TMP0;
offset = 0;
}
offset >>= vece;

switch (vece) {
case MO_8:
tcg_out_opc_vldrepl_b(s, r, base, offset);
break;
case MO_16:
tcg_out_opc_vldrepl_h(s, r, base, offset);
break;
case MO_32:
tcg_out_opc_vldrepl_w(s, r, base, offset);
break;
case MO_64:
tcg_out_opc_vldrepl_d(s, r, base, offset);
break;
default:
g_assert_not_reached();
}
return true;
}

static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg rd, int64_t v64)
{
/* Try vldi if imm can fit */
int64_t value = sextract64(v64, 0, 8 << vece);
if (-0x200 <= value && value <= 0x1FF) {
uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
tcg_out_opc_vldi(s, rd, imm);
return;
}

/* TODO: vldi patterns when imm 12 is set */

/* Fallback to vreplgr2vr */
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
switch (vece) {
case MO_8:
tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0);
break;
case MO_16:
tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0);
break;
case MO_32:
tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0);
break;
case MO_64:
tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0);
break;
default:
g_assert_not_reached();
}
}

static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
{
TCGType type = vecl + TCG_TYPE_V64;
TCGArg a0, a1, a2;
TCGReg temp = TCG_REG_TMP0;

a0 = args[0];
a1 = args[1];
a2 = args[2];

/* Currently only supports V128 */
tcg_debug_assert(type == TCG_TYPE_V128);

switch (opc) {
case INDEX_op_st_vec:
/* Try to fit vst imm */
if (-0x800 <= a2 && a2 <= 0x7ff) {
tcg_out_opc_vst(s, a0, a1, a2);
} else {
tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
tcg_out_opc_vstx(s, a0, a1, temp);
}
break;
case INDEX_op_ld_vec:
/* Try to fit vld imm */
if (-0x800 <= a2 && a2 <= 0x7ff) {
tcg_out_opc_vld(s, a0, a1, a2);
} else {
tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
tcg_out_opc_vldx(s, a0, a1, temp);
}
break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
default:
g_assert_not_reached();
}
}

int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
{
switch (opc) {
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
case INDEX_op_dup_vec:
case INDEX_op_dupm_vec:
return 1;
default:
return 0;
}
}

void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
g_assert_not_reached();
}

static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
{
switch (op) {
Expand Down Expand Up @@ -1627,6 +1819,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_movcond_i64:
return C_O1_I4(r, rZ, rJ, rZ, rZ);

case INDEX_op_ld_vec:
case INDEX_op_dupm_vec:
case INDEX_op_dup_vec:
return C_O1_I1(w, r);

case INDEX_op_st_vec:
return C_O0_I2(w, r);

default:
g_assert_not_reached();
}
Expand Down Expand Up @@ -1708,6 +1908,10 @@ static void tcg_target_init(TCGContext *s)
exit(EXIT_FAILURE);
}

if (hwcap & HWCAP_LOONGARCH_LSX) {
use_lsx_instructions = 1;
}

tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;

Expand All @@ -1723,6 +1927,18 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);

if (use_lsx_instructions) {
tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
}

s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
Expand All @@ -1731,6 +1947,7 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
}

typedef struct {
Expand Down
38 changes: 37 additions & 1 deletion tcg/loongarch64/tcg-target.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#define LOONGARCH_TCG_TARGET_H

#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_NB_REGS 32
#define TCG_TARGET_NB_REGS 64

#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)

Expand Down Expand Up @@ -68,13 +68,25 @@ typedef enum {
TCG_REG_S7,
TCG_REG_S8,

TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,

/* aliases */
TCG_AREG0 = TCG_REG_S0,
TCG_REG_TMP0 = TCG_REG_T8,
TCG_REG_TMP1 = TCG_REG_T7,
TCG_REG_TMP2 = TCG_REG_T6,
TCG_VEC_TMP0 = TCG_REG_V23,
} TCGReg;

extern bool use_lsx_instructions;

/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_SP
#define TCG_TARGET_STACK_ALIGN 16
Expand Down Expand Up @@ -161,6 +173,30 @@ typedef enum {

#define TCG_TARGET_HAS_qemu_ldst_i128 0

#define TCG_TARGET_HAS_v64 0
#define TCG_TARGET_HAS_v128 use_lsx_instructions
#define TCG_TARGET_HAS_v256 0

#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_abs_vec 0
#define TCG_TARGET_HAS_andc_vec 0
#define TCG_TARGET_HAS_orc_vec 0
#define TCG_TARGET_HAS_nand_vec 0
#define TCG_TARGET_HAS_nor_vec 0
#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_mul_vec 0
#define TCG_TARGET_HAS_shi_vec 0
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 0
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_sat_vec 0
#define TCG_TARGET_HAS_minmax_vec 0
#define TCG_TARGET_HAS_bitsel_vec 0
#define TCG_TARGET_HAS_cmpsel_vec 0

#define TCG_TARGET_DEFAULT_MO (0)

#define TCG_TARGET_NEED_LDST_LABELS
Expand Down
12 changes: 12 additions & 0 deletions tcg/loongarch64/tcg-target.opc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright (c) 2023 Jiajie Chen
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version.
*
* See the COPYING file in the top-level directory for details.
*
* Target-specific opcodes for host vector expansion. These will be
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
* consider these to be UNSPEC with names.
*/

0 comments on commit 16288de

Please sign in to comment.