Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
target/mips/mxu: Add S32SLT D16SLT D16AVG[R] Q8AVG[R] insns
These instructions are part of pool1, see the grand tree above
in the file. Q8ADD is part of pool1 too but belong to another
category of instructions, thus will be made in later patches.

Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Message-Id: <20230608104222.1520143-8-lis8215@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
  • Loading branch information
SiarheiVolkau authored and philmd committed Jul 10, 2023
1 parent fc34c76 commit ff7936f
Showing 1 changed file with 243 additions and 1 deletion.
244 changes: 243 additions & 1 deletion target/mips/tcg/mxu_translate.c
Expand Up @@ -358,6 +358,7 @@ enum {
OPC_MXU__POOL00 = 0x03,
OPC_MXU_S32MSUB = 0x04,
OPC_MXU_S32MSUBU = 0x05,
OPC_MXU__POOL01 = 0x06,
OPC_MXU_D16MUL = 0x08,
OPC_MXU_D16MAC = 0x0A,
OPC_MXU__POOL04 = 0x10,
Expand Down Expand Up @@ -391,6 +392,18 @@ enum {
OPC_MXU_Q8SLTU = 0x07,
};

/*
* MXU pool 01
*/
enum {
OPC_MXU_S32SLT = 0x00,
OPC_MXU_D16SLT = 0x01,
OPC_MXU_D16AVG = 0x02,
OPC_MXU_D16AVGR = 0x03,
OPC_MXU_Q8AVG = 0x04,
OPC_MXU_Q8AVGR = 0x05,
};

/*
* MXU pool 04 05 06 07 08 09 10 11
*/
Expand Down Expand Up @@ -1152,11 +1165,15 @@ static void gen_mxu_S32XOR(DisasContext *ctx)


/*
* MXU instruction category max/min
* MXU instruction category max/min/avg
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32MAX D16MAX Q8MAX
* S32MIN D16MIN Q8MIN
* S32SLT D16SLT Q8SLT
* Q8SLTU
* D16AVG Q8AVG
* D16AVGR Q8AVGR
*/

/*
Expand Down Expand Up @@ -1462,6 +1479,199 @@ static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
}
}

/*
* S32SLT
* Update XRa with the signed "set less than" comparison of XRb and XRc.
* a.k.a. XRa = XRb < XRc ? 1 : 0;
*/
static void gen_mxu_S32SLT(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;

pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);

if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1],
mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}

/*
* D16SLT
* Update XRa with the signed "set less than" comparison of XRb and XRc
* on per-word basis.
* a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
*/
static void gen_mxu_D16SLT(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;

pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);

if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();

gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
tcg_gen_sextract_tl(t0, t3, 16, 16);
tcg_gen_sextract_tl(t1, t4, 16, 16);
tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
tcg_gen_shli_tl(t2, t0, 16);
tcg_gen_sextract_tl(t0, t3, 0, 16);
tcg_gen_sextract_tl(t1, t4, 0, 16);
tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
}
}

/*
* D16AVG
* Update XRa with the signed average of XRb and XRc
* on per-word basis, rounding down.
* a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
*
* D16AVGR
* Update XRa with the signed average of XRb and XRc
* on per-word basis, math rounding 4/5.
* a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
*/
static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
{
uint32_t pad, XRc, XRb, XRa;

pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);

if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to same */
tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();

gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
tcg_gen_sextract_tl(t0, t3, 16, 16);
tcg_gen_sextract_tl(t1, t4, 16, 16);
tcg_gen_add_tl(t0, t0, t1);
if (round45) {
tcg_gen_addi_tl(t0, t0, 1);
}
tcg_gen_shli_tl(t2, t0, 15);
tcg_gen_andi_tl(t2, t2, 0xffff0000);
tcg_gen_sextract_tl(t0, t3, 0, 16);
tcg_gen_sextract_tl(t1, t4, 0, 16);
tcg_gen_add_tl(t0, t0, t1);
if (round45) {
tcg_gen_addi_tl(t0, t0, 1);
}
tcg_gen_shri_tl(t0, t0, 1);
tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
gen_store_mxu_gpr(t2, XRa);
}
}

/*
* Q8AVG
* Update XRa with the signed average of XRb and XRc
* on per-byte basis, rounding down.
* a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
*
* Q8AVGR
* Update XRa with the signed average of XRb and XRc
* on per-word basis, math rounding 4/5.
* a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
*/
static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
{
uint32_t pad, XRc, XRb, XRa;

pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);

if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to same */
tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();

gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
tcg_gen_movi_tl(t2, 0);

for (int i = 0; i < 4; i++) {
tcg_gen_extract_tl(t0, t3, 8 * i, 8);
tcg_gen_extract_tl(t1, t4, 8 * i, 8);
tcg_gen_add_tl(t0, t0, t1);
if (round45) {
tcg_gen_addi_tl(t0, t0, 1);
}
tcg_gen_shri_tl(t0, t0, 1);
tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
}
gen_store_mxu_gpr(t2, XRa);
}
}


/*
* MXU instruction category: align
Expand Down Expand Up @@ -1769,6 +1979,35 @@ static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
return true;
}

static void decode_opc_mxu__pool01(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);

switch (opcode) {
case OPC_MXU_S32SLT:
gen_mxu_S32SLT(ctx);
break;
case OPC_MXU_D16SLT:
gen_mxu_D16SLT(ctx);
break;
case OPC_MXU_D16AVG:
gen_mxu_d16avg(ctx, false);
break;
case OPC_MXU_D16AVGR:
gen_mxu_d16avg(ctx, true);
break;
case OPC_MXU_Q8AVG:
gen_mxu_q8avg(ctx, false);
break;
case OPC_MXU_Q8AVGR:
gen_mxu_q8avg(ctx, true);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool04(DisasContext *ctx)
{
uint32_t reversed = extract32(ctx->opcode, 20, 1);
Expand Down Expand Up @@ -2015,6 +2254,9 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
case OPC_MXU_D16MAC:
gen_mxu_d16mac(ctx);
break;
case OPC_MXU__POOL01:
decode_opc_mxu__pool01(ctx);
break;
case OPC_MXU__POOL04:
decode_opc_mxu__pool04(ctx);
break;
Expand Down

0 comments on commit ff7936f

Please sign in to comment.