99 changes: 53 additions & 46 deletions target/hexagon/gen_tcg_funcs.py
Expand Up @@ -37,7 +37,7 @@ def genptr_decl_pair_writable(f, tag, regtype, regid, regno):
elif regtype == "C":
f.write(f" const int {regN} = insn->regno[{regno}] + HEX_REG_SA0;\n")
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
f.write(f" TCGv_i64 {regtype}{regid}V = " f"get_result_gpr_pair(ctx, {regN});\n")


Expand All @@ -53,7 +53,7 @@ def genptr_decl_writable(f, tag, regtype, regid, regno):
f.write(f" const int {regN} = insn->regno[{regno}];\n")
f.write(f" TCGv {regtype}{regid}V = tcg_temp_new();\n")
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)


def genptr_decl(f, tag, regtype, regid, regno):
Expand All @@ -71,7 +71,7 @@ def genptr_decl(f, tag, regtype, regid, regno):
elif regid in {"d", "e", "x", "y"}:
genptr_decl_writable(f, tag, regtype, regid, regno)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "P":
if regid in {"s", "t", "u", "v"}:
f.write(
Expand All @@ -80,7 +80,7 @@ def genptr_decl(f, tag, regtype, regid, regno):
elif regid in {"d", "e", "x"}:
genptr_decl_writable(f, tag, regtype, regid, regno)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "C":
if regid == "ss":
f.write(f" TCGv_i64 {regtype}{regid}V = " f"tcg_temp_new_i64();\n")
Expand All @@ -96,7 +96,7 @@ def genptr_decl(f, tag, regtype, regid, regno):
elif regid == "d":
genptr_decl_writable(f, tag, regtype, regid, regno)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "M":
if regid == "u":
f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n")
Expand All @@ -105,7 +105,7 @@ def genptr_decl(f, tag, regtype, regid, regno):
"HEX_REG_M0];\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "V":
if regid in {"dd"}:
f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n")
Expand Down Expand Up @@ -159,7 +159,7 @@ def genptr_decl(f, tag, regtype, regid, regno):
f"{regtype}{regid}V_off);\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "Q":
if regid in {"d", "e", "x"}:
f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n")
Expand All @@ -180,28 +180,28 @@ def genptr_decl(f, tag, regtype, regid, regno):
if not hex_common.skip_qemu_helper(tag):
f.write(f" TCGv_ptr {regtype}{regid}V = " "tcg_temp_new_ptr();\n")
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)


def genptr_decl_new(f, tag, regtype, regid, regno):
if regtype == "N":
if regid in {"s", "t"}:
f.write(
f" TCGv {regtype}{regid}N = "
f"hex_new_value[insn->regno[{regno}]];\n"
f"get_result_gpr(ctx, insn->regno[{regno}]);\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "P":
if regid in {"t", "u", "v"}:
f.write(
f" TCGv {regtype}{regid}N = "
f"hex_new_pred_value[insn->regno[{regno}]];\n"
f"ctx->new_pred_value[insn->regno[{regno}]];\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "O":
if regid == "s":
f.write(
Expand All @@ -218,9 +218,9 @@ def genptr_decl_new(f, tag, regtype, regid, regno):
f"tcg_constant_tl({regtype}{regid}N_num);\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)


def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i):
Expand All @@ -232,9 +232,9 @@ def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i):
elif hex_common.is_new_val(regtype, regid, tag):
genptr_decl_new(f, tag, regtype, regid, i)
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
hex_common.bad_register(regtype, regid, toss, numregs)
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
hex_common.bad_register(regtype, regid, toss, numregs)


def genptr_decl_imm(f, immlett):
Expand Down Expand Up @@ -266,15 +266,15 @@ def genptr_src_read(f, tag, regtype, regid):
f"hex_gpr[{regtype}{regid}N]);\n"
)
elif regid not in {"s", "t", "u", "v"}:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "P":
if regid == "x":
f.write(
f" tcg_gen_mov_tl({regtype}{regid}V, "
f"hex_pred[{regtype}{regid}N]);\n"
)
elif regid not in {"s", "t", "u", "v"}:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "C":
if regid == "ss":
f.write(
Expand All @@ -287,10 +287,10 @@ def genptr_src_read(f, tag, regtype, regid):
f"{regtype}{regid}V);\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "M":
if regid != "u":
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "V":
if regid in {"uu", "vv", "xx"}:
f.write(f" tcg_gen_gvec_mov(MO_64, {regtype}{regid}V_off,\n")
Expand All @@ -311,7 +311,7 @@ def genptr_src_read(f, tag, regtype, regid):
f.write(f" vreg_src_off(ctx, {regtype}{regid}N),\n")
f.write(" sizeof(MMVector), sizeof(MMVector));\n")
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "Q":
if regid in {"s", "t", "u", "v"}:
if not hex_common.skip_qemu_helper(tag):
Expand All @@ -326,23 +326,23 @@ def genptr_src_read(f, tag, regtype, regid):
)
f.write(" sizeof(MMQReg), sizeof(MMQReg));\n")
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)


def genptr_src_read_new(f, regtype, regid):
if regtype == "N":
if regid not in {"s", "t"}:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "P":
if regid not in {"t", "u", "v"}:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "O":
if regid != "s":
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)


def genptr_src_read_opn(f, regtype, regid, tag):
Expand All @@ -354,9 +354,9 @@ def genptr_src_read_opn(f, regtype, regid, tag):
elif hex_common.is_new_val(regtype, regid, tag):
genptr_src_read_new(f, regtype, regid)
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
hex_common.bad_register(regtype, regid, toss, numregs)
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
hex_common.bad_register(regtype, regid, toss, numregs)


def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i):
Expand All @@ -370,9 +370,9 @@ def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i):
elif hex_common.is_new_val(regtype, regid, tag):
f.write(f"{regtype}{regid}N")
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
hex_common.bad_register(regtype, regid, toss, numregs)
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
hex_common.bad_register(regtype, regid, toss, numregs)


def gen_helper_decl_imm(f, immlett):
Expand All @@ -387,7 +387,8 @@ def gen_helper_call_imm(f, immlett):


def genptr_dst_write_pair(f, tag, regtype, regid):
f.write(f" gen_log_reg_write_pair({regtype}{regid}N, " f"{regtype}{regid}V);\n")
f.write(f" gen_log_reg_write_pair(ctx, {regtype}{regid}N, "
f"{regtype}{regid}V);\n")


def genptr_dst_write(f, tag, regtype, regid):
Expand All @@ -396,18 +397,19 @@ def genptr_dst_write(f, tag, regtype, regid):
genptr_dst_write_pair(f, tag, regtype, regid)
elif regid in {"d", "e", "x", "y"}:
f.write(
f" gen_log_reg_write({regtype}{regid}N, " f"{regtype}{regid}V);\n"
f" gen_log_reg_write(ctx, {regtype}{regid}N, "
f"{regtype}{regid}V);\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "P":
if regid in {"d", "e", "x"}:
f.write(
f" gen_log_pred_write(ctx, {regtype}{regid}N, "
f"{regtype}{regid}V);\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "C":
if regid == "dd":
f.write(
Expand All @@ -420,9 +422,9 @@ def genptr_dst_write(f, tag, regtype, regid):
f"{regtype}{regid}V);\n"
)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)


def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"):
Expand All @@ -438,12 +440,12 @@ def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"):
f"{regtype}{regid}N, {newv});\n"
)
elif regid not in {"dd", "d", "x"}:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
elif regtype == "Q":
if regid not in {"d", "e", "x"}:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
hex_common.bad_register(regtype, regid)


def genptr_dst_write_opn(f, regtype, regid, tag):
Expand All @@ -466,7 +468,7 @@ def genptr_dst_write_opn(f, regtype, regid, tag):
else:
genptr_dst_write(f, tag, regtype, regid)
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
hex_common.bad_register(regtype, regid, toss, numregs)


##
Expand All @@ -481,7 +483,7 @@ def genptr_dst_write_opn(f, regtype, regid, tag):
## TCGv RsV = hex_gpr[insn->regno[1]];
## TCGv RtV = hex_gpr[insn->regno[2]];
## <GEN>
## gen_log_reg_write(RdN, RdV);
## gen_log_reg_write(ctx, RdN, RdV);
## }
##
## where <GEN> depends on hex_common.skip_qemu_helper(tag)
Expand Down Expand Up @@ -530,7 +532,7 @@ def gen_tcg_func(f, tag, regs, imms):
elif hex_common.is_new_val(regtype, regid, tag):
declared.append(f"{regtype}{regid}N")
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
hex_common.bad_register(regtype, regid, toss, numregs)

## Handle immediates
for immlett, bits, immshift in imms:
Expand All @@ -548,10 +550,13 @@ def gen_tcg_func(f, tag, regs, imms):
if hex_common.need_pkt_has_multi_cof(tag):
f.write(" TCGv pkt_has_multi_cof = ")
f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n")
if hex_common.need_pkt_need_commit(tag):
f.write(" TCGv pkt_need_commit = ")
f.write("tcg_constant_tl(ctx->need_commit);\n")
if hex_common.need_part1(tag):
f.write(" TCGv part1 = tcg_constant_tl(insn->part1);\n")
if hex_common.need_slot(tag):
f.write(" TCGv slot = tcg_constant_tl(insn->slot);\n")
f.write(" TCGv slotval = gen_slotval(ctx);\n")
if hex_common.need_PC(tag):
f.write(" TCGv PC = tcg_constant_tl(ctx->pkt->pc);\n")
if hex_common.helper_needs_next_PC(tag):
Expand Down Expand Up @@ -594,12 +599,14 @@ def gen_tcg_func(f, tag, regs, imms):

if hex_common.need_pkt_has_multi_cof(tag):
f.write(", pkt_has_multi_cof")
if hex_common.need_pkt_need_commit(tag):
f.write(", pkt_need_commit")
if hex_common.need_PC(tag):
f.write(", PC")
if hex_common.helper_needs_next_PC(tag):
f.write(", next_PC")
if hex_common.need_slot(tag):
f.write(", slot")
f.write(", slotval")
if hex_common.need_part1(tag):
f.write(", part1")
f.write(");\n")
Expand Down
35 changes: 35 additions & 0 deletions target/hexagon/gen_tcg_hvx.h
Expand Up @@ -128,6 +128,41 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \
sizeof(MMVector), sizeof(MMVector))

#define fGEN_TCG_V6_vassign_tmp(SHORTCODE) \
tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \
sizeof(MMVector), sizeof(MMVector))

#define fGEN_TCG_V6_vcombine_tmp(SHORTCODE) \
do { \
tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \
sizeof(MMVector), sizeof(MMVector)); \
tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \
sizeof(MMVector), sizeof(MMVector)); \
} while (0)

/*
* Vector combine
*
* Be careful that the source and dest don't overlap
*/
#define fGEN_TCG_V6_vcombine(SHORTCODE) \
do { \
if (VddV_off != VuV_off) { \
tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \
sizeof(MMVector), sizeof(MMVector)); \
tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \
sizeof(MMVector), sizeof(MMVector)); \
} else { \
intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
tcg_gen_gvec_mov(MO_64, tmpoff, VuV_off, \
sizeof(MMVector), sizeof(MMVector)); \
tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \
sizeof(MMVector), sizeof(MMVector)); \
tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), tmpoff, \
sizeof(MMVector), sizeof(MMVector)); \
} \
} while (0)

/* Vector conditional move */
#define fGEN_TCG_VEC_CMOV(PRED) \
do { \
Expand Down
347 changes: 289 additions & 58 deletions target/hexagon/genptr.c

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion target/hexagon/genptr.h
Expand Up @@ -35,7 +35,9 @@ void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, uint32_t slot);
void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot);
TCGv gen_read_reg(TCGv result, int num);
TCGv gen_read_preg(TCGv pred, uint8_t num);
void gen_log_reg_write(int rnum, TCGv val);
TCGv get_result_gpr(DisasContext *ctx, int rnum);
TCGv get_result_pred(DisasContext *ctx, int pnum);
void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val);
void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val);
void gen_set_usr_field(DisasContext *ctx, int field, TCGv val);
void gen_set_usr_fieldi(DisasContext *ctx, int field, int x);
Expand All @@ -58,4 +60,6 @@ void gen_set_half(int N, TCGv result, TCGv src);
void gen_set_half_i64(int N, TCGv_i64 result, TCGv src);
void probe_noshuf_load(TCGv va, int s, int mi);

extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS];

#endif
6 changes: 4 additions & 2 deletions target/hexagon/helper.h
Expand Up @@ -21,16 +21,18 @@
DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32)
DEF_HELPER_1(debug_start_packet, void, env)
DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int)
DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int)
DEF_HELPER_FLAGS_5(debug_commit_end, TCG_CALL_NO_WG, void, env, i32, int, int, int)
DEF_HELPER_2(commit_store, void, env, int)
DEF_HELPER_3(gather_store, void, env, i32, int)
DEF_HELPER_1(commit_hvx_stores, void, env)
DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32)
DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_3(sfrecipa, i64, env, f32, f32)
DEF_HELPER_2(sfinvsqrta, i64, env, f32)
DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64)
DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32)
DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64)
DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64)

/* Floating point */
DEF_HELPER_2(conv_sf2df, f64, env, f32)
Expand Down
19 changes: 16 additions & 3 deletions target/hexagon/hex_common.py
Expand Up @@ -30,6 +30,9 @@
overrides = {} # tags with helper overrides
idef_parser_enabled = {} # tags enabled for idef-parser

def bad_register(*args):
args_str = ", ".join(map(str, args))
raise Exception(f"Bad register parse: {args_str}")

# We should do this as a hash for performance,
# but to keep order let's keep it as a list.
Expand Down Expand Up @@ -97,6 +100,12 @@ def calculate_attribs():
add_qemu_macro_attrib("fSET_LPCFG", "A_IMPLICIT_WRITES_USR")
add_qemu_macro_attrib("fLOAD", "A_SCALAR_LOAD")
add_qemu_macro_attrib("fSTORE", "A_SCALAR_STORE")
add_qemu_macro_attrib('fLSBNEW0', 'A_IMPLICIT_READS_P0')
add_qemu_macro_attrib('fLSBNEW0NOT', 'A_IMPLICIT_READS_P0')
add_qemu_macro_attrib('fREAD_P0', 'A_IMPLICIT_READS_P0')
add_qemu_macro_attrib('fLSBNEW1', 'A_IMPLICIT_READS_P1')
add_qemu_macro_attrib('fLSBNEW1NOT', 'A_IMPLICIT_READS_P1')
add_qemu_macro_attrib('fREAD_P3', 'A_IMPLICIT_READS_P3')

# Recurse down macros, find attributes from sub-macros
macroValues = list(macros.values())
Expand Down Expand Up @@ -241,9 +250,10 @@ def is_new_val(regtype, regid, tag):

def need_slot(tag):
if (
("A_CONDEXEC" in attribdict[tag] and "A_JUMP" not in attribdict[tag])
or "A_STORE" in attribdict[tag]
or "A_LOAD" in attribdict[tag]
"A_CVI_SCATTER" not in attribdict[tag]
and "A_CVI_GATHER" not in attribdict[tag]
and ("A_STORE" in attribdict[tag]
or "A_LOAD" in attribdict[tag])
):
return 1
else:
Expand All @@ -270,6 +280,9 @@ def need_pkt_has_multi_cof(tag):
return "A_COF" in attribdict[tag]


def need_pkt_need_commit(tag):
return 'A_IMPLICIT_WRITES_USR' in attribdict[tag]

def need_condexec_reg(tag, regs):
if "A_CONDEXEC" in attribdict[tag]:
for regtype, regid, toss, numregs in regs:
Expand Down
6 changes: 4 additions & 2 deletions target/hexagon/iclass.c
@@ -1,5 +1,5 @@
/*
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -51,8 +51,10 @@ SlotMask find_iclass_slots(Opcode opcode, int itype)
return SLOTS_0;
} else if ((opcode == J2_trap0) ||
(opcode == Y2_isync) ||
(opcode == J2_pause) || (opcode == J4_hintjumpr)) {
(opcode == J2_pause)) {
return SLOTS_2;
} else if (opcode == J4_hintjumpr) {
return SLOTS_23;
} else if (GET_ATTRIB(opcode, A_CRSLOT23)) {
return SLOTS_23;
} else if (GET_ATTRIB(opcode, A_RESTRICT_PREFERSLOT0)) {
Expand Down
37 changes: 32 additions & 5 deletions target/hexagon/idef-parser/idef-parser.lex
Expand Up @@ -401,12 +401,39 @@ STRING_LIT \"(\\.|[^"\\])*\"
}
return SIGN;
}
"0x"{HEX_DIGIT}+ |
{DIGIT}+ { yylval->rvalue.type = IMMEDIATE;
yylval->rvalue.bit_width = 32;
yylval->rvalue.signedness = SIGNED;
"0x"{HEX_DIGIT}+ { uint64_t value = strtoull(yytext, NULL, 0);
yylval->rvalue.type = IMMEDIATE;
yylval->rvalue.imm.type = VALUE;
yylval->rvalue.imm.value = strtoull(yytext, NULL, 0);
yylval->rvalue.imm.value = value;
if (value <= INT_MAX) {
yylval->rvalue.bit_width = sizeof(int) * 8;
yylval->rvalue.signedness = SIGNED;
} else if (value <= UINT_MAX) {
yylval->rvalue.bit_width = sizeof(unsigned int) * 8;
yylval->rvalue.signedness = UNSIGNED;
} else if (value <= LONG_MAX) {
yylval->rvalue.bit_width = sizeof(long) * 8;
yylval->rvalue.signedness = SIGNED;
} else if (value <= ULONG_MAX) {
yylval->rvalue.bit_width = sizeof(unsigned long) * 8;
yylval->rvalue.signedness = UNSIGNED;
} else {
g_assert_not_reached();
}
return IMM; }
{DIGIT}+ { int64_t value = strtoll(yytext, NULL, 0);
yylval->rvalue.type = IMMEDIATE;
yylval->rvalue.imm.type = VALUE;
yylval->rvalue.imm.value = value;
if (value >= INT_MIN && value <= INT_MAX) {
yylval->rvalue.bit_width = sizeof(int) * 8;
yylval->rvalue.signedness = SIGNED;
} else if (value >= LONG_MIN && value <= LONG_MAX) {
yylval->rvalue.bit_width = sizeof(long) * 8;
yylval->rvalue.signedness = SIGNED;
} else {
g_assert_not_reached();
}
return IMM; }
"0x"{HEX_DIGIT}+"ULL" |
{DIGIT}+"ULL" { yylval->rvalue.type = IMMEDIATE;
Expand Down
6 changes: 2 additions & 4 deletions target/hexagon/idef-parser/idef-parser.y
Expand Up @@ -594,8 +594,6 @@ rvalue : FAIL
| CAST rvalue
{
@1.last_column = @2.last_column;
/* Assign target signedness */
$2.signedness = $1.signedness;
$$ = gen_cast_op(c, &@1, &$2, $1.bit_width, $1.signedness);
}
| rvalue EQ rvalue
Expand Down Expand Up @@ -685,15 +683,15 @@ rvalue : FAIL
yyassert(c, &@1, $5.type == IMMEDIATE &&
$5.imm.type == VALUE,
"SXT expects immediate values\n");
$$ = gen_extend_op(c, &@1, &$3, $5.imm.value, &$7, SIGNED);
$$ = gen_extend_op(c, &@1, &$3, 64, &$7, SIGNED);
}
| ZXT '(' rvalue ',' IMM ',' rvalue ')'
{
@1.last_column = @8.last_column;
yyassert(c, &@1, $5.type == IMMEDIATE &&
$5.imm.type == VALUE,
"ZXT expects immediate values\n");
$$ = gen_extend_op(c, &@1, &$3, $5.imm.value, &$7, UNSIGNED);
$$ = gen_extend_op(c, &@1, &$3, 64, &$7, UNSIGNED);
}
| '(' rvalue ')'
{
Expand Down
67 changes: 32 additions & 35 deletions target/hexagon/idef-parser/parser-helpers.c
Expand Up @@ -167,8 +167,9 @@ void reg_print(Context *c, YYLTYPE *locp, HexReg *reg)
EMIT(c, "hex_gpr[%u]", reg->id);
}

void imm_print(Context *c, YYLTYPE *locp, HexImm *imm)
void imm_print(Context *c, YYLTYPE *locp, HexValue *rvalue)
{
HexImm *imm = &rvalue->imm;
switch (imm->type) {
case I:
EMIT(c, "i");
Expand All @@ -177,7 +178,21 @@ void imm_print(Context *c, YYLTYPE *locp, HexImm *imm)
EMIT(c, "%ciV", imm->id);
break;
case VALUE:
EMIT(c, "((int64_t) %" PRIu64 "ULL)", (int64_t) imm->value);
if (rvalue->bit_width == 32) {
if (rvalue->signedness == UNSIGNED) {
EMIT(c, "((uint32_t) 0x%" PRIx32 ")", (uint32_t) imm->value);
} else {
EMIT(c, "((int32_t) 0x%" PRIx32 ")", (int32_t) imm->value);
}
} else if (rvalue->bit_width == 64) {
if (rvalue->signedness == UNSIGNED) {
EMIT(c, "((uint64_t) 0x%" PRIx64 "ULL)", (uint64_t) imm->value);
} else {
EMIT(c, "((int64_t) 0x%" PRIx64 "LL)", (int64_t) imm->value);
}
} else {
g_assert_not_reached();
}
break;
case QEMU_TMP:
EMIT(c, "qemu_tmp_%" PRIu64, imm->index);
Expand Down Expand Up @@ -213,7 +228,7 @@ void rvalue_print(Context *c, YYLTYPE *locp, void *pointer)
tmp_print(c, locp, &rvalue->tmp);
break;
case IMMEDIATE:
imm_print(c, locp, &rvalue->imm);
imm_print(c, locp, rvalue);
break;
case VARID:
var_print(c, locp, &rvalue->var);
Expand Down Expand Up @@ -386,13 +401,10 @@ HexValue gen_rvalue_extend(Context *c, YYLTYPE *locp, HexValue *rvalue)

if (rvalue->type == IMMEDIATE) {
HexValue res = gen_imm_qemu_tmp(c, locp, 64, rvalue->signedness);
bool is_unsigned = (rvalue->signedness == UNSIGNED);
const char *sign_suffix = is_unsigned ? "u" : "";
gen_c_int_type(c, locp, 64, rvalue->signedness);
OUT(c, locp, " ", &res, " = ");
OUT(c, locp, "(", sign_suffix, "int64_t) ");
OUT(c, locp, "(", sign_suffix, "int32_t) ");
OUT(c, locp, rvalue, ";\n");
OUT(c, locp, " ", &res, " = (");
gen_c_int_type(c, locp, 64, rvalue->signedness);
OUT(c, locp, ")", rvalue, ";\n");
return res;
} else {
HexValue res = gen_tmp(c, locp, 64, rvalue->signedness);
Expand Down Expand Up @@ -959,33 +971,18 @@ HexValue gen_cast_op(Context *c,
unsigned target_width,
HexSignedness signedness)
{
HexValue res;
assert_signedness(c, locp, src->signedness);
if (src->bit_width == target_width) {
return *src;
} else if (src->type == IMMEDIATE) {
HexValue res = *src;
res.bit_width = target_width;
res.signedness = signedness;
return res;
res = *src;
} else if (src->bit_width < target_width) {
res = gen_rvalue_extend(c, locp, src);
} else {
HexValue res = gen_tmp(c, locp, target_width, signedness);
/* Truncate */
if (src->bit_width > target_width) {
OUT(c, locp, "tcg_gen_trunc_i64_tl(", &res, ", ", src, ");\n");
} else {
assert_signedness(c, locp, src->signedness);
if (src->signedness == UNSIGNED) {
/* Extend unsigned */
OUT(c, locp, "tcg_gen_extu_i32_i64(",
&res, ", ", src, ");\n");
} else {
/* Extend signed */
OUT(c, locp, "tcg_gen_ext_i32_i64(",
&res, ", ", src, ");\n");
}
}
return res;
/* src->bit_width > target_width */
res = gen_rvalue_truncate(c, locp, src);
}
res.signedness = signedness;
return res;
}


Expand Down Expand Up @@ -1123,7 +1120,7 @@ HexValue gen_extend_op(Context *c,
HexValue *value,
HexSignedness signedness)
{
unsigned bit_width = (dst_width = 64) ? 64 : 32;
unsigned bit_width = (dst_width == 64) ? 64 : 32;
HexValue value_m = *value;
HexValue src_width_m = *src_width;

Expand Down Expand Up @@ -1318,7 +1315,7 @@ void gen_write_reg(Context *c, YYLTYPE *locp, HexValue *reg, HexValue *value)
value_m = rvalue_materialize(c, locp, &value_m);
OUT(c,
locp,
"gen_log_reg_write(", &reg->reg.id, ", ",
"gen_log_reg_write(ctx, ", &reg->reg.id, ", ",
&value_m, ");\n");
}

Expand Down Expand Up @@ -1854,7 +1851,7 @@ HexValue gen_rvalue_pred(Context *c, YYLTYPE *locp, HexValue *pred)
*pred = gen_tmp(c, locp, 32, UNSIGNED);
if (is_dotnew) {
OUT(c, locp, "tcg_gen_mov_i32(", pred,
", hex_new_pred_value[");
", ctx->new_pred_value[");
OUT(c, locp, pred_str, "]);\n");
} else {
OUT(c, locp, "gen_read_preg(", pred, ", ", pred_str, ");\n");
Expand Down
2 changes: 1 addition & 1 deletion target/hexagon/idef-parser/parser-helpers.h
Expand Up @@ -80,7 +80,7 @@ void reg_compose(Context *c, YYLTYPE *locp, HexReg *reg, char reg_id[5]);

void reg_print(Context *c, YYLTYPE *locp, HexReg *reg);

void imm_print(Context *c, YYLTYPE *locp, HexImm *imm);
void imm_print(Context *c, YYLTYPE *locp, HexValue *rvalue);

void var_print(Context *c, YYLTYPE *locp, HexVar *var);

Expand Down
7 changes: 6 additions & 1 deletion target/hexagon/imported/branch.idef
@@ -1,5 +1,5 @@
/*
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -34,6 +34,9 @@ Q6INSN(J2_jump,"jump #r22:2",ATTRIBS(A_JDIR), "direct unconditional jump",
Q6INSN(J2_jumpr,"jumpr Rs32",ATTRIBS(A_JINDIR), "indirect unconditional jump",
{fJUMPR(RsN,RsV,COF_TYPE_JUMPR);})

Q6INSN(J2_jumprh,"jumprh Rs32",ATTRIBS(A_JINDIR, A_HINTED_COF), "indirect unconditional jump",
{fJUMPR(RsN,RsV,COF_TYPE_JUMPR);})

#define OLDCOND_JUMP(TAG,OPER,OPER2,ATTRIB,DESCR,SEMANTICS) \
Q6INSN(TAG##t,"if (Pu4) "OPER":nt "OPER2,ATTRIB,DESCR,{fBRANCH_SPECULATE_STALL(fLSBOLD(PuV),,SPECULATE_NOT_TAKEN,12,0); if (fLSBOLD(PuV)) { SEMANTICS; }}) \
Q6INSN(TAG##f,"if (!Pu4) "OPER":nt "OPER2,ATTRIB,DESCR,{fBRANCH_SPECULATE_STALL(fLSBOLDNOT(PuV),,SPECULATE_NOT_TAKEN,12,0); if (fLSBOLDNOT(PuV)) { SEMANTICS; }}) \
Expand Down Expand Up @@ -196,6 +199,8 @@ Q6INSN(J2_callrt,"if (Pu4) callr Rs32",ATTRIBS(CINDIR_STD),"indirect conditional
Q6INSN(J2_callrf,"if (!Pu4) callr Rs32",ATTRIBS(CINDIR_STD),"indirect conditional call if false",
{fBRANCH_SPECULATE_STALL(fLSBOLDNOT(PuV),,SPECULATE_NOT_TAKEN,12,0);if (fLSBOLDNOT(PuV)) { fCALLR(RsV); }})

Q6INSN(J2_callrh,"callrh Rs32",ATTRIBS(CINDIR_STD, A_HINTED_COF), "hinted indirect unconditional call",
{ fCALLR(RsV); })



Expand Down
21 changes: 16 additions & 5 deletions target/hexagon/imported/encode_pp.def
@@ -1,5 +1,5 @@
/*
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -382,14 +382,23 @@ DEF_ENC32(L4_return_fnew_pt, ICLASS_LD" 011 0 000 sssss PP1110vv ---ddddd")
DEF_ENC32(L4_return_tnew_pnt, ICLASS_LD" 011 0 000 sssss PP0010vv ---ddddd")
DEF_ENC32(L4_return_fnew_pnt, ICLASS_LD" 011 0 000 sssss PP1010vv ---ddddd")

DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP00---- -00ddddd")
DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd")



DEF_ENC32(L2_loadw_aq, ICLASS_LD" 001 0 000 sssss PP001--- 000ddddd")
DEF_ENC32(L4_loadd_aq, ICLASS_LD" 001 0 000 sssss PP011--- 000ddddd")

DEF_ENC32(R6_release_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd")
DEF_ENC32(R6_release_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd")

DEF_ENC32(S2_storew_rl_at_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --0010dd")
DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd")

DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP01---- -00ddddd")
DEF_ENC32(S4_stored_rl_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0010dd")
DEF_ENC32(S4_stored_rl_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1010dd")

DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd")
DEF_EXT_SPACE(EXTRACTW, ICLASS_LD" 001 0 000 iiiii PP0iiiii -01iiiii")
DEF_ENC32(Y2_dcfetchbo, ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii")

Expand Down Expand Up @@ -479,8 +488,8 @@ STD_PST_ENC(rinew, "1 101","10ttt")
/* x bus/cache */
/* x store/cache */
DEF_ENC32(S2_allocframe, ICLASS_ST" 000 01 00xxxxx PP000iii iiiiiiii")
DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ------dd")
DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ------dd")
DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd")
DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd")
DEF_ENC32(Y2_dczeroa, ICLASS_ST" 000 01 10sssss PP0----- --------")


Expand Down Expand Up @@ -515,13 +524,15 @@ DEF_FIELD32(ICLASS_J" 110- -------- PP-!---- --------",J_PT,"Predict-taken")

DEF_FIELDROW_DESC32(ICLASS_J" 0000 -------- PP------ --------","[#0] PC=(Rs), R31=return")
DEF_ENC32(J2_callr, ICLASS_J" 0000 101sssss PP------ --------")
DEF_ENC32(J2_callrh, ICLASS_J" 0000 110sssss PP------ --------")

DEF_FIELDROW_DESC32(ICLASS_J" 0001 -------- PP------ --------","[#1] if (Pu) PC=(Rs), R31=return")
DEF_ENC32(J2_callrt, ICLASS_J" 0001 000sssss PP----uu --------")
DEF_ENC32(J2_callrf, ICLASS_J" 0001 001sssss PP----uu --------")

DEF_FIELDROW_DESC32(ICLASS_J" 0010 -------- PP------ --------","[#2] PC=(Rs); ")
DEF_ENC32(J2_jumpr, ICLASS_J" 0010 100sssss PP------ --------")
DEF_ENC32(J2_jumprh, ICLASS_J" 0010 110sssss PP------ --------")
DEF_ENC32(J4_hintjumpr, ICLASS_J" 0010 101sssss PP------ --------")

DEF_FIELDROW_DESC32(ICLASS_J" 0011 -------- PP------ --------","[#3] if (Pu) PC=(Rs) ")
Expand Down
20 changes: 19 additions & 1 deletion target/hexagon/imported/ldst.idef
@@ -1,5 +1,5 @@
/*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -128,6 +128,24 @@ Q6INSN(S2_allocframe,"allocframe(Rx32,#u11:3):raw", ATTRIBS(A_REGWRSIZE_8B,A_MEM

#define A_RETURN A_RESTRICT_COF_MAX1,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOSLOT1_STORE,A_RET_TYPE,A_DEALLOCRET

/**** Load Acquire Store Release Instructions****/



Q6INSN(L2_loadw_aq,"Rd32=memw_aq(Rs32)",ATTRIBS(A_REGWRSIZE_4B,A_ACQUIRE,A_RESTRICT_SLOT0ONLY,A_MEMSIZE_4B,A_LOAD),"Load Acquire Word",
{ fEA_REG(RsV); fLOAD(1,4,u,EA,RdV); })
Q6INSN(L4_loadd_aq,"Rdd32=memd_aq(Rs32)",ATTRIBS(A_REGWRSIZE_8B,A_ACQUIRE,A_RESTRICT_SLOT0ONLY,A_MEMSIZE_8B,A_LOAD),"Load Acquire Double integer",
{ fEA_REG(RsV); fLOAD(1,8,u,EA,RddV); })

Q6INSN(R6_release_at_vi,"release(Rs32):at",ATTRIBS(A_MEMSIZE_0B,A_RELEASE,A_STORE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_RESTRICT_SLOT0ONLY), "Release lock", {fEA_REG(RsV); fSTORE(1,0,EA,RsV); })
Q6INSN(R6_release_st_vi,"release(Rs32):st",ATTRIBS(A_MEMSIZE_0B,A_RELEASE,A_STORE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_RESTRICT_SLOT0ONLY), "Release lock", {fEA_REG(RsV); fSTORE(1,0,EA,RsV); })

Q6INSN(S2_storew_rl_at_vi,"memw_rl(Rs32):at=Rt32",ATTRIBS(A_REGWRSIZE_4B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_4B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Word", { fEA_REG(RsV); fSTORE(1,4,EA,RtV); })
Q6INSN(S4_stored_rl_at_vi,"memd_rl(Rs32):at=Rtt32",ATTRIBS(A_REGWRSIZE_8B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Double integer", { fEA_REG(RsV); fSTORE(1,8,EA,RttV); })

Q6INSN(S2_storew_rl_st_vi,"memw_rl(Rs32):st=Rt32",ATTRIBS(A_REGWRSIZE_4B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_4B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Word", { fEA_REG(RsV); fSTORE(1,4,EA,RtV); })
Q6INSN(S4_stored_rl_st_vi,"memd_rl(Rs32):st=Rtt32",ATTRIBS(A_REGWRSIZE_8B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Double integer", { fEA_REG(RsV); fSTORE(1,8,EA,RttV); })

Q6INSN(L2_deallocframe,"Rdd32=deallocframe(Rs32):raw", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_LOAD,A_DEALLOCFRAME), "Deallocate stack frame",
{ fHIDE(size8u_t tmp;) fEA_REG(RsV);
fLOAD(1,8,u,EA,tmp);
Expand Down
16 changes: 15 additions & 1 deletion target/hexagon/imported/mmvec/encode_ext.def
@@ -1,5 +1,5 @@
/*
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -257,6 +257,11 @@ DEF_ENC(V6_vasruhubrndsat, ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 111 ddd
DEF_ENC(V6_vasruwuhsat, ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 100 ddddd") //
DEF_ENC(V6_vasruhubsat, ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 101 ddddd") //

DEF_ENC(V6_vasrvuhubrndsat,"00011101000vvvvvPP0uuuuu011ddddd")
DEF_ENC(V6_vasrvuhubsat,"00011101000vvvvvPP0uuuuu010ddddd")
DEF_ENC(V6_vasrvwuhrndsat,"00011101000vvvvvPP0uuuuu001ddddd")
DEF_ENC(V6_vasrvwuhsat,"00011101000vvvvvPP0uuuuu000ddddd")

/***************************************************************
*
* Group #1, Uses Q6 Rt32
Expand Down Expand Up @@ -716,6 +721,7 @@ DEF_ENC(V6_vaddclbw, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 001 ddddd") //

DEF_ENC(V6_vavguw, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 010 ddddd") //
DEF_ENC(V6_vavguwrnd, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 011 ddddd") //
DEF_ENC(V6_vassign_tmp,"00011110--0---01PP0uuuuu110ddddd")
DEF_ENC(V6_vavgb, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 100 ddddd") //
DEF_ENC(V6_vavgbrnd, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 101 ddddd") //
DEF_ENC(V6_vnavgb, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 110 ddddd") //
Expand All @@ -730,6 +736,8 @@ DEF_ENC(V6_vmaxb, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 101 ddddd") //
DEF_ENC(V6_vsatuwuh, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 110 ddddd") //
DEF_ENC(V6_vdealb4w, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 111 ddddd") //

DEF_ENC(V6_v6mpyvubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 0ii xxxxx")
DEF_ENC(V6_v6mpyhubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 1ii xxxxx")

DEF_ENC(V6_vmpyowh_rnd, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 000 ddddd") //
DEF_ENC(V6_vshuffeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 001 ddddd") //
Expand All @@ -739,6 +747,11 @@ DEF_ENC(V6_vshufoh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 100 ddddd") //
DEF_ENC(V6_vshufoeh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 101 ddddd") //
DEF_ENC(V6_vshufoeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 110 ddddd") //
DEF_ENC(V6_vcombine, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 111 ddddd") //
DEF_ENC(V6_vcombine_tmp,"00011110101vvvvvPP0uuuuu111ddddd")

DEF_ENC(V6_v6mpyvubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 0ii ddddd")
DEF_ENC(V6_v6mpyhubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 1ii ddddd")


DEF_ENC(V6_vmpyieoh, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 000 ddddd") //
DEF_ENC(V6_vadduwsat, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 001 ddddd") //
Expand Down Expand Up @@ -789,6 +802,7 @@ DEF_ENC(V6_vrounduhub, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 011 ddddd") //
DEF_ENC(V6_vrounduwuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 100 ddddd") //
DEF_ENC(V6_vmpyewuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd")
DEF_ENC(V6_vmpyowh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd")
DEF_ENC(V6_vmpyuhvs,"00011111110vvvvvPP1uuuuu111ddddd")


#endif /* NO MMVEC */
321 changes: 320 additions & 1 deletion target/hexagon/imported/mmvec/ext.idef

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions target/hexagon/internal.h
Expand Up @@ -33,6 +33,8 @@

int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
int hexagon_hvx_gdb_read_register(CPUHexagonState *env, GByteArray *mem_buf, int n);
int hexagon_hvx_gdb_write_register(CPUHexagonState *env, uint8_t *mem_buf, int n);

void hexagon_debug_vreg(CPUHexagonState *env, int regnum);
void hexagon_debug_qreg(CPUHexagonState *env, int regnum);
Expand Down
57 changes: 24 additions & 33 deletions target/hexagon/macros.h
Expand Up @@ -44,8 +44,17 @@
reg_field_info[FIELD].offset)

#define SET_USR_FIELD(FIELD, VAL) \
fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \
reg_field_info[FIELD].offset, (VAL))
do { \
if (pkt_need_commit) { \
fINSERT_BITS(env->new_value_usr, \
reg_field_info[FIELD].width, \
reg_field_info[FIELD].offset, (VAL)); \
} else { \
fINSERT_BITS(env->gpr[HEX_REG_USR], \
reg_field_info[FIELD].width, \
reg_field_info[FIELD].offset, (VAL)); \
} \
} while (0)
#endif

#ifdef QEMU_GENERATE
Expand Down Expand Up @@ -164,14 +173,14 @@
#define MEM_STORE8(VA, DATA, SLOT) \
MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
#else
#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA))
#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA))
#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, slot, VA))
#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, slot, VA))
#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, slot, VA))
#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, slot, VA))
#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, slot, VA))
#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, slot, VA))
#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot, VA))
#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot, VA))
#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot, VA))
#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot, VA))
#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot, VA))
#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot, VA))
#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot, VA))
#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot, VA))

#define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT)
#define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT)
Expand Down Expand Up @@ -227,12 +236,8 @@ static inline void gen_cancel(uint32_t slot)

#ifdef QEMU_GENERATE
#define fLSBNEW(PVAL) tcg_gen_andi_tl(LSB, (PVAL), 1)
#define fLSBNEW0 tcg_gen_andi_tl(LSB, hex_new_pred_value[0], 1)
#define fLSBNEW1 tcg_gen_andi_tl(LSB, hex_new_pred_value[1], 1)
#else
#define fLSBNEW(PVAL) ((PVAL) & 1)
#define fLSBNEW0 (env->new_pred_value[0] & 1)
#define fLSBNEW1 (env->new_pred_value[1] & 1)
#endif

#ifdef QEMU_GENERATE
Expand Down Expand Up @@ -347,10 +352,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)

#define fREAD_LR() (env->gpr[HEX_REG_LR])

#define fWRITE_LR(A) log_reg_write(env, HEX_REG_LR, A)
#define fWRITE_FP(A) log_reg_write(env, HEX_REG_FP, A)
#define fWRITE_SP(A) log_reg_write(env, HEX_REG_SP, A)

#define fREAD_SP() (env->gpr[HEX_REG_SP])
#define fREAD_LC0 (env->gpr[HEX_REG_LC0])
#define fREAD_LC1 (env->gpr[HEX_REG_LC1])
Expand All @@ -375,24 +376,10 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
#define fBRANCH(LOC, TYPE) fWRITE_NPC(LOC)
#define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR)
#define fHINTJR(TARGET) { /* Not modelled in qemu */}
#define fWRITE_LOOP_REGS0(START, COUNT) \
do { \
log_reg_write(env, HEX_REG_LC0, COUNT); \
log_reg_write(env, HEX_REG_SA0, START); \
} while (0)
#define fWRITE_LOOP_REGS1(START, COUNT) \
do { \
log_reg_write(env, HEX_REG_LC1, COUNT); \
log_reg_write(env, HEX_REG_SA1, START);\
} while (0)

#define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1)
#define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL))
#define fGET_LPCFG (GET_USR_FIELD(USR_LPCFG))
#define fWRITE_P0(VAL) log_pred_write(env, 0, VAL)
#define fWRITE_P1(VAL) log_pred_write(env, 1, VAL)
#define fWRITE_P2(VAL) log_pred_write(env, 2, VAL)
#define fWRITE_P3(VAL) log_pred_write(env, 3, VAL)
#define fPART1(WORK) if (part1) { WORK; return; }
#define fCAST4u(A) ((uint32_t)(A))
#define fCAST4s(A) ((int32_t)(A))
Expand Down Expand Up @@ -661,7 +648,11 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
reg_field_info[FIELD].offset)

#ifdef QEMU_GENERATE
#define fDCZEROA(REG) tcg_gen_mov_tl(hex_dczero_addr, (REG))
#define fDCZEROA(REG) \
do { \
ctx->dczero_addr = tcg_temp_new(); \
tcg_gen_mov_tl(ctx->dczero_addr, (REG)); \
} while (0)
#endif

#define fBRANCH_SPECULATE_STALL(DOTNEWVAL, JUMP_COND, SPEC_DIR, HINTBITNUM, \
Expand Down
9 changes: 8 additions & 1 deletion target/hexagon/mmvec/macros.h
@@ -1,5 +1,5 @@
/*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -346,4 +346,11 @@
#define fUARCH_NOTE_PUMP_2X()

#define IV1DEAD()

#define fGET10BIT(COE, VAL, POS) \
do { \
COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \
extract32(VAL, POS * 8, 8); \
} while (0);

#endif
154 changes: 104 additions & 50 deletions target/hexagon/op_helper.c
Expand Up @@ -52,38 +52,6 @@ G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
do_raise_exception_err(env, excp, 0);
}

void log_reg_write(CPUHexagonState *env, int rnum,
target_ulong val)
{
HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")",
rnum, val, val);
if (val == env->gpr[rnum]) {
HEX_DEBUG_LOG(" NO CHANGE");
}
HEX_DEBUG_LOG("\n");

env->new_value[rnum] = val;
if (HEX_DEBUG) {
/* Do this so HELPER(debug_commit_end) will know */
env->reg_written[rnum] = 1;
}
}

static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val)
{
HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld
" (0x" TARGET_FMT_lx ")\n",
pnum, val, val);

/* Multiple writes to the same preg are and'ed together */
if (env->pred_written & (1 << pnum)) {
env->new_pred_value[pnum] &= val & 0xff;
} else {
env->new_pred_value[pnum] = val & 0xff;
env->pred_written |= 1 << pnum;
}
}

void log_store32(CPUHexagonState *env, target_ulong addr,
target_ulong val, int width, int slot)
{
Expand Down Expand Up @@ -235,14 +203,14 @@ static void print_store(CPUHexagonState *env, int slot)
}

/* This function is a handy place to set a breakpoint */
void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
void HELPER(debug_commit_end)(CPUHexagonState *env, uint32_t this_PC,
int pred_written, int has_st0, int has_st1)
{
bool reg_printed = false;
bool pred_printed = false;
int i;

HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n",
env->this_PC);
HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", this_PC);
HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);

for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
Expand All @@ -252,18 +220,18 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
reg_printed = true;
}
HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
i, env->new_value[i], env->new_value[i]);
i, env->gpr[i], env->gpr[i]);
}
}

for (i = 0; i < NUM_PREGS; i++) {
if (env->pred_written & (1 << i)) {
if (pred_written & (1 << i)) {
if (!pred_printed) {
HEX_DEBUG_LOG("Predicates written\n");
pred_printed = true;
}
HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
i, env->new_pred_value[i]);
i, env->pred[i]);
}
}

Expand Down Expand Up @@ -384,7 +352,8 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
}

int64_t HELPER(vacsh_val)(CPUHexagonState *env,
int64_t RxxV, int64_t RssV, int64_t RttV)
int64_t RxxV, int64_t RssV, int64_t RttV,
uint32_t pkt_need_commit)
{
for (int i = 0; i < 4; i++) {
int xv = sextract64(RxxV, i * 16, 16);
Expand Down Expand Up @@ -416,6 +385,87 @@ int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
return PeV;
}

int64_t HELPER(cabacdecbin_val)(int64_t RssV, int64_t RttV)
{
int64_t RddV = 0;
size4u_t state;
size4u_t valMPS;
size4u_t bitpos;
size4u_t range;
size4u_t offset;
size4u_t rLPS;
size4u_t rMPS;

state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0);
valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8);
bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0);
range = fGETWORD(0, RssV);
offset = fGETWORD(1, RssV);

/* calculate rLPS */
range <<= bitpos;
offset <<= bitpos;
rLPS = rLPS_table_64x4[state][(range >> 29) & 3];
rLPS = rLPS << 23; /* left aligned */

/* calculate rMPS */
rMPS = (range & 0xff800000) - rLPS;

/* most probable region */
if (offset < rMPS) {
RddV = AC_next_state_MPS_64[state];
fINSERT_RANGE(RddV, 8, 8, valMPS);
fINSERT_RANGE(RddV, 31, 23, (rMPS >> 23));
fSETWORD(1, RddV, offset);
}
/* least probable region */
else {
RddV = AC_next_state_LPS_64[state];
fINSERT_RANGE(RddV, 8, 8, ((!state) ? (1 - valMPS) : (valMPS)));
fINSERT_RANGE(RddV, 31, 23, (rLPS >> 23));
fSETWORD(1, RddV, (offset - rMPS));
}
return RddV;
}

int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t RttV)
{
int32_t p0 = 0;
size4u_t state;
size4u_t valMPS;
size4u_t bitpos;
size4u_t range;
size4u_t offset;
size4u_t rLPS;
size4u_t rMPS;

state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0);
valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8);
bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0);
range = fGETWORD(0, RssV);
offset = fGETWORD(1, RssV);

/* calculate rLPS */
range <<= bitpos;
offset <<= bitpos;
rLPS = rLPS_table_64x4[state][(range >> 29) & 3];
rLPS = rLPS << 23; /* left aligned */

/* calculate rMPS */
rMPS = (range & 0xff800000) - rLPS;

/* most probable region */
if (offset < rMPS) {
p0 = valMPS;

}
/* least probable region */
else {
p0 = valMPS ^ 1;
}
return p0;
}

static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
bool is_predicated)
{
Expand Down Expand Up @@ -516,41 +566,45 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
* If the load is in slot 0 and there is a store in slot1 (that
* wasn't cancelled), we have to do the store first.
*/
static void check_noshuf(CPUHexagonState *env, uint32_t slot,
target_ulong vaddr, int size)
static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr, int size)
{
if (slot == 0 && env->pkt_has_store_s1 &&
if (slot == 0 && pkt_has_store_s1 &&
((env->slot_cancelled & (1 << 1)) == 0)) {
HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX);
HELPER(commit_store)(env, 1);
}
}

uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr)
{
uintptr_t ra = GETPC();
check_noshuf(env, slot, vaddr, 1);
check_noshuf(env, pkt_has_store_s1, slot, vaddr, 1);
return cpu_ldub_data_ra(env, vaddr, ra);
}

uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr)
{
uintptr_t ra = GETPC();
check_noshuf(env, slot, vaddr, 2);
check_noshuf(env, pkt_has_store_s1, slot, vaddr, 2);
return cpu_lduw_data_ra(env, vaddr, ra);
}

uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr)
{
uintptr_t ra = GETPC();
check_noshuf(env, slot, vaddr, 4);
check_noshuf(env, pkt_has_store_s1, slot, vaddr, 4);
return cpu_ldl_data_ra(env, vaddr, ra);
}

uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr)
{
uintptr_t ra = GETPC();
check_noshuf(env, slot, vaddr, 8);
check_noshuf(env, pkt_has_store_s1, slot, vaddr, 8);
return cpu_ldq_data_ra(env, vaddr, ra);
}

Expand Down
16 changes: 8 additions & 8 deletions target/hexagon/op_helper.h
Expand Up @@ -19,15 +19,15 @@
#define HEXAGON_OP_HELPER_H

/* Misc functions */
void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, target_ulong addr);
uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr);
uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr);
uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr);
uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
uint32_t slot, target_ulong vaddr);

uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr);
uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr);
uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr);
uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr);

void log_reg_write(CPUHexagonState *env, int rnum,
target_ulong val);
void log_store64(CPUHexagonState *env, target_ulong addr,
int64_t val, int width, int slot);
void log_store32(CPUHexagonState *env, target_ulong addr,
Expand Down
275 changes: 192 additions & 83 deletions target/hexagon/translate.c

Large diffs are not rendered by default.

52 changes: 45 additions & 7 deletions target/hexagon/translate.h
Expand Up @@ -38,10 +38,12 @@ typedef struct DisasContext {
int reg_log[REG_WRITES_MAX];
int reg_log_idx;
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
int preg_log[PRED_WRITES_MAX];
int preg_log_idx;
DECLARE_BITMAP(pregs_written, NUM_PREGS);
DECLARE_BITMAP(pregs_read, NUM_PREGS);
uint8_t store_width[STORES_MAX];
bool s1_store_processed;
int future_vregs_idx;
Expand All @@ -55,13 +57,22 @@ typedef struct DisasContext {
DECLARE_BITMAP(vregs_select, NUM_VREGS);
DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
DECLARE_BITMAP(vregs_read, NUM_VREGS);
int qreg_log[NUM_QREGS];
int qreg_log_idx;
DECLARE_BITMAP(qregs_read, NUM_QREGS);
bool pre_commit;
bool need_commit;
TCGCond branch_cond;
target_ulong branch_dest;
bool is_tight_loop;
bool need_pkt_has_store_s1;
bool short_circuit;
bool has_hvx_helper;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
TCGv branch_taken;
TCGv dczero_addr;
} DisasContext;

static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
Expand All @@ -73,6 +84,11 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
}
}

static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
{
set_bit(pnum, ctx->pregs_read);
}

static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
bool is_predicated)
{
Expand All @@ -99,6 +115,17 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
ctx_log_reg_write(ctx, rnum + 1, is_predicated);
}

static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
{
set_bit(rnum, ctx->regs_read);
}

static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_reg_read(ctx, rnum);
ctx_log_reg_read(ctx, rnum + 1);
}

intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
Expand Down Expand Up @@ -139,27 +166,38 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
}

static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
{
set_bit(rnum, ctx->vregs_read);
}

static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_vreg_read(ctx, rnum ^ 0);
ctx_log_vreg_read(ctx, rnum ^ 1);
}

static inline void ctx_log_qreg_write(DisasContext *ctx,
int rnum)
{
ctx->qreg_log[ctx->qreg_log_idx] = rnum;
ctx->qreg_log_idx++;
}

static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
{
set_bit(qnum, ctx->qregs_read);
}

extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
extern TCGv hex_pred[NUM_PREGS];
extern TCGv hex_this_PC;
extern TCGv hex_slot_cancelled;
extern TCGv hex_branch_taken;
extern TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
extern TCGv hex_new_value_usr;
extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
extern TCGv hex_new_pred_value[NUM_PREGS];
extern TCGv hex_pred_written;
extern TCGv hex_store_addr[STORES_MAX];
extern TCGv hex_store_width[STORES_MAX];
extern TCGv hex_store_val32[STORES_MAX];
extern TCGv_i64 hex_store_val64[STORES_MAX];
extern TCGv hex_dczero_addr;
extern TCGv hex_llsc_addr;
extern TCGv hex_llsc_val;
extern TCGv_i64 hex_llsc_val_i64;
Expand Down
16 changes: 12 additions & 4 deletions tests/guest-debug/run-test.py
Expand Up @@ -26,11 +26,12 @@ def get_args():
parser.add_argument("--qargs", help="Qemu arguments for test")
parser.add_argument("--binary", help="Binary to debug",
required=True)
parser.add_argument("--test", help="GDB test script",
required=True)
parser.add_argument("--test", help="GDB test script")
parser.add_argument("--gdb", help="The gdb binary to use",
default=None)
parser.add_argument("--gdb-args", help="Additional gdb arguments")
parser.add_argument("--output", help="A file to redirect output to")
parser.add_argument("--stderr", help="A file to redirect stderr to")

return parser.parse_args()

Expand Down Expand Up @@ -58,6 +59,10 @@ def log(output, msg):
output = open(args.output, "w")
else:
output = None
if args.stderr:
stderr = open(args.stderr, "w")
else:
stderr = None

socket_dir = TemporaryDirectory("qemu-gdbstub")
socket_name = os.path.join(socket_dir.name, "gdbstub.socket")
Expand All @@ -77,20 +82,23 @@ def log(output, msg):

# Now launch gdb with our test and collect the result
gdb_cmd = "%s %s" % (args.gdb, args.binary)
if args.gdb_args:
gdb_cmd += " %s" % (args.gdb_args)
# run quietly and ignore .gdbinit
gdb_cmd += " -q -n -batch"
# disable prompts in case of crash
gdb_cmd += " -ex 'set confirm off'"
# connect to remote
gdb_cmd += " -ex 'target remote %s'" % (socket_name)
# finally the test script itself
gdb_cmd += " -x %s" % (args.test)
if args.test:
gdb_cmd += " -x %s" % (args.test)


sleep(1)
log(output, "GDB CMD: %s" % (gdb_cmd))

result = subprocess.call(gdb_cmd, shell=True, stdout=output)
result = subprocess.call(gdb_cmd, shell=True, stdout=output, stderr=stderr)

# A result of greater than 128 indicates a fatal signal (likely a
# crash due to gdb internal failure). That's a problem for GDB and
Expand Down
7 changes: 4 additions & 3 deletions tests/qemu-iotests/245
Expand Up @@ -611,6 +611,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
self.reopen(hd0_opts, {'file': 'hd0-file'})

# Insert (and remove) a compress filter
@iotests.skip_if_unsupported(['compress'])
def test_insert_compress_filter(self):
# Add an image to the VM: hd (raw) -> hd0 (qcow2) -> hd0-file (file)
opts = {'driver': 'raw', 'node-name': 'hd', 'file': hd_opts(0)}
Expand Down Expand Up @@ -650,9 +651,9 @@ class TestBlockdevReopen(iotests.QMPTestCase):

# Check the first byte of the first three L2 entries and verify that
# the second one is compressed (0x40) while the others are not (0x80)
iotests.qemu_io_log('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
'-c', 'read -P 0x40 0x40008 1',
'-c', 'read -P 0x80 0x40010 1', hd_path[0])
iotests.qemu_io('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
'-c', 'read -P 0x40 0x40008 1',
'-c', 'read -P 0x80 0x40010 1', hd_path[0])

# Swap the disk images of two active block devices
def test_swap_files(self):
Expand Down
9 changes: 1 addition & 8 deletions tests/qemu-iotests/245.out
Expand Up @@ -10,14 +10,7 @@
{"return": {}}
{"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
{"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
....read 1/1 bytes at offset 262144
1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 1/1 bytes at offset 262152
1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 1/1 bytes at offset 262160
1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)

................
....................
----------------------------------------------------------------------
Ran 26 tests

Expand Down
4 changes: 4 additions & 0 deletions tests/qemu-iotests/iotests.py
Expand Up @@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \
assert self._qmp is not None
return self._qmp.cmd(cmd, args)

def get_qmp(self) -> QEMUMonitorProtocol:
assert self._qmp is not None
return self._qmp

def stop(self, kill_signal=15):
self._p.send_signal(kill_signal)
self._p.wait()
Expand Down
56 changes: 52 additions & 4 deletions tests/qemu-iotests/tests/graph-changes-while-io
Expand Up @@ -22,19 +22,19 @@
import os
from threading import Thread
import iotests
from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
QemuStorageDaemon
from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \
QMPTestCase, QemuStorageDaemon


top = os.path.join(iotests.test_dir, 'top.img')
nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')


def do_qemu_img_bench() -> None:
def do_qemu_img_bench(count: int = 2000000) -> None:
"""
Do some I/O requests on `nbd_sock`.
"""
qemu_img('bench', '-f', 'raw', '-c', '2000000',
qemu_img('bench', '-f', 'raw', '-c', str(count),
f'nbd+unix:///node0?socket={nbd_sock}')


Expand Down Expand Up @@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase):

bench_thr.join()

def test_commit_while_io(self) -> None:
# Run qemu-img bench in the background
bench_thr = Thread(target=do_qemu_img_bench, args=(200000, ))
bench_thr.start()

qemu_io('-c', 'write 0 64k', top)
qemu_io('-c', 'write 128k 64k', top)

result = self.qsd.qmp('blockdev-add', {
'driver': imgfmt,
'node-name': 'overlay',
'backing': None,
'file': {
'driver': 'file',
'filename': top
}
})
self.assert_qmp(result, 'return', {})

result = self.qsd.qmp('blockdev-snapshot', {
'node': 'node0',
'overlay': 'overlay',
})
self.assert_qmp(result, 'return', {})

# While qemu-img bench is running, repeatedly commit overlay to node0
while bench_thr.is_alive():
result = self.qsd.qmp('block-commit', {
'job-id': 'job0',
'device': 'overlay',
})
self.assert_qmp(result, 'return', {})

result = self.qsd.qmp('block-job-cancel', {
'device': 'job0',
})
self.assert_qmp(result, 'return', {})

cancelled = False
while not cancelled:
for event in self.qsd.get_qmp().get_events(wait=10.0):
if event['event'] != 'JOB_STATUS_CHANGE':
continue
if event['data']['status'] == 'null':
cancelled = True

bench_thr.join()

if __name__ == '__main__':
# Format must support raw backing files
iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
Expand Down
4 changes: 2 additions & 2 deletions tests/qemu-iotests/tests/graph-changes-while-io.out
@@ -1,5 +1,5 @@
.
..
----------------------------------------------------------------------
Ran 1 tests
Ran 2 tests

OK
21 changes: 21 additions & 0 deletions tests/tcg/hexagon/Makefile.target
Expand Up @@ -45,10 +45,18 @@ HEX_TESTS += fpstuff
HEX_TESTS += overflow
HEX_TESTS += signal_context
HEX_TESTS += reg_mut
HEX_TESTS += read_write_overlap
HEX_TESTS += vector_add_int
HEX_TESTS += scatter_gather
HEX_TESTS += hvx_misc
HEX_TESTS += hvx_histogram
HEX_TESTS += invalid-slots

run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \
test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr)

run-invalid-slots: invalid-slots
$(call run-and-check-exception, 0x15, $@, $(QEMU) $(QEMU_OPTS) $<)

HEX_TESTS += test_abs
HEX_TESTS += test_bitcnt
Expand Down Expand Up @@ -76,17 +84,30 @@ HEX_TESTS += test_vminh
HEX_TESTS += test_vpmpyh
HEX_TESTS += test_vspliceb

HEX_TESTS += v68_scalar
HEX_TESTS += v68_hvx
HEX_TESTS += v69_hvx
HEX_TESTS += v73_scalar

TESTS += $(HEX_TESTS)

# This test has to be compiled for the -mv67t target
usr: usr.c
$(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS)

# Build this test with -mv71 to exercise the CABAC instruction
misc: misc.c
$(CC) $(CFLAGS) -mv71 -O2 $< -o $@ $(LDFLAGS)
scatter_gather: CFLAGS += -mhvx
vector_add_int: CFLAGS += -mhvx -fvectorize
hvx_misc: hvx_misc.c hvx_misc.h
hvx_misc: CFLAGS += -mhvx
hvx_histogram: CFLAGS += -mhvx -Wno-gnu-folding-constant
v68_hvx: v68_hvx.c hvx_misc.h v6mpy_ref.c.inc
v68_hvx: CFLAGS += -mhvx -Wno-unused-function
v69_hvx: v69_hvx.c hvx_misc.h
v69_hvx: CFLAGS += -mhvx -Wno-unused-function
v73_scalar: CFLAGS += -Wno-unused-function

hvx_histogram: hvx_histogram.c hvx_histogram_row.S
$(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS)
54 changes: 54 additions & 0 deletions tests/tcg/hexagon/fpstuff.c
Expand Up @@ -20,6 +20,7 @@
*/

#include <stdio.h>
#include <float.h>

const int FPINVF_BIT = 1; /* Invalid */
const int FPINVF = 1 << FPINVF_BIT;
Expand Down Expand Up @@ -706,6 +707,57 @@ static void check_float2int_convs()
check_fpstatus(usr, FPINVF);
}

static void check_float_consts(void)
{
int res32;
unsigned long long res64;

asm("%0 = sfmake(#%1):neg\n\t" : "=r"(res32) : "i"(0xf));
check32(res32, 0xbc9e0000);

asm("%0 = sfmake(#%1):pos\n\t" : "=r"(res32) : "i"(0xf));
check32(res32, 0x3c9e0000);

asm("%0 = dfmake(#%1):neg\n\t" : "=r"(res64) : "i"(0xf));
check64(res64, 0xbf93c00000000000ULL);

asm("%0 = dfmake(#%1):pos\n\t" : "=r"(res64) : "i"(0xf));
check64(res64, 0x3f93c00000000000ULL);
}

static inline unsigned long long dfmpyll(double x, double y)
{
unsigned long long res64;
asm("%0 = dfmpyll(%1, %2)" : "=r"(res64) : "r"(x), "r"(y));
return res64;
}

static inline unsigned long long dfmpylh(double acc, double x, double y)
{
unsigned long long res64 = *(unsigned long long *)&acc;
asm("%0 += dfmpylh(%1, %2)" : "+r"(res64) : "r"(x), "r"(y));
return res64;
}

static void check_dfmpyxx(void)
{
unsigned long long res64;

res64 = dfmpyll(DBL_MIN, DBL_MIN);
check64(res64, 0ULL);
res64 = dfmpyll(-1.0, DBL_MIN);
check64(res64, 0ULL);
res64 = dfmpyll(DBL_MAX, DBL_MAX);
check64(res64, 0x1fffffffdULL);

res64 = dfmpylh(DBL_MIN, DBL_MIN, DBL_MIN);
check64(res64, 0x10000000000000ULL);
res64 = dfmpylh(-1.0, DBL_MAX, DBL_MIN);
check64(res64, 0xc00fffffffe00000ULL);
res64 = dfmpylh(DBL_MAX, 0.0, -1.0);
check64(res64, 0x7fefffffffffffffULL);
}

int main()
{
check_compare_exception();
Expand All @@ -718,6 +770,8 @@ int main()
check_sffixupd();
check_sffms();
check_float2int_convs();
check_float_consts();
check_dfmpyxx();

puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
Expand Down
66 changes: 21 additions & 45 deletions tests/tcg/hexagon/hvx_misc.c
Expand Up @@ -342,49 +342,6 @@ static void test_vsubuwsat_dv(void)
check_output_w(__LINE__, 2);
}

static void test_vshuff(void)
{
/* Test that vshuff works when the two operands are the same register */
const uint32_t splat = 0x089be55c;
const uint32_t shuff = 0x454fa926;
MMVector v0, v1;

memset(expect, 0x12, sizeof(MMVector));
memset(output, 0x34, sizeof(MMVector));

asm volatile("v25 = vsplat(%0)\n\t"
"vshuff(v25, v25, %1)\n\t"
"vmem(%2 + #0) = v25\n\t"
: /* no outputs */
: "r"(splat), "r"(shuff), "r"(output)
: "v25", "memory");

/*
* The semantics of Hexagon are the operands are pass-by-value, so create
* two copies of the vsplat result.
*/
for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
v0.uw[i] = splat;
v1.uw[i] = splat;
}
/* Do the vshuff operation */
for (int offset = 1; offset < MAX_VEC_SIZE_BYTES; offset <<= 1) {
if (shuff & offset) {
for (int k = 0; k < MAX_VEC_SIZE_BYTES; k++) {
if (!(k & offset)) {
uint8_t tmp = v0.ub[k];
v0.ub[k] = v1.ub[k + offset];
v1.ub[k + offset] = tmp;
}
}
}
}
/* Put the result in the expect buffer for verification */
expect[0] = v1;

check_output_b(__LINE__, 1);
}

static void test_load_tmp_predicated(void)
{
void *p0 = buffer0;
Expand Down Expand Up @@ -454,6 +411,25 @@ static void test_load_cur_predicated(void)
check_output_w(__LINE__, BUFSIZE);
}

static void test_vcombine(void)
{
for (int i = 0; i < BUFSIZE / 2; i++) {
asm volatile("v2 = vsplat(%0)\n\t"
"v3 = vsplat(%1)\n\t"
"v3:2 = vcombine(v2, v3)\n\t"
"vmem(%2+#0) = v2\n\t"
"vmem(%2+#1) = v3\n\t"
:
: "r"(2 * i), "r"(2 * i + 1), "r"(&output[2 * i])
: "v2", "v3", "memory");
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
expect[2 * i].w[j] = 2 * i + 1;
expect[2 * i + 1].w[j] = 2 * i;
}
}
check_output_w(__LINE__, BUFSIZE);
}

int main()
{
init_buffers();
Expand Down Expand Up @@ -489,11 +465,11 @@ int main()
test_vadduwsat();
test_vsubuwsat_dv();

test_vshuff();

test_load_tmp_predicated();
test_load_cur_predicated();

test_vcombine();

puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
}
29 changes: 29 additions & 0 deletions tests/tcg/hexagon/invalid-slots.c
@@ -0,0 +1,29 @@
/*
* Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/

char mem[8] __attribute__((aligned(8)));

int main()
{
asm volatile(
"r0 = #mem\n"
/* Invalid packet (2 instructions at slot 0): */
".word 0xa1804100\n" /* { memw(r0) = r1; */
".word 0x28032804\n" /* r3 = #0; r4 = #0 } */
: : : "r0", "r3", "r4", "memory");
return 0;
}
47 changes: 47 additions & 0 deletions tests/tcg/hexagon/misc.c
Expand Up @@ -18,6 +18,8 @@
#include <stdio.h>
#include <string.h>

#define CORE_HAS_CABAC (__HEXAGON_ARCH__ <= 71)

typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
Expand Down Expand Up @@ -245,13 +247,15 @@ static void check(int val, int expect)
}
}

#if CORE_HAS_CABAC
static void check64(long long val, long long expect)
{
if (val != expect) {
printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect);
err++;
}
}
#endif

uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
uint32_t array[10];
Expand Down Expand Up @@ -286,6 +290,7 @@ static long long creg_pair(int x, int y)
return retval;
}

#if CORE_HAS_CABAC
static long long decbin(long long x, long long y, int *pred)
{
long long retval;
Expand All @@ -295,6 +300,7 @@ static long long decbin(long long x, long long y, int *pred)
: "r"(x), "r"(y));
return retval;
}
#endif

/* Check that predicates are auto-and'ed in a packet */
static int auto_and(void)
Expand Down Expand Up @@ -385,11 +391,46 @@ void test_count_trailing_zeros_ones(void)
check(ct1p(0xffffff0fffffffffULL), 36);
}

static inline int dpmpyss_rnd_s0(int x, int y)
{
int res;
asm("%0 = mpy(%1, %2):rnd\n\t" : "=r"(res) : "r"(x), "r"(y));
return res;
}

void test_dpmpyss_rnd_s0(void)
{
check(dpmpyss_rnd_s0(-1, 0x80000000), 1);
check(dpmpyss_rnd_s0(0, 0x80000000), 0);
check(dpmpyss_rnd_s0(1, 0x80000000), 0);
check(dpmpyss_rnd_s0(0x7fffffff, 0x80000000), 0xc0000001);
check(dpmpyss_rnd_s0(0x80000000, -1), 1);
check(dpmpyss_rnd_s0(-1, -1), 0);
check(dpmpyss_rnd_s0(0, -1), 0);
check(dpmpyss_rnd_s0(1, -1), 0);
check(dpmpyss_rnd_s0(0x7fffffff, -1), 0);
check(dpmpyss_rnd_s0(0x80000000, 0), 0);
check(dpmpyss_rnd_s0(-1, 0), 0);
check(dpmpyss_rnd_s0(0, 0), 0);
check(dpmpyss_rnd_s0(1, 0), 0);
check(dpmpyss_rnd_s0(-1, -1), 0);
check(dpmpyss_rnd_s0(0, -1), 0);
check(dpmpyss_rnd_s0(1, -1), 0);
check(dpmpyss_rnd_s0(0x7fffffff, 1), 0);
check(dpmpyss_rnd_s0(0x80000000, 0x7fffffff), 0xc0000001);
check(dpmpyss_rnd_s0(-1, 0x7fffffff), 0);
check(dpmpyss_rnd_s0(0, 0x7fffffff), 0);
check(dpmpyss_rnd_s0(1, 0x7fffffff), 0);
check(dpmpyss_rnd_s0(0x7fffffff, 0x7fffffff), 0x3fffffff);
}

int main()
{
int res;
#if CORE_HAS_CABAC
long long res64;
int pred;
#endif

memcpy(array, init, sizeof(array));
S4_storerhnew_rr(array, 4, 0xffff);
Expand Down Expand Up @@ -505,13 +546,17 @@ int main()
res = test_clrtnew(2, 7);
check(res, 7);

#if CORE_HAS_CABAC
res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred);
check64(res64, 0x357980003700010cLL);
check(pred, 0);

res64 = decbin(0xfLL, 0x1bLL, &pred);
check64(res64, 0x78000100LL);
check(pred, 1);
#else
puts("Skipping cabac tests");
#endif

res = auto_and();
check(res, 0);
Expand All @@ -522,6 +567,8 @@ int main()

test_count_trailing_zeros_ones();

test_dpmpyss_rnd_s0();

puts(err ? "FAIL" : "PASS");
return err;
}
136 changes: 136 additions & 0 deletions tests/tcg/hexagon/read_write_overlap.c
@@ -0,0 +1,136 @@
/*
* Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/

/*
* Test instructions where the semantics write to the destination
* before all the operand reads have been completed.
*
* These instructions are problematic when we short-circuit the
* register writes because the destination and source operands could
* be the same TCGv.
*
* We test by forcing the read and write to be register r7.
*/

#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>

int err;

static void __check(const char *filename, int line, int x, int expect)
{
if (x != expect) {
printf("ERROR %s:%d - 0x%08x != 0x%08x\n",
filename, line, x, expect);
err++;
}
}

#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect))

#define insert(RES, X, WIDTH, OFFSET) \
asm("r7 = %1\n\t" \
"r7 = insert(r7, #" #WIDTH ", #" #OFFSET ")\n\t" \
"%0 = r7\n\t" \
: "=r"(RES) : "r"(X) : "r7")

static void test_insert(void)
{
uint32_t res;

insert(res, 0x12345678, 8, 1);
check(res, 0x123456f0);
insert(res, 0x12345678, 0, 1);
check(res, 0x12345678);
insert(res, 0x12345678, 20, 16);
check(res, 0x56785678);
}

static inline uint32_t insert_rp(uint32_t x, uint32_t width, uint32_t offset)
{
uint64_t width_offset = (uint64_t)width << 32 | offset;
uint32_t res;
asm("r7 = %1\n\t"
"r7 = insert(r7, %2)\n\t"
"%0 = r7\n\t"
: "=r"(res) : "r"(x), "r"(width_offset) : "r7");
return res;

}

static void test_insert_rp(void)
{
check(insert_rp(0x12345678, 8, 1), 0x123456f0);
check(insert_rp(0x12345678, 63, 8), 0x34567878);
check(insert_rp(0x12345678, 127, 8), 0x34567878);
check(insert_rp(0x12345678, 8, 24), 0x78345678);
check(insert_rp(0x12345678, 8, 63), 0x12345678);
check(insert_rp(0x12345678, 8, 64), 0x00000000);
}

static inline uint32_t asr_r_svw_trun(uint64_t x, uint32_t y)
{
uint32_t res;
asm("r7 = %2\n\t"
"r7 = vasrw(%1, r7)\n\t"
"%0 = r7\n\t"
: "=r"(res) : "r"(x), "r"(y) : "r7");
return res;
}

static void test_asr_r_svw_trun(void)
{
check(asr_r_svw_trun(0x1111111122222222ULL, 5),
0x88881111);
check(asr_r_svw_trun(0x1111111122222222ULL, 63),
0x00000000);
check(asr_r_svw_trun(0x1111111122222222ULL, 64),
0x00000000);
check(asr_r_svw_trun(0x1111111122222222ULL, 127),
0x22224444);
check(asr_r_svw_trun(0x1111111122222222ULL, 128),
0x11112222);
check(asr_r_svw_trun(0xffffffff22222222ULL, 128),
0xffff2222);
}

static inline uint32_t swiz(uint32_t x)
{
uint32_t res;
asm("r7 = %1\n\t"
"r7 = swiz(r7)\n\t"
"%0 = r7\n\t"
: "=r"(res) : "r"(x) : "r7");
return res;
}

static void test_swiz(void)
{
check(swiz(0x11223344), 0x44332211);
}

int main()
{
test_insert();
test_insert_rp();
test_asr_r_svw_trun();
test_swiz();

puts(err ? "FAIL" : "PASS");
return err ? EXIT_FAILURE : EXIT_SUCCESS;
}
90 changes: 90 additions & 0 deletions tests/tcg/hexagon/v68_hvx.c
@@ -0,0 +1,90 @@
/*
* Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/

#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <limits.h>

int err;

#include "hvx_misc.h"

MMVector v6mpy_buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
MMVector v6mpy_buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));

static void init_v6mpy_buffers(void)
{
int counter0 = 0;
int counter1 = 17;
for (int i = 0; i < BUFSIZE; i++) {
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
v6mpy_buffer0[i].w[j] = counter0++;
v6mpy_buffer1[i].w[j] = counter1++;
}
}
}

int v6mpy_ref[BUFSIZE][MAX_VEC_SIZE_BYTES / 4] = {
#include "v6mpy_ref.c.inc"
};

static void test_v6mpy(void)
{
void *p00 = buffer0;
void *p01 = v6mpy_buffer0;
void *p10 = buffer1;
void *p11 = v6mpy_buffer1;
void *pout = output;

memset(expect, 0xff, sizeof(expect));
memset(output, 0xff, sizeof(expect));

for (int i = 0; i < BUFSIZE; i++) {
asm("v2 = vmem(%0 + #0)\n\t"
"v3 = vmem(%1 + #0)\n\t"
"v4 = vmem(%2 + #0)\n\t"
"v5 = vmem(%3 + #0)\n\t"
"v5:4.w = v6mpy(v5:4.ub, v3:2.b, #1):v\n\t"
"vmem(%4 + #0) = v4\n\t"
: : "r"(p00), "r"(p01), "r"(p10), "r"(p11), "r"(pout)
: "v2", "v3", "v4", "v5", "memory");
p00 += sizeof(MMVector);
p01 += sizeof(MMVector);
p10 += sizeof(MMVector);
p11 += sizeof(MMVector);
pout += sizeof(MMVector);

for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
expect[i].w[j] = v6mpy_ref[i][j];
}
}

check_output_w(__LINE__, BUFSIZE);
}

int main()
{
init_buffers();
init_v6mpy_buffers();

test_v6mpy();

puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
}
186 changes: 186 additions & 0 deletions tests/tcg/hexagon/v68_scalar.c
@@ -0,0 +1,186 @@
/*
* Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/

#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>

/*
* Test the scalar core instructions that are new in v68
*/

int err;

static int buffer32[] = { 1, 2, 3, 4 };
static long long buffer64[] = { 5, 6, 7, 8 };

static void __check32(int line, uint32_t result, uint32_t expect)
{
if (result != expect) {
printf("ERROR at line %d: 0x%08x != 0x%08x\n",
line, result, expect);
err++;
}
}

#define check32(RES, EXP) __check32(__LINE__, RES, EXP)

static void __check64(int line, uint64_t result, uint64_t expect)
{
if (result != expect) {
printf("ERROR at line %d: 0x%016llx != 0x%016llx\n",
line, result, expect);
err++;
}
}

#define check64(RES, EXP) __check64(__LINE__, RES, EXP)

static inline int loadw_aq(int *p)
{
int res;
asm volatile("%0 = memw_aq(%1)\n\t"
: "=r"(res) : "r"(p));
return res;
}

static void test_loadw_aq(void)
{
int res;

res = loadw_aq(&buffer32[0]);
check32(res, 1);
res = loadw_aq(&buffer32[1]);
check32(res, 2);
}

static inline long long loadd_aq(long long *p)
{
long long res;
asm volatile("%0 = memd_aq(%1)\n\t"
: "=r"(res) : "r"(p));
return res;
}

static void test_loadd_aq(void)
{
long long res;

res = loadd_aq(&buffer64[2]);
check64(res, 7);
res = loadd_aq(&buffer64[3]);
check64(res, 8);
}

static inline void release_at(int *p)
{
asm volatile("release(%0):at\n\t"
: : "r"(p));
}

static void test_release_at(void)
{
release_at(&buffer32[2]);
check64(buffer32[2], 3);
release_at(&buffer32[3]);
check64(buffer32[3], 4);
}

static inline void release_st(int *p)
{
asm volatile("release(%0):st\n\t"
: : "r"(p));
}

static void test_release_st(void)
{
release_st(&buffer32[2]);
check64(buffer32[2], 3);
release_st(&buffer32[3]);
check64(buffer32[3], 4);
}

static inline void storew_rl_at(int *p, int val)
{
asm volatile("memw_rl(%0):at = %1\n\t"
: : "r"(p), "r"(val) : "memory");
}

static void test_storew_rl_at(void)
{
storew_rl_at(&buffer32[2], 9);
check64(buffer32[2], 9);
storew_rl_at(&buffer32[3], 10);
check64(buffer32[3], 10);
}

static inline void stored_rl_at(long long *p, long long val)
{
asm volatile("memd_rl(%0):at = %1\n\t"
: : "r"(p), "r"(val) : "memory");
}

static void test_stored_rl_at(void)
{
stored_rl_at(&buffer64[2], 11);
check64(buffer64[2], 11);
stored_rl_at(&buffer64[3], 12);
check64(buffer64[3], 12);
}

static inline void storew_rl_st(int *p, int val)
{
asm volatile("memw_rl(%0):st = %1\n\t"
: : "r"(p), "r"(val) : "memory");
}

static void test_storew_rl_st(void)
{
storew_rl_st(&buffer32[0], 13);
check64(buffer32[0], 13);
storew_rl_st(&buffer32[1], 14);
check64(buffer32[1], 14);
}

static inline void stored_rl_st(long long *p, long long val)
{
asm volatile("memd_rl(%0):st = %1\n\t"
: : "r"(p), "r"(val) : "memory");
}

static void test_stored_rl_st(void)
{
stored_rl_st(&buffer64[0], 15);
check64(buffer64[0], 15);
stored_rl_st(&buffer64[1], 15);
check64(buffer64[1], 15);
}

int main()
{
test_loadw_aq();
test_loadd_aq();
test_release_at();
test_release_st();
test_storew_rl_at();
test_stored_rl_at();
test_storew_rl_st();
test_stored_rl_st();

puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
}
318 changes: 318 additions & 0 deletions tests/tcg/hexagon/v69_hvx.c
@@ -0,0 +1,318 @@
/*
* Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/

#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <limits.h>

int err;

#include "hvx_misc.h"

#define fVROUND(VAL, SHAMT) \
((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))

#define fVSATUB(VAL) \
((((VAL) & 0xffLL) == (VAL)) ? \
(VAL) : \
((((int32_t)(VAL)) < 0) ? 0 : 0xff))

#define fVSATUH(VAL) \
((((VAL) & 0xffffLL) == (VAL)) ? \
(VAL) : \
((((int32_t)(VAL)) < 0) ? 0 : 0xffff))

static void test_vasrvuhubrndsat(void)
{
void *p0 = buffer0;
void *p1 = buffer1;
void *pout = output;

memset(expect, 0xaa, sizeof(expect));
memset(output, 0xbb, sizeof(output));

for (int i = 0; i < BUFSIZE / 2; i++) {
asm("v4 = vmem(%0 + #0)\n\t"
"v5 = vmem(%0 + #1)\n\t"
"v6 = vmem(%1 + #0)\n\t"
"v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t"
"vmem(%2) = v5\n\t"
: : "r"(p0), "r"(p1), "r"(pout)
: "v4", "v5", "v6", "memory");
p0 += sizeof(MMVector) * 2;
p1 += sizeof(MMVector);
pout += sizeof(MMVector);

for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
int shamt;
uint8_t byte0;
uint8_t byte1;

shamt = buffer1[i].ub[2 * j + 0] & 0x7;
byte0 = fVSATUB(fVROUND(buffer0[2 * i + 0].uh[j], shamt) >> shamt);
shamt = buffer1[i].ub[2 * j + 1] & 0x7;
byte1 = fVSATUB(fVROUND(buffer0[2 * i + 1].uh[j], shamt) >> shamt);
expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff);
}
}

check_output_h(__LINE__, BUFSIZE / 2);
}

static void test_vasrvuhubsat(void)
{
void *p0 = buffer0;
void *p1 = buffer1;
void *pout = output;

memset(expect, 0xaa, sizeof(expect));
memset(output, 0xbb, sizeof(output));

for (int i = 0; i < BUFSIZE / 2; i++) {
asm("v4 = vmem(%0 + #0)\n\t"
"v5 = vmem(%0 + #1)\n\t"
"v6 = vmem(%1 + #0)\n\t"
"v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t"
"vmem(%2) = v5\n\t"
: : "r"(p0), "r"(p1), "r"(pout)
: "v4", "v5", "v6", "memory");
p0 += sizeof(MMVector) * 2;
p1 += sizeof(MMVector);
pout += sizeof(MMVector);

for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
int shamt;
uint8_t byte0;
uint8_t byte1;

shamt = buffer1[i].ub[2 * j + 0] & 0x7;
byte0 = fVSATUB(buffer0[2 * i + 0].uh[j] >> shamt);
shamt = buffer1[i].ub[2 * j + 1] & 0x7;
byte1 = fVSATUB(buffer0[2 * i + 1].uh[j] >> shamt);
expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff);
}
}

check_output_h(__LINE__, BUFSIZE / 2);
}

static void test_vasrvwuhrndsat(void)
{
void *p0 = buffer0;
void *p1 = buffer1;
void *pout = output;

memset(expect, 0xaa, sizeof(expect));
memset(output, 0xbb, sizeof(output));

for (int i = 0; i < BUFSIZE / 2; i++) {
asm("v4 = vmem(%0 + #0)\n\t"
"v5 = vmem(%0 + #1)\n\t"
"v6 = vmem(%1 + #0)\n\t"
"v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t"
"vmem(%2) = v5\n\t"
: : "r"(p0), "r"(p1), "r"(pout)
: "v4", "v5", "v6", "memory");
p0 += sizeof(MMVector) * 2;
p1 += sizeof(MMVector);
pout += sizeof(MMVector);

for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
int shamt;
uint16_t half0;
uint16_t half1;

shamt = buffer1[i].uh[2 * j + 0] & 0xf;
half0 = fVSATUH(fVROUND(buffer0[2 * i + 0].w[j], shamt) >> shamt);
shamt = buffer1[i].uh[2 * j + 1] & 0xf;
half1 = fVSATUH(fVROUND(buffer0[2 * i + 1].w[j], shamt) >> shamt);
expect[i].w[j] = (half1 << 16) | (half0 & 0xffff);
}
}

check_output_w(__LINE__, BUFSIZE / 2);
}

static void test_vasrvwuhsat(void)
{
void *p0 = buffer0;
void *p1 = buffer1;
void *pout = output;

memset(expect, 0xaa, sizeof(expect));
memset(output, 0xbb, sizeof(output));

for (int i = 0; i < BUFSIZE / 2; i++) {
asm("v4 = vmem(%0 + #0)\n\t"
"v5 = vmem(%0 + #1)\n\t"
"v6 = vmem(%1 + #0)\n\t"
"v5.uh = vasr(v5:4.w, v6.uh):sat\n\t"
"vmem(%2) = v5\n\t"
: : "r"(p0), "r"(p1), "r"(pout)
: "v4", "v5", "v6", "memory");
p0 += sizeof(MMVector) * 2;
p1 += sizeof(MMVector);
pout += sizeof(MMVector);

for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
int shamt;
uint16_t half0;
uint16_t half1;

shamt = buffer1[i].uh[2 * j + 0] & 0xf;
half0 = fVSATUH(buffer0[2 * i + 0].w[j] >> shamt);
shamt = buffer1[i].uh[2 * j + 1] & 0xf;
half1 = fVSATUH(buffer0[2 * i + 1].w[j] >> shamt);
expect[i].w[j] = (half1 << 16) | (half0 & 0xffff);
}
}

check_output_w(__LINE__, BUFSIZE / 2);
}

static void test_vassign_tmp(void)
{
void *p0 = buffer0;
void *pout = output;

memset(expect, 0xaa, sizeof(expect));
memset(output, 0xbb, sizeof(output));

for (int i = 0; i < BUFSIZE; i++) {
/*
* Assign into v12 as .tmp, then use it in the next packet
* Should get the new value within the same packet and
* the old value in the next packet
*/
asm("v3 = vmem(%0 + #0)\n\t"
"r1 = #1\n\t"
"v12 = vsplat(r1)\n\t"
"r1 = #2\n\t"
"v13 = vsplat(r1)\n\t"
"{\n\t"
" v12.tmp = v13\n\t"
" v4.w = vadd(v12.w, v3.w)\n\t"
"}\n\t"
"v4.w = vadd(v4.w, v12.w)\n\t"
"vmem(%1 + #0) = v4\n\t"
: : "r"(p0), "r"(pout)
: "r1", "v3", "v4", "v12", "v13", "memory");
p0 += sizeof(MMVector);
pout += sizeof(MMVector);

for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
expect[i].w[j] = buffer0[i].w[j] + 3;
}
}

check_output_w(__LINE__, BUFSIZE);
}

static void test_vcombine_tmp(void)
{
void *p0 = buffer0;
void *p1 = buffer1;
void *pout = output;

memset(expect, 0xaa, sizeof(expect));
memset(output, 0xbb, sizeof(output));

for (int i = 0; i < BUFSIZE; i++) {
/*
* Combine into v13:12 as .tmp, then use it in the next packet
* Should get the new value within the same packet and
* the old value in the next packet
*/
asm("v3 = vmem(%0 + #0)\n\t"
"r1 = #1\n\t"
"v12 = vsplat(r1)\n\t"
"r1 = #2\n\t"
"v13 = vsplat(r1)\n\t"
"r1 = #3\n\t"
"v14 = vsplat(r1)\n\t"
"r1 = #4\n\t"
"v15 = vsplat(r1)\n\t"
"{\n\t"
" v13:12.tmp = vcombine(v15, v14)\n\t"
" v4.w = vadd(v12.w, v3.w)\n\t"
" v16 = v13\n\t"
"}\n\t"
"v4.w = vadd(v4.w, v12.w)\n\t"
"v4.w = vadd(v4.w, v13.w)\n\t"
"v4.w = vadd(v4.w, v16.w)\n\t"
"vmem(%2 + #0) = v4\n\t"
: : "r"(p0), "r"(p1), "r"(pout)
: "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory");
p0 += sizeof(MMVector);
p1 += sizeof(MMVector);
pout += sizeof(MMVector);

for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
expect[i].w[j] = buffer0[i].w[j] + 10;
}
}

check_output_w(__LINE__, BUFSIZE);
}

static void test_vmpyuhvs(void)
{
void *p0 = buffer0;
void *p1 = buffer1;
void *pout = output;

memset(expect, 0xaa, sizeof(expect));
memset(output, 0xbb, sizeof(output));

for (int i = 0; i < BUFSIZE; i++) {
asm("v4 = vmem(%0 + #0)\n\t"
"v5 = vmem(%1 + #0)\n\t"
"v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t"
"vmem(%2) = v4\n\t"
: : "r"(p0), "r"(p1), "r"(pout)
: "v4", "v5", "memory");
p0 += sizeof(MMVector);
p1 += sizeof(MMVector);
pout += sizeof(MMVector);

for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) {
expect[i].uh[j] = (buffer0[i].uh[j] * buffer1[i].uh[j]) >> 16;
}
}

check_output_h(__LINE__, BUFSIZE);
}

int main()
{
init_buffers();

test_vasrvuhubrndsat();
test_vasrvuhubsat();
test_vasrvwuhrndsat();
test_vasrvwuhsat();

test_vassign_tmp();
test_vcombine_tmp();

test_vmpyuhvs();

puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
}