diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index ff8c26272d5e..1d89e11a1a37 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -25,6 +25,7 @@ #include "mmvec/mmvec.h" #include "qom/object.h" #include "hw/core/cpu.h" +#include "hw/registerfields.h" #define NUM_PREGS 4 #define TOTAL_PER_THREAD_REGS 64 @@ -152,16 +153,18 @@ struct ArchCPU { #include "cpu_bits.h" +FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1) + static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *flags) { + uint32_t hex_flags = 0; *pc = env->gpr[HEX_REG_PC]; *cs_base = 0; -#ifdef CONFIG_USER_ONLY - *flags = 0; -#else -#error System mode not supported on Hexagon yet -#endif + if (*pc == env->gpr[HEX_REG_SA0]) { + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1); + } + *flags = hex_flags; } static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 3583f390e935..19697b42a501 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -620,6 +620,9 @@ #define fGEN_TCG_J2_callf(SHORTCODE) \ gen_cond_call(ctx, PuV, TCG_COND_NE, riV) +#define fGEN_TCG_J2_endloop0(SHORTCODE) \ + gen_endloop0(ctx) + /* * Compound compare and jump instructions * Here is a primer to understand the tag names diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index ee0f86fab20a..a4a79c8454ef 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -497,6 +497,33 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, int pc_off, } } +static void gen_set_usr_field(int field, TCGv val) +{ + tcg_gen_deposit_tl(hex_new_value[HEX_REG_USR], hex_new_value[HEX_REG_USR], + val, + reg_field_info[field].offset, + reg_field_info[field].width); +} + +static void gen_set_usr_fieldi(int field, int x) +{ + if (reg_field_info[field].width == 1) { + target_ulong bit = 1 << reg_field_info[field].offset; + if ((x & 1) == 1) { + tcg_gen_ori_tl(hex_new_value[HEX_REG_USR], + hex_new_value[HEX_REG_USR], + bit); + } else { + tcg_gen_andi_tl(hex_new_value[HEX_REG_USR], + hex_new_value[HEX_REG_USR], + ~bit); + } + } else { + TCGv val = tcg_constant_tl(x); + gen_set_usr_field(field, val); + } +} + static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2) { TCGv one = tcg_constant_tl(0xff); @@ -636,6 +663,63 @@ static void gen_cond_call(DisasContext *ctx, TCGv pred, gen_set_label(skip); } +static void gen_endloop0(DisasContext *ctx) +{ + TCGv lpcfg = tcg_temp_local_new(); + + GET_USR_FIELD(USR_LPCFG, lpcfg); + + /* + * if (lpcfg == 1) { + * hex_new_pred_value[3] = 0xff; + * hex_pred_written |= 1 << 3; + * } + */ + TCGLabel *label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1); + { + tcg_gen_movi_tl(hex_new_pred_value[3], 0xff); + tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3); + } + gen_set_label(label1); + + /* + * if (lpcfg) { + * SET_USR_FIELD(USR_LPCFG, lpcfg - 1); + * } + */ + TCGLabel *label2 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, lpcfg, 0, label2); + { + tcg_gen_subi_tl(lpcfg, lpcfg, 1); + SET_USR_FIELD(USR_LPCFG, lpcfg); + } + gen_set_label(label2); + + /* + * If we're in a tight loop, we'll do this at the end of the TB to take + * advantage of direct block chaining. + */ + if (!ctx->is_tight_loop) { + /* + * if (hex_gpr[HEX_REG_LC0] > 1) { + * PC = hex_gpr[HEX_REG_SA0]; + * hex_new_value[HEX_REG_LC0] = hex_gpr[HEX_REG_LC0] - 1; + * } + */ + TCGLabel *label3 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3); + { + gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]); + tcg_gen_subi_tl(hex_new_value[HEX_REG_LC0], + hex_gpr[HEX_REG_LC0], 1); + } + gen_set_label(label3); + } + + tcg_temp_free(lpcfg); +} + static void gen_cmp_jumpnv(DisasContext *ctx, TCGCond cond, TCGv val, TCGv src, int pc_off) { diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index f5ef54f03975..75f28e08adb7 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -135,6 +135,8 @@ static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest) static void gen_end_tb(DisasContext *ctx) { + Packet *pkt = ctx->pkt; + gen_exec_counters(ctx); if (ctx->branch_cond != TCG_COND_NEVER) { @@ -147,6 +149,18 @@ static void gen_end_tb(DisasContext *ctx) } else { gen_goto_tb(ctx, 0, ctx->branch_dest); } + } else if (ctx->is_tight_loop && + pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) { + /* + * When we're in a tight loop, we defer the endloop0 processing + * to take advantage of direct block chaining + */ + TCGLabel *skip = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip); + tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1); + gen_goto_tb(ctx, 0, ctx->base.tb->pc); + gen_set_label(skip); + gen_goto_tb(ctx, 1, ctx->next_PC); } else { tcg_gen_lookup_and_goto_ptr(); } @@ -337,6 +351,15 @@ static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) */ bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) || rnum == HEX_REG_USR; + + /* LC0/LC1 is conditionally written by endloop instructions */ + if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) && + (opcode == J2_endloop0 || + opcode == J2_endloop1 || + opcode == J2_endloop01)) { + is_predicated = true; + } + if (is_predicated && !is_preloaded(ctx, rnum)) { tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]); } @@ -420,6 +443,14 @@ static void gen_reg_writes(DisasContext *ctx) int reg_num = ctx->reg_log[i]; tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]); + + /* + * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. + * If we write to SA0, we have to turn off tight loop handling. + */ + if (reg_num == HEX_REG_SA0) { + ctx->is_tight_loop = false; + } } } @@ -833,12 +864,14 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *ctx = container_of(dcbase, DisasContext, base); + uint32_t hex_flags = dcbase->tb->flags; ctx->mem_idx = MMU_USER_IDX; ctx->num_packets = 0; ctx->num_insns = 0; ctx->num_hvx_insns = 0; ctx->branch_cond = TCG_COND_NEVER; + ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index aacf0b092162..d971f4f095eb 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -59,6 +59,7 @@ typedef struct DisasContext { bool pre_commit; TCGCond branch_cond; target_ulong branch_dest; + bool is_tight_loop; } DisasContext; static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)