Skip to content

Commit

Permalink
Hexagon (target/hexagon) Use direct block chaining for tight loops
Browse files Browse the repository at this point in the history
Direct block chaining is documented here
https://qemu.readthedocs.io/en/latest/devel/tcg.html#direct-block-chaining

Hexagon inner loops end with the endloop0 instruction
To go back to the beginning of the loop, this instructions writes to PC
from register SA0 (start address 0).  To use direct block chaining, we
have to assign PC with a constant value.  So, we specialize the code
generation when the start of the translation block is equal to SA0.

When this is the case, we defer the compare/branch from endloop0 to
gen_end_tb.  When this is done, we can assign the start address of the TB
to PC.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <20221108162906.3166-12-tsimpson@quicinc.com>
  • Loading branch information
taylorsimpson committed Dec 16, 2022
1 parent 1b9a7f2 commit 564b204
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 5 deletions.
13 changes: 8 additions & 5 deletions target/hexagon/cpu.h
Expand Up @@ -25,6 +25,7 @@
#include "mmvec/mmvec.h"
#include "qom/object.h"
#include "hw/core/cpu.h"
#include "hw/registerfields.h"

#define NUM_PREGS 4
#define TOTAL_PER_THREAD_REGS 64
Expand Down Expand Up @@ -152,16 +153,18 @@ struct ArchCPU {

#include "cpu_bits.h"

FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1)

static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc,
target_ulong *cs_base, uint32_t *flags)
{
uint32_t hex_flags = 0;
*pc = env->gpr[HEX_REG_PC];
*cs_base = 0;
#ifdef CONFIG_USER_ONLY
*flags = 0;
#else
#error System mode not supported on Hexagon yet
#endif
if (*pc == env->gpr[HEX_REG_SA0]) {
hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1);
}
*flags = hex_flags;
}

static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch)
Expand Down
3 changes: 3 additions & 0 deletions target/hexagon/gen_tcg.h
Expand Up @@ -620,6 +620,9 @@
#define fGEN_TCG_J2_callf(SHORTCODE) \
gen_cond_call(ctx, PuV, TCG_COND_NE, riV)

#define fGEN_TCG_J2_endloop0(SHORTCODE) \
gen_endloop0(ctx)

/*
* Compound compare and jump instructions
* Here is a primer to understand the tag names
Expand Down
84 changes: 84 additions & 0 deletions target/hexagon/genptr.c
Expand Up @@ -497,6 +497,33 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, int pc_off,
}
}

static void gen_set_usr_field(int field, TCGv val)
{
tcg_gen_deposit_tl(hex_new_value[HEX_REG_USR], hex_new_value[HEX_REG_USR],
val,
reg_field_info[field].offset,
reg_field_info[field].width);
}

static void gen_set_usr_fieldi(int field, int x)
{
if (reg_field_info[field].width == 1) {
target_ulong bit = 1 << reg_field_info[field].offset;
if ((x & 1) == 1) {
tcg_gen_ori_tl(hex_new_value[HEX_REG_USR],
hex_new_value[HEX_REG_USR],
bit);
} else {
tcg_gen_andi_tl(hex_new_value[HEX_REG_USR],
hex_new_value[HEX_REG_USR],
~bit);
}
} else {
TCGv val = tcg_constant_tl(x);
gen_set_usr_field(field, val);
}
}

static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2)
{
TCGv one = tcg_constant_tl(0xff);
Expand Down Expand Up @@ -636,6 +663,63 @@ static void gen_cond_call(DisasContext *ctx, TCGv pred,
gen_set_label(skip);
}

static void gen_endloop0(DisasContext *ctx)
{
TCGv lpcfg = tcg_temp_local_new();

GET_USR_FIELD(USR_LPCFG, lpcfg);

/*
* if (lpcfg == 1) {
* hex_new_pred_value[3] = 0xff;
* hex_pred_written |= 1 << 3;
* }
*/
TCGLabel *label1 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1);
{
tcg_gen_movi_tl(hex_new_pred_value[3], 0xff);
tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3);
}
gen_set_label(label1);

/*
* if (lpcfg) {
* SET_USR_FIELD(USR_LPCFG, lpcfg - 1);
* }
*/
TCGLabel *label2 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, lpcfg, 0, label2);
{
tcg_gen_subi_tl(lpcfg, lpcfg, 1);
SET_USR_FIELD(USR_LPCFG, lpcfg);
}
gen_set_label(label2);

/*
* If we're in a tight loop, we'll do this at the end of the TB to take
* advantage of direct block chaining.
*/
if (!ctx->is_tight_loop) {
/*
* if (hex_gpr[HEX_REG_LC0] > 1) {
* PC = hex_gpr[HEX_REG_SA0];
* hex_new_value[HEX_REG_LC0] = hex_gpr[HEX_REG_LC0] - 1;
* }
*/
TCGLabel *label3 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3);
{
gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]);
tcg_gen_subi_tl(hex_new_value[HEX_REG_LC0],
hex_gpr[HEX_REG_LC0], 1);
}
gen_set_label(label3);
}

tcg_temp_free(lpcfg);
}

static void gen_cmp_jumpnv(DisasContext *ctx,
TCGCond cond, TCGv val, TCGv src, int pc_off)
{
Expand Down
33 changes: 33 additions & 0 deletions target/hexagon/translate.c
Expand Up @@ -135,6 +135,8 @@ static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest)

static void gen_end_tb(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;

gen_exec_counters(ctx);

if (ctx->branch_cond != TCG_COND_NEVER) {
Expand All @@ -147,6 +149,18 @@ static void gen_end_tb(DisasContext *ctx)
} else {
gen_goto_tb(ctx, 0, ctx->branch_dest);
}
} else if (ctx->is_tight_loop &&
pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
/*
* When we're in a tight loop, we defer the endloop0 processing
* to take advantage of direct block chaining
*/
TCGLabel *skip = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
gen_goto_tb(ctx, 0, ctx->base.tb->pc);
gen_set_label(skip);
gen_goto_tb(ctx, 1, ctx->next_PC);
} else {
tcg_gen_lookup_and_goto_ptr();
}
Expand Down Expand Up @@ -337,6 +351,15 @@ static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
*/
bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
rnum == HEX_REG_USR;

/* LC0/LC1 is conditionally written by endloop instructions */
if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
(opcode == J2_endloop0 ||
opcode == J2_endloop1 ||
opcode == J2_endloop01)) {
is_predicated = true;
}

if (is_predicated && !is_preloaded(ctx, rnum)) {
tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]);
}
Expand Down Expand Up @@ -420,6 +443,14 @@ static void gen_reg_writes(DisasContext *ctx)
int reg_num = ctx->reg_log[i];

tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);

/*
* ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
* If we write to SA0, we have to turn off tight loop handling.
*/
if (reg_num == HEX_REG_SA0) {
ctx->is_tight_loop = false;
}
}
}

Expand Down Expand Up @@ -833,12 +864,14 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
CPUState *cs)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
uint32_t hex_flags = dcbase->tb->flags;

ctx->mem_idx = MMU_USER_IDX;
ctx->num_packets = 0;
ctx->num_insns = 0;
ctx->num_hvx_insns = 0;
ctx->branch_cond = TCG_COND_NEVER;
ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
}

static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
Expand Down
1 change: 1 addition & 0 deletions target/hexagon/translate.h
Expand Up @@ -59,6 +59,7 @@ typedef struct DisasContext {
bool pre_commit;
TCGCond branch_cond;
target_ulong branch_dest;
bool is_tight_loop;
} DisasContext;

static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
Expand Down

0 comments on commit 564b204

Please sign in to comment.