diff --git a/include/qemu/log.h b/include/qemu/log.h index 9ab8f511883b..00bf37fc0f9a 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -42,6 +42,7 @@ static inline bool qemu_log_separate(void) #define CPU_LOG_TB_NOCHAIN (1 << 13) #define CPU_LOG_PAGE (1 << 14) #define LOG_TRACE (1 << 15) +#define CPU_LOG_TB_OP_IND (1 << 16) /* Returns true if a bit is set in the current loglevel mask */ diff --git a/tcg/optimize.c b/tcg/optimize.c index 8df7fc7f6918..cffe89b52500 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -82,33 +82,6 @@ static void init_temp_info(TCGArg temp) } } -static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, int nargs) -{ - int oi = s->gen_next_op_idx; - int pi = s->gen_next_parm_idx; - int prev = old_op->prev; - int next = old_op - s->gen_op_buf; - TCGOp *new_op; - - tcg_debug_assert(oi < OPC_BUF_SIZE); - tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); - s->gen_next_op_idx = oi + 1; - s->gen_next_parm_idx = pi + nargs; - - new_op = &s->gen_op_buf[oi]; - *new_op = (TCGOp){ - .opc = opc, - .args = pi, - .prev = prev, - .next = next - }; - s->gen_op_buf[prev].next = oi; - old_op->prev = oi; - - return new_op; -} - static int op_bits(TCGOpcode op) { const TCGOpDef *def = &tcg_op_defs[op]; @@ -1116,7 +1089,7 @@ void tcg_optimize(TCGContext *s) uint64_t a = ((uint64_t)ah << 32) | al; uint64_t b = ((uint64_t)bh << 32) | bl; TCGArg rl, rh; - TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); + TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); TCGArg *args2 = &s->gen_opparam_buf[op2->args]; if (opc == INDEX_op_add2_i32) { @@ -1142,7 +1115,7 @@ void tcg_optimize(TCGContext *s) uint32_t b = temps[args[3]].val; uint64_t r = (uint64_t)a * b; TCGArg rl, rh; - TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); + TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); TCGArg *args2 = &s->gen_opparam_buf[op2->args]; rl = args[0]; diff --git a/tcg/tcg.c b/tcg/tcg.c index 3c1f5262a84b..42417bdc9254 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -531,8 +531,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, #endif if (!base_ts->fixed_reg) { - indirect_reg = 1; + /* We do not support double-indirect registers. */ + tcg_debug_assert(!base_ts->indirect_reg); base_ts->indirect_base = 1; + s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 + ? 2 : 1); + indirect_reg = 1; } if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { @@ -1336,9 +1340,66 @@ void tcg_op_remove(TCGContext *s, TCGOp *op) #endif } +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, + TCGOpcode opc, int nargs) +{ + int oi = s->gen_next_op_idx; + int pi = s->gen_next_parm_idx; + int prev = old_op->prev; + int next = old_op - s->gen_op_buf; + TCGOp *new_op; + + tcg_debug_assert(oi < OPC_BUF_SIZE); + tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); + s->gen_next_op_idx = oi + 1; + s->gen_next_parm_idx = pi + nargs; + + new_op = &s->gen_op_buf[oi]; + *new_op = (TCGOp){ + .opc = opc, + .args = pi, + .prev = prev, + .next = next + }; + s->gen_op_buf[prev].next = oi; + old_op->prev = oi; + + return new_op; +} + +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, + TCGOpcode opc, int nargs) +{ + int oi = s->gen_next_op_idx; + int pi = s->gen_next_parm_idx; + int prev = old_op - s->gen_op_buf; + int next = old_op->next; + TCGOp *new_op; + + tcg_debug_assert(oi < OPC_BUF_SIZE); + tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); + s->gen_next_op_idx = oi + 1; + s->gen_next_parm_idx = pi + nargs; + + new_op = &s->gen_op_buf[oi]; + *new_op = (TCGOp){ + .opc = opc, + .args = pi, + .prev = prev, + .next = next + }; + s->gen_op_buf[next].prev = oi; + old_op->next = oi; + + return new_op; +} + #define TS_DEAD 1 #define TS_MEM 2 +#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) +#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) + /* liveness analysis: end of function: all temps are dead, and globals should be in memory. */ static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state) @@ -1364,13 +1425,11 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state) /* Liveness analysis : update the opc_arg_life array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ -static void tcg_liveness_analysis(TCGContext *s) +static void liveness_pass_1(TCGContext *s, uint8_t *temp_state) { - uint8_t *temp_state; - int oi, oi_prev; int nb_globals = s->nb_globals; + int oi, oi_prev; - temp_state = tcg_malloc(s->nb_temps); tcg_la_func_end(s, temp_state); for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) { @@ -1593,6 +1652,165 @@ static void tcg_liveness_analysis(TCGContext *s) } } +/* Liveness analysis: Convert indirect regs to direct temporaries. */ +static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state) +{ + int nb_globals = s->nb_globals; + int16_t *dir_temps; + int i, oi, oi_next; + bool changes = false; + + dir_temps = tcg_malloc(nb_globals * sizeof(int16_t)); + memset(dir_temps, 0, nb_globals * sizeof(int16_t)); + + /* Create a temporary for each indirect global. */ + for (i = 0; i < nb_globals; ++i) { + TCGTemp *its = &s->temps[i]; + if (its->indirect_reg) { + TCGTemp *dts = tcg_temp_alloc(s); + dts->type = its->type; + dts->base_type = its->base_type; + dir_temps[i] = temp_idx(s, dts); + } + } + + memset(temp_state, TS_DEAD, nb_globals); + + for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) { + TCGOp *op = &s->gen_op_buf[oi]; + TCGArg *args = &s->gen_opparam_buf[op->args]; + TCGOpcode opc = op->opc; + const TCGOpDef *def = &tcg_op_defs[opc]; + TCGLifeData arg_life = op->life; + int nb_iargs, nb_oargs, call_flags; + TCGArg arg, dir; + + oi_next = op->next; + + if (opc == INDEX_op_call) { + nb_oargs = op->callo; + nb_iargs = op->calli; + call_flags = args[nb_oargs + nb_iargs + 1]; + } else { + nb_iargs = def->nb_iargs; + nb_oargs = def->nb_oargs; + + /* Set flags similar to how calls require. */ + if (def->flags & TCG_OPF_BB_END) { + /* Like writing globals: save_globals */ + call_flags = 0; + } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + /* Like reading globals: sync_globals */ + call_flags = TCG_CALL_NO_WRITE_GLOBALS; + } else { + /* No effect on globals. */ + call_flags = (TCG_CALL_NO_READ_GLOBALS | + TCG_CALL_NO_WRITE_GLOBALS); + } + } + + /* Make sure that input arguments are available. */ + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + arg = args[i]; + /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */ + if (arg < nb_globals) { + dir = dir_temps[arg]; + if (dir != 0 && temp_state[arg] == TS_DEAD) { + TCGTemp *its = &s->temps[arg]; + TCGOpcode lopc = (its->type == TCG_TYPE_I32 + ? INDEX_op_ld_i32 + : INDEX_op_ld_i64); + TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + TCGArg *largs = &s->gen_opparam_buf[lop->args]; + + largs[0] = dir; + largs[1] = temp_idx(s, its->mem_base); + largs[2] = its->mem_offset; + + /* Loaded, but synced with memory. */ + temp_state[arg] = TS_MEM; + } + } + } + + /* Perform input replacement, and mark inputs that became dead. + No action is required except keeping temp_state up to date + so that we reload when needed. */ + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + arg = args[i]; + if (arg < nb_globals) { + dir = dir_temps[arg]; + if (dir != 0) { + args[i] = dir; + changes = true; + if (IS_DEAD_ARG(i)) { + temp_state[arg] = TS_DEAD; + } + } + } + } + + /* Liveness analysis should ensure that the following are + all correct, for call sites and basic block end points. */ + if (call_flags & TCG_CALL_NO_READ_GLOBALS) { + /* Nothing to do */ + } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { + for (i = 0; i < nb_globals; ++i) { + /* Liveness should see that globals are synced back, + that is, either TS_DEAD or TS_MEM. */ + tcg_debug_assert(dir_temps[i] == 0 + || temp_state[i] != 0); + } + } else { + for (i = 0; i < nb_globals; ++i) { + /* Liveness should see that globals are saved back, + that is, TS_DEAD, waiting to be reloaded. */ + tcg_debug_assert(dir_temps[i] == 0 + || temp_state[i] == TS_DEAD); + } + } + + /* Outputs become available. */ + for (i = 0; i < nb_oargs; i++) { + arg = args[i]; + if (arg >= nb_globals) { + continue; + } + dir = dir_temps[arg]; + if (dir == 0) { + continue; + } + args[i] = dir; + changes = true; + + /* The output is now live and modified. */ + temp_state[arg] = 0; + + /* Sync outputs upon their last write. */ + if (NEED_SYNC_ARG(i)) { + TCGTemp *its = &s->temps[arg]; + TCGOpcode sopc = (its->type == TCG_TYPE_I32 + ? INDEX_op_st_i32 + : INDEX_op_st_i64); + TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGArg *sargs = &s->gen_opparam_buf[sop->args]; + + sargs[0] = dir; + sargs[1] = temp_idx(s, its->mem_base); + sargs[2] = its->mem_offset; + + temp_state[arg] = TS_MEM; + } + /* Drop outputs that are dead. */ + if (IS_DEAD_ARG(i)) { + temp_state[arg] = TS_DEAD; + } + } + } + + return changes; +} + #ifdef CONFIG_DEBUG_TCG static void dump_regs(TCGContext *s) { @@ -1723,14 +1941,6 @@ static void temp_sync(TCGContext *s, TCGTemp *ts, if (!ts->mem_allocated) { temp_allocate_frame(s, temp_idx(s, ts)); } - if (ts->indirect_reg) { - if (ts->val_type == TEMP_VAL_REG) { - tcg_regset_set_reg(allocated_regs, ts->reg); - } - temp_load(s, ts->mem_base, - tcg_target_available_regs[TCG_TYPE_PTR], - allocated_regs); - } switch (ts->val_type) { case TEMP_VAL_CONST: /* If we're going to free the temp immediately, then we won't @@ -1821,12 +2031,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, break; case TEMP_VAL_MEM: reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); - if (ts->indirect_reg) { - tcg_regset_set_reg(allocated_regs, reg); - temp_load(s, ts->mem_base, - tcg_target_available_regs[TCG_TYPE_PTR], - allocated_regs); - } tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); ts->mem_coherent = 1; break; @@ -1843,14 +2047,9 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, temporary registers needs to be allocated to store a constant. */ static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) { - /* ??? Liveness does not yet incorporate indirect bases. */ - if (!ts->indirect_base) { - /* The liveness analysis already ensures that globals are back - in memory. Keep an tcg_debug_assert for safety. */ - tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); - return; - } - temp_sync(s, ts, allocated_regs, 1); + /* The liveness analysis already ensures that globals are back + in memory. Keep an tcg_debug_assert for safety. */ + tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg); } /* save globals to their canonical location and assume they can be @@ -1874,14 +2073,9 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) for (i = 0; i < s->nb_globals; i++) { TCGTemp *ts = &s->temps[i]; - /* ??? Liveness does not yet incorporate indirect bases. */ - if (!ts->indirect_base) { - tcg_debug_assert(ts->val_type != TEMP_VAL_REG - || ts->fixed_reg - || ts->mem_coherent); - continue; - } - temp_sync(s, ts, allocated_regs, 0); + tcg_debug_assert(ts->val_type != TEMP_VAL_REG + || ts->fixed_reg + || ts->mem_coherent); } } @@ -1896,23 +2090,15 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) if (ts->temp_local) { temp_save(s, ts, allocated_regs); } else { - /* ??? Liveness does not yet incorporate indirect bases. */ - if (!ts->indirect_base) { - /* The liveness analysis already ensures that temps are dead. - Keep an tcg_debug_assert for safety. */ - tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); - continue; - } - temp_dead(s, ts); + /* The liveness analysis already ensures that temps are dead. + Keep an tcg_debug_assert for safety. */ + tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); } } save_globals(s, allocated_regs); } -#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) -#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) - static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, TCGLifeData arg_life) { @@ -1975,12 +2161,6 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, if (!ots->mem_allocated) { temp_allocate_frame(s, args[0]); } - if (ots->indirect_reg) { - tcg_regset_set_reg(allocated_regs, ts->reg); - temp_load(s, ots->mem_base, - tcg_target_available_regs[TCG_TYPE_PTR], - allocated_regs); - } tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); if (IS_DEAD_ARG(1)) { temp_dead(s, ts); @@ -2385,7 +2565,27 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) s->la_time -= profile_getclock(); #endif - tcg_liveness_analysis(s); + { + uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects); + + liveness_pass_1(s, temp_state); + + if (s->nb_indirects > 0) { +#ifdef DEBUG_DISAS + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) + && qemu_log_in_addr_range(tb->pc))) { + qemu_log("OP before indirect lowering:\n"); + tcg_dump_ops(s); + qemu_log("\n"); + } +#endif + /* Replace indirect temps with direct temps. */ + if (liveness_pass_2(s, temp_state)) { + /* If changes were made, re-run liveness. */ + liveness_pass_1(s, temp_state); + } + } + } #ifdef CONFIG_PROFILER s->la_time += profile_getclock(); diff --git a/tcg/tcg.h b/tcg/tcg.h index ebf68670f684..1bcabcad9d58 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -617,6 +617,7 @@ struct TCGContext { int nb_labels; int nb_globals; int nb_temps; + int nb_indirects; /* goto_tb support */ tcg_insn_unit *code_buf; @@ -898,6 +899,9 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, int nargs, TCGArg *args); void tcg_op_remove(TCGContext *s, TCGOp *op); +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); + void tcg_optimize(TCGContext *s); /* only used for debugging purposes */ diff --git a/util/log.c b/util/log.c index 9f0844481c4d..54b54e868aab 100644 --- a/util/log.c +++ b/util/log.c @@ -247,8 +247,9 @@ const QEMULogItem qemu_log_items[] = { { CPU_LOG_TB_OP, "op", "show micro ops for each compiled TB" }, { CPU_LOG_TB_OP_OPT, "op_opt", - "show micro ops (x86 only: before eflags optimization) and\n" - "after liveness analysis" }, + "show micro ops after optimization" }, + { CPU_LOG_TB_OP_IND, "op_ind", + "show micro ops before indirect lowering" }, { CPU_LOG_INT, "int", "show interrupts/exceptions in short format" }, { CPU_LOG_EXEC, "exec",