Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
tcg: remove the final vestiges of dstate
Now we no longer have dynamic state affecting things we can remove the
additional fields in cpu.h and simplify the TB hash calculation.

For the benchmark:

    hyperfine -w 2 -m 20 \
      "./arm-softmmu/qemu-system-arm -cpu cortex-a15 \
        -machine type=virt,highmem=off \
        -display none -m 2048 \
        -serial mon:stdio \
        -netdev user,id=unet,hostfwd=tcp::2222-:22 \
        -device virtio-net-pci,netdev=unet \
        -device virtio-scsi-pci \
        -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf \
        -device scsi-hd,drive=hd -smp 4 \
        -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage \
        -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' \
        -snapshot"

It has a marginal effect on runtime, before:

  Time (mean ± σ):     26.279 s ±  2.438 s    [User: 41.113 s, System: 1.843 s]
  Range (min … max):   24.420 s … 32.565 s    20 runs

after:

  Time (mean ± σ):     24.440 s ±  2.885 s    [User: 34.474 s, System: 2.028 s]
  Range (min … max):   21.663 s … 29.937 s    20 runs

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1358
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 20230526165401.574474-10-alex.bennee@linaro.org
Message-Id: <20230524133952.3971948-9-alex.bennee@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
  • Loading branch information
stsquad authored and Stefan Hajnoczi committed May 31, 2023
1 parent dd109ea commit fbc69de
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 26 deletions.
7 changes: 1 addition & 6 deletions accel/tcg/cpu-exec.c
Expand Up @@ -175,7 +175,6 @@ struct tb_desc {
tb_page_addr_t page_addr0;
uint32_t flags;
uint32_t cflags;
uint32_t trace_vcpu_dstate;
};

static bool tb_lookup_cmp(const void *p, const void *d)
Expand All @@ -187,7 +186,6 @@ static bool tb_lookup_cmp(const void *p, const void *d)
tb_page_addr0(tb) == desc->page_addr0 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
tb_cflags(tb) == desc->cflags) {
/* check next page if needed */
tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
Expand Down Expand Up @@ -228,15 +226,14 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
desc.cs_base = cs_base;
desc.flags = flags;
desc.cflags = cflags;
desc.trace_vcpu_dstate = *cpu->trace_dstate;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
if (phys_pc == -1) {
return NULL;
}
desc.page_addr0 = phys_pc;
h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
flags, cflags, *cpu->trace_dstate);
flags, cflags);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}

Expand All @@ -263,7 +260,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
jc->array[hash].pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
tb_cflags(tb) == cflags)) {
return tb;
}
Expand All @@ -282,7 +278,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
tb->pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
tb_cflags(tb) == cflags)) {
return tb;
}
Expand Down
6 changes: 3 additions & 3 deletions accel/tcg/tb-hash.h
Expand Up @@ -61,10 +61,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
#endif /* CONFIG_SOFTMMU */

static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
uint32_t cf_mask, uint32_t trace_vcpu_dstate)
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
uint32_t flags, uint32_t cf_mask)
{
return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
return qemu_xxhash6(phys_pc, pc, flags, cf_mask);
}

#endif
5 changes: 2 additions & 3 deletions accel/tcg/tb-maint.c
Expand Up @@ -50,7 +50,6 @@ static bool tb_cmp(const void *ap, const void *bp)
a->cs_base == b->cs_base &&
a->flags == b->flags &&
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
tb_page_addr0(a) == tb_page_addr0(b) &&
tb_page_addr1(a) == tb_page_addr1(b));
}
Expand Down Expand Up @@ -888,7 +887,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
/* remove the TB from the hash list */
phys_pc = tb_page_addr0(tb);
h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
tb->flags, orig_cflags, tb->trace_vcpu_dstate);
tb->flags, orig_cflags);
if (!qht_remove(&tb_ctx.htable, tb, h)) {
return;
}
Expand Down Expand Up @@ -969,7 +968,7 @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,

/* add in the hash table */
h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc),
tb->flags, tb->cflags, tb->trace_vcpu_dstate);
tb->flags, tb->cflags);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);

/* remove TB from the page(s) if we couldn't insert it */
Expand Down
6 changes: 0 additions & 6 deletions accel/tcg/translate-all.c
Expand Up @@ -65,11 +65,6 @@
#include "internal.h"
#include "perf.h"

/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
sizeof_field(TranslationBlock, trace_vcpu_dstate)
* BITS_PER_BYTE);

TBContext tb_ctx;

/*
Expand Down Expand Up @@ -352,7 +347,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
tb->trace_vcpu_dstate = *cpu->trace_dstate;
tb_set_page_addr0(tb, phys_pc);
tb_set_page_addr1(tb, -1);
tcg_ctx->gen_tb = tb;
Expand Down
3 changes: 0 additions & 3 deletions include/exec/exec-all.h
Expand Up @@ -545,9 +545,6 @@ struct TranslationBlock {
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24

/* Per-vCPU dynamic tracing state used to generate this TB */
uint32_t trace_vcpu_dstate;

/*
* Above fields used for comparing
*/
Expand Down
5 changes: 0 additions & 5 deletions include/hw/core/cpu.h
Expand Up @@ -266,7 +266,6 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
struct qemu_work_item;

#define CPU_UNSET_NUMA_NODE_ID -1
#define CPU_TRACE_DSTATE_MAX_EVENTS 32

/**
* CPUState:
Expand Down Expand Up @@ -407,10 +406,6 @@ struct CPUState {
/* Use by accel-block: CPU is executing an ioctl() */
QemuLockCnt in_ioctl_lock;

/* Used for events with 'vcpu' and *without* the 'disabled' properties */
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);

DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX);

#ifdef CONFIG_PLUGIN
Expand Down

0 comments on commit fbc69de

Please sign in to comment.