Permalink
Browse files

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160611' int…

…o staging

TB hashing improvements

# gpg: Signature made Sun 12 Jun 2016 01:12:50 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-tcg-20160611:
  translate-all: add tb hash bucket info to 'info jit' dump
  tb hash: track translated blocks with qht
  qht: add test-qht-par to invoke qht-bench from 'check' target
  qht: add qht-bench, a performance benchmark
  qht: add test program
  qht: QEMU's fast, resizable and scalable Hash Table
  qdist: add test program
  qdist: add module to represent frequency distributions of data
  tb hash: hash phys_pc, pc, and flags with xxhash
  exec: add tb_hash_func5, derived from xxhash
  qemu-thread: add simple test-and-set spinlock
  include/processor.h: define cpu_relax()
  seqlock: rename write_lock/unlock to write_begin/end
  seqlock: remove optional mutex
  compiler.h: add QEMU_ALIGNED() to enforce struct alignment

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information...
pm215 committed Jun 13, 2016
2 parents a93c1bd + 329844d commit da2fdd0bd1514a44309dd5be162ebfb6c166a716
View
@@ -225,57 +225,57 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
}
#endif
static TranslationBlock *tb_find_physical(CPUState *cpu,
target_ulong pc,
target_ulong cs_base,
uint32_t flags)
struct tb_desc {
target_ulong pc;
target_ulong cs_base;
CPUArchState *env;
tb_page_addr_t phys_page1;
uint32_t flags;
};
static bool tb_cmp(const void *p, const void *d)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb, **tb_hash_head, **ptb1;
unsigned int h;
tb_page_addr_t phys_pc, phys_page1;
/* find translated block using physical mappings */
phys_pc = get_page_addr_code(env, pc);
phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_phys_hash_func(phys_pc);
/* Start at head of the hash entry */
ptb1 = tb_hash_head = &tcg_ctx.tb_ctx.tb_phys_hash[h];
tb = *ptb1;
while (tb) {
if (tb->pc == pc &&
tb->page_addr[0] == phys_page1 &&
tb->cs_base == cs_base &&
tb->flags == flags) {
if (tb->page_addr[1] == -1) {
/* done, we have a match */
break;
} else {
/* check next page if needed */
target_ulong virt_page2 = (pc & TARGET_PAGE_MASK) +
TARGET_PAGE_SIZE;
tb_page_addr_t phys_page2 = get_page_addr_code(env, virt_page2);
if (tb->page_addr[1] == phys_page2) {
break;
}
const TranslationBlock *tb = p;
const struct tb_desc *desc = d;
if (tb->pc == desc->pc &&
tb->page_addr[0] == desc->phys_page1 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags) {
/* check next page if needed */
if (tb->page_addr[1] == -1) {
return true;
} else {
tb_page_addr_t phys_page2;
target_ulong virt_page2;
virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
phys_page2 = get_page_addr_code(desc->env, virt_page2);
if (tb->page_addr[1] == phys_page2) {
return true;
}
}
ptb1 = &tb->phys_hash_next;
tb = *ptb1;
}
return false;
}
if (tb) {
/* Move the TB to the head of the list */
*ptb1 = tb->phys_hash_next;
tb->phys_hash_next = *tb_hash_head;
*tb_hash_head = tb;
}
return tb;
static TranslationBlock *tb_find_physical(CPUState *cpu,
target_ulong pc,
target_ulong cs_base,
uint32_t flags)
{
tb_page_addr_t phys_pc;
struct tb_desc desc;
uint32_t h;
desc.env = (CPUArchState *)cpu->env_ptr;
desc.cs_base = cs_base;
desc.flags = flags;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_hash_func(phys_pc, pc, flags);
return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
}
static TranslationBlock *tb_find_slow(CPUState *cpu,
View
30 cpus.c
@@ -249,13 +249,13 @@ int64_t cpu_get_clock(void)
void cpu_enable_ticks(void)
{
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
seqlock_write_lock(&timers_state.vm_clock_seqlock);
seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (!timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
timers_state.cpu_clock_offset -= get_clock();
timers_state.cpu_ticks_enabled = 1;
}
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
seqlock_write_end(&timers_state.vm_clock_seqlock);
}
/* disable cpu_get_ticks() : the clock is stopped. You must not call
@@ -265,13 +265,13 @@ void cpu_enable_ticks(void)
void cpu_disable_ticks(void)
{
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
seqlock_write_lock(&timers_state.vm_clock_seqlock);
seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset += cpu_get_host_ticks();
timers_state.cpu_clock_offset = cpu_get_clock_locked();
timers_state.cpu_ticks_enabled = 0;
}
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
seqlock_write_end(&timers_state.vm_clock_seqlock);
}
/* Correlation between real and virtual time is always going to be
@@ -294,7 +294,7 @@ static void icount_adjust(void)
return;
}
seqlock_write_lock(&timers_state.vm_clock_seqlock);
seqlock_write_begin(&timers_state.vm_clock_seqlock);
cur_time = cpu_get_clock_locked();
cur_icount = cpu_get_icount_locked();
@@ -315,7 +315,7 @@ static void icount_adjust(void)
last_delta = delta;
timers_state.qemu_icount_bias = cur_icount
- (timers_state.qemu_icount << icount_time_shift);
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
seqlock_write_end(&timers_state.vm_clock_seqlock);
}
static void icount_adjust_rt(void *opaque)
@@ -355,7 +355,7 @@ static void icount_warp_rt(void)
return;
}
seqlock_write_lock(&timers_state.vm_clock_seqlock);
seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (runstate_is_running()) {
int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
cpu_get_clock_locked());
@@ -374,7 +374,7 @@ static void icount_warp_rt(void)
timers_state.qemu_icount_bias += warp_delta;
}
vm_clock_warp_start = -1;
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
seqlock_write_end(&timers_state.vm_clock_seqlock);
if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -399,9 +399,9 @@ void qtest_clock_warp(int64_t dest)
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
seqlock_write_lock(&timers_state.vm_clock_seqlock);
seqlock_write_begin(&timers_state.vm_clock_seqlock);
timers_state.qemu_icount_bias += warp;
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
seqlock_write_end(&timers_state.vm_clock_seqlock);
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
@@ -468,9 +468,9 @@ void qemu_start_warp_timer(void)
* It is useful when we want a deterministic execution time,
* isolated from host latencies.
*/
seqlock_write_lock(&timers_state.vm_clock_seqlock);
seqlock_write_begin(&timers_state.vm_clock_seqlock);
timers_state.qemu_icount_bias += deadline;
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
seqlock_write_end(&timers_state.vm_clock_seqlock);
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
} else {
/*
@@ -481,11 +481,11 @@ void qemu_start_warp_timer(void)
* you will not be sending network packets continuously instead of
* every 100ms.
*/
seqlock_write_lock(&timers_state.vm_clock_seqlock);
seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
vm_clock_warp_start = clock;
}
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
seqlock_write_end(&timers_state.vm_clock_seqlock);
timer_mod_anticipate(icount_warp_timer, clock + deadline);
}
} else if (deadline == 0) {
@@ -621,7 +621,7 @@ int cpu_throttle_get_percentage(void)
void cpu_ticks_init(void)
{
seqlock_init(&timers_state.vm_clock_seqlock, NULL);
seqlock_init(&timers_state.vm_clock_seqlock);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
cpu_throttle_timer_tick, NULL);
View
@@ -215,8 +215,6 @@ struct TranslationBlock {
void *tc_ptr; /* pointer to the translated code */
uint8_t *tc_search; /* pointer to search data */
/* next matching tb for physical address. */
struct TranslationBlock *phys_hash_next;
/* original tb when cflags has CF_NOCACHE */
struct TranslationBlock *orig_tb;
/* first and second physical page containing code. The lower bit
@@ -21,17 +21,18 @@
#define QEMU_TB_CONTEXT_H_
#include "qemu/thread.h"
#include "qemu/qht.h"
#define CODE_GEN_PHYS_HASH_BITS 15
#define CODE_GEN_PHYS_HASH_SIZE (1 << CODE_GEN_PHYS_HASH_BITS)
#define CODE_GEN_HTABLE_BITS 15
#define CODE_GEN_HTABLE_SIZE (1 << CODE_GEN_HTABLE_BITS)
typedef struct TranslationBlock TranslationBlock;
typedef struct TBContext TBContext;
struct TBContext {
TranslationBlock *tbs;
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
struct qht htable;
int nb_tbs;
/* any access to the tbs or the page table must use this lock */
QemuMutex tb_lock;
View
@@ -0,0 +1,94 @@
/*
* xxHash - Fast Hash algorithm
* Copyright (C) 2012-2016, Yann Collet
*
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* + Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* + Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at :
* - xxHash source repository : https://github.com/Cyan4973/xxHash
*/
#ifndef EXEC_TB_HASH_XX
#define EXEC_TB_HASH_XX
#include <qemu/bitops.h>
#define PRIME32_1 2654435761U
#define PRIME32_2 2246822519U
#define PRIME32_3 3266489917U
#define PRIME32_4 668265263U
#define PRIME32_5 374761393U
#define TB_HASH_XX_SEED 1
/*
* xxhash32, customized for input variables that are not guaranteed to be
* contiguous in memory.
*/
static inline
uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e)
{
uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
uint32_t v3 = TB_HASH_XX_SEED + 0;
uint32_t v4 = TB_HASH_XX_SEED - PRIME32_1;
uint32_t a = a0 >> 32;
uint32_t b = a0;
uint32_t c = b0 >> 32;
uint32_t d = b0;
uint32_t h32;
v1 += a * PRIME32_2;
v1 = rol32(v1, 13);
v1 *= PRIME32_1;
v2 += b * PRIME32_2;
v2 = rol32(v2, 13);
v2 *= PRIME32_1;
v3 += c * PRIME32_2;
v3 = rol32(v3, 13);
v3 *= PRIME32_1;
v4 += d * PRIME32_2;
v4 = rol32(v4, 13);
v4 *= PRIME32_1;
h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
h32 += 20;
h32 += e * PRIME32_3;
h32 = rol32(h32, 17) * PRIME32_4;
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
#endif /* EXEC_TB_HASH_XX */
View
@@ -20,6 +20,8 @@
#ifndef EXEC_TB_HASH
#define EXEC_TB_HASH
#include "exec/tb-hash-xx.h"
/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
addresses on the same page. The top bits are the same. This allows
TLB invalidation to quickly clear a subset of the hash table. */
@@ -43,9 +45,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
| (tmp & TB_JMP_ADDR_MASK));
}
static inline unsigned int tb_phys_hash_func(tb_page_addr_t pc)
static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags)
{
return (pc >> 2) & (CODE_GEN_PHYS_HASH_SIZE - 1);
return tb_hash_func5(phys_pc, pc, flags);
}
#endif
View
@@ -41,6 +41,8 @@
# define QEMU_PACKED __attribute__((packed))
#endif
#define QEMU_ALIGNED(X) __attribute__((aligned(X)))
#ifndef glue
#define xglue(x, y) x ## y
#define glue(x, y) xglue(x, y)
Oops, something went wrong.

0 comments on commit da2fdd0

Please sign in to comment.