Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into…
Browse files Browse the repository at this point in the history
… staging

* New KVM PV features (Marcelo, Wanpeng)
* valgrind fixes (Andrey)
* Remove clock reset notifiers (David)
* KConfig and Makefile cleanups (Paolo)
* Replay and icount improvements (Pavel)
* x86 FP fixes (Peter M.)
* TCG locking assertions (Roman)
* x86 support for mmap-ed -kernel/-initrd (Stefano)
* Other cleanups (Wei Yang, Yan Zhao, Tony)
* LSI fix for infinite loop (Prasad)
* ARM migration fix (Catherine)
* AVX512_BF16 feature (Jing)

# gpg: Signature made Tue 20 Aug 2019 19:00:54 BST
# gpg:                using RSA key BFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (33 commits)
  x86: Intel AVX512_BF16 feature enabling
  scsi: lsi: exit infinite loop while executing script (CVE-2019-12068)
  test-bitmap: test set 1 bit case for bitmap_set
  migration: do not rom_reset() during incoming migration
  HACKING: Document 'struct' keyword usage
  kvm: vmxcap: Enhance with latest features
  cpus-common: nuke finish_safe_work
  icount: remove unnecessary gen_io_end calls
  icount: clean up cpu_can_io at the entry to the block
  replay: rename step-related variables and functions
  replay: refine replay-time module
  replay: fix replay shutdown
  util/qemu-timer: refactor deadline calculation for external timers
  replay: document development rules
  replay: add missing fix for internal function
  timer: last, remove last bits of last
  replay: Remove host_clock_last
  timer: Remove reset notifiers
  mc146818rtc: Remove reset notifiers
  memory: fix race between TCG and accesses to dirty bitmap
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Aug 21, 2019
2 parents 17dc579 + 80db491 commit f2cfa12
Show file tree
Hide file tree
Showing 70 changed files with 661 additions and 502 deletions.
14 changes: 13 additions & 1 deletion HACKING
Expand Up @@ -100,7 +100,19 @@ pointer, you're guaranteed that it is used to modify the storage
it points to, or it is aliased to another pointer that is.

2.3. Typedefs
Typedefs are used to eliminate the redundant 'struct' keyword.

Typedefs are used to eliminate the redundant 'struct' keyword, since type
names have a different style than other identifiers ("CamelCase" versus
"snake_case"). Each named struct type should have a CamelCase name and a
corresponding typedef.

Since certain C compilers choke on duplicated typedefs, you should avoid
them and declare a typedef only in one header file. For common types,
you can use "include/qemu/typedefs.h" for example. However, as a matter
of convenience it is also perfectly fine to use forward struct
definitions instead of typedefs in headers and function prototypes; this
avoids problems with duplicated typedefs and reduces the need to include
headers from other headers.

2.4. Reserved namespaces in C and POSIX
Underscore capital, double underscore, and underscore 't' suffixes should be
Expand Down
1 change: 1 addition & 0 deletions Kconfig.host
Expand Up @@ -28,6 +28,7 @@ config VHOST_USER

config XEN
bool
select FSDEV_9P if VIRTFS

config VIRTFS
bool
Expand Down
1 change: 0 additions & 1 deletion accel/tcg/cpu-exec.c
Expand Up @@ -169,7 +169,6 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
}
#endif /* DEBUG_DISAS */

cpu->can_do_io = !use_icount;
ret = tcg_qemu_tb_exec(env, tb_ptr);
cpu->can_do_io = 1;
last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
Expand Down
1 change: 0 additions & 1 deletion accel/tcg/translator.c
Expand Up @@ -90,7 +90,6 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
/* Accept I/O on the last instruction. */
gen_io_start();
ops->translate_insn(db, cpu);
gen_io_end();
} else {
ops->translate_insn(db, cpu);
}
Expand Down
12 changes: 10 additions & 2 deletions configure
Expand Up @@ -7431,11 +7431,16 @@ for target in $target_list; do
target_dir="$target"
config_target_mak=$target_dir/config-target.mak
target_name=$(echo $target | cut -d '-' -f 1)
target_aligned_only="no"
case "$target_name" in
alpha|hppa|mips64el|mips64|mipsel|mips|mipsn32|mipsn32el|sh4|sh4eb|sparc|sparc64|sparc32plus|xtensa|xtensaeb)
target_aligned_only="yes"
;;
esac
target_bigendian="no"

case "$target_name" in
armeb|aarch64_be|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or1k|ppc|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
target_bigendian=yes
target_bigendian="yes"
;;
esac
target_softmmu="no"
Expand Down Expand Up @@ -7717,6 +7722,9 @@ fi
if supported_whpx_target $target; then
echo "CONFIG_WHPX=y" >> $config_target_mak
fi
if test "$target_aligned_only" = "yes" ; then
echo "TARGET_ALIGNED_ONLY=y" >> $config_target_mak
fi
if test "$target_bigendian" = "yes" ; then
echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
fi
Expand Down
8 changes: 0 additions & 8 deletions cpus-common.c
Expand Up @@ -69,12 +69,6 @@ static int cpu_get_free_index(void)
return cpu_index;
}

static void finish_safe_work(CPUState *cpu)
{
cpu_exec_start(cpu);
cpu_exec_end(cpu);
}

void cpu_list_add(CPUState *cpu)
{
qemu_mutex_lock(&qemu_cpu_list_lock);
Expand All @@ -86,8 +80,6 @@ void cpu_list_add(CPUState *cpu)
}
QTAILQ_INSERT_TAIL_RCU(&cpus, cpu, node);
qemu_mutex_unlock(&qemu_cpu_list_lock);

finish_safe_work(cpu);
}

void cpu_list_remove(CPUState *cpu)
Expand Down
17 changes: 12 additions & 5 deletions cpus.c
Expand Up @@ -556,7 +556,8 @@ void qtest_clock_warp(int64_t dest)
assert(qtest_enabled());
aio_context = qemu_get_aio_context();
while (clock < dest) {
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
QEMU_TIMER_ATTR_ALL);
int64_t warp = qemu_soonest_timeout(dest - clock, deadline);

seqlock_write_lock(&timers_state.vm_clock_seqlock,
Expand Down Expand Up @@ -616,7 +617,8 @@ void qemu_start_warp_timer(void)

/* We want to use the earliest deadline from ALL vm_clocks */
clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
~QEMU_TIMER_ATTR_EXTERNAL);
if (deadline < 0) {
static bool notified;
if (!icount_sleep && !notified) {
Expand Down Expand Up @@ -1352,7 +1354,12 @@ static int64_t tcg_get_icount_limit(void)
int64_t deadline;

if (replay_mode != REPLAY_MODE_PLAY) {
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
/*
* Include all the timers, because they may need an attention.
* Too long CPU execution may create unnecessary delay in UI.
*/
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
QEMU_TIMER_ATTR_ALL);

/* Maintain prior (possibly buggy) behaviour where if no deadline
* was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
Expand All @@ -1373,8 +1380,8 @@ static void handle_icount_deadline(void)
{
assert(qemu_in_vcpu_thread());
if (use_icount) {
int64_t deadline =
qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
QEMU_TIMER_ATTR_ALL);

if (deadline == 0) {
/* Wake up other AioContexts. */
Expand Down
46 changes: 46 additions & 0 deletions docs/devel/replay.txt
@@ -0,0 +1,46 @@
Record/replay mechanism, that could be enabled through icount mode, expects
the virtual devices to satisfy the following requirements.

The main idea behind this document is that everything that affects
the guest state during execution in icount mode should be deterministic.

Timers
======

All virtual devices should use virtual clock for timers that change the guest
state. Virtual clock is deterministic, therefore such timers are deterministic
too.

Virtual devices can also use realtime clock for the events that do not change
the guest state directly. When the clock ticking should depend on VM execution
speed, use virtual clock with EXTERNAL attribute. It is not deterministic,
but its speed depends on the guest execution. This clock is used by
the virtual devices (e.g., slirp routing device) that lie outside the
replayed guest.

Bottom halves
=============

Bottom half callbacks, that affect the guest state, should be invoked through
replay_bh_schedule_event or replay_bh_schedule_oneshot_event functions.
Their invocations are saved in record mode and synchronized with the existing
log in replay mode.

Saving/restoring the VM state
=============================

All fields in the device state structure (including virtual timers)
should be restored by loadvm to the same values they had before savevm.

Avoid accessing other devices' state, because the order of saving/restoring
is not defined. It means that you should not call functions like
'update_irq' in post_load callback. Save everything explicitly to avoid
the dependencies that may make restoring the VM state non-deterministic.

Stopping the VM
===============

Stopping the guest should not interfere with its state (with the exception
of the network connections, that could be broken by the remote timeouts).
VM can be stopped at any moment of replay by the user. Restarting the VM
after that stop should not break the replay by the unneeded guest state change.
31 changes: 31 additions & 0 deletions exec.c
Expand Up @@ -197,6 +197,7 @@ typedef struct subpage_t {

static void io_mem_init(void);
static void memory_map_init(void);
static void tcg_log_global_after_sync(MemoryListener *listener);
static void tcg_commit(MemoryListener *listener);

static MemoryRegion io_mem_watch;
Expand Down Expand Up @@ -905,6 +906,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
newas->cpu = cpu;
newas->as = as;
if (tcg_enabled()) {
newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync;
newas->tcg_as_listener.commit = tcg_commit;
memory_listener_register(&newas->tcg_as_listener, as);
}
Expand Down Expand Up @@ -3142,6 +3144,35 @@ void address_space_dispatch_free(AddressSpaceDispatch *d)
g_free(d);
}

static void do_nothing(CPUState *cpu, run_on_cpu_data d)
{
}

static void tcg_log_global_after_sync(MemoryListener *listener)
{
CPUAddressSpace *cpuas;

/* Wait for the CPU to end the current TB. This avoids the following
* incorrect race:
*
* vCPU migration
* ---------------------- -------------------------
* TLB check -> slow path
* notdirty_mem_write
* write to RAM
* mark dirty
* clear dirty flag
* TLB check -> fast path
* read memory
* write to RAM
*
* by pushing the migration thread's memory read after the vCPU thread has
* written the memory.
*/
cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
}

static void tcg_commit(MemoryListener *listener)
{
CPUAddressSpace *cpuas;
Expand Down
2 changes: 1 addition & 1 deletion fsdev/Makefile.objs
@@ -1,6 +1,6 @@
# Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
# only pull in the actual 9p backend if we also enabled virtio or xen.
ifeq ($(call land,$(CONFIG_VIRTFS),$(call lor,$(CONFIG_VIRTIO_9P),$(CONFIG_XEN))),y)
ifeq ($(CONFIG_FSDEV_9P),y)
common-obj-y = qemu-fsdev.o 9p-marshal.o 9p-iov-marshal.o
else
common-obj-y = qemu-fsdev-dummy.o
Expand Down
5 changes: 5 additions & 0 deletions hw/9pfs/Kconfig
@@ -1,4 +1,9 @@
config FSDEV_9P
bool
depends on VIRTFS

config VIRTIO_9P
bool
default y
depends on VIRTFS && VIRTIO
select FSDEV_9P
48 changes: 40 additions & 8 deletions hw/core/loader.c
Expand Up @@ -58,6 +58,7 @@
#include "exec/address-spaces.h"
#include "hw/boards.h"
#include "qemu/cutils.h"
#include "sysemu/runstate.h"

#include <zlib.h>

Expand Down Expand Up @@ -838,6 +839,7 @@ struct Rom {
int isrom;
char *fw_dir;
char *fw_file;
GMappedFile *mapped_file;

bool committed;

Expand All @@ -848,10 +850,25 @@ struct Rom {
static FWCfgState *fw_cfg;
static QTAILQ_HEAD(, Rom) roms = QTAILQ_HEAD_INITIALIZER(roms);

/* rom->data must be heap-allocated (do not use with rom_add_elf_program()) */
/*
* rom->data can be heap-allocated or memory-mapped (e.g. when added with
* rom_add_elf_program())
*/
static void rom_free_data(Rom *rom)
{
if (rom->mapped_file) {
g_mapped_file_unref(rom->mapped_file);
rom->mapped_file = NULL;
} else {
g_free(rom->data);
}

rom->data = NULL;
}

static void rom_free(Rom *rom)
{
g_free(rom->data);
rom_free_data(rom);
g_free(rom->path);
g_free(rom->name);
g_free(rom->fw_dir);
Expand Down Expand Up @@ -1058,11 +1075,12 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,

/* This function is specific for elf program because we don't need to allocate
* all the rom. We just allocate the first part and the rest is just zeros. This
* is why romsize and datasize are different. Also, this function seize the
* memory ownership of "data", so we don't have to allocate and copy the buffer.
* is why romsize and datasize are different. Also, this function takes its own
* reference to "mapped_file", so we don't have to allocate and copy the buffer.
*/
int rom_add_elf_program(const char *name, void *data, size_t datasize,
size_t romsize, hwaddr addr, AddressSpace *as)
int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data,
size_t datasize, size_t romsize, hwaddr addr,
AddressSpace *as)
{
Rom *rom;

Expand All @@ -1073,6 +1091,12 @@ int rom_add_elf_program(const char *name, void *data, size_t datasize,
rom->romsize = romsize;
rom->data = data;
rom->as = as;

if (mapped_file && data) {
g_mapped_file_ref(mapped_file);
rom->mapped_file = mapped_file;
}

rom_insert(rom);
return 0;
}
Expand All @@ -1091,6 +1115,15 @@ static void rom_reset(void *unused)
{
Rom *rom;

/*
* We don't need to fill in the RAM with ROM data because we'll fill
* the data in during the next incoming migration in all cases. Note
* that some of those RAMs can actually be modified by the guest on ARM
* so this is probably the only right thing to do here.
*/
if (runstate_check(RUN_STATE_INMIGRATE))
return;

QTAILQ_FOREACH(rom, &roms, next) {
if (rom->fw_file) {
continue;
Expand All @@ -1107,8 +1140,7 @@ static void rom_reset(void *unused)
}
if (rom->isrom) {
/* rom needs to be written only once */
g_free(rom->data);
rom->data = NULL;
rom_free_data(rom);
}
/*
* The rom loader is really on the same level as firmware in the guest
Expand Down

0 comments on commit f2cfa12

Please sign in to comment.