diff --git a/MAINTAINERS b/MAINTAINERS index 8cbc1fac2bfc..831d4b014aea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -396,6 +396,8 @@ F: target/s390x/machine.c F: target/s390x/sigp.c F: target/s390x/cpu_features*.[ch] F: target/s390x/cpu_models.[ch] +F: hw/s390x/pv.c +F: include/hw/s390x/pv.h F: hw/intc/s390_flic.c F: hw/intc/s390_flic_kvm.c F: include/hw/s390x/s390_flic.h diff --git a/docs/system/s390x/protvirt.rst b/docs/system/s390x/protvirt.rst new file mode 100644 index 000000000000..712974ad87b9 --- /dev/null +++ b/docs/system/s390x/protvirt.rst @@ -0,0 +1,60 @@ +Protected Virtualization on s390x +================================= + +The memory and most of the registers of Protected Virtual Machines +(PVMs) are encrypted or inaccessible to the hypervisor, effectively +prohibiting VM introspection when the VM is running. At rest, PVMs are +encrypted and can only be decrypted by the firmware, represented by an +entity called Ultravisor, of specific IBM Z machines. + + +Prerequisites +------------- + +To run PVMs, a machine with the Protected Virtualization feature, as +indicated by the Ultravisor Call facility (stfle bit 158), is +required. The Ultravisor needs to be initialized at boot by setting +`prot_virt=1` on the host's kernel command line. + +Running PVMs requires using the KVM hypervisor. + +If those requirements are met, the capability `KVM_CAP_S390_PROTECTED` +will indicate that KVM can support PVMs on that LPAR. + + +QEMU Settings +------------- + +To indicate to the VM that it can transition into protected mode, the +`Unpack facility` (stfle bit 161 represented by the feature +`unpack`/`S390_FEAT_UNPACK`) needs to be part of the cpu model of +the VM. + +All I/O devices need to use the IOMMU. +Passthrough (vfio) devices are currently not supported. + +Host huge page backings are not supported. However guests can use huge +pages as indicated by its facilities. + + +Boot Process +------------ + +A secure guest image can either be loaded from disk or supplied on the +QEMU command line. Booting from disk is done by the unmodified +s390-ccw BIOS. I.e., the bootmap is interpreted, multiple components +are read into memory and control is transferred to one of the +components (zipl stage3). Stage3 does some fixups and then transfers +control to some program residing in guest memory, which is normally +the OS kernel. The secure image has another component prepended +(stage3a) that uses the new diag308 subcodes 8 and 10 to trigger the +transition into secure mode. + +Booting from the image supplied on the QEMU command line requires that +the file passed via -kernel has the same memory layout as would result +from the disk boot. This memory layout includes the encrypted +components (kernel, initrd, cmdline), the stage3a loader and +metadata. In case this boot method is used, the command line +options -initrd and -cmdline are ineffective. The preparation of a PVM +image is done via the `genprotimg` tool from the s390-tools +collection. diff --git a/docs/system/target-s390x.rst b/docs/system/target-s390x.rst index 4c8b7cdd6615..7d76ae97b401 100644 --- a/docs/system/target-s390x.rst +++ b/docs/system/target-s390x.rst @@ -24,3 +24,8 @@ or vfio-ap is also available. .. toctree:: s390x/vfio-ap +Architectural features +====================== + +.. toctree:: + s390x/protvirt diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 65bb6886c750..3301869d4f90 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -83,6 +83,8 @@ #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */ #define VIRTIO_NET_F_RSC_EXT 61 +#endif + static inline __virtio16 *virtio_net_rsc_ext_num_packets( struct virtio_net_hdr *hdr) { @@ -95,8 +97,6 @@ static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( return &hdr->csum_offset; } -#endif - static VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_NET_F_MAC, .end = endof(struct virtio_net_config, mac)}, diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs index e02ed80b6829..a46a1c7894e0 100644 --- a/hw/s390x/Makefile.objs +++ b/hw/s390x/Makefile.objs @@ -31,6 +31,7 @@ obj-y += tod-qemu.o obj-$(CONFIG_KVM) += tod-kvm.o obj-$(CONFIG_KVM) += s390-skeys-kvm.o obj-$(CONFIG_KVM) += s390-stattrib-kvm.o +obj-$(CONFIG_KVM) += pv.o obj-y += s390-ccw.o obj-y += ap-device.o obj-y += ap-bridge.o diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 8c3e01957176..ce21494c08f7 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -1,10 +1,11 @@ /* * bootloader support * - * Copyright IBM, Corp. 2012 + * Copyright IBM, Corp. 2012, 2020 * * Authors: * Christian Borntraeger + * Janosch Frank * * This work is licensed under the terms of the GNU GPL, version 2 or (at your * option) any later version. See the COPYING file in the top-level directory. @@ -27,6 +28,7 @@ #include "hw/s390x/vfio-ccw.h" #include "hw/s390x/css.h" #include "hw/s390x/ebcdic.h" +#include "hw/s390x/pv.h" #include "ipl.h" #include "qemu/error-report.h" #include "qemu/config-file.h" @@ -566,12 +568,31 @@ void s390_ipl_update_diag308(IplParameterBlock *iplb) { S390IPLState *ipl = get_ipl_device(); - ipl->iplb = *iplb; - ipl->iplb_valid = true; + /* + * The IPLB set and retrieved by subcodes 8/9 is completely + * separate from the one managed via subcodes 5/6. + */ + if (iplb->pbt == S390_IPL_TYPE_PV) { + ipl->iplb_pv = *iplb; + ipl->iplb_valid_pv = true; + } else { + ipl->iplb = *iplb; + ipl->iplb_valid = true; + } ipl->netboot = is_virtio_net_device(iplb); update_machine_ipl_properties(iplb); } +IplParameterBlock *s390_ipl_get_iplb_pv(void) +{ + S390IPLState *ipl = get_ipl_device(); + + if (!ipl->iplb_valid_pv) { + return NULL; + } + return &ipl->iplb_pv; +} + IplParameterBlock *s390_ipl_get_iplb(void) { S390IPLState *ipl = get_ipl_device(); @@ -660,6 +681,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) cpu_physical_memory_unmap(addr, len, 1, len); } +int s390_ipl_prepare_pv_header(void) +{ + IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); + IPLBlockPV *ipib_pv = &ipib->pv; + void *hdr = g_malloc(ipib_pv->pv_header_len); + int rc; + + cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, + ipib_pv->pv_header_len); + rc = s390_pv_set_sec_parms((uintptr_t)hdr, + ipib_pv->pv_header_len); + g_free(hdr); + return rc; +} + +int s390_ipl_pv_unpack(void) +{ + IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); + IPLBlockPV *ipib_pv = &ipib->pv; + int i, rc = 0; + + for (i = 0; i < ipib_pv->num_comp; i++) { + rc = s390_pv_unpack(ipib_pv->components[i].addr, + TARGET_PAGE_ALIGN(ipib_pv->components[i].size), + ipib_pv->components[i].tweak_pref); + if (rc) { + break; + } + } + return rc; +} + void s390_ipl_prepare_cpu(S390CPU *cpu) { S390IPLState *ipl = get_ipl_device(); diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h index 3e44abe1c651..53cc9eb5ac4d 100644 --- a/hw/s390x/ipl.h +++ b/hw/s390x/ipl.h @@ -1,8 +1,9 @@ /* * s390 IPL device * - * Copyright 2015 IBM Corp. + * Copyright 2015, 2020 IBM Corp. * Author(s): Zhang Fan + * Janosch Frank * * This work is licensed under the terms of the GNU GPL, version 2 or (at * your option) any later version. See the COPYING file in the top-level @@ -13,8 +14,27 @@ #define HW_S390_IPL_H #include "cpu.h" +#include "exec/address-spaces.h" #include "hw/qdev-core.h" +struct IPLBlockPVComp { + uint64_t tweak_pref; + uint64_t addr; + uint64_t size; +} QEMU_PACKED; +typedef struct IPLBlockPVComp IPLBlockPVComp; + +struct IPLBlockPV { + uint8_t reserved18[87]; /* 0x18 */ + uint8_t version; /* 0x6f */ + uint32_t reserved70; /* 0x70 */ + uint32_t num_comp; /* 0x74 */ + uint64_t pv_header_addr; /* 0x78 */ + uint64_t pv_header_len; /* 0x80 */ + struct IPLBlockPVComp components[]; +} QEMU_PACKED; +typedef struct IPLBlockPV IPLBlockPV; + struct IplBlockCcw { uint8_t reserved0[85]; uint8_t ssid; @@ -71,6 +91,7 @@ union IplParameterBlock { union { IplBlockCcw ccw; IplBlockFcp fcp; + IPLBlockPV pv; IplBlockQemuScsi scsi; }; } QEMU_PACKED; @@ -85,8 +106,11 @@ typedef union IplParameterBlock IplParameterBlock; int s390_ipl_set_loadparm(uint8_t *loadparm); void s390_ipl_update_diag308(IplParameterBlock *iplb); +int s390_ipl_prepare_pv_header(void); +int s390_ipl_pv_unpack(void); void s390_ipl_prepare_cpu(S390CPU *cpu); IplParameterBlock *s390_ipl_get_iplb(void); +IplParameterBlock *s390_ipl_get_iplb_pv(void); enum s390_reset { /* default is a reset not triggered by a CPU e.g. issued by QMP */ @@ -94,6 +118,7 @@ enum s390_reset { S390_RESET_REIPL, S390_RESET_MODIFIED_CLEAR, S390_RESET_LOAD_NORMAL, + S390_RESET_PV, }; void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type); void s390_ipl_get_reset_request(CPUState **cs, enum s390_reset *reset_type); @@ -133,6 +158,7 @@ struct S390IPLState { /*< private >*/ DeviceState parent_obj; IplParameterBlock iplb; + IplParameterBlock iplb_pv; QemuIplParameters qipl; uint64_t start_addr; uint64_t compat_start_addr; @@ -140,6 +166,7 @@ struct S390IPLState { uint64_t compat_bios_start_addr; bool enforce_bios; bool iplb_valid; + bool iplb_valid_pv; bool netboot; /* reset related properties don't have to be migrated or reset */ enum s390_reset reset_type; @@ -159,11 +186,29 @@ struct S390IPLState { typedef struct S390IPLState S390IPLState; QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong"); +#define DIAG_308_RC_OK 0x0001 +#define DIAG_308_RC_NO_CONF 0x0102 +#define DIAG_308_RC_INVALID 0x0402 +#define DIAG_308_RC_NO_PV_CONF 0x0902 +#define DIAG_308_RC_INVAL_FOR_PV 0x0a02 + +#define DIAG308_RESET_MOD_CLR 0 +#define DIAG308_RESET_LOAD_NORM 1 +#define DIAG308_LOAD_CLEAR 3 +#define DIAG308_LOAD_NORMAL_DUMP 4 +#define DIAG308_SET 5 +#define DIAG308_STORE 6 +#define DIAG308_PV_SET 8 +#define DIAG308_PV_STORE 9 +#define DIAG308_PV_START 10 + #define S390_IPL_TYPE_FCP 0x00 #define S390_IPL_TYPE_CCW 0x02 +#define S390_IPL_TYPE_PV 0x05 #define S390_IPL_TYPE_QEMU_SCSI 0xff #define S390_IPLB_HEADER_LEN 8 +#define S390_IPLB_MIN_PV_LEN 148 #define S390_IPLB_MIN_CCW_LEN 200 #define S390_IPLB_MIN_FCP_LEN 384 #define S390_IPLB_MIN_QEMU_SCSI_LEN 200 @@ -173,6 +218,62 @@ static inline bool iplb_valid_len(IplParameterBlock *iplb) return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock); } +static inline bool ipl_valid_pv_components(IplParameterBlock *iplb) +{ + IPLBlockPV *ipib_pv = &iplb->pv; + int i; + + if (ipib_pv->num_comp == 0) { + return false; + } + + for (i = 0; i < ipib_pv->num_comp; i++) { + /* Addr must be 4k aligned */ + if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) { + return false; + } + + /* Tweak prefix is monotonically increasing with each component */ + if (i < ipib_pv->num_comp - 1 && + ipib_pv->components[i].tweak_pref >= + ipib_pv->components[i + 1].tweak_pref) { + return false; + } + } + return true; +} + +static inline bool ipl_valid_pv_header(IplParameterBlock *iplb) +{ + IPLBlockPV *ipib_pv = &iplb->pv; + + if (ipib_pv->pv_header_len > 2 * TARGET_PAGE_SIZE) { + return false; + } + + if (!address_space_access_valid(&address_space_memory, + ipib_pv->pv_header_addr, + ipib_pv->pv_header_len, + false, + MEMTXATTRS_UNSPECIFIED)) { + return false; + } + + return true; +} + +static inline bool iplb_valid_pv(IplParameterBlock *iplb) +{ + if (iplb->pbt != S390_IPL_TYPE_PV || + be32_to_cpu(iplb->len) < S390_IPLB_MIN_PV_LEN) { + return false; + } + if (!ipl_valid_pv_header(iplb)) { + return false; + } + return ipl_valid_pv_components(iplb); +} + static inline bool iplb_valid(IplParameterBlock *iplb) { switch (iplb->pbt) { diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c new file mode 100644 index 000000000000..f11868e865fa --- /dev/null +++ b/hw/s390x/pv.c @@ -0,0 +1,113 @@ +/* + * Protected Virtualization functions + * + * Copyright IBM Corp. 2020 + * Author(s): + * Janosch Frank + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#include "qemu/osdep.h" + +#include + +#include "cpu.h" +#include "qemu/error-report.h" +#include "sysemu/kvm.h" +#include "hw/s390x/ipl.h" +#include "hw/s390x/pv.h" + +static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) +{ + struct kvm_pv_cmd pv_cmd = { + .cmd = cmd, + .data = (uint64_t)data, + }; + int rc; + + do { + rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); + } while (rc == -EINTR); + + if (rc) { + error_report("KVM PV command %d (%s) failed: header rc %x rrc %x " + "IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc, + rc); + } + return rc; +} + +/* + * This macro lets us pass the command as a string to the function so + * we can print it on an error. + */ +#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data); +#define s390_pv_cmd_exit(cmd, data) \ +{ \ + int rc; \ + \ + rc = __s390_pv_cmd(cmd, #cmd, data);\ + if (rc) { \ + exit(1); \ + } \ +} + +int s390_pv_vm_enable(void) +{ + return s390_pv_cmd(KVM_PV_ENABLE, NULL); +} + +void s390_pv_vm_disable(void) +{ + s390_pv_cmd_exit(KVM_PV_DISABLE, NULL); +} + +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) +{ + struct kvm_s390_pv_sec_parm args = { + .origin = origin, + .length = length, + }; + + return s390_pv_cmd(KVM_PV_SET_SEC_PARMS, &args); +} + +/* + * Called for each component in the SE type IPL parameter block 0. + */ +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) +{ + struct kvm_s390_pv_unp args = { + .addr = addr, + .size = size, + .tweak = tweak, + }; + + return s390_pv_cmd(KVM_PV_UNPACK, &args); +} + +void s390_pv_perf_clear_reset(void) +{ + s390_pv_cmd_exit(KVM_PV_PREP_RESET, NULL); +} + +int s390_pv_verify(void) +{ + return s390_pv_cmd(KVM_PV_VERIFY, NULL); +} + +void s390_pv_unshare(void) +{ + s390_pv_cmd_exit(KVM_PV_UNSHARE_ALL, NULL); +} + +void s390_pv_inject_reset_error(CPUState *cs) +{ + int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; + CPUS390XState *env = &S390_CPU(cs)->env; + + /* Report that we are unable to enter protected mode */ + env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; +} diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 0fa00a9fff3d..45292fb5a8b6 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -1,9 +1,10 @@ /* * virtio ccw machine * - * Copyright 2012 IBM Corp. + * Copyright 2012, 2020 IBM Corp. * Copyright (c) 2009 Alexander Graf * Author(s): Cornelia Huck + * Janosch Frank * * This work is licensed under the terms of the GNU GPL, version 2 or (at * your option) any later version. See the COPYING file in the top-level @@ -42,6 +43,11 @@ #include "hw/qdev-properties.h" #include "hw/s390x/tod.h" #include "sysemu/sysemu.h" +#include "sysemu/balloon.h" +#include "hw/s390x/pv.h" +#include "migration/blocker.h" + +static Error *pv_mig_blocker; S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) { @@ -317,10 +323,93 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); } +static void s390_machine_unprotect(S390CcwMachineState *ms) +{ + s390_pv_vm_disable(); + ms->pv = false; + migrate_del_blocker(pv_mig_blocker); + error_free_or_abort(&pv_mig_blocker); + qemu_balloon_inhibit(false); +} + +static int s390_machine_protect(S390CcwMachineState *ms) +{ + Error *local_err = NULL; + int rc; + + /* + * Ballooning on protected VMs needs support in the guest for + * sharing and unsharing balloon pages. Block ballooning for + * now, until we have a solution to make at least Linux guests + * either support it or fail gracefully. + */ + qemu_balloon_inhibit(true); + error_setg(&pv_mig_blocker, + "protected VMs are currently not migrateable."); + rc = migrate_add_blocker(pv_mig_blocker, &local_err); + if (rc) { + qemu_balloon_inhibit(false); + error_report_err(local_err); + error_free_or_abort(&pv_mig_blocker); + return rc; + } + + /* Create SE VM */ + rc = s390_pv_vm_enable(); + if (rc) { + qemu_balloon_inhibit(false); + migrate_del_blocker(pv_mig_blocker); + error_free_or_abort(&pv_mig_blocker); + return rc; + } + + ms->pv = true; + + /* Set SE header and unpack */ + rc = s390_ipl_prepare_pv_header(); + if (rc) { + goto out_err; + } + + /* Decrypt image */ + rc = s390_ipl_pv_unpack(); + if (rc) { + goto out_err; + } + + /* Verify integrity */ + rc = s390_pv_verify(); + if (rc) { + goto out_err; + } + return rc; + +out_err: + s390_machine_unprotect(ms); + return rc; +} + +static void s390_pv_prepare_reset(S390CcwMachineState *ms) +{ + CPUState *cs; + + if (!s390_is_pv()) { + return; + } + /* Unsharing requires all cpus to be stopped */ + CPU_FOREACH(cs) { + s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); + } + s390_pv_unshare(); + s390_pv_perf_clear_reset(); +} + static void s390_machine_reset(MachineState *machine) { + S390CcwMachineState *ms = S390_CCW_MACHINE(machine); enum s390_reset reset_type; CPUState *cs, *t; + S390CPU *cpu; /* get the reset parameters, reset them once done */ s390_ipl_get_reset_request(&cs, &reset_type); @@ -328,9 +417,15 @@ static void s390_machine_reset(MachineState *machine) /* all CPUs are paused and synchronized at this point */ s390_cmma_reset(); + cpu = S390_CPU(cs); + switch (reset_type) { case S390_RESET_EXTERNAL: case S390_RESET_REIPL: + if (s390_is_pv()) { + s390_machine_unprotect(ms); + } + qemu_devices_reset(); s390_crypto_reset(); @@ -338,22 +433,56 @@ static void s390_machine_reset(MachineState *machine) run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL); break; case S390_RESET_MODIFIED_CLEAR: + /* + * Susbsystem reset needs to be done before we unshare memory + * and lose access to VIRTIO structures in guest memory. + */ + subsystem_reset(); + s390_crypto_reset(); + s390_pv_prepare_reset(ms); CPU_FOREACH(t) { run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); } - subsystem_reset(); - s390_crypto_reset(); run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); break; case S390_RESET_LOAD_NORMAL: + /* + * Susbsystem reset needs to be done before we unshare memory + * and lose access to VIRTIO structures in guest memory. + */ + subsystem_reset(); + s390_pv_prepare_reset(ms); CPU_FOREACH(t) { if (t == cs) { continue; } run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); } - subsystem_reset(); run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); + run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); + break; + case S390_RESET_PV: /* Subcode 10 */ + subsystem_reset(); + s390_crypto_reset(); + + CPU_FOREACH(t) { + if (t == cs) { + continue; + } + run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); + } + run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL); + + if (s390_machine_protect(ms)) { + s390_pv_inject_reset_error(cs); + /* + * Continue after the diag308 so the guest knows something + * went wrong. + */ + s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); + return; + } + run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); break; default: diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c index f0c35aa57afd..ede056b3efd7 100644 --- a/hw/s390x/sclp.c +++ b/hw/s390x/sclp.c @@ -33,6 +33,22 @@ static inline SCLPDevice *get_sclp_device(void) return sclp; } +static inline bool sclp_command_code_valid(uint32_t code) +{ + switch (code & SCLP_CMD_CODE_MASK) { + case SCLP_CMDW_READ_SCP_INFO: + case SCLP_CMDW_READ_SCP_INFO_FORCED: + case SCLP_CMDW_READ_CPU_INFO: + case SCLP_CMDW_CONFIGURE_IOA: + case SCLP_CMDW_DECONFIGURE_IOA: + case SCLP_CMD_READ_EVENT_DATA: + case SCLP_CMD_WRITE_EVENT_DATA: + case SCLP_CMD_WRITE_EVENT_MASK: + return true; + } + return false; +} + static void prepare_cpu_entries(SCLPDevice *sclp, CPUEntry *entry, int *count) { MachineState *ms = MACHINE(qdev_get_machine()); @@ -193,6 +209,34 @@ static void sclp_execute(SCLPDevice *sclp, SCCB *sccb, uint32_t code) } } +/* + * We only need the address to have something valid for the + * service_interrupt call. + */ +#define SCLP_PV_DUMMY_ADDR 0x4000 +int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, + uint32_t code) +{ + SCLPDevice *sclp = get_sclp_device(); + SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); + SCCB work_sccb; + hwaddr sccb_len = sizeof(SCCB); + + s390_cpu_pv_mem_read(env_archcpu(env), 0, &work_sccb, sccb_len); + + if (!sclp_command_code_valid(code)) { + work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); + goto out_write; + } + + sclp_c->execute(sclp, &work_sccb, code); +out_write: + s390_cpu_pv_mem_write(env_archcpu(env), 0, &work_sccb, + be16_to_cpu(work_sccb.h.length)); + sclp_c->service_interrupt(sclp, SCLP_PV_DUMMY_ADDR); + return 0; +} + int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) { SCLPDevice *sclp = get_sclp_device(); @@ -225,17 +269,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) return -PGM_SPECIFICATION; } - switch (code & SCLP_CMD_CODE_MASK) { - case SCLP_CMDW_READ_SCP_INFO: - case SCLP_CMDW_READ_SCP_INFO_FORCED: - case SCLP_CMDW_READ_CPU_INFO: - case SCLP_CMDW_CONFIGURE_IOA: - case SCLP_CMDW_DECONFIGURE_IOA: - case SCLP_CMD_READ_EVENT_DATA: - case SCLP_CMD_WRITE_EVENT_DATA: - case SCLP_CMD_WRITE_EVENT_MASK: - break; - default: + if (!sclp_command_code_valid(code)) { work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); goto out_write; } diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h new file mode 100644 index 000000000000..522ca6a04ee8 --- /dev/null +++ b/include/hw/s390x/pv.h @@ -0,0 +1,58 @@ +/* + * Protected Virtualization header + * + * Copyright IBM Corp. 2020 + * Author(s): + * Janosch Frank + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#ifndef HW_S390_PV_H +#define HW_S390_PV_H + +#ifdef CONFIG_KVM +#include "cpu.h" +#include "hw/s390x/s390-virtio-ccw.h" + +static inline bool s390_is_pv(void) +{ + static S390CcwMachineState *ccw; + Object *obj; + + if (ccw) { + return ccw->pv; + } + + /* we have to bail out for the "none" machine */ + obj = object_dynamic_cast(qdev_get_machine(), + TYPE_S390_CCW_MACHINE); + if (!obj) { + return false; + } + ccw = S390_CCW_MACHINE(obj); + return ccw->pv; +} + +int s390_pv_vm_enable(void); +void s390_pv_vm_disable(void); +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); +void s390_pv_perf_clear_reset(void); +int s390_pv_verify(void); +void s390_pv_unshare(void); +void s390_pv_inject_reset_error(CPUState *cs); +#else /* CONFIG_KVM */ +static inline bool s390_is_pv(void) { return false; } +static inline int s390_pv_vm_enable(void) { return 0; } +static inline void s390_pv_vm_disable(void) {} +static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } +static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } +static inline void s390_pv_perf_clear_reset(void) {} +static inline int s390_pv_verify(void) { return 0; } +static inline void s390_pv_unshare(void) {} +static inline void s390_pv_inject_reset_error(CPUState *cs) {}; +#endif /* CONFIG_KVM */ + +#endif /* HW_S390_PV_H */ diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h index 8aa27199c912..cd1dccc6e3ba 100644 --- a/include/hw/s390x/s390-virtio-ccw.h +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -28,6 +28,7 @@ typedef struct S390CcwMachineState { /*< public >*/ bool aes_key_wrap; bool dea_key_wrap; + bool pv; uint8_t loadparm[8]; } S390CcwMachineState; diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h index cd7b24359f28..822eff4396ff 100644 --- a/include/hw/s390x/sclp.h +++ b/include/hw/s390x/sclp.h @@ -217,5 +217,7 @@ void s390_sclp_init(void); void sclp_service_interrupt(uint32_t sccb); void raise_irq_cpu_hotplug(void); int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code); +int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, + uint32_t code); #endif diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 8adf3b018b95..1200890c8608 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -596,6 +596,9 @@ struct ethtool_pauseparam { * @ETH_SS_LINK_MODES: link mode names * @ETH_SS_MSG_CLASSES: debug message class names * @ETH_SS_WOL_MODES: wake-on-lan modes + * @ETH_SS_SOF_TIMESTAMPING: SOF_TIMESTAMPING_* flags + * @ETH_SS_TS_TX_TYPES: timestamping Tx types + * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -610,6 +613,9 @@ enum ethtool_stringset { ETH_SS_LINK_MODES, ETH_SS_MSG_CLASSES, ETH_SS_WOL_MODES, + ETH_SS_SOF_TIMESTAMPING, + ETH_SS_TS_TX_TYPES, + ETH_SS_TS_RX_FILTERS, /* add new constants above here */ ETH_SS_COUNT @@ -1330,6 +1336,7 @@ enum ethtool_fec_config_bits { ETHTOOL_FEC_OFF_BIT, ETHTOOL_FEC_RS_BIT, ETHTOOL_FEC_BASER_BIT, + ETHTOOL_FEC_LLRS_BIT, }; #define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) @@ -1337,6 +1344,7 @@ enum ethtool_fec_config_bits { #define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) #define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) #define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) +#define ETHTOOL_FEC_LLRS (1 << ETHTOOL_FEC_LLRS_BIT) /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. @@ -1521,7 +1529,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71, ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72, ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73, - + ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index b484c252897f..ebf72c10317b 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* * Input event codes * @@ -652,6 +652,9 @@ /* Electronic privacy screen control */ #define KEY_PRIVACY_SCREEN_TOGGLE 0x279 +/* Select an area of screen to be copied */ +#define KEY_SELECTIVE_SCREENSHOT 0x27a + /* * Some keyboards have keys which do not have a defined meaning, these keys * are intended to be programmed / bound to macros by the user. For most diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 5437690483cd..f9701410d3b5 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -605,6 +605,7 @@ #define PCI_EXP_SLTCTL_PWR_OFF 0x0400 /* Power Off */ #define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ #define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ +#define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ #define PCI_EXP_SLTSTA 26 /* Slot Status */ #define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ #define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ @@ -680,6 +681,7 @@ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ #define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ +#define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ #define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h index 5351fe172d7e..a678d8fbaa92 100644 --- a/include/standard-headers/linux/vhost_types.h +++ b/include/standard-headers/linux/vhost_types.h @@ -119,6 +119,14 @@ struct vhost_scsi_target { unsigned short reserved; }; +/* VHOST_VDPA specific definitions */ + +struct vhost_vdpa_config { + uint32_t off; + uint32_t len; + uint8_t buf[0]; +}; + /* Feature bits */ /* Log all write descriptors. Can be changed while device is active. */ #define VHOST_F_LOG_ALL 26 diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h index 9375ca2a70de..f343bfefd82c 100644 --- a/include/standard-headers/linux/virtio_balloon.h +++ b/include/standard-headers/linux/virtio_balloon.h @@ -36,6 +36,7 @@ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ #define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */ +#define VIRTIO_BALLOON_F_REPORTING 5 /* Page reporting virtqueue */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -47,8 +48,15 @@ struct virtio_balloon_config { uint32_t num_pages; /* Number of pages we've actually got in balloon. */ uint32_t actual; - /* Free page report command id, readonly by guest */ - uint32_t free_page_report_cmd_id; + /* + * Free page hint command id, readonly by guest. + * Was previously named free_page_report_cmd_id so we + * need to carry that name for legacy support. + */ + union { + uint32_t free_page_hint_cmd_id; + uint32_t free_page_report_cmd_id; /* deprecated */ + }; /* Stores PAGE_POISON if page poisoning is in use */ uint32_t poison_val; }; diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index 585e07b27333..ecc27a17401a 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -46,5 +46,6 @@ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index 260c3681d70d..a90f79e1b17a 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -57,6 +57,9 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ +#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ +#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device * with the same MAC. */ @@ -69,6 +72,17 @@ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ #define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ +/* supported/enabled hash types */ +#define VIRTIO_NET_RSS_HASH_TYPE_IPv4 (1 << 0) +#define VIRTIO_NET_RSS_HASH_TYPE_TCPv4 (1 << 1) +#define VIRTIO_NET_RSS_HASH_TYPE_UDPv4 (1 << 2) +#define VIRTIO_NET_RSS_HASH_TYPE_IPv6 (1 << 3) +#define VIRTIO_NET_RSS_HASH_TYPE_TCPv6 (1 << 4) +#define VIRTIO_NET_RSS_HASH_TYPE_UDPv6 (1 << 5) +#define VIRTIO_NET_RSS_HASH_TYPE_IP_EX (1 << 6) +#define VIRTIO_NET_RSS_HASH_TYPE_TCP_EX (1 << 7) +#define VIRTIO_NET_RSS_HASH_TYPE_UDP_EX (1 << 8) + struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ uint8_t mac[ETH_ALEN]; @@ -92,6 +106,12 @@ struct virtio_net_config { * Any other value stands for unknown. */ uint8_t duplex; + /* maximum size of RSS key */ + uint8_t rss_max_key_size; + /* maximum number of indirection table entries */ + uint16_t rss_max_indirection_table_length; + /* bitmask of supported VIRTIO_NET_RSS_HASH_ types */ + uint32_t supported_hash_types; } QEMU_PACKED; /* @@ -104,6 +124,7 @@ struct virtio_net_config { struct virtio_net_hdr_v1 { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ +#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc info in csum_ fields */ uint8_t flags; #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ @@ -113,11 +134,46 @@ struct virtio_net_hdr_v1 { uint8_t gso_type; __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ __virtio16 gso_size; /* Bytes to append to hdr_len per frame */ - __virtio16 csum_start; /* Position to start checksumming from */ - __virtio16 csum_offset; /* Offset after that to place checksum */ + union { + struct { + __virtio16 csum_start; + __virtio16 csum_offset; + }; + /* Checksum calculation */ + struct { + /* Position to start checksumming from */ + __virtio16 start; + /* Offset after that to place checksum */ + __virtio16 offset; + } csum; + /* Receive Segment Coalescing */ + struct { + /* Number of coalesced segments */ + uint16_t segments; + /* Number of duplicated acks */ + uint16_t dup_acks; + } rsc; + }; __virtio16 num_buffers; /* Number of merged rx buffers */ }; +struct virtio_net_hdr_v1_hash { + struct virtio_net_hdr_v1 hdr; + uint32_t hash_value; +#define VIRTIO_NET_HASH_REPORT_NONE 0 +#define VIRTIO_NET_HASH_REPORT_IPv4 1 +#define VIRTIO_NET_HASH_REPORT_TCPv4 2 +#define VIRTIO_NET_HASH_REPORT_UDPv4 3 +#define VIRTIO_NET_HASH_REPORT_IPv6 4 +#define VIRTIO_NET_HASH_REPORT_TCPv6 5 +#define VIRTIO_NET_HASH_REPORT_UDPv6 6 +#define VIRTIO_NET_HASH_REPORT_IPv6_EX 7 +#define VIRTIO_NET_HASH_REPORT_TCPv6_EX 8 +#define VIRTIO_NET_HASH_REPORT_UDPv6_EX 9 + uint16_t hash_report; + uint16_t padding; +}; + #ifndef VIRTIO_NET_NO_LEGACY /* This header comes first in the scatter-gather list. * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must @@ -228,7 +284,9 @@ struct virtio_net_ctrl_mac { /* * Control Receive Flow Steering - * + */ +#define VIRTIO_NET_CTRL_MQ 4 +/* * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET * enables Receive Flow Steering, specifying the number of the transmit and * receive queues that will be used. After the command is consumed and acked by @@ -241,11 +299,47 @@ struct virtio_net_ctrl_mq { __virtio16 virtqueue_pairs; }; -#define VIRTIO_NET_CTRL_MQ 4 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 +/* + * The command VIRTIO_NET_CTRL_MQ_RSS_CONFIG has the same effect as + * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET does and additionally configures + * the receive steering to use a hash calculated for incoming packet + * to decide on receive virtqueue to place the packet. The command + * also provides parameters to calculate a hash and receive virtqueue. + */ +struct virtio_net_rss_config { + uint32_t hash_types; + uint16_t indirection_table_mask; + uint16_t unclassified_queue; + uint16_t indirection_table[1/* + indirection_table_mask */]; + uint16_t max_tx_vq; + uint8_t hash_key_length; + uint8_t hash_key_data[/* hash_key_length */]; +}; + + #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 + +/* + * The command VIRTIO_NET_CTRL_MQ_HASH_CONFIG requests the device + * to include in the virtio header of the packet the value of the + * calculated hash and the report type of hash. It also provides + * parameters for hash calculation. The command requires feature + * VIRTIO_NET_F_HASH_REPORT to be negotiated to extend the + * layout of virtio header as defined in virtio_net_hdr_v1_hash. + */ +struct virtio_net_hash_config { + uint32_t hash_types; + /* for compatibility with virtio_net_rss_config */ + uint16_t reserved[4]; + uint8_t hash_key_length; + uint8_t hash_key_data[/* hash_key_length */]; +}; + + #define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2 + /* * Control network offloads * diff --git a/linux-headers/COPYING b/linux-headers/COPYING index da4cb28febe6..a635a38ef940 100644 --- a/linux-headers/COPYING +++ b/linux-headers/COPYING @@ -16,3 +16,5 @@ In addition, other licenses may also apply. Please see: Documentation/process/license-rules.rst for more details. + +All contributions to the Linux Kernel are subject to this COPYING file. diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 503d3f42da16..3f3f780c8c65 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -390,6 +390,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 +#define KVM_STATE_NESTED_MTF_PENDING 0x00000008 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index f6e06fcfbdcf..1e6c1a586776 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -429,4 +429,5 @@ #define __NR_openat2 437 #define __NR_pidfd_getfd 438 + #endif /* _ASM_X86_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 924f826d2d48..6daf0aecb298 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -351,4 +351,5 @@ #define __NR_openat2 437 #define __NR_pidfd_getfd 438 + #endif /* _ASM_X86_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 010307757b1b..e3f17ef370fc 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -340,4 +340,5 @@ #define __NR_preadv2 (__X32_SYSCALL_BIT + 546) #define __NR_pwritev2 (__X32_SYSCALL_BIT + 547) + #endif /* _ASM_X86_UNISTD_X32_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 265099100e65..9804495a46c5 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -474,12 +474,17 @@ struct kvm_s390_mem_op { __u32 size; /* amount of bytes */ __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ - __u8 ar; /* the access register number */ - __u8 reserved[31]; /* should be set to 0 */ + union { + __u8 ar; /* the access register number */ + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* should be set to 0 */ + }; }; /* types for kvm_s390_mem_op->op */ #define KVM_S390_MEMOP_LOGICAL_READ 0 #define KVM_S390_MEMOP_LOGICAL_WRITE 1 +#define KVM_S390_MEMOP_SIDA_READ 2 +#define KVM_S390_MEMOP_SIDA_WRITE 3 /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) @@ -1010,6 +1015,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_NISV_TO_USER 177 #define KVM_CAP_ARM_INJECT_EXT_DABT 178 #define KVM_CAP_S390_VCPU_RESETS 179 +#define KVM_CAP_S390_PROTECTED 180 +#define KVM_CAP_PPC_SECURE_GUEST 181 #ifdef KVM_CAP_IRQ_ROUTING @@ -1478,6 +1485,39 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) +struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +}; + +struct kvm_s390_pv_unp { + __u64 addr; + __u64 size; + __u64 tweak; +}; + +enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, + KVM_PV_SET_SEC_PARMS, + KVM_PV_UNPACK, + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, +}; + +struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; +}; + +/* Available with KVM_CAP_S390_PROTECTED */ +#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1628,4 +1668,7 @@ struct kvm_hyperv_eventfd { #define KVM_HYPERV_CONN_ID_MASK 0x00ffffff #define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) +#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) +#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) + #endif /* __LINUX_KVM_H */ diff --git a/linux-headers/linux/mman.h b/linux-headers/linux/mman.h index 1f6e2cd89ccb..51ea363759f0 100644 --- a/linux-headers/linux/mman.h +++ b/linux-headers/linux/mman.h @@ -5,8 +5,9 @@ #include #include -#define MREMAP_MAYMOVE 1 -#define MREMAP_FIXED 2 +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 +#define MREMAP_DONTUNMAP 4 #define OVERCOMMIT_GUESS 0 #define OVERCOMMIT_ALWAYS 1 diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index ce78878d127e..8d3996eb8285 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -19,7 +19,8 @@ * means the userland is reading). */ #define UFFD_API ((__u64)0xAA) -#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ +#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ + UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ @@ -34,7 +35,8 @@ #define UFFD_API_RANGE_IOCTLS \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ - (__u64)1 << _UFFDIO_ZEROPAGE) + (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_WRITEPROTECT) #define UFFD_API_RANGE_IOCTLS_BASIC \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY) @@ -52,6 +54,7 @@ #define _UFFDIO_WAKE (0x02) #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_WRITEPROTECT (0x06) #define _UFFDIO_API (0x3F) /* userfaultfd ioctl ids */ @@ -68,6 +71,8 @@ struct uffdio_copy) #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ struct uffdio_zeropage) +#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ + struct uffdio_writeprotect) /* read() structure */ struct uffd_msg { @@ -203,13 +208,14 @@ struct uffdio_copy { __u64 dst; __u64 src; __u64 len; +#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) /* - * There will be a wrprotection flag later that allows to map - * pages wrprotected on the fly. And such a flag will be - * available if the wrprotection ioctl are implemented for the - * range according to the uffdio_register.ioctls. + * UFFDIO_COPY_MODE_WP will map the page write protected on + * the fly. UFFDIO_COPY_MODE_WP is available only if the + * write protected ioctl is implemented for the range + * according to the uffdio_register.ioctls. */ -#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) +#define UFFDIO_COPY_MODE_WP ((__u64)1<<1) __u64 mode; /* @@ -231,4 +237,24 @@ struct uffdio_zeropage { __s64 zeropage; }; +struct uffdio_writeprotect { + struct uffdio_range range; +/* + * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, + * unset the flag to undo protection of a range which was previously + * write protected. + * + * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up + * any wait thread after the operation succeeds. + * + * NOTE: Write protecting a region (WP=1) is unrelated to page faults, + * therefore DONTWAKE flag is meaningless with WP=1. Removing write + * protection (WP=0) in response to a page fault wakes the faulting + * task unless DONTWAKE is set. + */ +#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) +#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) + __u64 mode; +}; + #endif /* _LINUX_USERFAULTFD_H */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index fb10370d2928..a41c45286511 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -707,6 +707,43 @@ struct vfio_device_ioeventfd { #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) +/** + * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_device_feature) + * + * Get, set, or probe feature data of the device. The feature is selected + * using the FEATURE_MASK portion of the flags field. Support for a feature + * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe + * may optionally include the GET and/or SET bits to determine read vs write + * access of the feature respectively. Probing a feature will return success + * if the feature is supported and all of the optionally indicated GET/SET + * methods are supported. The format of the data portion of the structure is + * specific to the given feature. The data portion is not required for + * probing. GET and SET are mutually exclusive, except for use with PROBE. + * + * Return 0 on success, -errno on failure. + */ +struct vfio_device_feature { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ +#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ +#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ +#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ + __u8 data[]; +}; + +#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) + +/* + * Provide support for setting a PCI VF Token, which is used as a shared + * secret between PF and VF drivers. This feature may only be set on a + * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing + * open VFs. Data provided when setting this feature is a 16-byte array + * (__u8 b[16]), representing a UUID. + */ +#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) + /* -------- API for Type1 VFIO IOMMU -------- */ /** diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index 40d028eed645..9fe72e4b1373 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -116,4 +116,28 @@ #define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) #define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) +/* VHOST_VDPA specific defines */ + +/* Get the device id. The device ids follow the same definition of + * the device id defined in virtio-spec. + */ +#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32) +/* Get and set the status. The status bits follow the same definition + * of the device status defined in virtio-spec. + */ +#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8) +#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8) +/* Get and set the device config. The device config follows the same + * definition of the device config defined in virtio-spec. + */ +#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \ + struct vhost_vdpa_config) +#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \ + struct vhost_vdpa_config) +/* Enable/disable the ring. */ +#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \ + struct vhost_vring_state) +/* Get the max ring size. */ +#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) + #endif diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 427a46e3e1b6..f2ccf0a06a0c 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -37,6 +37,7 @@ #include "sysemu/hw_accel.h" #include "hw/qdev-properties.h" #ifndef CONFIG_USER_ONLY +#include "hw/s390x/pv.h" #include "hw/boards.h" #include "sysemu/arch_init.h" #include "sysemu/sysemu.h" @@ -76,16 +77,24 @@ static bool s390_cpu_has_work(CPUState *cs) static void s390_cpu_load_normal(CPUState *s) { S390CPU *cpu = S390_CPU(s); - uint64_t spsw = ldq_phys(s->as, 0); - - cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL; - /* - * Invert short psw indication, so SIE will report a specification - * exception if it was not set. - */ - cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; - cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR; + uint64_t spsw; + if (!s390_is_pv()) { + spsw = ldq_phys(s->as, 0); + cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL; + /* + * Invert short psw indication, so SIE will report a specification + * exception if it was not set. + */ + cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; + cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR; + } else { + /* + * Firmware requires us to set the load state before we set + * the cpu to operating on protected guests. + */ + s390_cpu_set_state(S390_CPU_STATE_LOAD, cpu); + } s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); } #endif diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 1d17709d6e10..035427521cec 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -823,7 +823,12 @@ int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf, #define s390_cpu_virt_mem_check_write(cpu, laddr, ar, len) \ s390_cpu_virt_mem_rw(cpu, laddr, ar, NULL, len, true) void s390_cpu_virt_mem_handle_exc(S390CPU *cpu, uintptr_t ra); - +int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf, + int len, bool is_write); +#define s390_cpu_pv_mem_read(cpu, offset, dest, len) \ + s390_cpu_pv_mem_rw(cpu, offset, dest, len, false) +#define s390_cpu_pv_mem_write(cpu, offset, dest, len) \ + s390_cpu_pv_mem_rw(cpu, offset, dest, len, true) /* sigp.c */ int s390_cpu_restart(S390CPU *cpu); diff --git a/target/s390x/cpu_features_def.inc.h b/target/s390x/cpu_features_def.inc.h index 31dff0d84e97..60db28351d05 100644 --- a/target/s390x/cpu_features_def.inc.h +++ b/target/s390x/cpu_features_def.inc.h @@ -107,6 +107,7 @@ DEF_FEAT(DEFLATE_BASE, "deflate-base", STFL, 151, "Deflate-conversion facility ( DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal-Enhancement Facility") DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)") DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility") +DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility") /* Features exposed via SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility") diff --git a/target/s390x/diag.c b/target/s390x/diag.c index 54e5670b3fd6..1a4842956402 100644 --- a/target/s390x/diag.c +++ b/target/s390x/diag.c @@ -20,6 +20,8 @@ #include "sysemu/cpus.h" #include "hw/s390x/ipl.h" #include "hw/s390x/s390-virtio-ccw.h" +#include "hw/s390x/pv.h" +#include "kvm_s390x.h" int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) { @@ -49,20 +51,13 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) return diag288_class->handle_timer(diag288, func, timeout); } -#define DIAG_308_RC_OK 0x0001 -#define DIAG_308_RC_NO_CONF 0x0102 -#define DIAG_308_RC_INVALID 0x0402 - -#define DIAG308_RESET_MOD_CLR 0 -#define DIAG308_RESET_LOAD_NORM 1 -#define DIAG308_LOAD_CLEAR 3 -#define DIAG308_LOAD_NORMAL_DUMP 4 -#define DIAG308_SET 5 -#define DIAG308_STORE 6 - static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, uintptr_t ra, bool write) { + /* Handled by the Ultravisor */ + if (s390_is_pv()) { + return 0; + } if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { s390_program_interrupt(env, PGM_SPECIFICATION, ra); return -1; @@ -78,7 +73,9 @@ static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) { + bool valid; CPUState *cs = env_cpu(env); + S390CPU *cpu = S390_CPU(cs); uint64_t addr = env->regs[r1]; uint64_t subcode = env->regs[r3]; IplParameterBlock *iplb; @@ -93,6 +90,11 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) return; } + if (subcode >= DIAG308_PV_SET && !s390_has_feat(S390_FEAT_UNPACK)) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; + } + switch (subcode) { case DIAG308_RESET_MOD_CLR: s390_ipl_reset_request(cs, S390_RESET_MODIFIED_CLEAR); @@ -105,19 +107,30 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) s390_ipl_reset_request(cs, S390_RESET_REIPL); break; case DIAG308_SET: + case DIAG308_PV_SET: if (diag308_parm_check(env, r1, addr, ra, false)) { return; } iplb = g_new0(IplParameterBlock, 1); - cpu_physical_memory_read(addr, iplb, sizeof(iplb->len)); + if (!s390_is_pv()) { + cpu_physical_memory_read(addr, iplb, sizeof(iplb->len)); + } else { + s390_cpu_pv_mem_read(cpu, 0, iplb, sizeof(iplb->len)); + } + if (!iplb_valid_len(iplb)) { env->regs[r1 + 1] = DIAG_308_RC_INVALID; goto out; } - cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); + if (!s390_is_pv()) { + cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); + } else { + s390_cpu_pv_mem_read(cpu, 0, iplb, be32_to_cpu(iplb->len)); + } - if (!iplb_valid(iplb)) { + valid = subcode == DIAG308_PV_SET ? iplb_valid_pv(iplb) : iplb_valid(iplb); + if (!valid) { env->regs[r1 + 1] = DIAG_308_RC_INVALID; goto out; } @@ -128,17 +141,43 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) g_free(iplb); return; case DIAG308_STORE: + case DIAG308_PV_STORE: if (diag308_parm_check(env, r1, addr, ra, true)) { return; } - iplb = s390_ipl_get_iplb(); - if (iplb) { - cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len)); - env->regs[r1 + 1] = DIAG_308_RC_OK; + if (subcode == DIAG308_PV_STORE) { + iplb = s390_ipl_get_iplb_pv(); } else { + iplb = s390_ipl_get_iplb(); + } + if (!iplb) { env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; + return; } + + if (!s390_is_pv()) { + cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len)); + } else { + s390_cpu_pv_mem_write(cpu, 0, iplb, be32_to_cpu(iplb->len)); + } + env->regs[r1 + 1] = DIAG_308_RC_OK; return; + case DIAG308_PV_START: + iplb = s390_ipl_get_iplb_pv(); + if (!iplb) { + env->regs[r1 + 1] = DIAG_308_RC_NO_PV_CONF; + return; + } + + if (kvm_s390_get_hpage_1m()) { + error_report("Protected VMs can currently not be backed with " + "huge pages"); + env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; + return; + } + + s390_ipl_reset_request(cs, S390_RESET_PV); + break; default: s390_program_interrupt(env, PGM_SPECIFICATION, ra); break; diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index 6278845b12b8..8ddeebc54419 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -562,6 +562,7 @@ static uint16_t full_GEN15_GA1[] = { S390_FEAT_GROUP_MSA_EXT_9, S390_FEAT_GROUP_MSA_EXT_9_PCKMO, S390_FEAT_ETOKEN, + S390_FEAT_UNPACK, }; /* Default features (in order of release) diff --git a/target/s390x/helper.c b/target/s390x/helper.c index ed726849114f..09f60406aa33 100644 --- a/target/s390x/helper.c +++ b/target/s390x/helper.c @@ -25,6 +25,7 @@ #include "qemu/timer.h" #include "qemu/qemu-print.h" #include "hw/s390x/ioinst.h" +#include "hw/s390x/pv.h" #include "sysemu/hw_accel.h" #include "sysemu/runstate.h" #ifndef CONFIG_USER_ONLY @@ -246,6 +247,11 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch) hwaddr len = sizeof(*sa); int i; + /* For PVMs storing will occur when this cpu enters SIE again */ + if (s390_is_pv()) { + return 0; + } + sa = cpu_physical_memory_map(addr, &len, true); if (!sa) { return -EFAULT; diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c index 0e840cc5792a..7a14c52c123b 100644 --- a/target/s390x/ioinst.c +++ b/target/s390x/ioinst.c @@ -16,6 +16,25 @@ #include "hw/s390x/ioinst.h" #include "trace.h" #include "hw/s390x/s390-pci-bus.h" +#include "hw/s390x/pv.h" + +/* All I/O instructions but chsc use the s format */ +static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb, + uint8_t *ar) +{ + /* + * Addresses for protected guests are all offsets into the + * satellite block which holds the IO control structures. Those + * control structures are always starting at offset 0 and are + * always aligned and accessible. So we can return 0 here which + * will pass the following address checks. + */ + if (s390_is_pv()) { + *ar = 0; + return 0; + } + return decode_basedisp_s(env, ipb, ar); +} int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid, int *schid) @@ -114,12 +133,14 @@ void ioinst_handle_msch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) CPUS390XState *env = &cpu->env; uint8_t ar; - addr = decode_basedisp_s(env, ipb, &ar); + addr = get_address_from_regs(env, ipb, &ar); if (addr & 3) { s390_program_interrupt(env, PGM_SPECIFICATION, ra); return; } - if (s390_cpu_virt_mem_read(cpu, addr, ar, &schib, sizeof(schib))) { + if (s390_is_pv()) { + s390_cpu_pv_mem_read(cpu, addr, &schib, sizeof(schib)); + } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &schib, sizeof(schib))) { s390_cpu_virt_mem_handle_exc(cpu, ra); return; } @@ -171,12 +192,14 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) CPUS390XState *env = &cpu->env; uint8_t ar; - addr = decode_basedisp_s(env, ipb, &ar); + addr = get_address_from_regs(env, ipb, &ar); if (addr & 3) { s390_program_interrupt(env, PGM_SPECIFICATION, ra); return; } - if (s390_cpu_virt_mem_read(cpu, addr, ar, &orig_orb, sizeof(orb))) { + if (s390_is_pv()) { + s390_cpu_pv_mem_read(cpu, addr, &orig_orb, sizeof(orb)); + } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &orig_orb, sizeof(orb))) { s390_cpu_virt_mem_handle_exc(cpu, ra); return; } @@ -203,7 +226,7 @@ void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb, uintptr_t ra) CPUS390XState *env = &cpu->env; uint8_t ar; - addr = decode_basedisp_s(env, ipb, &ar); + addr = get_address_from_regs(env, ipb, &ar); if (addr & 3) { s390_program_interrupt(env, PGM_SPECIFICATION, ra); return; @@ -212,14 +235,19 @@ void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb, uintptr_t ra) cc = css_do_stcrw(&crw); /* 0 - crw stored, 1 - zeroes stored */ - if (s390_cpu_virt_mem_write(cpu, addr, ar, &crw, sizeof(crw)) == 0) { + if (s390_is_pv()) { + s390_cpu_pv_mem_write(cpu, addr, &crw, sizeof(crw)); setcc(cpu, cc); } else { - if (cc == 0) { - /* Write failed: requeue CRW since STCRW is suppressing */ - css_undo_stcrw(&crw); + if (s390_cpu_virt_mem_write(cpu, addr, ar, &crw, sizeof(crw)) == 0) { + setcc(cpu, cc); + } else { + if (cc == 0) { + /* Write failed: requeue CRW since STCRW is suppressing */ + css_undo_stcrw(&crw); + } + s390_cpu_virt_mem_handle_exc(cpu, ra); } - s390_cpu_virt_mem_handle_exc(cpu, ra); } } @@ -234,13 +262,20 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, CPUS390XState *env = &cpu->env; uint8_t ar; - addr = decode_basedisp_s(env, ipb, &ar); + addr = get_address_from_regs(env, ipb, &ar); if (addr & 3) { s390_program_interrupt(env, PGM_SPECIFICATION, ra); return; } if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { + /* + * The Ultravisor checks schid bit 16 to be one and bits 0-12 + * to be 0 and injects a operand exception itself. + * + * Hence we should never end up here. + */ + g_assert(!s390_is_pv()); /* * As operand exceptions have a lower priority than access exceptions, * we check whether the memory area is writeable (injecting the @@ -273,14 +308,17 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, } } if (cc != 3) { - if (s390_cpu_virt_mem_write(cpu, addr, ar, &schib, - sizeof(schib)) != 0) { + if (s390_is_pv()) { + s390_cpu_pv_mem_write(cpu, addr, &schib, sizeof(schib)); + } else if (s390_cpu_virt_mem_write(cpu, addr, ar, &schib, + sizeof(schib)) != 0) { s390_cpu_virt_mem_handle_exc(cpu, ra); return; } } else { /* Access exceptions have a higher priority than cc3 */ - if (s390_cpu_virt_mem_check_write(cpu, addr, ar, sizeof(schib)) != 0) { + if (!s390_is_pv() && + s390_cpu_virt_mem_check_write(cpu, addr, ar, sizeof(schib)) != 0) { s390_cpu_virt_mem_handle_exc(cpu, ra); return; } @@ -303,7 +341,7 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) return -EIO; } trace_ioinst_sch_id("tsch", cssid, ssid, schid); - addr = decode_basedisp_s(env, ipb, &ar); + addr = get_address_from_regs(env, ipb, &ar); if (addr & 3) { s390_program_interrupt(env, PGM_SPECIFICATION, ra); return -EIO; @@ -317,7 +355,9 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) } /* 0 - status pending, 1 - not status pending, 3 - not operational */ if (cc != 3) { - if (s390_cpu_virt_mem_write(cpu, addr, ar, &irb, irb_len) != 0) { + if (s390_is_pv()) { + s390_cpu_pv_mem_write(cpu, addr, &irb, irb_len); + } else if (s390_cpu_virt_mem_write(cpu, addr, ar, &irb, irb_len) != 0) { s390_cpu_virt_mem_handle_exc(cpu, ra); return -EFAULT; } @@ -325,7 +365,8 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) } else { irb_len = sizeof(irb) - sizeof(irb.emw); /* Access exceptions have a higher priority than cc3 */ - if (s390_cpu_virt_mem_check_write(cpu, addr, ar, irb_len) != 0) { + if (!s390_is_pv() && + s390_cpu_virt_mem_check_write(cpu, addr, ar, irb_len) != 0) { s390_cpu_virt_mem_handle_exc(cpu, ra); return -EFAULT; } @@ -601,7 +642,7 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) { ChscReq *req; ChscResp *res; - uint64_t addr; + uint64_t addr = 0; int reg; uint16_t len; uint16_t command; @@ -610,7 +651,9 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) trace_ioinst("chsc"); reg = (ipb >> 20) & 0x00f; - addr = env->regs[reg]; + if (!s390_is_pv()) { + addr = env->regs[reg]; + } /* Page boundary? */ if (addr & 0xfff) { s390_program_interrupt(env, PGM_SPECIFICATION, ra); @@ -621,7 +664,9 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) * present CHSC sub-handlers ... if we ever need more, we should take * care of req->len here first. */ - if (s390_cpu_virt_mem_read(cpu, addr, reg, buf, sizeof(ChscReq))) { + if (s390_is_pv()) { + s390_cpu_pv_mem_read(cpu, addr, buf, sizeof(ChscReq)); + } else if (s390_cpu_virt_mem_read(cpu, addr, reg, buf, sizeof(ChscReq))) { s390_cpu_virt_mem_handle_exc(cpu, ra); return; } @@ -654,11 +699,16 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) break; } - if (!s390_cpu_virt_mem_write(cpu, addr + len, reg, res, - be16_to_cpu(res->len))) { + if (s390_is_pv()) { + s390_cpu_pv_mem_write(cpu, addr + len, res, be16_to_cpu(res->len)); setcc(cpu, 0); /* Command execution complete */ } else { - s390_cpu_virt_mem_handle_exc(cpu, ra); + if (!s390_cpu_virt_mem_write(cpu, addr + len, reg, res, + be16_to_cpu(res->len))) { + setcc(cpu, 0); /* Command execution complete */ + } else { + s390_cpu_virt_mem_handle_exc(cpu, ra); + } } } diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c index c4cd497f850e..aa185017a2a8 100644 --- a/target/s390x/kvm-stub.c +++ b/target/s390x/kvm-stub.c @@ -39,6 +39,11 @@ int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu) return 0; } +int kvm_s390_get_hpage_1m(void) +{ + return 0; +} + int kvm_s390_get_ri(void) { return 0; diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 7f7ebab84279..69881a0da0b3 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -50,6 +50,7 @@ #include "exec/memattrs.h" #include "hw/s390x/s390-virtio-ccw.h" #include "hw/s390x/s390-virtio-hcall.h" +#include "hw/s390x/pv.h" #ifndef DEBUG_KVM #define DEBUG_KVM 0 @@ -115,6 +116,8 @@ #define ICPT_CPU_STOP 0x28 #define ICPT_OPEREXC 0x2c #define ICPT_IO 0x40 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_INSTR_NOTIFICATION 0x6c #define NR_LOCAL_IRQS 32 /* @@ -152,6 +155,7 @@ static int cap_ri; static int cap_gs; static int cap_hpage_1m; static int cap_vcpu_resets; +static int cap_protected; static int active_cmma; @@ -321,6 +325,11 @@ void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp) cap_hpage_1m = 1; } +int kvm_s390_get_hpage_1m(void) +{ + return cap_hpage_1m; +} + static void ccw_machine_class_foreach(ObjectClass *oc, void *opaque) { MachineClass *mc = MACHINE_CLASS(oc); @@ -344,6 +353,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) || !kvm_check_extension(s, KVM_CAP_S390_COW)) { @@ -844,6 +854,30 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, return ret; } +int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf, + int len, bool is_write) +{ + struct kvm_s390_mem_op mem_op = { + .sida_offset = offset, + .size = len, + .op = is_write ? KVM_S390_MEMOP_SIDA_WRITE + : KVM_S390_MEMOP_SIDA_READ, + .buf = (uint64_t)hostbuf, + }; + int ret; + + if (!cap_mem_op || !cap_protected) { + return -ENOSYS; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); + if (ret < 0) { + error_report("KVM_S390_MEM_OP failed: %s", strerror(-ret)); + abort(); + } + return ret; +} + /* * Legacy layout for s390: * Older S390 KVM requires the topmost vma of the RAM to be @@ -1199,12 +1233,27 @@ static void kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, sccb = env->regs[ipbh0 & 0xf]; code = env->regs[(ipbh0 & 0xf0) >> 4]; - r = sclp_service_call(env, sccb, code); - if (r < 0) { - kvm_s390_program_interrupt(cpu, -r); - return; + switch (run->s390_sieic.icptcode) { + case ICPT_PV_INSTR_NOTIFICATION: + g_assert(s390_is_pv()); + /* The notification intercepts are currently handled by KVM */ + error_report("unexpected SCLP PV notification"); + exit(1); + break; + case ICPT_PV_INSTR: + g_assert(s390_is_pv()); + sclp_service_call_protected(env, sccb, code); + /* Setting the CC is done by the Ultravisor. */ + break; + case ICPT_INSTRUCTION: + g_assert(!s390_is_pv()); + r = sclp_service_call(env, sccb, code); + if (r < 0) { + kvm_s390_program_interrupt(cpu, -r); + return; + } + setcc(cpu, r); } - setcc(cpu, r); } static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) @@ -1693,6 +1742,8 @@ static int handle_intercept(S390CPU *cpu) (long)cs->kvm_run->psw_addr); switch (icpt_code) { case ICPT_INSTRUCTION: + case ICPT_PV_INSTR: + case ICPT_PV_INSTR_NOTIFICATION: r = handle_instruction(cpu, run); break; case ICPT_PROGRAM: @@ -1773,7 +1824,9 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) SysIB_322 sysib; int del, i; - if (s390_cpu_virt_mem_read(cpu, addr, ar, &sysib, sizeof(sysib))) { + if (s390_is_pv()) { + s390_cpu_pv_mem_read(cpu, 0, &sysib, sizeof(sysib)); + } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &sysib, sizeof(sysib))) { return; } /* Shift the stack of Extended Names to prepare for our own data */ @@ -1826,7 +1879,11 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) /* Insert UUID */ memcpy(sysib.vm[0].uuid, &qemu_uuid, sizeof(sysib.vm[0].uuid)); - s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, sizeof(sysib)); + if (s390_is_pv()) { + s390_cpu_pv_mem_write(cpu, 0, &sysib, sizeof(sysib)); + } else { + s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, sizeof(sysib)); + } } static int handle_stsi(S390CPU *cpu) @@ -2368,6 +2425,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) clear_bit(S390_FEAT_BPB, model->features); } + /* + * If we have support for protected virtualization, indicate + * the protected virtualization IPL unpack facility. + */ + if (cap_protected) { + set_bit(S390_FEAT_UNPACK, model->features); + } + /* We emulate a zPCI bus and AEN, therefore we don't need HW support */ set_bit(S390_FEAT_ZPCI, model->features); set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features); diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h index 0b21789796d7..6ab17c81b73a 100644 --- a/target/s390x/kvm_s390x.h +++ b/target/s390x/kvm_s390x.h @@ -19,10 +19,13 @@ void kvm_s390_vcpu_interrupt(S390CPU *cpu, struct kvm_s390_irq *irq); void kvm_s390_access_exception(S390CPU *cpu, uint16_t code, uint64_t te_code); int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, int len, bool is_write); +int kvm_s390_mem_op_pv(S390CPU *cpu, vaddr addr, void *hostbuf, int len, + bool is_write); void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code); int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); +int kvm_s390_get_hpage_1m(void); int kvm_s390_get_ri(void); int kvm_s390_get_gs(void); int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c index 0be2f300bbe4..7d9f3059cd50 100644 --- a/target/s390x/mmu_helper.c +++ b/target/s390x/mmu_helper.c @@ -474,6 +474,20 @@ static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages, return 0; } +int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf, + int len, bool is_write) +{ + int ret; + + if (kvm_enabled()) { + ret = kvm_s390_mem_op_pv(cpu, offset, hostbuf, len, is_write); + } else { + /* Protected Virtualization is a KVM/Hardware only feature */ + g_assert_not_reached(); + } + return ret; +} + /** * s390_cpu_virt_mem_rw: * @laddr: the logical start address