diff --git a/MAINTAINERS b/MAINTAINERS index 3275cc6bbed1..fb81b3a8eb06 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -88,6 +88,7 @@ F: hw/char/terminal3270.c F: hw/intc/s390_flic.c F: hw/intc/s390_flic_kvm.c F: hw/s390x/ +F: hw/vfio/ap.c F: hw/vfio/ccw.c F: hw/watchdog/wdt_diag288.c F: include/hw/s390x/ @@ -95,6 +96,7 @@ F: include/hw/watchdog/wdt_diag288.h F: pc-bios/s390-ccw/ F: pc-bios/s390-ccw.img F: target/s390x/ +F: docs/vfio-ap.txt K: ^Subject:.*(?i)s390x? T: git git://github.com/cohuck/qemu.git s390-next L: qemu-s390x@nongnu.org @@ -1207,6 +1209,20 @@ F: include/hw/s390x/s390-ccw.h T: git git://github.com/cohuck/qemu.git s390-next L: qemu-s390x@nongnu.org +vfio-ap +M: Christian Borntraeger +M: Tony Krowiak +M: Halil Pasic +M: Pierre Morel +S: Supported +F: hw/s390x/ap-device.c +F: hw/s390x/ap-bridge.c +F: include/hw/s390x/ap-device.h +F: include/hw/s390x/ap-bridge.h +F: hw/vfio/ap.c +F: docs/vfio-ap.txt +L: qemu-s390x@nongnu.org + vhost M: Michael S. Tsirkin S: Supported diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index d6b67d50f0e4..5eef37592451 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -7,3 +7,4 @@ CONFIG_S390_FLIC=y CONFIG_S390_FLIC_KVM=$(CONFIG_KVM) CONFIG_VFIO_CCW=$(CONFIG_LINUX) CONFIG_WDT_DIAG288=y +CONFIG_VFIO_AP=$(CONFIG_LINUX) diff --git a/docs/vfio-ap.txt b/docs/vfio-ap.txt new file mode 100644 index 000000000000..12339684cd52 --- /dev/null +++ b/docs/vfio-ap.txt @@ -0,0 +1,825 @@ +Adjunct Processor (AP) Device +============================= + +Contents: +========= +* Introduction +* AP Architectural Overview +* Start Interpretive Execution (SIE) Instruction +* AP Matrix Configuration on Linux Host +* Starting a Linux Guest Configured with an AP Matrix +* Example: Configure AP Matrices for Three Linux Guests + +Introduction: +============ +The IBM Adjunct Processor (AP) Cryptographic Facility is comprised +of three AP instructions and from 1 to 256 PCIe cryptographic adapter cards. +These AP devices provide cryptographic functions to all CPUs assigned to a +linux system running in an IBM Z system LPAR. + +On s390x, AP adapter cards are exposed via the AP bus. This document +describes how those cards may be made available to KVM guests using the +VFIO mediated device framework. + +AP Architectural Overview: +========================= +In order understand the terminology used in the rest of this document, let's +start with some definitions: + +* AP adapter + + An AP adapter is an IBM Z adapter card that can perform cryptographic + functions. There can be from 0 to 256 adapters assigned to an LPAR depending + on the machine model. Adapters assigned to the LPAR in which a linux host is + running will be available to the linux host. Each adapter is identified by a + number from 0 to 255; however, the maximum adapter number allowed is + determined by machine model. When installed, an AP adapter is accessed by + AP instructions executed by any CPU. + +* AP domain + + An adapter is partitioned into domains. Each domain can be thought of as + a set of hardware registers for processing AP instructions. An adapter can + hold up to 256 domains; however, the maximum domain number allowed is + determined by machine model. Each domain is identified by a number from 0 to + 255. Domains can be further classified into two types: + + * Usage domains are domains that can be accessed directly to process AP + commands + + * Control domains are domains that are accessed indirectly by AP + commands sent to a usage domain to control or change the domain; for + example, to set a secure private key for the domain. + +* AP Queue + + An AP queue is the means by which an AP command-request message is sent to an + AP usage domain inside a specific AP. An AP queue is identified by a tuple + comprised of an AP adapter ID (APID) and an AP queue index (APQI). The + APQI corresponds to a given usage domain number within the adapter. This tuple + forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP + instructions include a field containing the APQN to identify the AP queue to + which the AP command-request message is to be sent for processing. + +* AP Instructions: + + There are three AP instructions: + + * NQAP: to enqueue an AP command-request message to a queue + * DQAP: to dequeue an AP command-reply message from a queue + * PQAP: to administer the queues + + AP instructions identify the domain that is targeted to process the AP + command; this must be one of the usage domains. An AP command may modify a + domain that is not one of the usage domains, but the modified domain + must be one of the control domains. + +Start Interpretive Execution (SIE) Instruction +============================================== +A KVM guest is started by executing the Start Interpretive Execution (SIE) +instruction. The SIE state description is a control block that contains the +state information for a KVM guest and is supplied as input to the SIE +instruction. The SIE state description contains a satellite control block called +the Crypto Control Block (CRYCB). The CRYCB contains three fields to identify +the adapters, usage domains and control domains assigned to the KVM guest: + +* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned + to the KVM guest. Each bit in the mask, from left to right, corresponds to + an APID from 0-255. If a bit is set, the corresponding adapter is valid for + use by the KVM guest. + +* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains + assigned to the KVM guest. Each bit in the mask, from left to right, + corresponds to an AP queue index (APQI) from 0-255. If a bit is set, the + corresponding queue is valid for use by the KVM guest. + +* The AP Domain Mask field is a bit mask that identifies the AP control domains + assigned to the KVM guest. The ADM bit mask controls which domains can be + changed by an AP command-request message sent to a usage domain from the + guest. Each bit in the mask, from left to right, corresponds to a domain from + 0-255. If a bit is set, the corresponding domain can be modified by an AP + command-request message sent to a usage domain. + +If you recall from the description of an AP Queue, AP instructions include +an APQN to identify the AP adapter and AP queue to which an AP command-request +message is to be sent (NQAP and PQAP instructions), or from which a +command-reply message is to be received (DQAP instruction). The validity of an +APQN is defined by the matrix calculated from the APM and AQM; it is the +cross product of all assigned adapter numbers (APM) with all assigned queue +indexes (AQM). For example, if adapters 1 and 2 and usage domains 5 and 6 are +assigned to a guest, the APQNs (1,5), (1,6), (2,5) and (2,6) will be valid for +the guest. + +The APQNs can provide secure key functionality - i.e., a private key is stored +on the adapter card for each of its domains - so each APQN must be assigned to +at most one guest or the linux host. + + Example 1: Valid configuration: + ------------------------------ + Guest1: adapters 1,2 domains 5,6 + Guest2: adapter 1,2 domain 7 + + This is valid because both guests have a unique set of APQNs: Guest1 has + APQNs (1,5), (1,6), (2,5) and (2,6); Guest2 has APQNs (1,7) and (2,7). + + Example 2: Valid configuration: + ------------------------------ + Guest1: adapters 1,2 domains 5,6 + Guest2: adapters 3,4 domains 5,6 + + This is also valid because both guests have a unique set of APQNs: + Guest1 has APQNs (1,5), (1,6), (2,5), (2,6); + Guest2 has APQNs (3,5), (3,6), (4,5), (4,6) + + Example 3: Invalid configuration: + -------------------------------- + Guest1: adapters 1,2 domains 5,6 + Guest2: adapter 1 domains 6,7 + + This is an invalid configuration because both guests have access to + APQN (1,6). + +AP Matrix Configuration on Linux Host: +===================================== +A linux system is a guest of the LPAR in which it is running and has access to +the AP resources configured for the LPAR. The LPAR's AP matrix is +configured via its Activation Profile which can be edited on the HMC. When the +linux system is started, the AP bus will detect the AP devices assigned to the +LPAR and create the following in sysfs: + +/sys/bus/ap +... [devices] +...... xx.yyyy +...... ... +...... cardxx +...... ... + +Where: + cardxx is AP adapter number xx (in hex) +....xx.yyyy is an APQN with xx specifying the APID and yyyy specifying the + APQI + +For example, if AP adapters 5 and 6 and domains 4, 71 (0x47), 171 (0xab) and +255 (0xff) are configured for the LPAR, the sysfs representation on the linux +host system would look like this: + +/sys/bus/ap +... [devices] +...... 05.0004 +...... 05.0047 +...... 05.00ab +...... 05.00ff +...... 06.0004 +...... 06.0047 +...... 06.00ab +...... 06.00ff +...... card05 +...... card06 + +A set of default device drivers are also created to control each type of AP +device that can be assigned to the LPAR on which a linux host is running: + +/sys/bus/ap +... [drivers] +...... [cex2acard] for Crypto Express 2/3 accelerator cards +...... [cex2aqueue] for AP queues served by Crypto Express 2/3 + accelerator cards +...... [cex4card] for Crypto Express 4/5/6 accelerator and coprocessor + cards +...... [cex4queue] for AP queues served by Crypto Express 4/5/6 + accelerator and coprocessor cards +...... [pcixcccard] for Crypto Express 2/3 coprocessor cards +...... [pcixccqueue] for AP queues served by Crypto Express 2/3 + coprocessor cards + +Binding AP devices to device drivers +------------------------------------ +There are two sysfs files that specify bitmasks marking a subset of the APQN +range as 'usable by the default AP queue device drivers' or 'not usable by the +default device drivers' and thus available for use by the alternate device +driver(s). The sysfs locations of the masks are: + + /sys/bus/ap/apmask + /sys/bus/ap/aqmask + + The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs + (APID). Each bit in the mask, from left to right (i.e., from most significant + to least significant bit in big endian order), corresponds to an APID from + 0-255. If a bit is set, the APID is marked as usable only by the default AP + queue device drivers; otherwise, the APID is usable by the vfio_ap + device driver. + + The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes + (APQI). Each bit in the mask, from left to right (i.e., from most significant + to least significant bit in big endian order), corresponds to an APQI from + 0-255. If a bit is set, the APQI is marked as usable only by the default AP + queue device drivers; otherwise, the APQI is usable by the vfio_ap device + driver. + + Take, for example, the following mask: + + 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + + It indicates: + + 1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6 + belong to the vfio_ap device driver's pool. + + The APQN of each AP queue device assigned to the linux host is checked by the + AP bus against the set of APQNs derived from the cross product of APIDs + and APQIs marked as usable only by the default AP queue device drivers. If a + match is detected, only the default AP queue device drivers will be probed; + otherwise, the vfio_ap device driver will be probed. + + By default, the two masks are set to reserve all APQNs for use by the default + AP queue device drivers. There are two ways the default masks can be changed: + + 1. The sysfs mask files can be edited by echoing a string into the + respective sysfs mask file in one of two formats: + + * An absolute hex string starting with 0x - like "0x12345678" - sets + the mask. If the given string is shorter than the mask, it is padded + with 0s on the right; for example, specifying a mask value of 0x41 is + the same as specifying: + + 0x4100000000000000000000000000000000000000000000000000000000000000 + + Keep in mind that the mask reads from left to right (i.e., most + significant to least significant bit in big endian order), so the mask + above identifies device numbers 1 and 7 (01000001). + + If the string is longer than the mask, the operation is terminated with + an error (EINVAL). + + * Individual bits in the mask can be switched on and off by specifying + each bit number to be switched in a comma separated list. Each bit + number string must be prepended with a ('+') or minus ('-') to indicate + the corresponding bit is to be switched on ('+') or off ('-'). Some + valid values are: + + "+0" switches bit 0 on + "-13" switches bit 13 off + "+0x41" switches bit 65 on + "-0xff" switches bit 255 off + + The following example: + +0,-6,+0x47,-0xf0 + + Switches bits 0 and 71 (0x47) on + Switches bits 6 and 240 (0xf0) off + + Note that the bits not specified in the list remain as they were before + the operation. + + 2. The masks can also be changed at boot time via parameters on the kernel + command line like this: + + ap.apmask=0xffff ap.aqmask=0x40 + + This would create the following masks: + + apmask: + 0xffff000000000000000000000000000000000000000000000000000000000000 + + aqmask: + 0x4000000000000000000000000000000000000000000000000000000000000000 + + Resulting in these two pools: + + default drivers pool: adapter 0-15, domain 1 + alternate drivers pool: adapter 16-255, domains 0, 2-255 + +Configuring an AP matrix for a linux guest. +------------------------------------------ +The sysfs interfaces for configuring an AP matrix for a guest are built on the +VFIO mediated device framework. To configure an AP matrix for a guest, a +mediated matrix device must first be created for the /sys/devices/vfio_ap/matrix +device. When the vfio_ap device driver is loaded, it registers with the VFIO +mediated device framework. When the driver registers, the sysfs interfaces for +creating mediated matrix devices is created: + +/sys/devices +... [vfio_ap] +......[matrix] +......... [mdev_supported_types] +............ [vfio_ap-passthrough] +............... create +............... [devices] + +A mediated AP matrix device is created by writing a UUID to the attribute file +named 'create', for example: + + uuidgen > create + + or + + echo $uuid > create + +When a mediated AP matrix device is created, a sysfs directory named after +the UUID is created in the 'devices' subdirectory: + +/sys/devices +... [vfio_ap] +......[matrix] +......... [mdev_supported_types] +............ [vfio_ap-passthrough] +............... create +............... [devices] +.................. [$uuid] + +There will also be three sets of attribute files created in the mediated +matrix device's sysfs directory to configure an AP matrix for the +KVM guest: + +/sys/devices +... [vfio_ap] +......[matrix] +......... [mdev_supported_types] +............ [vfio_ap-passthrough] +............... create +............... [devices] +.................. [$uuid] +..................... assign_adapter +..................... assign_control_domain +..................... assign_domain +..................... matrix +..................... unassign_adapter +..................... unassign_control_domain +..................... unassign_domain + +assign_adapter + To assign an AP adapter to the mediated matrix device, its APID is written + to the 'assign_adapter' file. This may be done multiple times to assign more + than one adapter. The APID may be specified using conventional semantics + as a decimal, hexadecimal, or octal number. For example, to assign adapters + 4, 5 and 16 to a mediated matrix device in decimal, hexadecimal and octal + respectively: + + echo 4 > assign_adapter + echo 0x5 > assign_adapter + echo 020 > assign_adapter + + In order to successfully assign an adapter: + + * The adapter number specified must represent a value from 0 up to the + maximum adapter number allowed by the machine model. If an adapter number + higher than the maximum is specified, the operation will terminate with + an error (ENODEV). + + * All APQNs that can be derived from the adapter ID being assigned and the + IDs of the previously assigned domains must be bound to the vfio_ap device + driver. If no domains have yet been assigned, then there must be at least + one APQN with the specified APID bound to the vfio_ap driver. If no such + APQNs are bound to the driver, the operation will terminate with an + error (EADDRNOTAVAIL). + + No APQN that can be derived from the adapter ID and the IDs of the + previously assigned domains can be assigned to another mediated matrix + device. If an APQN is assigned to another mediated matrix device, the + operation will terminate with an error (EADDRINUSE). + +unassign_adapter + To unassign an AP adapter, its APID is written to the 'unassign_adapter' + file. This may also be done multiple times to unassign more than one adapter. + +assign_domain + To assign a usage domain, the domain number is written into the + 'assign_domain' file. This may be done multiple times to assign more than one + usage domain. The domain number is specified using conventional semantics as + a decimal, hexadecimal, or octal number. For example, to assign usage domains + 4, 8, and 71 to a mediated matrix device in decimal, hexadecimal and octal + respectively: + + echo 4 > assign_domain + echo 0x8 > assign_domain + echo 0107 > assign_domain + + In order to successfully assign a domain: + + * The domain number specified must represent a value from 0 up to the + maximum domain number allowed by the machine model. If a domain number + higher than the maximum is specified, the operation will terminate with + an error (ENODEV). + + * All APQNs that can be derived from the domain ID being assigned and the IDs + of the previously assigned adapters must be bound to the vfio_ap device + driver. If no domains have yet been assigned, then there must be at least + one APQN with the specified APQI bound to the vfio_ap driver. If no such + APQNs are bound to the driver, the operation will terminate with an + error (EADDRNOTAVAIL). + + No APQN that can be derived from the domain ID being assigned and the IDs + of the previously assigned adapters can be assigned to another mediated + matrix device. If an APQN is assigned to another mediated matrix device, + the operation will terminate with an error (EADDRINUSE). + +unassign_domain + To unassign a usage domain, the domain number is written into the + 'unassign_domain' file. This may be done multiple times to unassign more than + one usage domain. + +assign_control_domain + To assign a control domain, the domain number is written into the + 'assign_control_domain' file. This may be done multiple times to + assign more than one control domain. The domain number may be specified using + conventional semantics as a decimal, hexadecimal, or octal number. For + example, to assign control domains 4, 8, and 71 to a mediated matrix device + in decimal, hexadecimal and octal respectively: + + echo 4 > assign_domain + echo 0x8 > assign_domain + echo 0107 > assign_domain + + In order to successfully assign a control domain, the domain number + specified must represent a value from 0 up to the maximum domain number + allowed by the machine model. If a control domain number higher than the + maximum is specified, the operation will terminate with an error (ENODEV). + +unassign_control_domain + To unassign a control domain, the domain number is written into the + 'unassign_domain' file. This may be done multiple times to unassign more than + one control domain. + +Notes: Hot plug/unplug is not currently supported for mediated AP matrix +devices, so no changes to the AP matrix will be allowed while a guest using +the mediated matrix device is running. Attempts to assign an adapter, +domain or control domain will be rejected and an error (EBUSY) returned. + +Starting a Linux Guest Configured with an AP Matrix: +=================================================== +To provide a mediated matrix device for use by a guest, the following option +must be specified on the QEMU command line: + + -device vfio_ap,sysfsdev=$path-to-mdev + +The sysfsdev parameter specifies the path to the mediated matrix device. +There are a number of ways to specify this path: + +/sys/devices/vfio_ap/matrix/$uuid +/sys/bus/mdev/devices/$uuid +/sys/bus/mdev/drivers/vfio_mdev/$uuid +/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid + +When the linux guest is started, the guest will open the mediated +matrix device's file descriptor to get information about the mediated matrix +device. The vfio_ap device driver will update the APM, AQM, and ADM fields in +the guest's CRYCB with the adapter, usage domain and control domains assigned +via the mediated matrix device's sysfs attribute files. Programs running on the +linux guest will then: + +1. Have direct access to the APQNs derived from the cross product of the AP + adapter numbers (APID) and queue indexes (APQI) specified in the APM and AQM + fields of the guests's CRYCB respectively. These APQNs identify the AP queues + that are valid for use by the guest; meaning, AP commands can be sent by the + guest to any of these queues for processing. + +2. Have authorization to process AP commands to change a control domain + identified in the ADM field of the guest's CRYCB. The AP command must be sent + to a valid APQN (see 1 above). + +CPU model features: + +Three CPU model features are available for controlling guest access to AP +facilities: + +1. AP facilities feature + + The AP facilities feature indicates that AP facilities are installed on the + guest. This feature will be exposed for use only if the AP facilities + are installed on the host system. The feature is s390-specific and is + represented as a parameter of the -cpu option on the QEMU command line: + + qemu-system-s390x -cpu $model,ap=on|off + + Where: + + $model is the CPU model defined for the guest (defaults to the model of + the host system if not specified). + + ap=on|off indicates whether AP facilities are installed (on) or not + (off). The default for CPU models zEC12 or newer + is ap=on. AP facilities must be installed on the guest if a + vfio-ap device (-device vfio-ap,sysfsdev=$path) is configured + for the guest, or the guest will fail to start. + +2. Query Configuration Information (QCI) facility + + The QCI facility is used by the AP bus running on the guest to query the + configuration of the AP facilities. This facility will be available + only if the QCI facility is installed on the host system. The feature is + s390-specific and is represented as a parameter of the -cpu option on the + QEMU command line: + + qemu-system-s390x -cpu $model,apqci=on|off + + Where: + + $model is the CPU model defined for the guest + + apqci=on|off indicates whether the QCI facility is installed (on) or + not (off). The default for CPU models zEC12 or newer + is apqci=on; for older models, QCI will not be installed. + + If QCI is installed (apqci=on) but AP facilities are not + (ap=off), an error message will be logged, but the guest + will be allowed to start. It makes no sense to have QCI + installed if the AP facilities are not; this is considered + an invalid configuration. + + If the QCI facility is not installed, APQNs with an APQI + greater than 15 will not be detected by the AP bus + running on the guest. + +3. Adjunct Process Facility Test (APFT) facility + + The APFT facility is used by the AP bus running on the guest to test the + AP facilities available for a given AP queue. This facility will be available + only if the APFT facility is installed on the host system. The feature is + s390-specific and is represented as a parameter of the -cpu option on the + QEMU command line: + + qemu-system-s390x -cpu $model,apft=on|off + + Where: + + $model is the CPU model defined for the guest (defaults to the model of + the host system if not specified). + + apft=on|off indicates whether the APFT facility is installed (on) or + not (off). The default for CPU models zEC12 and + newer is apft=on for older models, APFT will not be + installed. + + If APFT is installed (apft=on) but AP facilities are not + (ap=off), an error message will be logged, but the guest + will be allowed to start. It makes no sense to have APFT + installed if the AP facilities are not; this is considered + an invalid configuration. + + It also makes no sense to turn APFT off because the AP bus + running on the guest will not detect CEX4 and newer devices + without it. Since only CEX4 and newer devices are supported + for guest usage, no AP devices can be made accessible to a + guest started without APFT installed. + +Example: Configure AP Matrixes for Three Linux Guests: +===================================================== +Let's now provide an example to illustrate how KVM guests may be given +access to AP facilities. For this example, we will show how to configure +three guests such that executing the lszcrypt command on the guests would +look like this: + +Guest1 +------ +CARD.DOMAIN TYPE MODE +------------------------------ +05 CEX5C CCA-Coproc +05.0004 CEX5C CCA-Coproc +05.00ab CEX5C CCA-Coproc +06 CEX5A Accelerator +06.0004 CEX5A Accelerator +06.00ab CEX5C CCA-Coproc + +Guest2 +------ +CARD.DOMAIN TYPE MODE +------------------------------ +05 CEX5A Accelerator +05.0047 CEX5A Accelerator +05.00ff CEX5A Accelerator (5,4), (5,171), (6,4), (6,171), + +Guest3 +------ +CARD.DOMAIN TYPE MODE +------------------------------ +06 CEX5A Accelerator +06.0047 CEX5A Accelerator +06.00ff CEX5A Accelerator + +These are the steps: + +1. Install the vfio_ap module on the linux host. The dependency chain for the + vfio_ap module is: + * iommu + * s390 + * zcrypt + * vfio + * vfio_mdev + * vfio_mdev_device + * KVM + + To build the vfio_ap module, the kernel build must be configured with the + following Kconfig elements selected: + * IOMMU_SUPPORT + * S390 + * ZCRYPT + * S390_AP_IOMMU + * VFIO + * VFIO_MDEV + * VFIO_MDEV_DEVICE + * KVM + + If using make menuconfig select the following to build the vfio_ap module: + -> Device Drivers + -> IOMMU Hardware Support + select S390 AP IOMMU Support + -> VFIO Non-Privileged userspace driver framework + -> Mediated device driver frramework + -> VFIO driver for Mediated devices + -> I/O subsystem + -> VFIO support for AP devices + +2. Secure the AP queues to be used by the three guests so that the host can not + access them. To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, + 06.0004, 06.0047, 06.00ab, and 06.00ff for use by the vfio_ap device driver, + the corresponding APQNs must be removed from the default queue drivers pool + as follows: + + echo -5,-6 > /sys/bus/ap/apmask + + echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask + + This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, + 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The + sysfs directory for the vfio_ap device driver will now contain symbolic links + to the AP queue devices bound to it: + + /sys/bus/ap + ... [drivers] + ...... [vfio_ap] + ......... [05.0004] + ......... [05.0047] + ......... [05.00ab] + ......... [05.00ff] + ......... [06.0004] + ......... [06.0047] + ......... [06.00ab] + ......... [06.00ff] + + Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later) + can be bound to the vfio_ap device driver. The reason for this is to + simplify the implementation by not needlessly complicating the design by + supporting older devices that will go out of service in the relatively near + future, and for which there are few older systems on which to test. + + The administrator, therefore, must take care to secure only AP queues that + can be bound to the vfio_ap device driver. The device type for a given AP + queue device can be read from the parent card's sysfs directory. For example, + to see the hardware type of the queue 05.0004: + + cat /sys/bus/ap/devices/card05/hwtype + + The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the + vfio_ap device driver. + +3. Create the mediated devices needed to configure the AP matrixes for the + three guests and to provide an interface to the vfio_ap driver for + use by the guests: + + /sys/devices/vfio_ap/matrix/ + --- [mdev_supported_types] + ------ [vfio_ap-passthrough] (passthrough mediated matrix device type) + --------- create + --------- [devices] + + To create the mediated devices for the three guests: + + uuidgen > create + uuidgen > create + uuidgen > create + + or + + echo $uuid1 > create + echo $uuid2 > create + echo $uuid3 > create + + This will create three mediated devices in the [devices] subdirectory named + after the UUID used to create the mediated device. We'll call them $uuid1, + $uuid2 and $uuid3 and this is the sysfs directory structure after creation: + + /sys/devices/vfio_ap/matrix/ + --- [mdev_supported_types] + ------ [vfio_ap-passthrough] + --------- [devices] + ------------ [$uuid1] + --------------- assign_adapter + --------------- assign_control_domain + --------------- assign_domain + --------------- matrix + --------------- unassign_adapter + --------------- unassign_control_domain + --------------- unassign_domain + + ------------ [$uuid2] + --------------- assign_adapter + --------------- assign_control_domain + --------------- assign_domain + --------------- matrix + --------------- unassign_adapter + ----------------unassign_control_domain + ----------------unassign_domain + + ------------ [$uuid3] + --------------- assign_adapter + --------------- assign_control_domain + --------------- assign_domain + --------------- matrix + --------------- unassign_adapter + ----------------unassign_control_domain + ----------------unassign_domain + +4. The administrator now needs to configure the matrixes for the mediated + devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3). + + This is how the matrix is configured for Guest1: + + echo 5 > assign_adapter + echo 6 > assign_adapter + echo 4 > assign_domain + echo 0xab > assign_domain + + Control domains can similarly be assigned using the assign_control_domain + sysfs file. + + If a mistake is made configuring an adapter, domain or control domain, + you can use the unassign_xxx interfaces to unassign the adapter, domain or + control domain. + + To display the matrix configuration for Guest1: + + cat matrix + + The output will display the APQNs in the format xx.yyyy, where xx is + the adapter number and yyyy is the domain number. The output for Guest1 + will look like this: + + 05.0004 + 05.00ab + 06.0004 + 06.00ab + + This is how the matrix is configured for Guest2: + + echo 5 > assign_adapter + echo 0x47 > assign_domain + echo 0xff > assign_domain + + This is how the matrix is configured for Guest3: + + echo 6 > assign_adapter + echo 0x47 > assign_domain + echo 0xff > assign_domain + +5. Start Guest1: + + /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ + -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ... + +7. Start Guest2: + + /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ + -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ... + +7. Start Guest3: + + /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ + -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ... + +When the guest is shut down, the mediated matrix devices may be removed. + +Using our example again, to remove the mediated matrix device $uuid1: + + /sys/devices/vfio_ap/matrix/ + --- [mdev_supported_types] + ------ [vfio_ap-passthrough] + --------- [devices] + ------------ [$uuid1] + --------------- remove + + + echo 1 > remove + + This will remove all of the mdev matrix device's sysfs structures including + the mdev device itself. To recreate and reconfigure the mdev matrix device, + all of the steps starting with step 3 will have to be performed again. Note + that the remove will fail if a guest using the mdev is still running. + + It is not necessary to remove an mdev matrix device, but one may want to + remove it if no guest will use it during the remaining lifetime of the linux + host. If the mdev matrix device is removed, one may want to also reconfigure + the pool of adapters and queues reserved for use by the default drivers. + +Limitations +=========== +* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN + to the default drivers pool of a queue that is still assigned to a mediated + device in use by a guest. It is incumbent upon the administrator to + ensure there is no mediated device in use by a guest to which the APQN is + assigned lest the host be given access to the private data of the AP queue + device, such as a private key configured specifically for the guest. + +* Dynamically modifying the AP matrix for a running guest (which would amount to + hot(un)plug of AP devices for the guest) is currently not supported + +* Live guest migration is not supported for guests using AP devices. diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs index 5dbc00ce9b45..ca68806e4418 100644 --- a/hw/s390x/Makefile.objs +++ b/hw/s390x/Makefile.objs @@ -26,8 +26,10 @@ obj-$(call lnot,$(CONFIG_PCI)) += s390-pci-stub.o obj-y += s390-skeys.o obj-y += s390-stattrib.o obj-y += tod.o +obj-y += tod-qemu.o obj-$(CONFIG_KVM) += tod-kvm.o -obj-$(CONFIG_TCG) += tod-qemu.o obj-$(CONFIG_KVM) += s390-skeys-kvm.o obj-$(CONFIG_KVM) += s390-stattrib-kvm.o obj-y += s390-ccw.o +obj-y += ap-device.o +obj-y += ap-bridge.o diff --git a/hw/s390x/ap-bridge.c b/hw/s390x/ap-bridge.c new file mode 100644 index 000000000000..3795d30dd7c9 --- /dev/null +++ b/hw/s390x/ap-bridge.c @@ -0,0 +1,78 @@ +/* + * ap bridge + * + * Copyright 2018 IBM Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "qemu/bitops.h" +#include "hw/s390x/ap-bridge.h" +#include "cpu.h" + +static char *ap_bus_get_dev_path(DeviceState *dev) +{ + /* at most one */ + return g_strdup_printf("/1"); +} + +static void ap_bus_class_init(ObjectClass *oc, void *data) +{ + BusClass *k = BUS_CLASS(oc); + + k->get_dev_path = ap_bus_get_dev_path; + /* More than one ap device does not make sense */ + k->max_dev = 1; +} + +static const TypeInfo ap_bus_info = { + .name = TYPE_AP_BUS, + .parent = TYPE_BUS, + .instance_size = 0, + .class_init = ap_bus_class_init, +}; + +void s390_init_ap(void) +{ + DeviceState *dev; + + /* If no AP instructions then no need for AP bridge */ + if (!s390_has_feat(S390_FEAT_AP)) { + return; + } + + /* Create bridge device */ + dev = qdev_create(NULL, TYPE_AP_BRIDGE); + object_property_add_child(qdev_get_machine(), TYPE_AP_BRIDGE, + OBJECT(dev), NULL); + qdev_init_nofail(dev); + + /* Create bus on bridge device */ + qbus_create(TYPE_AP_BUS, dev, TYPE_AP_BUS); + } + +static void ap_bridge_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); +} + +static const TypeInfo ap_bridge_info = { + .name = TYPE_AP_BRIDGE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = 0, + .class_init = ap_bridge_class_init, +}; + +static void ap_register(void) +{ + type_register_static(&ap_bridge_info); + type_register_static(&ap_bus_info); +} + +type_init(ap_register) diff --git a/hw/s390x/ap-device.c b/hw/s390x/ap-device.c new file mode 100644 index 000000000000..f5ac8db96826 --- /dev/null +++ b/hw/s390x/ap-device.c @@ -0,0 +1,38 @@ +/* + * Adjunct Processor (AP) matrix device + * + * Copyright 2018 IBM Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "qapi/error.h" +#include "hw/qdev.h" +#include "hw/s390x/ap-device.h" + +static void ap_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "AP device class"; + dc->hotpluggable = false; +} + +static const TypeInfo ap_device_info = { + .name = AP_DEVICE_TYPE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(APDevice), + .class_size = sizeof(DeviceClass), + .class_init = ap_class_init, + .abstract = true, +}; + +static void ap_device_register(void) +{ + type_register_static(&ap_device_info); +} + +type_init(ap_device_register) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 53fd7c975f64..a0615a8b35f5 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -32,6 +32,7 @@ #include "ipl.h" #include "hw/s390x/s390-virtio-ccw.h" #include "hw/s390x/css-bridge.h" +#include "hw/s390x/ap-bridge.h" #include "migration/register.h" #include "cpu_models.h" #include "hw/nmi.h" @@ -263,6 +264,9 @@ static void ccw_init(MachineState *machine) /* init the SIGP facility */ s390_init_sigp(); + /* create AP bridge and bus(es) */ + s390_init_ap(); + /* get a BUS */ css_bus = virtual_css_bus_init(); s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs index a2e7a0a7cf02..8b3f664d85f7 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs @@ -6,4 +6,5 @@ obj-$(CONFIG_SOFTMMU) += platform.o obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o obj-$(CONFIG_SOFTMMU) += spapr.o +obj-$(CONFIG_VFIO_AP) += ap.o endif diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c new file mode 100644 index 000000000000..3962bb74e578 --- /dev/null +++ b/hw/vfio/ap.c @@ -0,0 +1,181 @@ +/* + * VFIO based AP matrix device assignment + * + * Copyright 2018 IBM Corp. + * Author(s): Tony Krowiak + * Halil Pasic + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include +#include +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "hw/vfio/vfio.h" +#include "hw/vfio/vfio-common.h" +#include "hw/s390x/ap-device.h" +#include "qemu/error-report.h" +#include "qemu/queue.h" +#include "qemu/option.h" +#include "qemu/config-file.h" +#include "cpu.h" +#include "kvm_s390x.h" +#include "sysemu/sysemu.h" +#include "hw/s390x/ap-bridge.h" +#include "exec/address-spaces.h" + +#define VFIO_AP_DEVICE_TYPE "vfio-ap" + +typedef struct VFIOAPDevice { + APDevice apdev; + VFIODevice vdev; +} VFIOAPDevice; + +#define VFIO_AP_DEVICE(obj) \ + OBJECT_CHECK(VFIOAPDevice, (obj), VFIO_AP_DEVICE_TYPE) + +static void vfio_ap_compute_needs_reset(VFIODevice *vdev) +{ + vdev->needs_reset = false; +} + +/* + * We don't need vfio_hot_reset_multi and vfio_eoi operations for + * vfio-ap device now. + */ +struct VFIODeviceOps vfio_ap_ops = { + .vfio_compute_needs_reset = vfio_ap_compute_needs_reset, +}; + +static void vfio_ap_put_device(VFIOAPDevice *vapdev) +{ + g_free(vapdev->vdev.name); + vfio_put_base_device(&vapdev->vdev); +} + +static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) +{ + GError *gerror = NULL; + char *symlink, *group_path; + int groupid; + + symlink = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev); + group_path = g_file_read_link(symlink, &gerror); + g_free(symlink); + + if (!group_path) { + error_setg(errp, "%s: no iommu_group found for %s: %s", + VFIO_AP_DEVICE_TYPE, vapdev->vdev.sysfsdev, gerror->message); + return NULL; + } + + if (sscanf(basename(group_path), "%d", &groupid) != 1) { + error_setg(errp, "vfio: failed to read %s", group_path); + g_free(group_path); + return NULL; + } + + g_free(group_path); + + return vfio_get_group(groupid, &address_space_memory, errp); +} + +static void vfio_ap_realize(DeviceState *dev, Error **errp) +{ + int ret; + char *mdevid; + Error *local_err = NULL; + VFIOGroup *vfio_group; + APDevice *apdev = AP_DEVICE(dev); + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); + + vfio_group = vfio_ap_get_group(vapdev, &local_err); + if (!vfio_group) { + goto out_err; + } + + vapdev->vdev.ops = &vfio_ap_ops; + vapdev->vdev.type = VFIO_DEVICE_TYPE_AP; + mdevid = basename(vapdev->vdev.sysfsdev); + vapdev->vdev.name = g_strdup_printf("%s", mdevid); + vapdev->vdev.dev = dev; + + ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, &local_err); + if (ret) { + goto out_get_dev_err; + } + + return; + +out_get_dev_err: + vfio_ap_put_device(vapdev); + vfio_put_group(vfio_group); +out_err: + error_propagate(errp, local_err); +} + +static void vfio_ap_unrealize(DeviceState *dev, Error **errp) +{ + APDevice *apdev = AP_DEVICE(dev); + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); + VFIOGroup *group = vapdev->vdev.group; + + vfio_ap_put_device(vapdev); + vfio_put_group(group); +} + +static Property vfio_ap_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vfio_ap_reset(DeviceState *dev) +{ + int ret; + APDevice *apdev = AP_DEVICE(dev); + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); + + ret = ioctl(vapdev->vdev.fd, VFIO_DEVICE_RESET); + if (ret) { + error_report("%s: failed to reset %s device: %s", __func__, + vapdev->vdev.name, strerror(ret)); + } +} + +static const VMStateDescription vfio_ap_vmstate = { + .name = VFIO_AP_DEVICE_TYPE, + .unmigratable = 1, +}; + +static void vfio_ap_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = vfio_ap_properties; + dc->vmsd = &vfio_ap_vmstate; + dc->desc = "VFIO-based AP device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->realize = vfio_ap_realize; + dc->unrealize = vfio_ap_unrealize; + dc->hotpluggable = false; + dc->reset = vfio_ap_reset; + dc->bus_type = TYPE_AP_BUS; +} + +static const TypeInfo vfio_ap_info = { + .name = VFIO_AP_DEVICE_TYPE, + .parent = AP_DEVICE_TYPE, + .instance_size = sizeof(VFIOAPDevice), + .class_init = vfio_ap_class_init, +}; + +static void vfio_ap_type_init(void) +{ + type_register_static(&vfio_ap_info); +} + +type_init(vfio_ap_type_init) diff --git a/include/hw/s390x/ap-bridge.h b/include/hw/s390x/ap-bridge.h new file mode 100644 index 000000000000..470e439a98ed --- /dev/null +++ b/include/hw/s390x/ap-bridge.h @@ -0,0 +1,19 @@ +/* + * ap bridge + * + * Copyright 2018 IBM Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390X_AP_BRIDGE_H +#define HW_S390X_AP_BRIDGE_H + +#define TYPE_AP_BRIDGE "ap-bridge" +#define TYPE_AP_BUS "ap-bus" + +void s390_init_ap(void); + +#endif diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h new file mode 100644 index 000000000000..765e9082a3db --- /dev/null +++ b/include/hw/s390x/ap-device.h @@ -0,0 +1,22 @@ +/* + * Adjunct Processor (AP) matrix device interfaces + * + * Copyright 2018 IBM Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#ifndef HW_S390X_AP_DEVICE_H +#define HW_S390X_AP_DEVICE_H + +#define AP_DEVICE_TYPE "ap-device" + +typedef struct APDevice { + DeviceState parent_obj; +} APDevice; + +#define AP_DEVICE(obj) \ + OBJECT_CHECK(APDevice, (obj), AP_DEVICE_TYPE) + +#endif /* HW_S390X_AP_DEVICE_H */ diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 821def05658f..6be9a93f611b 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -37,6 +37,7 @@ enum { VFIO_DEVICE_TYPE_PCI = 0, VFIO_DEVICE_TYPE_PLATFORM = 1, VFIO_DEVICE_TYPE_CCW = 2, + VFIO_DEVICE_TYPE_AP = 3, }; typedef struct VFIOMmap { diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h index 6d6128c081b1..c0ad9fc2c343 100644 --- a/include/standard-headers/linux/input.h +++ b/include/standard-headers/linux/input.h @@ -267,10 +267,11 @@ struct input_mask { /* * MT_TOOL types */ -#define MT_TOOL_FINGER 0 -#define MT_TOOL_PEN 1 -#define MT_TOOL_PALM 2 -#define MT_TOOL_MAX 2 +#define MT_TOOL_FINGER 0x00 +#define MT_TOOL_PEN 0x01 +#define MT_TOOL_PALM 0x02 +#define MT_TOOL_DIAL 0x0a +#define MT_TOOL_MAX 0x0f /* * Values describing the status of a force-feedback effect diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 72aa226e6c1b..e1f8b745582f 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -27,6 +27,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -125,6 +126,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 99cb9ad14a23..e6a98c14c848 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -39,6 +39,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -154,6 +155,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h index 1ab9901911bf..0265482f8fdf 100644 --- a/linux-headers/asm-s390/kvm.h +++ b/linux-headers/asm-s390/kvm.h @@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc { #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 +#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4 +#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5 /* kvm attributes for migration mode */ #define KVM_S390_VM_MIGRATION_STOP 0 diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 86299efa804a..fd23d5778ea1 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -377,6 +377,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 66790724f1f9..83ba4eb57167 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -951,6 +951,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_TLBFLUSH 155 #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 +#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 3615a269d378..ceb645339440 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -200,6 +200,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ +#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; @@ -215,6 +216,7 @@ struct vfio_device_info { #define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" +#define VFIO_DEVICE_API_AP_STRING "vfio-ap" /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index 94726cb2c519..c8a8fbeb817a 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -176,7 +176,7 @@ struct vhost_memory { #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) -#define VHOST_GET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x26, __u64) +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) /* VHOST_NET specific defines */ diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c index 172fb18df718..60cfeba48f4e 100644 --- a/target/s390x/cpu_features.c +++ b/target/s390x/cpu_features.c @@ -39,8 +39,10 @@ static const S390FeatDef s390_features[] = { FEAT_INIT("srs", S390_FEAT_TYPE_STFL, 9, "Sense-running-status facility"), FEAT_INIT("csske", S390_FEAT_TYPE_STFL, 10, "Conditional-SSKE facility"), FEAT_INIT("ctop", S390_FEAT_TYPE_STFL, 11, "Configuration-topology facility"), + FEAT_INIT("apqci", S390_FEAT_TYPE_STFL, 12, "Query AP Configuration Information facility"), FEAT_INIT("ipter", S390_FEAT_TYPE_STFL, 13, "IPTE-range facility"), FEAT_INIT("nonqks", S390_FEAT_TYPE_STFL, 14, "Nonquiescing key-setting facility"), + FEAT_INIT("apft", S390_FEAT_TYPE_STFL, 15, "AP Facilities Test facility"), FEAT_INIT("etf2", S390_FEAT_TYPE_STFL, 16, "Extended-translation facility 2"), FEAT_INIT("msa-base", S390_FEAT_TYPE_STFL, 17, "Message-security-assist facility (excluding subfunctions)"), FEAT_INIT("ldisp", S390_FEAT_TYPE_STFL, 18, "Long-displacement facility"), @@ -129,6 +131,7 @@ static const S390FeatDef s390_features[] = { FEAT_INIT_MISC("dateh2", "DAT-enhancement facility 2"), FEAT_INIT_MISC("cmm", "Collaborative-memory-management facility"), + FEAT_INIT_MISC("ap", "AP instructions installed"), FEAT_INIT("plo-cl", S390_FEAT_TYPE_PLO, 0, "PLO Compare and load (32 bit in general registers)"), FEAT_INIT("plo-clg", S390_FEAT_TYPE_PLO, 1, "PLO Compare and load (64 bit in parameter list)"), diff --git a/target/s390x/cpu_features_def.h b/target/s390x/cpu_features_def.h index ac2c947f30a8..5fc7e7bf0116 100644 --- a/target/s390x/cpu_features_def.h +++ b/target/s390x/cpu_features_def.h @@ -27,8 +27,10 @@ typedef enum { S390_FEAT_SENSE_RUNNING_STATUS, S390_FEAT_CONDITIONAL_SSKE, S390_FEAT_CONFIGURATION_TOPOLOGY, + S390_FEAT_AP_QUERY_CONFIG_INFO, S390_FEAT_IPTE_RANGE, S390_FEAT_NONQ_KEY_SETTING, + S390_FEAT_AP_FACILITIES_TEST, S390_FEAT_EXTENDED_TRANSLATION_2, S390_FEAT_MSA, S390_FEAT_LONG_DISPLACEMENT, @@ -119,6 +121,7 @@ typedef enum { /* Misc */ S390_FEAT_DAT_ENH_2, S390_FEAT_CMM, + S390_FEAT_AP, /* PLO */ S390_FEAT_PLO_CL, diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 265d25c937bb..7c253ff308c5 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -786,6 +786,8 @@ static void check_consistency(const S390CPUModel *model) { S390_FEAT_PRNO_TRNG_QRTCR, S390_FEAT_MSA_EXT_5 }, { S390_FEAT_PRNO_TRNG, S390_FEAT_MSA_EXT_5 }, { S390_FEAT_SIE_KSS, S390_FEAT_SIE_F2 }, + { S390_FEAT_AP_QUERY_CONFIG_INFO, S390_FEAT_AP }, + { S390_FEAT_AP_FACILITIES_TEST, S390_FEAT_AP }, }; int i; diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c index cd76c3163a75..2a33222f7e36 100644 --- a/target/s390x/excp_helper.c +++ b/target/s390x/excp_helper.c @@ -33,23 +33,6 @@ #include "hw/s390x/s390_flic.h" #endif -/* #define DEBUG_S390 */ -/* #define DEBUG_S390_STDOUT */ - -#ifdef DEBUG_S390 -#ifdef DEBUG_S390_STDOUT -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, fmt, ## __VA_ARGS__); \ - if (qemu_log_separate()) { qemu_log(fmt, ##__VA_ARGS__); } } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { qemu_log(fmt, ## __VA_ARGS__); } while (0) -#endif -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif - void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code, int ilen, uintptr_t ra) { @@ -128,8 +111,8 @@ int s390_cpu_handle_mmu_fault(CPUState *cs, vaddr orig_vaddr, int size, uint64_t asc; int prot; - DPRINTF("%s: address 0x%" VADDR_PRIx " rw %d mmu_idx %d\n", - __func__, orig_vaddr, rw, mmu_idx); + qemu_log_mask(CPU_LOG_MMU, "%s: addr 0x%" VADDR_PRIx " rw %d mmu_idx %d\n", + __func__, orig_vaddr, rw, mmu_idx); vaddr = orig_vaddr; @@ -158,8 +141,9 @@ int s390_cpu_handle_mmu_fault(CPUState *cs, vaddr orig_vaddr, int size, if (!address_space_access_valid(&address_space_memory, raddr, TARGET_PAGE_SIZE, rw, MEMTXATTRS_UNSPECIFIED)) { - DPRINTF("%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n", __func__, - (uint64_t)raddr, (uint64_t)ram_size); + qemu_log_mask(CPU_LOG_MMU, + "%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n", + __func__, (uint64_t)raddr, (uint64_t)ram_size); trigger_pgm_exception(env, PGM_ADDRESSING, ILEN_AUTO); return 1; } @@ -217,8 +201,10 @@ static void do_program_interrupt(CPUS390XState *env) break; } - qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilen=%d\n", - __func__, env->int_pgm_code, ilen); + qemu_log_mask(CPU_LOG_INT, + "%s: code=0x%x ilen=%d psw: %" PRIx64 " %" PRIx64 "\n", + __func__, env->int_pgm_code, ilen, env->psw.mask, + env->psw.addr); lowcore = cpu_map_lowcore(env); @@ -240,10 +226,6 @@ static void do_program_interrupt(CPUS390XState *env) cpu_unmap_lowcore(lowcore); - DPRINTF("%s: %x %x %" PRIx64 " %" PRIx64 "\n", __func__, - env->int_pgm_code, ilen, env->psw.mask, - env->psw.addr); - load_psw(env, mask, addr); } @@ -334,9 +316,6 @@ static void do_ext_interrupt(CPUS390XState *env) cpu_unmap_lowcore(lowcore); - DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, - env->psw.mask, env->psw.addr); - load_psw(env, mask, addr); } @@ -365,8 +344,6 @@ static void do_io_interrupt(CPUS390XState *env) cpu_unmap_lowcore(lowcore); g_free(io); - DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, env->psw.mask, - env->psw.addr); load_psw(env, mask, addr); } @@ -408,9 +385,6 @@ static void do_mchk_interrupt(CPUS390XState *env) cpu_unmap_lowcore(lowcore); - DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, - env->psw.mask, env->psw.addr); - load_psw(env, mask, addr); } @@ -421,8 +395,8 @@ void s390_cpu_do_interrupt(CPUState *cs) CPUS390XState *env = &cpu->env; bool stopped = false; - qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n", - __func__, cs->exception_index, env->psw.addr); + qemu_log_mask(CPU_LOG_INT, "%s: %d at psw=%" PRIx64 ":%" PRIx64 "\n", + __func__, cs->exception_index, env->psw.mask, env->psw.addr); try_deliver: /* handle machine checks */ diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index 384b61cd67b9..70015eaaf5df 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -447,6 +447,9 @@ static uint16_t full_GEN12_GA1[] = { S390_FEAT_ADAPTER_INT_SUPPRESSION, S390_FEAT_EDAT_2, S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2, + S390_FEAT_AP_QUERY_CONFIG_INFO, + S390_FEAT_AP_FACILITIES_TEST, + S390_FEAT_AP, }; static uint16_t full_GEN12_GA2[] = { diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 78d39b34d0b8..2ebf26adfe24 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2299,11 +2299,26 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) error_setg(errp, "KVM: host CPU model could not be identified"); return; } + /* for now, we can only provide the AP feature with HW support */ + if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, + KVM_S390_VM_CRYPTO_ENABLE_APIE)) { + set_bit(S390_FEAT_AP, model->features); + } /* strip of features that are not part of the maximum model */ bitmap_and(model->features, model->features, model->def->full_feat, S390_FEAT_MAX); } +static void kvm_s390_configure_apie(bool interpret) +{ + uint64_t attr = interpret ? KVM_S390_VM_CRYPTO_ENABLE_APIE : + KVM_S390_VM_CRYPTO_DISABLE_APIE; + + if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, attr)) { + kvm_s390_set_attr(attr); + } +} + void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) { struct kvm_s390_vm_cpu_processor prop = { @@ -2353,6 +2368,10 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) if (test_bit(S390_FEAT_CMM, model->features)) { kvm_s390_enable_cmma(); } + + if (test_bit(S390_FEAT_AP, model->features)) { + kvm_s390_configure_apie(true); + } } void kvm_s390_restart_interrupt(S390CPU *cpu)