Skip to content

Commit

Permalink
util: Use enhanced s390 PCI instructions
Browse files Browse the repository at this point in the history
In the existing MMIO implementation s390 relies on special syscalls to
access PCI memory. This was necessary as s390 originally only had
special privileged instructions for accessing PCI memory. With z15
however comes support for 4 new PCI memory I/O (MIO) instructions which
operate on virtually mapped PCI memory spaces.

While these are still special PCI access instructions instead of real
MMIO they behave much more like standard MMIO access. There is a load
like instruction pcilgi, a store like instruction pcistgi a block store
variant for efficient memcpy pcistbi and a write barrier instruction
pciwb. The load and store variants always operate on a 64 bit register
but only load/store the right most bytes of the register controlled by
a length value in a paired register (even numbered register rN + odd
numbered register r(N+1)).

As the previous PCI instructions did not operate on virtual memory
mappings at all a kernel using them does not setup virtual memory
mappings and thus can't support user-space using the new instructions.

Also as use of PCI MIO instructions can be disabled via the pci=nomio
kernel parameter we can't solely rely on hardware support and kernel
version. Instead Linux exposes whether PCI MIO instructions are enabled
via an ELF hardware capability. With this patch we check for this
capability and if enabled use the newly introduced PCI MIO instructions
for MMIO access and barriers.

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
  • Loading branch information
niklas88 committed Feb 16, 2022
1 parent 3ff453e commit fe43e33
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 34 deletions.
21 changes: 21 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,11 @@ check_type_size("long" SIZEOF_LONG BUILTIN_TYPES_ONLY LANGUAGE C)
# Determine if this arch supports cache coherent DMA. This isn't really an
# arch specific property, but for our purposes arches that do not support it
# also do not define wmb/etc which breaks our compile.
# As a special case s390x always has coherent DMA but needs linking for its wmb
CHECK_C_SOURCE_COMPILES("
#if !defined(__s390x__)
#include \"${CMAKE_CURRENT_SOURCE_DIR}/util/udma_barrier.h\"
#endif
int main(int argc,const char *argv[]) {return 0;}"
HAVE_COHERENT_DMA)

Expand Down Expand Up @@ -609,6 +612,21 @@ RDMA_Check_C_Compiles(HAVE_GLIBC_FXSTAT "
struct stat stat = {}; __fxstat(0, 0, &stat); return 0;}")
RDMA_DoFixup("${HAVE_GLIBC_FXSTAT}" "sys/stat.h")


# glibc before 2.35 does not necesarily define the HWCAP_S390_PCI_MIO hardware
# capability bit constant. Check for it and if necessary shim it in such that
# kernel support for PCI MIO instructions can always be checked.
RDMA_Check_C_Compiles(HAVE_GLIBC_HWCAP_S390_PCI_MIO "
#if defined(__s390x__)
#include <sys/auxv.h>
int main(int argc, const char *argv[]) {
return !!(getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO);}
#else
int main(int argc, const char *argv[]) {return 0;}
#endif
")
RDMA_DoFixup("${HAVE_GLIBC_HWCAP_S390_PCI_MIO}" "sys/auxv.h")

#-------------------------
# Build Prep
# Write out a git ignore file to the build directory if it isn't the source
Expand Down Expand Up @@ -806,3 +824,6 @@ endif()
if (NOT DRM_INCLUDE_DIRS)
message(STATUS " DMABUF NOT supported (disabling some tests)")
endif()
if (NOT HAVE_GLIBC_HWCAP_S390_PCI_MIO )
message(STATUS " Glibc version does not contain the HWCAP_S390_PCI_MIO bit, using shim version")
endif()
10 changes: 10 additions & 0 deletions buildlib/fixup-include/sys-auxv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef _FIXUP_SYS_AUXV_H
#define _FIXUP_SYS_AUXV_H
#if defined(__s390x__)

#include_next <sys/auxv.h>

#define HWCAP_S390_PCI_MIO 2097152

#endif
#endif
1 change: 1 addition & 0 deletions util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ set(C_FILES
if (HAVE_COHERENT_DMA)
publish_internal_headers(util
mmio.h
s390_mmio_insn.h
udma_barrier.h
)

Expand Down
67 changes: 67 additions & 0 deletions util/mmio.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,73 @@
#include <pthread.h>
#include <stdbool.h>

#ifdef __s390x__
#include <sys/auxv.h>
#include <ccan/minmax.h>

bool s390_is_mio_supported;

static __attribute__((constructor)) void check_mio_supported(void)
{
s390_is_mio_supported = !!(getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO);
}

typedef void (*mmio_memcpy_x64_fn_t)(void *, const void *, size_t);
/* This uses the STT_GNU_IFUNC extension to have the dynamic linker select the
best above implementations at runtime. */
#if HAVE_FUNC_ATTRIBUTE_IFUNC
void mmio_memcpy_x64(void *, const void *, size_t)
__attribute__((ifunc("resolve_mmio_memcpy_x64")));
static mmio_memcpy_x64_fn_t resolve_mmio_memcpy_x64(uint64_t);
#else
__asm__(".type mmio_memcpy_x64, %gnu_indirect_function");
write64_fn_t resolve_mmio_memcpy_64(uint64_t)
__asm__("mmio_memcpy_x64");
#endif

#define S390_MAX_WRITE_SIZE 128
#define S390_BOUNDARY_SIZE (1 << 12)
#define S390_BOUNDARY_MASK (S390_BOUNDARY_SIZE - 1)

static uint8_t get_max_write_size(void *dst, size_t len)
{
size_t offset = ((uint64_t __force)dst) & S390_BOUNDARY_MASK;
size_t size = min_t(int, len, S390_MAX_WRITE_SIZE);

if (likely(offset + size <= S390_BOUNDARY_SIZE))
return size;

return S390_BOUNDARY_SIZE - offset;
}

static void mmio_memcpy_x64_mio(void *dst, const void *src, size_t bytecnt)
{
size_t size;

/* Input is 8 byte aligned 64 byte chunks. The alignment matches the
* requirements of pcistbi but we must not cross a 4K byte boundary.
*/
while (bytecnt > 0) {
size = get_max_write_size(dst, bytecnt);
if (size > 8)
s390_pcistbi(dst, src, size);
else
s390_pcistgi(dst, *(uint64_t *)src, 8);
src += size;
dst += size;
bytecnt -= size;
}
}

mmio_memcpy_x64_fn_t resolve_mmio_memcpy_x64(uint64_t hwcap)
{
if (hwcap & HWCAP_S390_PCI_MIO)
return &mmio_memcpy_x64_mio;
else
return &s390_mmio_write_syscall;
}
#endif /* __s390x__ */

#if SIZEOF_LONG != 8

static pthread_spinlock_t mmio_spinlock;
Expand Down
63 changes: 30 additions & 33 deletions util/mmio.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,58 +67,60 @@
code is always identical.
*/
#ifdef __s390x__
#include <unistd.h>
#include <sys/syscall.h>

/* s390 requires a privileged instruction to access IO memory, these syscalls
perform that instruction using a memory buffer copy semantic.
*/
static inline void s390_mmio_write(void *mmio_addr, const void *val,
size_t length)
{
// FIXME: Check for error and call abort?
syscall(__NR_s390_pci_mmio_write, mmio_addr, val, length);
}

static inline void s390_mmio_read(const void *mmio_addr, void *val,
size_t length)
{
// FIXME: Check for error and call abort?
syscall(__NR_s390_pci_mmio_read, mmio_addr, val, length);
}
#include <util/s390_mmio_insn.h>

#define MAKE_WRITE(_NAME_, _SZ_) \
static inline void _NAME_##_be(void *addr, __be##_SZ_ value) \
{ \
s390_mmio_write(addr, &value, sizeof(value)); \
if (s390_is_mio_supported) \
s390_pcistgi(addr, value, sizeof(value)); \
else \
s390_mmio_write_syscall(addr, &value, sizeof(value)); \
} \
static inline void _NAME_##_le(void *addr, __le##_SZ_ value) \
{ \
s390_mmio_write(addr, &value, sizeof(value)); \
if (s390_is_mio_supported) \
s390_pcistgi(addr, value, sizeof(value)); \
else \
s390_mmio_write_syscall(addr, &value, sizeof(value)); \
}
#define MAKE_READ(_NAME_, _SZ_) \
static inline __be##_SZ_ _NAME_##_be(const void *addr) \
{ \
__be##_SZ_ res; \
s390_mmio_read(addr, &res, sizeof(res)); \
if (s390_is_mio_supported) \
res = s390_pcilgi(addr, sizeof(res)); \
else \
s390_mmio_read_syscall(addr, &res, sizeof(res)); \
return res; \
} \
static inline __le##_SZ_ _NAME_##_le(const void *addr) \
{ \
__le##_SZ_ res; \
s390_mmio_read(addr, &res, sizeof(res)); \
if (s390_is_mio_supported) \
res = s390_pcilgi(addr, sizeof(res)); \
else \
s390_mmio_read_syscall(addr, &res, sizeof(res)); \
return res; \
}

static inline void mmio_write8(void *addr, uint8_t value)
{
s390_mmio_write(addr, &value, sizeof(value));
if (s390_is_mio_supported)
s390_pcistgi(addr, value, sizeof(value));
else
s390_mmio_write_syscall(addr, &value, sizeof(value));
}

static inline uint8_t mmio_read8(const void *addr)
{
uint8_t res;
s390_mmio_read(addr, &res, sizeof(res));

if (s390_is_mio_supported)
res = s390_pcilgi(addr, sizeof(res));
else
s390_mmio_read_syscall(addr, &res, sizeof(res));

return res;
}

Expand Down Expand Up @@ -205,13 +207,7 @@ __le64 mmio_read64_le(const void *addr);
/* This strictly guarantees the order of TLP generation for the memory copy to
be in ascending address order.
*/
#ifdef __s390x__
static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
{
s390_mmio_write(dest, src, bytecnt);
}

#elif defined(__aarch64__) || defined(__arm__)
#if defined(__aarch64__) || defined(__arm__)
#include <arm_neon.h>

static inline void _mmio_memcpy_x64_64b(void *dest, const void *src)
Expand All @@ -236,7 +232,8 @@ static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
else \
_mmio_memcpy_x64((dest), (src), (bytecount)); \
})

#elif defined(__s390x__)
void mmio_memcpy_x64(void *dst, const void *src, size_t bytecnt);
#else
/* Transfer is some multiple of 64 bytes */
static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
Expand Down
100 changes: 100 additions & 0 deletions util/s390_mmio_insn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/* GPLv2 or OpenIB.org BSD (MIT) See COPYING file */
#ifndef __S390_UTIL_MMIO_H
#define __S390_UTIL_MMIO_H
#ifdef __s390x__
#include <stdbool.h>
#include <stdint.h>
#include <endian.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/auxv.h>

#include <util/compiler.h>

/* s390 requires special instructions to access IO memory. Originally there
were only privileged IO instructions that are exposed via special syscalls.
Starting with z15 there are also non-privileged memory IO (MIO) instructions
we can execute in user-space. Despite the hardware support this requires
support in the kernel. If MIO instructions are available is indicated in an
ELF hardware capability.
*/
extern bool s390_is_mio_supported;

union register_pair {
unsigned __int128 pair;
struct {
uint64_t even;
uint64_t odd;
};
};

/* The following pcilgi and pcistgi instructions allow IO memory access from
user-space but are only available on z15 and newer.
*/
static inline uint64_t s390_pcilgi(const void *ioaddr, size_t len)
{
union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len};
uint64_t val;
int cc;

asm volatile (
/* pcilgi */
".insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
"ipm %[cc]\n"
"srl %[cc],28\n"
: [cc] "=d" (cc), [val] "=d" (val),
[ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
if (unlikely(cc))
val = -1ULL;

return val;
}

static inline void s390_pcistgi(void *ioaddr, uint64_t val, size_t len)
{
union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len};

asm volatile (
/* pcistgi */
".insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
: [ioaddr_len] "+&d" (ioaddr_len.pair)
: [val] "d" (val)
: "cc", "memory");
}

/* This is the block store variant of unprivileged IO access instructions */
static inline void s390_pcistbi(void *ioaddr, const void *data, size_t len)
{
const uint8_t *src = data;

asm volatile (
/* pcistbi */
".insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
: [len] "+d" (len)
: [ioaddr] "d" ((uint64_t *)ioaddr),
[src] "Q" (*src)
: "cc");
}

static inline void s390_pciwb(void)
{
if (s390_is_mio_supported)
asm volatile (".insn rre,0xb9d50000,0,0\n"); /* pciwb */
else
asm volatile("" ::: "memory");
}

static inline void s390_mmio_write_syscall(void *mmio_addr, const void *val,
size_t length)
{
syscall(__NR_s390_pci_mmio_write, mmio_addr, val, length);
}

static inline void s390_mmio_read_syscall(const void *mmio_addr, void *val,
size_t length)
{
syscall(__NR_s390_pci_mmio_read, mmio_addr, val, length);
}

#endif /* __s390x__ */
#endif /* __S390_UTIL_MMIO_H */
5 changes: 4 additions & 1 deletion util/udma_barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,15 @@
#define mmio_flush_writes() asm volatile("membar #StoreStore" ::: "memory")
#elif defined(__aarch64__)
#define mmio_flush_writes() asm volatile("dsb st" ::: "memory");
#elif defined(__sparc__) || defined(__s390x__)
#elif defined(__sparc__)
#define mmio_flush_writes() asm volatile("" ::: "memory")
#elif defined(__loongarch__)
#define mmio_flush_writes() asm volatile("dbar 0" ::: "memory")
#elif defined(__riscv)
#define mmio_flush_writes() asm volatile("fence ow,ow" ::: "memory")
#elif defined(__s390x__)
#include "s390_mmio_insn.h"
#define mmio_flush_writes() s390_pciwb()
#else
#error No architecture specific memory barrier defines found!
#endif
Expand Down

0 comments on commit fe43e33

Please sign in to comment.