Skip to content

Commit

Permalink
Merge pull request #1122 from niklas88/s390_pci_mio
Browse files Browse the repository at this point in the history
Exploit s390's new and enhanced PCI memory I/O (MIO) instructions
  • Loading branch information
jgunthorpe committed Feb 28, 2022
2 parents c0c2467 + fe43e33 commit b23c91b
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 34 deletions.
21 changes: 21 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -504,8 +504,11 @@ check_type_size("long" SIZEOF_LONG BUILTIN_TYPES_ONLY LANGUAGE C)
# Determine if this arch supports cache coherent DMA. This isn't really an
# arch specific property, but for our purposes arches that do not support it
# also do not define wmb/etc which breaks our compile.
# As a special case s390x always has coherent DMA but needs linking for its wmb
CHECK_C_SOURCE_COMPILES("
#if !defined(__s390x__)
#include \"${CMAKE_CURRENT_SOURCE_DIR}/util/udma_barrier.h\"
#endif
int main(int argc,const char *argv[]) {return 0;}"
HAVE_COHERENT_DMA)

Expand Down Expand Up @@ -611,6 +614,21 @@ RDMA_Check_C_Compiles(HAVE_GLIBC_FXSTAT "
struct stat stat = {}; __fxstat(0, 0, &stat); return 0;}")
RDMA_DoFixup("${HAVE_GLIBC_FXSTAT}" "sys/stat.h")


# glibc before 2.35 does not necesarily define the HWCAP_S390_PCI_MIO hardware
# capability bit constant. Check for it and if necessary shim it in such that
# kernel support for PCI MIO instructions can always be checked.
RDMA_Check_C_Compiles(HAVE_GLIBC_HWCAP_S390_PCI_MIO "
#if defined(__s390x__)
#include <sys/auxv.h>
int main(int argc, const char *argv[]) {
return !!(getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO);}
#else
int main(int argc, const char *argv[]) {return 0;}
#endif
")
RDMA_DoFixup("${HAVE_GLIBC_HWCAP_S390_PCI_MIO}" "sys/auxv.h")

#-------------------------
# Build Prep
# Write out a git ignore file to the build directory if it isn't the source
Expand Down Expand Up @@ -808,3 +826,6 @@ endif()
if (NOT DRM_INCLUDE_DIRS)
message(STATUS " DMABUF NOT supported (disabling some tests)")
endif()
if (NOT HAVE_GLIBC_HWCAP_S390_PCI_MIO )
message(STATUS " Glibc version does not contain the HWCAP_S390_PCI_MIO bit, using shim version")
endif()
10 changes: 10 additions & 0 deletions buildlib/fixup-include/sys-auxv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef _FIXUP_SYS_AUXV_H
#define _FIXUP_SYS_AUXV_H
#if defined(__s390x__)

#include_next <sys/auxv.h>

#define HWCAP_S390_PCI_MIO 2097152

#endif
#endif
1 change: 1 addition & 0 deletions util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ set(C_FILES
if (HAVE_COHERENT_DMA)
publish_internal_headers(util
mmio.h
s390_mmio_insn.h
udma_barrier.h
)

Expand Down
67 changes: 67 additions & 0 deletions util/mmio.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,73 @@
#include <pthread.h>
#include <stdbool.h>

#ifdef __s390x__
#include <sys/auxv.h>
#include <ccan/minmax.h>

bool s390_is_mio_supported;

static __attribute__((constructor)) void check_mio_supported(void)
{
s390_is_mio_supported = !!(getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO);
}

typedef void (*mmio_memcpy_x64_fn_t)(void *, const void *, size_t);
/* This uses the STT_GNU_IFUNC extension to have the dynamic linker select the
best above implementations at runtime. */
#if HAVE_FUNC_ATTRIBUTE_IFUNC
void mmio_memcpy_x64(void *, const void *, size_t)
__attribute__((ifunc("resolve_mmio_memcpy_x64")));
static mmio_memcpy_x64_fn_t resolve_mmio_memcpy_x64(uint64_t);
#else
__asm__(".type mmio_memcpy_x64, %gnu_indirect_function");
write64_fn_t resolve_mmio_memcpy_64(uint64_t)
__asm__("mmio_memcpy_x64");
#endif

#define S390_MAX_WRITE_SIZE 128
#define S390_BOUNDARY_SIZE (1 << 12)
#define S390_BOUNDARY_MASK (S390_BOUNDARY_SIZE - 1)

static uint8_t get_max_write_size(void *dst, size_t len)
{
size_t offset = ((uint64_t __force)dst) & S390_BOUNDARY_MASK;
size_t size = min_t(int, len, S390_MAX_WRITE_SIZE);

if (likely(offset + size <= S390_BOUNDARY_SIZE))
return size;

return S390_BOUNDARY_SIZE - offset;
}

static void mmio_memcpy_x64_mio(void *dst, const void *src, size_t bytecnt)
{
size_t size;

/* Input is 8 byte aligned 64 byte chunks. The alignment matches the
* requirements of pcistbi but we must not cross a 4K byte boundary.
*/
while (bytecnt > 0) {
size = get_max_write_size(dst, bytecnt);
if (size > 8)
s390_pcistbi(dst, src, size);
else
s390_pcistgi(dst, *(uint64_t *)src, 8);
src += size;
dst += size;
bytecnt -= size;
}
}

mmio_memcpy_x64_fn_t resolve_mmio_memcpy_x64(uint64_t hwcap)
{
if (hwcap & HWCAP_S390_PCI_MIO)
return &mmio_memcpy_x64_mio;
else
return &s390_mmio_write_syscall;
}
#endif /* __s390x__ */

#if SIZEOF_LONG != 8

static pthread_spinlock_t mmio_spinlock;
Expand Down
63 changes: 30 additions & 33 deletions util/mmio.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,58 +67,60 @@
code is always identical.
*/
#ifdef __s390x__
#include <unistd.h>
#include <sys/syscall.h>

/* s390 requires a privileged instruction to access IO memory, these syscalls
perform that instruction using a memory buffer copy semantic.
*/
static inline void s390_mmio_write(void *mmio_addr, const void *val,
size_t length)
{
// FIXME: Check for error and call abort?
syscall(__NR_s390_pci_mmio_write, mmio_addr, val, length);
}

static inline void s390_mmio_read(const void *mmio_addr, void *val,
size_t length)
{
// FIXME: Check for error and call abort?
syscall(__NR_s390_pci_mmio_read, mmio_addr, val, length);
}
#include <util/s390_mmio_insn.h>

#define MAKE_WRITE(_NAME_, _SZ_) \
static inline void _NAME_##_be(void *addr, __be##_SZ_ value) \
{ \
s390_mmio_write(addr, &value, sizeof(value)); \
if (s390_is_mio_supported) \
s390_pcistgi(addr, value, sizeof(value)); \
else \
s390_mmio_write_syscall(addr, &value, sizeof(value)); \
} \
static inline void _NAME_##_le(void *addr, __le##_SZ_ value) \
{ \
s390_mmio_write(addr, &value, sizeof(value)); \
if (s390_is_mio_supported) \
s390_pcistgi(addr, value, sizeof(value)); \
else \
s390_mmio_write_syscall(addr, &value, sizeof(value)); \
}
#define MAKE_READ(_NAME_, _SZ_) \
static inline __be##_SZ_ _NAME_##_be(const void *addr) \
{ \
__be##_SZ_ res; \
s390_mmio_read(addr, &res, sizeof(res)); \
if (s390_is_mio_supported) \
res = s390_pcilgi(addr, sizeof(res)); \
else \
s390_mmio_read_syscall(addr, &res, sizeof(res)); \
return res; \
} \
static inline __le##_SZ_ _NAME_##_le(const void *addr) \
{ \
__le##_SZ_ res; \
s390_mmio_read(addr, &res, sizeof(res)); \
if (s390_is_mio_supported) \
res = s390_pcilgi(addr, sizeof(res)); \
else \
s390_mmio_read_syscall(addr, &res, sizeof(res)); \
return res; \
}

static inline void mmio_write8(void *addr, uint8_t value)
{
s390_mmio_write(addr, &value, sizeof(value));
if (s390_is_mio_supported)
s390_pcistgi(addr, value, sizeof(value));
else
s390_mmio_write_syscall(addr, &value, sizeof(value));
}

static inline uint8_t mmio_read8(const void *addr)
{
uint8_t res;
s390_mmio_read(addr, &res, sizeof(res));

if (s390_is_mio_supported)
res = s390_pcilgi(addr, sizeof(res));
else
s390_mmio_read_syscall(addr, &res, sizeof(res));

return res;
}

Expand Down Expand Up @@ -205,13 +207,7 @@ __le64 mmio_read64_le(const void *addr);
/* This strictly guarantees the order of TLP generation for the memory copy to
be in ascending address order.
*/
#ifdef __s390x__
static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
{
s390_mmio_write(dest, src, bytecnt);
}

#elif defined(__aarch64__) || defined(__arm__)
#if defined(__aarch64__) || defined(__arm__)
#include <arm_neon.h>

static inline void _mmio_memcpy_x64_64b(void *dest, const void *src)
Expand All @@ -236,7 +232,8 @@ static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
else \
_mmio_memcpy_x64((dest), (src), (bytecount)); \
})

#elif defined(__s390x__)
void mmio_memcpy_x64(void *dst, const void *src, size_t bytecnt);
#else
/* Transfer is some multiple of 64 bytes */
static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
Expand Down
100 changes: 100 additions & 0 deletions util/s390_mmio_insn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/* GPLv2 or OpenIB.org BSD (MIT) See COPYING file */
#ifndef __S390_UTIL_MMIO_H
#define __S390_UTIL_MMIO_H
#ifdef __s390x__
#include <stdbool.h>
#include <stdint.h>
#include <endian.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/auxv.h>

#include <util/compiler.h>

/* s390 requires special instructions to access IO memory. Originally there
were only privileged IO instructions that are exposed via special syscalls.
Starting with z15 there are also non-privileged memory IO (MIO) instructions
we can execute in user-space. Despite the hardware support this requires
support in the kernel. If MIO instructions are available is indicated in an
ELF hardware capability.
*/
extern bool s390_is_mio_supported;

union register_pair {
unsigned __int128 pair;
struct {
uint64_t even;
uint64_t odd;
};
};

/* The following pcilgi and pcistgi instructions allow IO memory access from
user-space but are only available on z15 and newer.
*/
static inline uint64_t s390_pcilgi(const void *ioaddr, size_t len)
{
union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len};
uint64_t val;
int cc;

asm volatile (
/* pcilgi */
".insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
"ipm %[cc]\n"
"srl %[cc],28\n"
: [cc] "=d" (cc), [val] "=d" (val),
[ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
if (unlikely(cc))
val = -1ULL;

return val;
}

static inline void s390_pcistgi(void *ioaddr, uint64_t val, size_t len)
{
union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len};

asm volatile (
/* pcistgi */
".insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
: [ioaddr_len] "+&d" (ioaddr_len.pair)
: [val] "d" (val)
: "cc", "memory");
}

/* This is the block store variant of unprivileged IO access instructions */
static inline void s390_pcistbi(void *ioaddr, const void *data, size_t len)
{
const uint8_t *src = data;

asm volatile (
/* pcistbi */
".insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
: [len] "+d" (len)
: [ioaddr] "d" ((uint64_t *)ioaddr),
[src] "Q" (*src)
: "cc");
}

static inline void s390_pciwb(void)
{
if (s390_is_mio_supported)
asm volatile (".insn rre,0xb9d50000,0,0\n"); /* pciwb */
else
asm volatile("" ::: "memory");
}

static inline void s390_mmio_write_syscall(void *mmio_addr, const void *val,
size_t length)
{
syscall(__NR_s390_pci_mmio_write, mmio_addr, val, length);
}

static inline void s390_mmio_read_syscall(const void *mmio_addr, void *val,
size_t length)
{
syscall(__NR_s390_pci_mmio_read, mmio_addr, val, length);
}

#endif /* __s390x__ */
#endif /* __S390_UTIL_MMIO_H */
5 changes: 4 additions & 1 deletion util/udma_barrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,15 @@
#define mmio_flush_writes() asm volatile("membar #StoreStore" ::: "memory")
#elif defined(__aarch64__)
#define mmio_flush_writes() asm volatile("dsb st" ::: "memory");
#elif defined(__sparc__) || defined(__s390x__)
#elif defined(__sparc__)
#define mmio_flush_writes() asm volatile("" ::: "memory")
#elif defined(__loongarch__)
#define mmio_flush_writes() asm volatile("dbar 0" ::: "memory")
#elif defined(__riscv)
#define mmio_flush_writes() asm volatile("fence ow,ow" ::: "memory")
#elif defined(__s390x__)
#include "s390_mmio_insn.h"
#define mmio_flush_writes() s390_pciwb()
#else
#error No architecture specific memory barrier defines found!
#endif
Expand Down

0 comments on commit b23c91b

Please sign in to comment.