Skip to content

Commit

Permalink
pmem2: arm64: fix data loss on ARMv8.2+ (improper flushing)
Browse files Browse the repository at this point in the history
ARMv8.2 separates flush-for-coherency from flush-for-persistency as two
instructions: DC CVAC and DC CVAP.  It's the latter that is of interest
to us, and it is up to the processor to know where the Point of
Persistency happens to be.

The documentation is very clear, and this PIRL talk dispels all doubt:
https://www.youtube.com/watch?v=8QAuN8CL5Zg

On the other hand, I found no clear answer whether DC CVAC might be
enough on some pre-8.2 hardware.  There's no other option though (CVAC
is the heaviest flush available).  This question is probably moot as
pre-8.2 CPUs, while ubiquitous in dev boards, are gone in datacenter
gear.
  • Loading branch information
kilobyte committed Jun 24, 2021
1 parent 2c68fd4 commit d42dbd5
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 11 deletions.
12 changes: 9 additions & 3 deletions src/libpmem2/aarch64/arm_cacheops.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/* Copyright 2014-2020, Intel Corporation */
/* Copyright 2014-2021, Intel Corporation */
/*
* ARM inline assembly to flush and invalidate caches
* clwb => dc cvac
Expand All @@ -11,8 +11,8 @@
/*
* Cache instructions on ARM:
* ARMv8.0-a DC CVAC - cache clean to Point of Coherency
* Meant for thread synchronization, usually implies
* real memory flush but may mean less.
* Meant for thread synchronization, before 8.2 may or
* may not flush memory, on 8.2+ the following is needed:
* ARMv8.2-a DC CVAP - cache clean to Point of Persistency
* Meant exactly for our use.
* ARMv8.5-a DC CVADP - cache clean to Point of Deep Persistency
Expand Down Expand Up @@ -54,6 +54,12 @@ arm_clean_va_to_poc(void const *p __attribute__((unused)))
asm volatile("dc cvac, %0" : : "r" (p) : "memory");
}

static inline void
arm_clean_va_to_pop(void const *p __attribute__((unused)))
{
asm volatile(".arch armv8.2-a\n dc cvap, %0" : : "r" (p) : "memory");
}

static inline void
arm_store_memory_barrier(void)
{
Expand Down
22 changes: 20 additions & 2 deletions src/libpmem2/aarch64/flush.h
Expand Up @@ -11,10 +11,10 @@
#define FLUSH_ALIGN ((uintptr_t)64)

/*
* flush_dcache_nolog -- flush the CPU cache, using DC CVAC
* flush_poc_nolog -- flush the CPU cache, using DC CVAC
*/
static force_inline void
flush_dcache_nolog(const void *addr, size_t len)
flush_poc_nolog(const void *addr, size_t len)
{
uintptr_t uptr;

Expand All @@ -28,4 +28,22 @@ flush_dcache_nolog(const void *addr, size_t len)
}
}

/*
* flush_pop_nolog -- flush the CPU cache, using DC CVAP
*/
static force_inline void
flush_pop_nolog(const void *addr, size_t len)
{
uintptr_t uptr;

/*
* Loop through cache-line-size (typically 64B) aligned chunks
* covering the given range.
*/
for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1);
uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) {
arm_clean_va_to_pop((char *)uptr);
}
}

#endif
50 changes: 44 additions & 6 deletions src/libpmem2/aarch64/init.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: BSD-3-Clause
/* Copyright 2014-2019, Intel Corporation */
/* Copyright 2014-2021, Intel Corporation */

#include <string.h>
#include <sys/auxv.h>
#include <asm/hwcap.h>

#include "auto_flush.h"
#include "flush.h"
Expand All @@ -19,14 +21,45 @@ memory_barrier(void)
}

/*
* flush_dcache -- (internal) flush the CPU cache
* flush_poc -- (internal) flush the dcache to Point of Coherency,
* available on all ARMv8+. It does _not_ flush to dimms
* on new CPUs, and is ill-specified earlier.
*/
static void
flush_dcache(const void *addr, size_t len)
flush_poc(const void *addr, size_t len)
{
LOG(15, "addr %p len %zu", addr, len);

flush_dcache_nolog(addr, len);
flush_poc_nolog(addr, len);
}

/*
* flush_pop -- (internal) flush the dcache to Point of Persistency,
* available on ARMv8.2+
*/
static void
flush_pop(const void *addr, size_t len)
{
LOG(15, "addr %p len %zu", addr, len);

flush_pop_nolog(addr, len);
}

/*
* is_dcpop_available -- (internal) determine dcpop cpuid flag using hwcaps
*/
static int
is_dc_pop_available(void)
{
LOG(15, NULL);

/*
* Shouldn't ever fail, but if it does, error is reported as -1
* which conveniently includes all bits. We then assume PoP flushes
* are required -- safer on any hardware suspected of actually being
* capable of pmem, cleanly crashing with SIGILL on old gear.
*/
return getauxval(AT_HWCAP) & HWCAP_DCPOP;
}

/*
Expand All @@ -38,10 +71,15 @@ pmem2_arch_init(struct pmem2_arch_info *info)
LOG(3, NULL);

info->fence = memory_barrier;
info->flush = flush_dcache;
if (is_dc_pop_available())
info->flush = flush_pop;
else
info->flush = flush_poc;

if (info->flush == flush_dcache)
if (info->flush == flush_poc)
LOG(3, "Synchronize VA to poc for ARM");
else if (info->flush == flush_pop)
LOG(3, "Synchronize VA to pop for ARM");
else
FATAL("invalid deep flush function address");
}

0 comments on commit d42dbd5

Please sign in to comment.