Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix insufficient flushing on ARMv8.2+ #5257

Merged
merged 1 commit into from Jul 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 10 additions & 3 deletions src/libpmem2/aarch64/arm_cacheops.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/* Copyright 2014-2020, Intel Corporation */
/* Copyright 2014-2021, Intel Corporation */
/*
* ARM inline assembly to flush and invalidate caches
* clwb => dc cvac
Expand All @@ -11,8 +11,9 @@
/*
* Cache instructions on ARM:
* ARMv8.0-a DC CVAC - cache clean to Point of Coherency
* Meant for thread synchronization, usually implies
* real memory flush but may mean less.
* Meant for thread synchronization, before 8.2 may or
* may not flush memory, on 8.2+ the following is
* needed:
* ARMv8.2-a DC CVAP - cache clean to Point of Persistency
* Meant exactly for our use.
* ARMv8.5-a DC CVADP - cache clean to Point of Deep Persistency
Expand Down Expand Up @@ -54,6 +55,12 @@ arm_clean_va_to_poc(void const *p __attribute__((unused)))
asm volatile("dc cvac, %0" : : "r" (p) : "memory");
}

static inline void
arm_clean_va_to_pop(void const *p __attribute__((unused)))
{
asm volatile(".arch armv8.2-a\n dc cvap, %0" : : "r" (p) : "memory");
}

static inline void
arm_store_memory_barrier(void)
{
Expand Down
22 changes: 20 additions & 2 deletions src/libpmem2/aarch64/flush.h
Expand Up @@ -11,10 +11,10 @@
#define FLUSH_ALIGN ((uintptr_t)64)

/*
* flush_dcache_nolog -- flush the CPU cache, using DC CVAC
* flush_poc_nolog -- flush the CPU cache, using DC CVAC
*/
static force_inline void
flush_dcache_nolog(const void *addr, size_t len)
flush_poc_nolog(const void *addr, size_t len)
{
uintptr_t uptr;

Expand All @@ -28,4 +28,22 @@ flush_dcache_nolog(const void *addr, size_t len)
}
}

/*
* flush_pop_nolog -- flush the CPU cache, using DC CVAP
*/
static force_inline void
flush_pop_nolog(const void *addr, size_t len)
{
uintptr_t uptr;

/*
* Loop through cache-line-size (typically 64B) aligned chunks
* covering the given range.
*/
for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1);
uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) {
arm_clean_va_to_pop((char *)uptr);
}
}

#endif
50 changes: 44 additions & 6 deletions src/libpmem2/aarch64/init.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: BSD-3-Clause
/* Copyright 2014-2019, Intel Corporation */
/* Copyright 2014-2021, Intel Corporation */

#include <string.h>
#include <sys/auxv.h>
#include <asm/hwcap.h>

#include "auto_flush.h"
#include "flush.h"
Expand All @@ -19,14 +21,45 @@ memory_barrier(void)
}

/*
* flush_dcache -- (internal) flush the CPU cache
* flush_poc -- (internal) flush the dcache to Point of Coherency,
* available on all ARMv8+. It does _not_ flush to dimms
* on new CPUs, and is ill-specified earlier.
*/
static void
flush_dcache(const void *addr, size_t len)
flush_poc(const void *addr, size_t len)
{
LOG(15, "addr %p len %zu", addr, len);

flush_dcache_nolog(addr, len);
flush_poc_nolog(addr, len);
}

/*
* flush_pop -- (internal) flush the dcache to Point of Persistency,
* available on ARMv8.2+
*/
static void
flush_pop(const void *addr, size_t len)
{
LOG(15, "addr %p len %zu", addr, len);

flush_pop_nolog(addr, len);
}

/*
* is_dcpop_available -- (internal) determine dcpop cpuid flag using hwcaps
*/
static int
is_dc_pop_available(void)
{
LOG(15, NULL);

/*
* Shouldn't ever fail, but if it does, error is reported as -1
* which conveniently includes all bits. We then assume PoP flushes
* are required -- safer on any hardware suspected of actually being
* capable of pmem, cleanly crashing with SIGILL on old gear.
*/
return getauxval(AT_HWCAP) & HWCAP_DCPOP;
}

/*
Expand All @@ -38,10 +71,15 @@ pmem2_arch_init(struct pmem2_arch_info *info)
LOG(3, NULL);

info->fence = memory_barrier;
info->flush = flush_dcache;
if (is_dc_pop_available())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future, this should also include a check for ARM's eADR equivalent. But for now, it's fine.

info->flush = flush_pop;
else
info->flush = flush_poc;

if (info->flush == flush_dcache)
if (info->flush == flush_poc)
LOG(3, "Synchronize VA to poc for ARM");
else if (info->flush == flush_pop)
LOG(3, "Synchronize VA to pop for ARM");
else
FATAL("invalid deep flush function address");
}