diff --git a/redis.conf b/redis.conf index 891bb1663043..a5062fda9d86 100644 --- a/redis.conf +++ b/redis.conf @@ -1976,3 +1976,10 @@ jemalloc-bg-thread yes # # Set bgsave child process to cpu affinity 1,10,11 # bgsave_cpulist 1,10-11 + +# In some cases redis will emit warnings and even refuse to start if it detects +# that the system is in bad state, it is possible to suppress these warnings +# by setting the following config which takes a space delimited list of warnings +# to suppress +# +# ignore-warnings ARM64-COW-BUG diff --git a/src/config.c b/src/config.c index d804ff0efb85..2e109dbaec7d 100644 --- a/src/config.c +++ b/src/config.c @@ -2424,6 +2424,7 @@ standardConfig configs[] = { createStringConfig("bio_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bio_cpulist, NULL, NULL, NULL), createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.aof_rewrite_cpulist, NULL, NULL, NULL), createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bgsave_cpulist, NULL, NULL, NULL), + createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.ignore_warnings, "", NULL, NULL), /* SDS Configs */ createSDSConfig("masterauth", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.masterauth, NULL, NULL, NULL), diff --git a/src/server.c b/src/server.c index 7d2688b665e9..faa67fccb887 100644 --- a/src/server.c +++ b/src/server.c @@ -59,6 +59,10 @@ #include #include +#ifdef __linux__ +#include +#endif + /* Our shared "common" objects */ struct sharedObjectsStruct shared; @@ -5090,6 +5094,21 @@ void monitorCommand(client *c) { /* =================================== Main! ================================ */ +int checkIgnoreWarning(const char *warning) { + int argc, j; + sds *argv = sdssplitargs(server.ignore_warnings, &argc); + if (argv == NULL) + return 0; + + for (j = 0; j < argc; j++) { + char *flag = argv[j]; + if (!strcasecmp(flag, warning)) + break; + } + sdsfreesplitres(argv,argc); + return j < argc; +} + #ifdef __linux__ int linuxOvercommitMemoryValue(void) { FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r"); @@ -5113,6 +5132,113 @@ void linuxMemoryWarnings(void) { serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with Redis. To fix this issue run the command 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. Redis must be restarted after THP is disabled (set to 'madvise' or 'never')."); } } + +#ifdef __arm64__ + +/* Get size in kilobytes of the Shared_Dirty pages of the calling process for the + * memory map corresponding to the provided address, or -1 on error. */ +static int smapsGetSharedDirty(unsigned long addr) { + int ret, in_mapping = 0, val = -1; + unsigned long from, to; + char buf[64]; + FILE *f; + + f = fopen("/proc/self/smaps", "r"); + serverAssert(f); + + while (1) { + if (!fgets(buf, sizeof(buf), f)) + break; + + ret = sscanf(buf, "%lx-%lx", &from, &to); + if (ret == 2) + in_mapping = from <= addr && addr < to; + + if (in_mapping && !memcmp(buf, "Shared_Dirty:", 13)) { + ret = sscanf(buf, "%*s %d", &val); + serverAssert(ret == 1); + break; + } + } + + fclose(f); + return val; +} + +/* Older arm64 Linux kernels have a bug that could lead to data corruption + * during background save in certain scenarios. This function checks if the + * kernel is affected. + * The bug was fixed in commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b + * titled: "arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()" + * Return 1 if the kernel seems to be affected, and 0 otherwise. */ +int linuxMadvFreeForkBugCheck(void) { + int ret, pipefd[2]; + pid_t pid; + char *p, *q, bug_found = 0; + const long map_size = 3 * 4096; + + /* Create a memory map that's in our full control (not one used by the allocator). */ + p = mmap(NULL, map_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + serverAssert(p != MAP_FAILED); + + q = p + 4096; + + /* Split the memory map in 3 pages by setting their protection as RO|RW|RO to prevent + * Linux from merging this memory map with adjacent VMAs. */ + ret = mprotect(q, 4096, PROT_READ | PROT_WRITE); + serverAssert(!ret); + + /* Write to the page once to make it resident */ + *(volatile char*)q = 0; + + /* Tell the kernel that this page is free to be reclaimed. */ +#ifndef MADV_FREE +#define MADV_FREE 8 +#endif + ret = madvise(q, 4096, MADV_FREE); + serverAssert(!ret); + + /* Write to the page after being marked for freeing, this is supposed to take + * ownership of that page again. */ + *(volatile char*)q = 0; + + /* Create a pipe for the child to return the info to the parent. */ + ret = pipe(pipefd); + serverAssert(!ret); + + /* Fork the process. */ + pid = fork(); + serverAssert(pid >= 0); + if (!pid) { + /* Child: check if the page is marked as dirty, expecing 4 (kB). + * A value of 0 means the kernel is affected by the bug. */ + if (!smapsGetSharedDirty((unsigned long)q)) + bug_found = 1; + + ret = write(pipefd[1], &bug_found, 1); + serverAssert(ret == 1); + + exit(0); + } else { + /* Read the result from the child. */ + ret = read(pipefd[0], &bug_found, 1); + serverAssert(ret == 1); + + /* Reap the child pid. */ + serverAssert(waitpid(pid, NULL, 0) == pid); + } + + /* Cleanup */ + ret = close(pipefd[0]); + serverAssert(!ret); + ret = close(pipefd[1]); + serverAssert(!ret); + ret = munmap(p, map_size); + serverAssert(!ret); + + return bug_found; +} +#endif /* __arm64__ */ #endif /* __linux__ */ void createPidFile(void) { @@ -5711,7 +5837,16 @@ int main(int argc, char **argv) { serverLog(LL_WARNING,"Server initialized"); #ifdef __linux__ linuxMemoryWarnings(); - #endif + #if defined (__arm64__) + if (linuxMadvFreeForkBugCheck()) { + serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. Please upgrade to the latest stable kernel."); + if (!checkIgnoreWarning("ARM64-COW-BUG")) { + serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG"); + exit(1); + } + } + #endif /* __arm64__ */ + #endif /* __linux__ */ moduleInitModulesSystemLast(); moduleLoadFromQueue(); ACLLoadUsersAtStartup(); diff --git a/src/server.h b/src/server.h index 62281a63cfa2..fb6d71c71cb4 100644 --- a/src/server.h +++ b/src/server.h @@ -1136,6 +1136,7 @@ struct redisServer { int in_eval; /* Are we inside EVAL? */ int in_exec; /* Are we inside EXEC? */ int propagate_in_transaction; /* Make sure we don't propagate nested MULTI/EXEC */ + char *ignore_warnings; /* Config: warnings that should be ignored. */ /* Modules */ dict *moduleapi; /* Exported core APIs dictionary for modules. */ dict *sharedapi; /* Like moduleapi but containing the APIs that