Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmake/os/Linux.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,5 @@ IF(NOT WITH_ASAN AND NOT WITH_MSAN AND NOT WITH_UBSAN AND NOT WITH_TSAN)
ENDIF()

# Linux specific HUGETLB /large page support
CHECK_SYMBOL_EXISTS(SHM_HUGETLB sys/shm.h HAVE_LINUX_LARGE_PAGES)
CHECK_SYMBOL_EXISTS(MAP_HUGETLB sys/mman.h HAVE_LINUX_LARGE_PAGES)
CHECK_SYMBOL_EXISTS(MAP_HUGE_SHIFT sys/mman.h HAVE_LINUX_MULTIPLE_LARGE_PAGES)
1 change: 1 addition & 0 deletions config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@
#cmakedefine LINUX_ALPINE 1
#cmakedefine LINUX_SUSE
#cmakedefine HAVE_LINUX_LARGE_PAGES 1
#cmakedefine HAVE_LINUX_MULTIPLE_LARGE_PAGES 1
#cmakedefine HAVE_SOLARIS_LARGE_PAGES 1
#cmakedefine HAVE_SOLARIS_ATOMIC 1
#define SYSTEM_TYPE "@SYSTEM_TYPE@"
Expand Down
6 changes: 3 additions & 3 deletions share/errmsg-utf8.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16799,9 +16799,6 @@ ER_IB_MSG_851
ER_IB_MSG_852
eng "%s"

ER_IB_MSG_853
eng "%s"

ER_IB_MSG_854
eng "%s"

Expand Down Expand Up @@ -18515,6 +18512,9 @@ ER_IB_MSG_MADV_DONTDUMP_UNSUPPORTED
ER_IB_MSG_MADVISE_FAILED
eng "Disabling @@core_file because @@innodb_buffer_pool_in_core_file is disabled, yet madvise(%p,%zu,%s) failed with %s"

ER_IB_OS_LARGE_PAGE_SIZE
eng "Unexpected OS large page size %zu, not a power of 2, skipping"

ER_COLUMN_CHANGE_SIZE
eng "Could not change column '%s' of table '%s'. The resulting size of index '%s' would exceed the max key length of %d bytes."

Expand Down
5 changes: 0 additions & 5 deletions storage/innobase/include/os0proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ this program; if not, write to the Free Software Foundation, Inc.,

#include "univ.i"

#ifdef UNIV_LINUX
#include <sys/ipc.h>
#include <sys/shm.h>
#endif

typedef void *os_process_t;
typedef unsigned long int os_process_id_t;

Expand Down
4 changes: 4 additions & 0 deletions storage/innobase/include/srv0srv.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,10 @@ even if they are marked as "corrupted". Mostly it is for DBA to process
corrupted index and table */
extern bool srv_load_corrupted;

/* Available OS RAM large page sizes */
#define srv_large_page_sizes_length 8
extern size_t srv_large_page_sizes[srv_large_page_sizes_length];

/** Dedicated server setting */
extern bool srv_dedicated_server;
/** Requested size in bytes */
Expand Down
129 changes: 96 additions & 33 deletions storage/innobase/os/os0proc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <errno.h>
#include <stddef.h>
#include <sys/types.h>

#if defined HAVE_LINUX_LARGE_PAGES && defined HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif

#include "ha_prototypes.h"
#include "os0proc.h"
#include "srv0srv.h"
Expand Down Expand Up @@ -70,49 +75,114 @@ ulint os_proc_get_number(void) {
#endif
}

/*
Returns the next large page size smaller or equal to the passed in size.

The search starts at srv_large_page_sizes[*start].

Assumes srv_get_large_page_sizes has been initialised

For first use, have *start=0. There is no need to increment *start.

@param sz size to be searched for.
@param start ptr to int representing offset in my_large_page_sizes to start from.
*start is updated during search and can be used to search again if 0 isn't returned.

@returns the next size found. *start will be incremented to the next potential size.
@retval a large page size that is valid on this system or 0 if no large page size possible.
*/
static size_t os_next_large_page_size(size_t sz, int *start)
{
#if defined HAVE_LINUX_MULTIPLE_LARGE_PAGES
size_t cur;

while (*start < srv_large_page_sizes_length
&& srv_large_page_sizes[*start] > 0)
{
cur= *start;
(*start)++;
if (srv_large_page_sizes[cur] <= sz)
{
return srv_large_page_sizes[cur];
}
}
#endif
return 0;
}

static inline uint os_bit_size_t_log2(size_t value)
{
uint bit;
for (bit=0 ; value > 1 ; value>>=1, bit++) ;
return bit;
}

/** Allocates large pages memory.
@param[in,out] n Number of bytes to allocate
@return allocated memory */
void *os_mem_alloc_large(ulint *n) {
void *ptr;
void *ptr = NULL;
ulint size;
#if defined HAVE_LINUX_LARGE_PAGES && defined UNIV_LINUX
int shmid;
struct shmid_ds buf;
int mapflag, i= 0;
size_t adjusted_size, large_page_size;

if (!os_use_large_pages || !os_large_page_size) {
if (!os_use_large_pages) {
goto skip;
}
#ifdef HAVE_LINUX_MULTIPLE_LARGE_PAGES
if (!os_large_page_size) {
/* advance i to be a smaller or equal to os_large_page_size */
os_next_large_page_size(os_large_page_size, &i);
}
large_page_size = os_next_large_page_size(*n, &i);
#else
large_page_size = os_large_page_size;
#endif
if (!large_page_size)
goto skip;

/* Align block size to os_large_page_size */
ut_ad(ut_is_2pow(os_large_page_size));
size = ut_2pow_round(*n + (os_large_page_size - 1), os_large_page_size);
ut_ad(ut_is_2pow(large_page_size));

shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
if (shmid < 0) {
ib::warn(ER_IB_MSG_852)
<< "Failed to allocate " << size << " bytes. errno " << errno;
ptr = NULL;
} else {
ptr = shmat(shmid, NULL, 0);
if (ptr == (void *)-1) {
ib::warn(ER_IB_MSG_853) << "Failed to attach shared memory segment,"
" errno "
<< errno;
#if defined HAVE_LINUX_MULTIPLE_LARGE_PAGES
do
#endif
{
mapflag = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
#if defined HAVE_LINUX_MULTIPLE_LARGE_PAGES
/* MAP_HUGE_SHIFT added linux-3.8. Take largest HUGEPAGE size */
mapflag |= os_bit_size_t_log2(large_page_size) << MAP_HUGE_SHIFT;
#endif
/* Align block size to large_page_size */
adjusted_size = ut_2pow_round(*n + (large_page_size - 1), large_page_size);
ptr = mmap(NULL, adjusted_size, PROT_READ | PROT_WRITE, mapflag, -1, 0);
if (ptr != (void*)-1) {
#if defined HAVE_LINUX_MULTIPLE_LARGE_PAGES
break;
} else {
ptr = NULL;
if (errno == ENOMEM) {
/* no memory at this size, try next size */
continue;
}
#else
} else {
#endif
ptr = NULL;
ib::warn(ER_IB_MSG_852)
<< "Failed to allocate " << adjusted_size << " bytes. pagesize " << large_page_size
<< " bytes. errno " << errno;
}

/* Remove the shared memory segment so that it will be
automatically freed after memory is detached or
process exits */
shmctl(shmid, IPC_RMID, &buf);
}
#if defined HAVE_LINUX_MULTIPLE_LARGE_PAGES
while ((large_page_size = os_next_large_page_size(*n, &i)));
#endif

if (ptr) {
*n = size;
os_atomic_increment_ulint(&os_total_large_mem_allocated, size);
*n = adjusted_size;
os_atomic_increment_ulint(&os_total_large_mem_allocated, adjusted_size);

UNIV_MEM_ALLOC(ptr, size);
UNIV_MEM_ALLOC(ptr, adjusted_size);
return (ptr);
}

Expand Down Expand Up @@ -167,13 +237,6 @@ void *os_mem_alloc_large(ulint *n) {
void os_mem_free_large(void *ptr, ulint size) {
ut_a(os_total_large_mem_allocated >= size);

#if defined HAVE_LINUX_LARGE_PAGES && defined UNIV_LINUX
if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
os_atomic_decrement_ulint(&os_total_large_mem_allocated, size);
UNIV_MEM_FREE(ptr, size);
return;
}
#endif /* HAVE_LINUX_LARGE_PAGES && UNIV_LINUX */
#ifdef _WIN32
/* When RELEASE memory, the size parameter must be 0.
Do not use MEM_RELEASE with MEM_DECOMMIT. */
Expand Down
2 changes: 2 additions & 0 deletions storage/innobase/srv/srv0srv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ with mutex_enter(), which will wait until it gets the mutex. */

/** Dedicated server setting */
bool srv_dedicated_server = true;
/** Operating system RAM sizes */
size_t srv_large_page_sizes[srv_large_page_sizes_length];
/** Requested size in bytes */
ulint srv_buf_pool_size = ULINT_MAX;
/** Minimum pool size in bytes */
Expand Down
54 changes: 54 additions & 0 deletions storage/innobase/srv/srv0start.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>

#include <zlib.h>
#include "btr0btr.h"
Expand Down Expand Up @@ -318,6 +319,55 @@ static MY_ATTRIBUTE((warn_unused_result)) dberr_t
return (DB_SUCCESS);
}

/* Descending sort */
static int size_t_cmp(const void *a, const void *b)
{
const size_t *ia = (const size_t *)a; // casting pointer types
const size_t *ib = (const size_t *)b;
if (*ib > *ia)
{
return 1;
}
else if (*ib < *ia)
{
return -1;
}
return 0;
}

/** Fetch large page sizes available from linux */
static void srv_get_large_page_sizes(size_t sizes[srv_large_page_sizes_length])
{
DIR *dirp;
struct dirent *r;
int i= 0;

dirp= opendir("/sys/kernel/mm/hugepages");
if (dirp == NULL)
{
perror("Warning: failed to open /sys/kernel/mm/hugepages");
}
else
{
while (i < srv_large_page_sizes_length &&
(r= readdir(dirp)))
{
if (strncmp("hugepages-", r->d_name, 10) == 0)
{
sizes[i]= strtoull(r->d_name + 10, NULL, 10) * 1024ULL;
if (!ut_is_2pow(sizes[i]))
{
ib::warn(ER_IB_OS_LARGE_PAGE_SIZE, sizes[i]);
sizes[i] = 0;
continue;
}
++i;
}
}
qsort(sizes, i, sizeof(size_t), size_t_cmp);
}
}

/** Initial number of the first redo log file */
#define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1)

Expand Down Expand Up @@ -2025,6 +2075,10 @@ dberr_t srv_start(bool create_new_db, const std::string &scan_directories) {
return (srv_init_abort(DB_ERROR));
}

#if defined(HAVE_LINUX_MULTIPLE_LARGE_PAGES)
srv_get_large_page_sizes(srv_large_page_sizes);
#endif /* HAVE_LINUX_MULTIPLE_LARGE_PAGES */

double size;
char unit;

Expand Down