Skip to content

Commit

Permalink
8286030: Avoid JVM crash when containers share the same /tmp dir
Browse files Browse the repository at this point in the history
Reviewed-by: stuefe
Backport-of: 84f23149e22561173feb0e34bca31a7345b43c89
  • Loading branch information
GoeLin committed Feb 10, 2023
1 parent b023d5c commit d52e18c
Show file tree
Hide file tree
Showing 3 changed files with 297 additions and 57 deletions.
179 changes: 122 additions & 57 deletions src/hotspot/os/posix/perfMemory_posix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@
# include <signal.h>
# include <pwd.h>

#if defined(LINUX)
# include <sys/file.h>
#endif

static char* backing_store_file_name = NULL; // name of the backing store
// file, if successfully created.

Expand Down Expand Up @@ -75,18 +79,6 @@ static char* create_standard_memory(size_t size) {
return mapAddress;
}

// delete the PerfData memory region
//
static void delete_standard_memory(char* addr, size_t size) {

// there are no persistent external resources to cleanup for standard
// memory. since DestroyJavaVM does not support unloading of the JVM,
// cleanup of the memory resource is not performed. The memory will be
// reclaimed by the OS upon termination of the process.
//
return;
}

// save the specified memory region to the given file
//
// Note: this function might be called from signal handler (by os::abort()),
Expand Down Expand Up @@ -707,17 +699,17 @@ static void remove_file(const char* path) {
}
}


// cleanup stale shared memory resources
// cleanup stale shared memory files
//
// This method attempts to remove all stale shared memory files in
// the named user temporary directory. It scans the named directory
// for files matching the pattern ^$[0-9]*$. For each file found, the
// process id is extracted from the file name and a test is run to
// determine if the process is alive. If the process is not alive,
// any stale file resources are removed.
// for files matching the pattern ^$[0-9]*$.
//
// This directory should be used only by JVM processes owned by the
// current user to store PerfMemory files. Any other files found
// in this directory may be removed.
//
static void cleanup_sharedmem_resources(const char* dirname) {
static void cleanup_sharedmem_files(const char* dirname) {

int saved_cwd_fd;
// open the directory and set the current working directory to it
Expand All @@ -727,48 +719,95 @@ static void cleanup_sharedmem_resources(const char* dirname) {
return;
}

// for each entry in the directory that matches the expected file
// name pattern, determine if the file resources are stale and if
// so, remove the file resources. Note, instrumented HotSpot processes
// for this user may start and/or terminate during this search and
// remove or create new files in this directory. The behavior of this
// loop under these conditions is dependent upon the implementation of
// opendir/readdir.
// For each entry in the directory that matches the expected file
// name pattern, remove the file if it's determine to be stale
// Note, instrumented HotSpot processes for this user may start and/or
// terminate during this search and remove or create new files in this
// directory. The behavior of this loop under these conditions is dependent
// upon the implementation of opendir/readdir.
//
struct dirent* entry;
errno = 0;
while ((entry = os::readdir(dirp)) != NULL) {

pid_t pid = filename_to_pid(entry->d_name);
const char* filename = entry->d_name;
pid_t pid = filename_to_pid(filename);

if (pid == 0) {

if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
if (strcmp(filename, ".") != 0 && strcmp(filename, "..") != 0) {
// attempt to remove all unexpected files, except "." and ".."
unlink(entry->d_name);
unlink(filename);
}

errno = 0;
continue;
}

// we now have a file name that converts to a valid integer
// that could represent a process id . if this process id
// matches the current process id or the process is not running,
// then remove the stale file resources.
//
// process liveness is detected by sending signal number 0 to
// the process id (see kill(2)). if kill determines that the
// process does not exist, then the file resources are removed.
// if kill determines that that we don't have permission to
// signal the process, then the file resources are assumed to
// be stale and are removed because the resources for such a
// process should be in a different user specific directory.
#if defined(LINUX)
// Special case on Linux, if multiple containers share the
// same /tmp directory:
//
if ((pid == os::current_process_id()) ||
(kill(pid, 0) == OS_ERR && (errno == ESRCH || errno == EPERM))) {
unlink(entry->d_name);
// - All the JVMs must have the JDK-8286030 fix, or the behavior
// is undefined.
// - We cannot rely on the values of the pid, because it could
// be a process in a different namespace. We must use the flock
// protocol to determine if a live process is using this file.
// See create_sharedmem_file().
int fd;
RESTARTABLE(os::open(filename, O_RDONLY, 0), fd);
if (fd == OS_ERR) {
// Something wrong happened. Ignore the error and don't try to remove the
// file.
log_debug(perf, memops)("os::open() for stale file check failed for %s/%s", dirname, filename);
errno = 0;
continue;
}

int n;
RESTARTABLE(::flock(fd, LOCK_EX|LOCK_NB), n);
if (n != 0) {
// Either another process holds the exclusive lock on this file, or
// something wrong happened. Ignore the error and don't try to remove the
// file.
log_debug(perf, memops)("flock for stale file check failed for %s/%s", dirname, filename);
::close(fd);
errno = 0;
continue;
}
// We are able to lock the file, but this file might have been created
// by an older JVM that doesn't use the flock prototol, so we must do
// the folowing checks (which are also done by older JVMs).
#endif

// The following code assumes that pid must be in the same
// namespace as the current process.
bool stale = false;

if (pid == os::current_process_id()) {
// The file was created by a terminated process that happened
// to have the same pid as the current process.
stale = true;
} else if (kill(pid, 0) == OS_ERR) {
if (errno == ESRCH) {
// The target process does not exist.
stale = true;
} else if (errno == EPERM) {
// The file was created by a terminated process that happened
// to have the same pid as a process not owned by the current user.
stale = true;
}
}

if (stale) {
log_info(perf, memops)("Remove stale file %s/%s", dirname, filename);
unlink(filename);
}

#if defined(LINUX)
// Hold the lock until here to prevent other JVMs from using this file
// while we were in the middle of deleting it.
::close(fd);
#endif

errno = 0;
}

Expand Down Expand Up @@ -814,13 +853,13 @@ static bool make_user_tmp_dir(const char* dirname) {
return true;
}

// create the shared memory file resources
// create the shared memory file
//
// This method creates the shared memory file with the given size
// This method also creates the user specific temporary directory, if
// it does not yet exist.
//
static int create_sharedmem_resources(const char* dirname, const char* filename, size_t size) {
static int create_sharedmem_file(const char* dirname, const char* filename, size_t size) {

// make the user temporary directory
if (!make_user_tmp_dir(dirname)) {
Expand Down Expand Up @@ -868,6 +907,32 @@ static int create_sharedmem_resources(const char* dirname, const char* filename,
return -1;
}

#if defined(LINUX)
// On Linux, different containerized processes that share the same /tmp
// directory (e.g., with "docker --volume ...") may have the same pid and
// try to use the same file. To avoid conflicts among such
// processes, we allow only one of them (the winner of the flock() call)
// to write to the file. All the other processes will give up and will
// have perfdata disabled.
//
// Note that the flock will be automatically given up when the winner
// process exits.
//
// The locking protocol works only with other JVMs that have the JDK-8286030
// fix. If you are sharing the /tmp difrectory among different containers,
// do not use older JVMs that don't have this fix, or the behavior is undefined.
int n;
RESTARTABLE(::flock(fd, LOCK_EX|LOCK_NB), n);
if (n != 0) {
log_warning(perf, memops)("Cannot use file %s/%s because %s (errno = %d)", dirname, filename,
(errno == EWOULDBLOCK) ?
"it is locked by another process" :
"flock() failed", errno);
::close(fd);
return -1;
}
#endif

// truncate the file to get rid of any existing data
RESTARTABLE(::ftruncate(fd, (off_t)0), result);
if (result == OS_ERR) {
Expand Down Expand Up @@ -982,12 +1047,13 @@ static char* mmap_create_shared(size_t size) {
}

// cleanup any stale shared memory files
cleanup_sharedmem_resources(dirname);
cleanup_sharedmem_files(dirname);

assert(((size > 0) && (size % os::vm_page_size() == 0)),
"unexpected PerfMemory region size");

fd = create_sharedmem_resources(dirname, short_filename, size);
log_info(perf, memops)("Trying to open %s/%s", dirname, short_filename);
fd = create_sharedmem_file(dirname, short_filename, size);

FREE_C_HEAP_ARRAY(char, user_name);
FREE_C_HEAP_ARRAY(char, dirname);
Expand Down Expand Up @@ -1020,6 +1086,8 @@ static char* mmap_create_shared(size_t size) {
// it does not go through os api, the operation has to record from here
MemTracker::record_virtual_memory_reserve_and_commit((address)mapAddress, size, CURRENT_PC, mtInternal);

log_info(perf, memops)("Successfully opened");

return mapAddress;
}

Expand Down Expand Up @@ -1049,10 +1117,10 @@ static char* create_shared_memory(size_t size) {
//
static void delete_shared_memory(char* addr, size_t size) {

// cleanup the persistent shared memory resources. since DestroyJavaVM does
// not support unloading of the JVM, unmapping of the memory resource is
// Remove the shared memory file. Since DestroyJavaVM does
// not support unloading of the JVM, unmapping of the memory region is
// not performed. The memory will be reclaimed by the OS upon termination of
// the process. The backing store file is deleted from the file system.
// the process.

assert(!PerfDisableSharedMem, "shouldn't be here");

Expand Down Expand Up @@ -1261,10 +1329,7 @@ void PerfMemory::delete_memory_region() {
save_memory_to_file(start(), capacity());
}

if (PerfDisableSharedMem) {
delete_standard_memory(start(), capacity());
}
else {
if (!PerfDisableSharedMem) {
delete_shared_memory(start(), capacity());
}
}
Expand Down
Loading

1 comment on commit d52e18c

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.