Skip to content

Commit d52e18c

Browse files
committed
8286030: Avoid JVM crash when containers share the same /tmp dir
Reviewed-by: stuefe Backport-of: 84f23149e22561173feb0e34bca31a7345b43c89
1 parent b023d5c commit d52e18c

File tree

3 files changed

+297
-57
lines changed

3 files changed

+297
-57
lines changed

src/hotspot/os/posix/perfMemory_posix.cpp

+122-57
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@
4747
# include <signal.h>
4848
# include <pwd.h>
4949

50+
#if defined(LINUX)
51+
# include <sys/file.h>
52+
#endif
53+
5054
static char* backing_store_file_name = NULL; // name of the backing store
5155
// file, if successfully created.
5256

@@ -75,18 +79,6 @@ static char* create_standard_memory(size_t size) {
7579
return mapAddress;
7680
}
7781

78-
// delete the PerfData memory region
79-
//
80-
static void delete_standard_memory(char* addr, size_t size) {
81-
82-
// there are no persistent external resources to cleanup for standard
83-
// memory. since DestroyJavaVM does not support unloading of the JVM,
84-
// cleanup of the memory resource is not performed. The memory will be
85-
// reclaimed by the OS upon termination of the process.
86-
//
87-
return;
88-
}
89-
9082
// save the specified memory region to the given file
9183
//
9284
// Note: this function might be called from signal handler (by os::abort()),
@@ -707,17 +699,17 @@ static void remove_file(const char* path) {
707699
}
708700
}
709701

710-
711-
// cleanup stale shared memory resources
702+
// cleanup stale shared memory files
712703
//
713704
// This method attempts to remove all stale shared memory files in
714705
// the named user temporary directory. It scans the named directory
715-
// for files matching the pattern ^$[0-9]*$. For each file found, the
716-
// process id is extracted from the file name and a test is run to
717-
// determine if the process is alive. If the process is not alive,
718-
// any stale file resources are removed.
706+
// for files matching the pattern ^$[0-9]*$.
707+
//
708+
// This directory should be used only by JVM processes owned by the
709+
// current user to store PerfMemory files. Any other files found
710+
// in this directory may be removed.
719711
//
720-
static void cleanup_sharedmem_resources(const char* dirname) {
712+
static void cleanup_sharedmem_files(const char* dirname) {
721713

722714
int saved_cwd_fd;
723715
// open the directory and set the current working directory to it
@@ -727,48 +719,95 @@ static void cleanup_sharedmem_resources(const char* dirname) {
727719
return;
728720
}
729721

730-
// for each entry in the directory that matches the expected file
731-
// name pattern, determine if the file resources are stale and if
732-
// so, remove the file resources. Note, instrumented HotSpot processes
733-
// for this user may start and/or terminate during this search and
734-
// remove or create new files in this directory. The behavior of this
735-
// loop under these conditions is dependent upon the implementation of
736-
// opendir/readdir.
722+
// For each entry in the directory that matches the expected file
723+
// name pattern, remove the file if it's determine to be stale
724+
// Note, instrumented HotSpot processes for this user may start and/or
725+
// terminate during this search and remove or create new files in this
726+
// directory. The behavior of this loop under these conditions is dependent
727+
// upon the implementation of opendir/readdir.
737728
//
738729
struct dirent* entry;
739730
errno = 0;
740731
while ((entry = os::readdir(dirp)) != NULL) {
741-
742-
pid_t pid = filename_to_pid(entry->d_name);
732+
const char* filename = entry->d_name;
733+
pid_t pid = filename_to_pid(filename);
743734

744735
if (pid == 0) {
745-
746-
if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
736+
if (strcmp(filename, ".") != 0 && strcmp(filename, "..") != 0) {
747737
// attempt to remove all unexpected files, except "." and ".."
748-
unlink(entry->d_name);
738+
unlink(filename);
749739
}
750740

751741
errno = 0;
752742
continue;
753743
}
754744

755-
// we now have a file name that converts to a valid integer
756-
// that could represent a process id . if this process id
757-
// matches the current process id or the process is not running,
758-
// then remove the stale file resources.
759-
//
760-
// process liveness is detected by sending signal number 0 to
761-
// the process id (see kill(2)). if kill determines that the
762-
// process does not exist, then the file resources are removed.
763-
// if kill determines that that we don't have permission to
764-
// signal the process, then the file resources are assumed to
765-
// be stale and are removed because the resources for such a
766-
// process should be in a different user specific directory.
745+
#if defined(LINUX)
746+
// Special case on Linux, if multiple containers share the
747+
// same /tmp directory:
767748
//
768-
if ((pid == os::current_process_id()) ||
769-
(kill(pid, 0) == OS_ERR && (errno == ESRCH || errno == EPERM))) {
770-
unlink(entry->d_name);
749+
// - All the JVMs must have the JDK-8286030 fix, or the behavior
750+
// is undefined.
751+
// - We cannot rely on the values of the pid, because it could
752+
// be a process in a different namespace. We must use the flock
753+
// protocol to determine if a live process is using this file.
754+
// See create_sharedmem_file().
755+
int fd;
756+
RESTARTABLE(os::open(filename, O_RDONLY, 0), fd);
757+
if (fd == OS_ERR) {
758+
// Something wrong happened. Ignore the error and don't try to remove the
759+
// file.
760+
log_debug(perf, memops)("os::open() for stale file check failed for %s/%s", dirname, filename);
761+
errno = 0;
762+
continue;
763+
}
764+
765+
int n;
766+
RESTARTABLE(::flock(fd, LOCK_EX|LOCK_NB), n);
767+
if (n != 0) {
768+
// Either another process holds the exclusive lock on this file, or
769+
// something wrong happened. Ignore the error and don't try to remove the
770+
// file.
771+
log_debug(perf, memops)("flock for stale file check failed for %s/%s", dirname, filename);
772+
::close(fd);
773+
errno = 0;
774+
continue;
775+
}
776+
// We are able to lock the file, but this file might have been created
777+
// by an older JVM that doesn't use the flock prototol, so we must do
778+
// the folowing checks (which are also done by older JVMs).
779+
#endif
780+
781+
// The following code assumes that pid must be in the same
782+
// namespace as the current process.
783+
bool stale = false;
784+
785+
if (pid == os::current_process_id()) {
786+
// The file was created by a terminated process that happened
787+
// to have the same pid as the current process.
788+
stale = true;
789+
} else if (kill(pid, 0) == OS_ERR) {
790+
if (errno == ESRCH) {
791+
// The target process does not exist.
792+
stale = true;
793+
} else if (errno == EPERM) {
794+
// The file was created by a terminated process that happened
795+
// to have the same pid as a process not owned by the current user.
796+
stale = true;
797+
}
771798
}
799+
800+
if (stale) {
801+
log_info(perf, memops)("Remove stale file %s/%s", dirname, filename);
802+
unlink(filename);
803+
}
804+
805+
#if defined(LINUX)
806+
// Hold the lock until here to prevent other JVMs from using this file
807+
// while we were in the middle of deleting it.
808+
::close(fd);
809+
#endif
810+
772811
errno = 0;
773812
}
774813

@@ -814,13 +853,13 @@ static bool make_user_tmp_dir(const char* dirname) {
814853
return true;
815854
}
816855

817-
// create the shared memory file resources
856+
// create the shared memory file
818857
//
819858
// This method creates the shared memory file with the given size
820859
// This method also creates the user specific temporary directory, if
821860
// it does not yet exist.
822861
//
823-
static int create_sharedmem_resources(const char* dirname, const char* filename, size_t size) {
862+
static int create_sharedmem_file(const char* dirname, const char* filename, size_t size) {
824863

825864
// make the user temporary directory
826865
if (!make_user_tmp_dir(dirname)) {
@@ -868,6 +907,32 @@ static int create_sharedmem_resources(const char* dirname, const char* filename,
868907
return -1;
869908
}
870909

910+
#if defined(LINUX)
911+
// On Linux, different containerized processes that share the same /tmp
912+
// directory (e.g., with "docker --volume ...") may have the same pid and
913+
// try to use the same file. To avoid conflicts among such
914+
// processes, we allow only one of them (the winner of the flock() call)
915+
// to write to the file. All the other processes will give up and will
916+
// have perfdata disabled.
917+
//
918+
// Note that the flock will be automatically given up when the winner
919+
// process exits.
920+
//
921+
// The locking protocol works only with other JVMs that have the JDK-8286030
922+
// fix. If you are sharing the /tmp difrectory among different containers,
923+
// do not use older JVMs that don't have this fix, or the behavior is undefined.
924+
int n;
925+
RESTARTABLE(::flock(fd, LOCK_EX|LOCK_NB), n);
926+
if (n != 0) {
927+
log_warning(perf, memops)("Cannot use file %s/%s because %s (errno = %d)", dirname, filename,
928+
(errno == EWOULDBLOCK) ?
929+
"it is locked by another process" :
930+
"flock() failed", errno);
931+
::close(fd);
932+
return -1;
933+
}
934+
#endif
935+
871936
// truncate the file to get rid of any existing data
872937
RESTARTABLE(::ftruncate(fd, (off_t)0), result);
873938
if (result == OS_ERR) {
@@ -982,12 +1047,13 @@ static char* mmap_create_shared(size_t size) {
9821047
}
9831048

9841049
// cleanup any stale shared memory files
985-
cleanup_sharedmem_resources(dirname);
1050+
cleanup_sharedmem_files(dirname);
9861051

9871052
assert(((size > 0) && (size % os::vm_page_size() == 0)),
9881053
"unexpected PerfMemory region size");
9891054

990-
fd = create_sharedmem_resources(dirname, short_filename, size);
1055+
log_info(perf, memops)("Trying to open %s/%s", dirname, short_filename);
1056+
fd = create_sharedmem_file(dirname, short_filename, size);
9911057

9921058
FREE_C_HEAP_ARRAY(char, user_name);
9931059
FREE_C_HEAP_ARRAY(char, dirname);
@@ -1020,6 +1086,8 @@ static char* mmap_create_shared(size_t size) {
10201086
// it does not go through os api, the operation has to record from here
10211087
MemTracker::record_virtual_memory_reserve_and_commit((address)mapAddress, size, CURRENT_PC, mtInternal);
10221088

1089+
log_info(perf, memops)("Successfully opened");
1090+
10231091
return mapAddress;
10241092
}
10251093

@@ -1049,10 +1117,10 @@ static char* create_shared_memory(size_t size) {
10491117
//
10501118
static void delete_shared_memory(char* addr, size_t size) {
10511119

1052-
// cleanup the persistent shared memory resources. since DestroyJavaVM does
1053-
// not support unloading of the JVM, unmapping of the memory resource is
1120+
// Remove the shared memory file. Since DestroyJavaVM does
1121+
// not support unloading of the JVM, unmapping of the memory region is
10541122
// not performed. The memory will be reclaimed by the OS upon termination of
1055-
// the process. The backing store file is deleted from the file system.
1123+
// the process.
10561124

10571125
assert(!PerfDisableSharedMem, "shouldn't be here");
10581126

@@ -1261,10 +1329,7 @@ void PerfMemory::delete_memory_region() {
12611329
save_memory_to_file(start(), capacity());
12621330
}
12631331

1264-
if (PerfDisableSharedMem) {
1265-
delete_standard_memory(start(), capacity());
1266-
}
1267-
else {
1332+
if (!PerfDisableSharedMem) {
12681333
delete_shared_memory(start(), capacity());
12691334
}
12701335
}

0 commit comments

Comments
 (0)