diff --git a/Lib/shutil.py b/Lib/shutil.py index a4aa0dfdd10b09..aaed61a6b932fd 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -5,6 +5,7 @@ """ import os +import fcntl import sys import stat import fnmatch @@ -211,6 +212,11 @@ def _samefile(src, dst): return (os.path.normcase(os.path.abspath(src)) == os.path.normcase(os.path.abspath(dst))) +def _samefilef(fsrc, fdst): + if os.path.abspath(fsrc.name) == os.path.abspath(fdst.name): + return True + return False + def copyfile(src, dst, *, follow_symlinks=True): """Copy data from src to dst in the most efficient way possible. @@ -218,50 +224,109 @@ def copyfile(src, dst, *, follow_symlinks=True): symlink will be created instead of copying the file it points to. """ + if _samefile(src, dst): raise SameFileError("{!r} and {!r} are the same file".format(src, dst)) - file_size = 0 - for i, fn in enumerate([src, dst]): - try: - st = os.stat(fn) - except OSError: - # File most likely does not exist - pass - else: - # XXX What about other special files? (sockets, devices...) - if stat.S_ISFIFO(st.st_mode): - raise SpecialFileError("`%s` is a named pipe" % fn) - if _WINDOWS and i == 0: - file_size = st.st_size - + # XXX Is there a race issue here with os.path.islink() operating on a + # path rather than a file descriptor if not follow_symlinks and os.path.islink(src): os.symlink(os.readlink(src), dst) + return dst + + # In Unix, we must open the files non-blocking initially as opening a + # FIFO without data present in it will block. + if os.name == 'posix': + src_flags = os.O_RDONLY | os.O_NONBLOCK + dst_flags = os.O_CREAT | os.O_WRONLY | os.O_NONBLOCK else: - with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: - # macOS - if _HAS_FCOPYFILE: - try: - _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA) - return dst - except _GiveupOnFastCopy: - pass - # Linux / Solaris - elif _HAS_SENDFILE: - try: - _fastcopy_sendfile(fsrc, fdst) - return dst - except _GiveupOnFastCopy: - pass - # Windows, see: - # https://github.com/python/cpython/pull/7160#discussion_r195405230 - elif _WINDOWS and file_size > 0: - _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) + src_flags = os.O_RDONLY + dst_flags = os.O_WRONLY + + def _src_opener(path, flags): + return os.open(path, src_flags) + + # dst's opener is more complex. We need to atomically check and open the + # destination if it doesn't exist. If it does exist, we need to detect so + # we don't destroy it later should we have to fail out for some reason. + dst_was_created = False + def _dst_opener(path, flags): + nonlocal dst_was_created + try: + dst_was_created = True + return os.open(path, dst_flags | os.O_EXCL) + except FileExistsError: + dst_was_created = False + try : + # Open the already existing file in as non-blocking + fd = os.open(path, dst_flags) + # If the destination is already a FIFO for some reason, we'll get an + # OSError here as we've opened a FIFO for a non-blocking write with + # no reader on the other end. + except OSError as e: + # Use errno to decide whether the OSError we caught is due to + # the above reason or some other reason. For the above reason, + # raise the appropriate special file error. Otherwise, we need + # to re-raise the OSError + if e.errno == errno.ENXIO: + raise SpecialFileError("`%s` is a named pipe" % path) + else: + raise e + # This check is required to catch the case where the destination + # file is a FIFO that's we successfully opened non-blocking because + # for some reason there was a reader listening preventing an ENXIO + # situation. + st = os.fstat(fd) + if stat.S_ISFIFO(st.st_mode): + raise SpecialFileError("`%s` is a named pipe" % path) + return fd + + with open(src, 'r', opener=_src_opener) as fsrc, open(dst, 'w', opener=_dst_opener) as fdst: + file_size = 0 + inodes_pre = [] + # file.name is not populated when using os.fdopen() + st = os.fstat(fsrc.fileno()) + if stat.S_ISFIFO(st.st_mode): + # Don't unlink dst unless we created it above + if dst_was_created: + os.unlink(dst) + raise SpecialFileError("`%s` is a named pipe" % src) + if _WINDOWS and i == 0: + file_size = st.st_size + + # In Unix, we must set the file descriptors back to blocking as that's + # what the rest of this function expects. + # Additonally, fsrc, and fdst must be turned into file objects + if os.name == 'posix': + srcfl = fcntl.fcntl(fsrc.fileno(), fcntl.F_GETFL) + dstfl = fcntl.fcntl(fdst.fileno(), fcntl.F_GETFL) + + # Unset the non-blocking flag + fcntl.fcntl(fsrc.fileno(), fcntl.F_SETFL, srcfl & ~os.O_NONBLOCK) + fcntl.fcntl(fdst.fileno(), fcntl.F_SETFL, dstfl & ~os.O_NONBLOCK) + + + # macOS + if _HAS_FCOPYFILE: + try: + _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA) return dst - - copyfileobj(fsrc, fdst) - - return dst + except _GiveupOnFastCopy: + pass + # Linux / Solaris + elif _HAS_SENDFILE: + try: + _fastcopy_sendfile(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass + # Windows, see: + # https://github.com/python/cpython/pull/7160#discussion_r195405230 + elif _WINDOWS and file_size > 0: + _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE)) + return dst + copyfileobj(fsrc, fdst) + return dst def copymode(src, dst, *, follow_symlinks=True): """Copy mode bits from src to dst. diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 7e0a3292e0f8a4..ba3fa4f2977aca 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -1461,6 +1461,21 @@ def test_copyfile_same_file(self): # Make sure file is not corrupted. self.assertEqual(read_file(src_file), 'foo') + def test_copyfile_dst_exists_as_fifo(self): + src_dir = self.mkdtemp() + src_file = os.path.join(src_dir, 'foo') + write_file(src_file, 'foo') + dst_dir = src_dir + dst_file = os.path.join(dst_dir, 'bar_pipe') + try: + os.mkfifo(dst_file) + except PermissionError as e: + self.skipTest('os.mkfifo(): %s' % e) + self.assertRaises(shutil.SpecialFileError, + shutil.copyfile, + src_file, + dst_file) + def test_copytree_return_value(self): # copytree returns its destination path. src_dir = self.mkdtemp() @@ -1799,7 +1814,7 @@ def _set_shutil_open(self, func): self._delete = True def test_w_source_open_fails(self): - def _open(filename, mode='r'): + def _open(filename, mode='r', opener=None): if filename == 'srcfile': raise OSError('Cannot open "srcfile"') assert 0 # shouldn't reach here. @@ -1813,7 +1828,7 @@ def test_w_dest_open_fails(self): srcfile = self.Faux() - def _open(filename, mode='r'): + def _open(filename, mode='r', opener=None): if filename == 'srcfile': return srcfile if filename == 'destfile': @@ -1834,7 +1849,7 @@ def test_w_dest_close_fails(self): srcfile = self.Faux() destfile = self.Faux(True) - def _open(filename, mode='r'): + def _open(filename, mode='r', opener=None): if filename == 'srcfile': return srcfile if filename == 'destfile': @@ -1857,7 +1872,7 @@ def test_w_source_close_fails(self): srcfile = self.Faux(True) destfile = self.Faux() - def _open(filename, mode='r'): + def _open(filename, mode='r', opener=None): if filename == 'srcfile': return srcfile if filename == 'destfile': diff --git a/Misc/NEWS.d/next/Library/2018-07-07.bpo-30400.Pjhd8.rst b/Misc/NEWS.d/next/Library/2018-07-07.bpo-30400.Pjhd8.rst new file mode 100644 index 00000000000000..ae50b56fcb4211 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-07-07.bpo-30400.Pjhd8.rst @@ -0,0 +1,2 @@ +Fix race condition in shutil.copyfile() by ensuring the inode of the source +file does not change while it is being copied. Patch by Preston Moore.