From 8573da52d3a80aff95b705436efe0dbb93811529 Mon Sep 17 00:00:00 2001 From: mrbean-bremen Date: Mon, 15 May 2017 21:28:17 +0200 Subject: [PATCH 1/2] Added lazy evaluation of real directories - real directory contents are only added to the fake file system on demand (makes adding large directory trees faster) - refactored real file/directory access to use own classes --- fake_filesystem_test.py | 7 ++ pyfakefs/fake_filesystem.py | 155 ++++++++++++++++++++++++++++-------- 2 files changed, 129 insertions(+), 33 deletions(-) diff --git a/fake_filesystem_test.py b/fake_filesystem_test.py index d5576e3b..2978d129 100755 --- a/fake_filesystem_test.py +++ b/fake_filesystem_test.py @@ -4623,6 +4623,13 @@ def testAddExistingRealDirectoryTree(self): self.assertTrue(self.filesystem.Exists(os.path.join(real_dir_path, 'pyfakefs', 'fake_filesystem.py'))) self.assertTrue(self.filesystem.Exists(os.path.join(real_dir_path, 'pyfakefs', '__init__.py'))) + def testGetObjectFromLazilyAddedRealDirectory(self): + self.filesystem.is_case_sensitive = True + real_dir_path = os.path.dirname(__file__) + self.filesystem.add_real_directory(real_dir_path) + self.assertTrue(self.filesystem.GetObject(os.path.join(real_dir_path, 'pyfakefs', 'fake_filesystem.py'))) + self.assertTrue(self.filesystem.GetObject(os.path.join(real_dir_path, 'pyfakefs', '__init__.py'))) + def testAddExistingRealDirectoryReadWrite(self): real_dir_path = os.path.join(os.path.dirname(__file__), 'pyfakefs') self.filesystem.add_real_directory(real_dir_path, read_only=False) diff --git a/pyfakefs/fake_filesystem.py b/pyfakefs/fake_filesystem.py index e999781d..a3f97b81 100644 --- a/pyfakefs/fake_filesystem.py +++ b/pyfakefs/fake_filesystem.py @@ -214,15 +214,8 @@ def __init__(self, name, st_mode=stat.S_IFREG | PERM_DEF_FILE, self.st_uid = None self.st_gid = None - # members changed only by _CreateFile() to implement add_real_file() - self.read_from_real_fs = False - self.file_path = None - @property def byte_contents(self): - if self._byte_contents is None and self.read_from_real_fs: - with io.open(self.file_path, 'rb') as f: - self._byte_contents = f.read() return self._byte_contents @property @@ -297,7 +290,7 @@ def SetLargeFileSize(self, st_size): def IsLargeFile(self): """Return True if this file was initialized with size but no contents.""" - return self._byte_contents is None and not self.read_from_real_fs + return self._byte_contents is None def _encode_contents(self, contents): # pylint: disable=undefined-variable @@ -425,6 +418,53 @@ def SetIno(self, st_ino): self.st_ino = st_ino +class FakeFileFromRealFile(FakeFile): + """Represents a fake file copied from the real file system. + + The contents of the file are read on demand only. + New in pyfakefs 3.2. + """ + + def __init__(self, file_path, filesystem, read_only=True): + """init. + + Args: + file_path: path to the existing file. + filesystem: the fake filesystem where the file is created. + read_only: if set, the file is treated as read-only, e.g. a write access raises an exception; + otherwise, writing to the file changes the fake file only as usually. + + Raises: + OSError: if the file does not exist in the real file system. + """ + real_stat = os.stat(file_path) + # for read-only mode, remove the write/executable permission bits + mode = real_stat.st_mode & 0o777444 if read_only else real_stat.st_mode + super(FakeFileFromRealFile, self).__init__(name=os.path.basename(file_path), + st_mode=mode, + filesystem=filesystem) + self.st_ctime = real_stat.st_ctime + self.st_atime = real_stat.st_atime + self.st_mtime = real_stat.st_mtime + self.st_gid = real_stat.st_gid + self.st_uid = real_stat.st_uid + self.st_size = real_stat.st_size + self.file_path = file_path + self.contents_read = False + + @property + def byte_contents(self): + if not self.contents_read: + self.contents_read = True + with io.open(self.file_path, 'rb') as f: + self._byte_contents = f.read() + return self._byte_contents + + def IsLargeFile(self): + """The contents are never faked.""" + return False + + class FakeDirectory(FakeFile): """Provides the appearance of a real directory.""" @@ -520,6 +560,58 @@ def __str__(self): return description +class FakeDirectoryFromRealDirectory(FakeDirectory): + """Represents a fake directory copied from the real file system. + + The contents of the directory are read on demand only. + New in pyfakefs 3.2. + """ + + def __init__(self, dir_path, filesystem, read_only): + """init. + + Args: + dir_path: full directory path + filesystem: the fake filesystem where the directory is created + read_only: if set, all files under the directory are treated as read-only, + e.g. a write access raises an exception; + otherwise, writing to the files changes the fake files only as usually. + + Raises: + OSError if the directory does not exist in the real file system + """ + real_stat = os.stat(dir_path) + super(FakeDirectoryFromRealDirectory, self).__init__( + name=os.path.split(dir_path)[1], + perm_bits=real_stat.st_mode, + filesystem=filesystem) + + self.st_ctime = real_stat.st_ctime + self.st_atime = real_stat.st_atime + self.st_mtime = real_stat.st_mtime + self.st_gid = real_stat.st_gid + self.st_uid = real_stat.st_uid + self.dir_path = dir_path + self.read_only = read_only + self.contents_read = False + + @property + def contents(self): + """Return the list of contained directory entries, loading them if not already loaded.""" + if not self.contents_read: + self.contents_read = True + self.filesystem.add_real_paths( + [os.path.join(self.dir_path, entry) for entry in os.listdir(self.dir_path)], + read_only=self.read_only) + return self.byte_contents + + def GetSize(self): + # we cannot get the size until the contents are loaded + if not self.contents_read: + return 0 + return FakeDirectory.GetSize() + + class FakeFilesystem(object): """Provides the appearance of a real directory tree for unit testing.""" @@ -1167,6 +1259,7 @@ def _DirectoryContent(self, directory, component): if subdir.lower() == component.lower()] if matching_content: return matching_content[0] + return None, None def Exists(self, file_path): @@ -1642,11 +1735,9 @@ def add_real_file(self, file_path, read_only=True): OSError: if the file does not exist in the real file system. IOError: if the file already exists in the fake file system. """ - real_stat = os.stat(file_path) - # for read-only mode, remove the write/executable permission bits - mode = real_stat.st_mode & 0o777444 if read_only else real_stat.st_mode - return self._CreateFile(file_path, contents=None, read_from_real_fs=True, - st_mode=mode, real_stat=real_stat) + return self._CreateFile(file_path, + read_from_real_fs=True, + read_only=read_only) def add_real_directory(self, dir_path, read_only=True): """Create fake directory for the existing directory at path, and entries for all contained @@ -1668,10 +1759,16 @@ def add_real_directory(self, dir_path, read_only=True): """ if not os.path.exists(dir_path): raise IOError(errno.ENOENT, 'No such directory', dir_path) - self.CreateDirectory(dir_path) - for base, _, files in os.walk(dir_path): - for fileEntry in files: - self.add_real_file(os.path.join(base, fileEntry), read_only) + parent_path = os.path.split(dir_path)[0] + if self.Exists(parent_path): + parent_dir = self.GetObject(parent_path) + else: + parent_dir = self.CreateDirectory(parent_path) + new_dir = FakeDirectoryFromRealDirectory(dir_path, filesystem=self, read_only=read_only) + parent_dir.AddEntry(new_dir) + self.last_ino += 1 + new_dir.SetIno(self.last_ino) + return new_dir def add_real_paths(self, path_list, read_only=True): """Convenience method to add several files and directories from the real file system @@ -1697,8 +1794,8 @@ def add_real_paths(self, path_list, read_only=True): def _CreateFile(self, file_path, st_mode=stat.S_IFREG | PERM_DEF_FILE, contents='', st_size=None, create_missing_dirs=True, apply_umask=False, encoding=None, errors=None, - read_from_real_fs=False, real_stat=None): - """Create file_path, including all the parent directories along the way. + read_from_real_fs=False, read_only=True): + """Internal fake file creation, supports both normal fake files and fake files from real files. Args: file_path: path to the file to create. @@ -1708,12 +1805,10 @@ def _CreateFile(self, file_path, st_mode=stat.S_IFREG | PERM_DEF_FILE, create_missing_dirs: if True, auto create missing directories. apply_umask: whether or not the current umask must be applied on st_mode. encoding: if contents is a unicode string, the encoding used for serialization. - New in pyfakefs 2.9. errors: the error mode used for encoding/decoding errors - New in pyfakefs 3.2. read_from_real_fs: if True, the contents are reaf from the real file system on demand. - New in pyfakefs 3.2. - real_stat: used in combination with read_from_real_fs; stat result of the real file + read_only: if set, the file is treated as read-only, e.g. a write access raises an exception; + otherwise, writing to the file changes the fake file only as usually. """ file_path = self.NormalizePath(file_path) if self.Exists(file_path): @@ -1732,22 +1827,16 @@ def _CreateFile(self, file_path, st_mode=stat.S_IFREG | PERM_DEF_FILE, parent_directory = self.NormalizeCase(parent_directory) if apply_umask: st_mode &= ~self.umask - file_object = FakeFile(new_file, st_mode, filesystem=self, encoding=encoding, errors=errors) if read_from_real_fs: - file_object.st_ctime = real_stat.st_ctime - file_object.st_atime = real_stat.st_atime - file_object.st_mtime = real_stat.st_mtime - file_object.st_gid = real_stat.st_gid - file_object.st_uid = real_stat.st_uid - file_object.st_size = real_stat.st_size - file_object.read_from_real_fs = True - file_object.file_path = file_path + file_object = FakeFileFromRealFile(file_path, filesystem=self, read_only=read_only) + else: + file_object = FakeFile(new_file, st_mode, filesystem=self, encoding=encoding, errors=errors) self.last_ino += 1 file_object.SetIno(self.last_ino) self.AddObject(parent_directory, file_object) - if contents is not None or st_size is not None: + if not read_from_real_fs and (contents is not None or st_size is not None): try: if st_size is not None: file_object.SetLargeFileSize(st_size) From f6cc4f3421224fefd140e4d03bd2d3d4ae0d3f41 Mon Sep 17 00:00:00 2001 From: mrbean-bremen Date: Thu, 18 May 2017 20:10:41 +0200 Subject: [PATCH 2/2] Added possibility to switch off lazy directory reading - may be needed for tests that check the disk usage to avoid the side effect of changing disk usage during delayed directory content access --- fake_filesystem_test.py | 22 ++++++++++++++++++++++ pyfakefs/fake_filesystem.py | 37 +++++++++++++++++++++++++------------ 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/fake_filesystem_test.py b/fake_filesystem_test.py index 2978d129..344bf2c7 100755 --- a/fake_filesystem_test.py +++ b/fake_filesystem_test.py @@ -4630,6 +4630,28 @@ def testGetObjectFromLazilyAddedRealDirectory(self): self.assertTrue(self.filesystem.GetObject(os.path.join(real_dir_path, 'pyfakefs', 'fake_filesystem.py'))) self.assertTrue(self.filesystem.GetObject(os.path.join(real_dir_path, 'pyfakefs', '__init__.py'))) + def testAddExistingRealDirectoryLazily(self): + disk_size = 1024*1024*1024 + real_dir_path = os.path.join(os.path.dirname(__file__), 'pyfakefs') + self.filesystem.SetDiskUsage(disk_size, real_dir_path) + self.filesystem.add_real_directory(real_dir_path) + + # the directory contents have not been read, the the disk usage has not changed + self.assertEqual(disk_size, self.filesystem.GetDiskUsage(real_dir_path).free) + # checking for existence shall read the directory contents + self.assertTrue(self.filesystem.GetObject(os.path.join(real_dir_path, 'fake_filesystem.py'))) + # so now the free disk space shall have decreased + self.assertGreater(disk_size, self.filesystem.GetDiskUsage(real_dir_path).free) + + def testAddExistingRealDirectoryNotLazily(self): + disk_size = 1024*1024*1024 + real_dir_path = os.path.join(os.path.dirname(__file__), 'pyfakefs') + self.filesystem.SetDiskUsage(disk_size, real_dir_path) + self.filesystem.add_real_directory(real_dir_path, lazy_read=False) + + # the directory has been read, so the file sizes have been subtracted from the free space + self.assertGreater(disk_size, self.filesystem.GetDiskUsage(real_dir_path).free) + def testAddExistingRealDirectoryReadWrite(self): real_dir_path = os.path.join(os.path.dirname(__file__), 'pyfakefs') self.filesystem.add_real_directory(real_dir_path, read_only=False) diff --git a/pyfakefs/fake_filesystem.py b/pyfakefs/fake_filesystem.py index a3f97b81..f5cd09ec 100644 --- a/pyfakefs/fake_filesystem.py +++ b/pyfakefs/fake_filesystem.py @@ -609,7 +609,7 @@ def GetSize(self): # we cannot get the size until the contents are loaded if not self.contents_read: return 0 - return FakeDirectory.GetSize() + return super(FakeDirectoryFromRealDirectory, self).GetSize() class FakeFilesystem(object): @@ -1739,7 +1739,7 @@ def add_real_file(self, file_path, read_only=True): read_from_real_fs=True, read_only=read_only) - def add_real_directory(self, dir_path, read_only=True): + def add_real_directory(self, dir_path, read_only=True, lazy_read=True): """Create fake directory for the existing directory at path, and entries for all contained files in the real file system. New in pyfakefs 3.2. @@ -1749,6 +1749,11 @@ def add_real_directory(self, dir_path, read_only=True): read_only: if set, all files under the directory are treated as read-only, e.g. a write access raises an exception; otherwise, writing to the files changes the fake files only as usually. + lazy_read: if set (default), directory contents are only read when accessed, + and only until the needed subdirectory level + Note: this means that the file system size is only updated at the time + the directory contents are read; set this to False only if you + are dependent on accurate file system size in your test Returns: the newly created FakeDirectory object. @@ -1759,18 +1764,24 @@ def add_real_directory(self, dir_path, read_only=True): """ if not os.path.exists(dir_path): raise IOError(errno.ENOENT, 'No such directory', dir_path) - parent_path = os.path.split(dir_path)[0] - if self.Exists(parent_path): - parent_dir = self.GetObject(parent_path) + if lazy_read: + parent_path = os.path.split(dir_path)[0] + if self.Exists(parent_path): + parent_dir = self.GetObject(parent_path) + else: + parent_dir = self.CreateDirectory(parent_path) + new_dir = FakeDirectoryFromRealDirectory(dir_path, filesystem=self, read_only=read_only) + parent_dir.AddEntry(new_dir) + self.last_ino += 1 + new_dir.SetIno(self.last_ino) else: - parent_dir = self.CreateDirectory(parent_path) - new_dir = FakeDirectoryFromRealDirectory(dir_path, filesystem=self, read_only=read_only) - parent_dir.AddEntry(new_dir) - self.last_ino += 1 - new_dir.SetIno(self.last_ino) + new_dir = self.CreateDirectory(dir_path) + for base, _, files in os.walk(dir_path): + for fileEntry in files: + self.add_real_file(os.path.join(base, fileEntry), read_only) return new_dir - def add_real_paths(self, path_list, read_only=True): + def add_real_paths(self, path_list, read_only=True, lazy_dir_read=True): """Convenience method to add several files and directories from the real file system in the fake file system. See `add_real_file()` and `add_real_directory()`. New in pyfakefs 3.2. @@ -1780,6 +1791,8 @@ def add_real_paths(self, path_list, read_only=True): read_only: if set, all files and files under under the directories are treated as read-only, e.g. a write access raises an exception; otherwise, writing to the files changes the fake files only as usually. + lazy_dir_read: uses lazy reading of directory contents if set + (see `add_real_directory`) Raises: OSError: if any of the files and directories in the list does not exist in the real file system. @@ -1787,7 +1800,7 @@ def add_real_paths(self, path_list, read_only=True): """ for path in path_list: if os.path.isdir(path): - self.add_real_directory(path, read_only) + self.add_real_directory(path, read_only, lazy_dir_read) else: self.add_real_file(path, read_only)