Skip to content

Commit

Permalink
BaseTools: Improve the cache hit in the edk2 build cache
Browse files Browse the repository at this point in the history
BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=1927

Current cache hash algorithm does not parse and generate
the makefile to get the accurate dependency files for a
module. It instead use the platform and package meta files
to get the module depenedency in a quick but over approximate
way. These meta files are monolithic and involve many redundant
dependency for the module, which cause the module build
cache miss easily.
This patch introduces one more cache checkpoint and a new
hash algorithm besides the current quick one. The new hash
algorithm leverages the module makefile to achieve more
accurate and precise dependency info for a module. When
the build cache miss with the first quick hash, the
Basetool will caculate new one after makefile is generated
and then check again.

Cc: Liming Gao <liming.gao@intel.com>
Cc: Bob Feng <bob.c.feng@intel.com>
Signed-off-by: Steven Shi <steven.shi@intel.com>
Reviewed-by: Bob Feng <bob.c.feng@intel.com>
  • Loading branch information
shijunjing authored and BobCF committed Aug 20, 2019
1 parent 8113281 commit 0e7e7a2
Show file tree
Hide file tree
Showing 7 changed files with 865 additions and 196 deletions.
21 changes: 21 additions & 0 deletions BaseTools/Source/Python/AutoGen/AutoGenWorker.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ def run(self):
GlobalData.gDisableIncludePathCheck = False
GlobalData.gFdfParser = self.data_pipe.Get("FdfParser")
GlobalData.gDatabasePath = self.data_pipe.Get("DatabasePath")
GlobalData.gBinCacheSource = self.data_pipe.Get("BinCacheSource")
GlobalData.gBinCacheDest = self.data_pipe.Get("BinCacheDest")
GlobalData.gCacheIR = self.data_pipe.Get("CacheIR")
GlobalData.gEnableGenfdsMultiThread = self.data_pipe.Get("EnableGenfdsMultiThread")
GlobalData.file_lock = self.file_lock
CommandTarget = self.data_pipe.Get("CommandTarget")
pcd_from_build_option = []
for pcd_tuple in self.data_pipe.Get("BuildOptPcd"):
pcd_id = ".".join((pcd_tuple[0],pcd_tuple[1]))
Expand All @@ -193,10 +199,13 @@ def run(self):
FfsCmd = self.data_pipe.Get("FfsCommand")
if FfsCmd is None:
FfsCmd = {}
GlobalData.FfsCmd = FfsCmd
PlatformMetaFile = self.GetPlatformMetaFile(self.data_pipe.Get("P_Info").get("ActivePlatform"),
self.data_pipe.Get("P_Info").get("WorkspaceDir"))
libConstPcd = self.data_pipe.Get("LibConstPcd")
Refes = self.data_pipe.Get("REFS")
GlobalData.libConstPcd = libConstPcd
GlobalData.Refes = Refes
while True:
if self.module_queue.empty():
break
Expand All @@ -223,8 +232,20 @@ def run(self):
Ma.ConstPcd = libConstPcd[(Ma.MetaFile.File,Ma.MetaFile.Root,Ma.Arch,Ma.MetaFile.Path)]
if (Ma.MetaFile.File,Ma.MetaFile.Root,Ma.Arch,Ma.MetaFile.Path) in Refes:
Ma.ReferenceModules = Refes[(Ma.MetaFile.File,Ma.MetaFile.Root,Ma.Arch,Ma.MetaFile.Path)]
if GlobalData.gBinCacheSource and CommandTarget in [None, "", "all"]:
Ma.GenModuleFilesHash(GlobalData.gCacheIR)
Ma.GenPreMakefileHash(GlobalData.gCacheIR)
if Ma.CanSkipbyPreMakefileCache(GlobalData.gCacheIR):
continue

Ma.CreateCodeFile(False)
Ma.CreateMakeFile(False,GenFfsList=FfsCmd.get((Ma.MetaFile.File, Ma.Arch),[]))

if GlobalData.gBinCacheSource and CommandTarget in [None, "", "all"]:
Ma.GenMakeHeaderFilesHash(GlobalData.gCacheIR)
Ma.GenMakeHash(GlobalData.gCacheIR)
if Ma.CanSkipbyMakeCache(GlobalData.gCacheIR):
continue
except Empty:
pass
except:
Expand Down
28 changes: 28 additions & 0 deletions BaseTools/Source/Python/AutoGen/CacheIR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
## @file
# Build cache intermediate result and state
#
# Copyright (c) 2019, Intel Corporation. All rights reserved.<BR>
# SPDX-License-Identifier: BSD-2-Clause-Patent
#

class ModuleBuildCacheIR():
def __init__(self, Path, Arch):
self.ModulePath = Path
self.ModuleArch = Arch
self.ModuleFilesHashDigest = None
self.ModuleFilesHashHexDigest = None
self.ModuleFilesChain = []
self.PreMakefileHashHexDigest = None
self.CreateCodeFileDone = False
self.CreateMakeFileDone = False
self.MakefilePath = None
self.AutoGenFileList = None
self.DependencyHeaderFileSet = None
self.MakeHeaderFilesHashChain = None
self.MakeHeaderFilesHashDigest = None
self.MakeHeaderFilesHashChain = []
self.MakeHashDigest = None
self.MakeHashHexDigest = None
self.MakeHashChain = []
self.PreMakeCacheHit = False
self.MakeCacheHit = False
8 changes: 8 additions & 0 deletions BaseTools/Source/Python/AutoGen/DataPipe.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,11 @@ def FillData(self,PlatformInfo):
self.DataContainer = {"FdfParser": True if GlobalData.gFdfParser else False}

self.DataContainer = {"LogLevel": EdkLogger.GetLevel()}

self.DataContainer = {"BinCacheSource":GlobalData.gBinCacheSource}

self.DataContainer = {"BinCacheDest":GlobalData.gBinCacheDest}

self.DataContainer = {"CacheIR":GlobalData.gCacheIR}

self.DataContainer = {"EnableGenfdsMultiThread":GlobalData.gEnableGenfdsMultiThread}
227 changes: 116 additions & 111 deletions BaseTools/Source/Python/AutoGen/GenMake.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,11 @@ def ProcessBuildTargetList(self):
self._AutoGenObject.IncludePathList + self._AutoGenObject.BuildOptionIncPathList
)

self.DependencyHeaderFileSet = set()
if FileDependencyDict:
for Dependency in FileDependencyDict.values():
self.DependencyHeaderFileSet.update(set(Dependency))

# Get a set of unique package includes from MetaFile
parentMetaFileIncludes = set()
for aInclude in self._AutoGenObject.PackageIncludePathList:
Expand Down Expand Up @@ -1115,7 +1120,7 @@ def CheckCCCmd(self, CommandList):
## For creating makefile targets for dependent libraries
def ProcessDependentLibrary(self):
for LibraryAutoGen in self._AutoGenObject.LibraryAutoGenList:
if not LibraryAutoGen.IsBinaryModule and not LibraryAutoGen.CanSkipbyHash():
if not LibraryAutoGen.IsBinaryModule:
self.LibraryBuildDirectoryList.append(self.PlaceMacro(LibraryAutoGen.BuildDir, self.Macros))

## Return a list containing source file's dependencies
Expand All @@ -1129,114 +1134,9 @@ def ProcessDependentLibrary(self):
def GetFileDependency(self, FileList, ForceInculeList, SearchPathList):
Dependency = {}
for F in FileList:
Dependency[F] = self.GetDependencyList(F, ForceInculeList, SearchPathList)
Dependency[F] = GetDependencyList(self._AutoGenObject, self.FileCache, F, ForceInculeList, SearchPathList)
return Dependency

## Find dependencies for one source file
#
# By searching recursively "#include" directive in file, find out all the
# files needed by given source file. The dependencies will be only searched
# in given search path list.
#
# @param File The source file
# @param ForceInculeList The list of files which will be included forcely
# @param SearchPathList The list of search path
#
# @retval list The list of files the given source file depends on
#
def GetDependencyList(self, File, ForceList, SearchPathList):
EdkLogger.debug(EdkLogger.DEBUG_1, "Try to get dependency files for %s" % File)
FileStack = [File] + ForceList
DependencySet = set()

if self._AutoGenObject.Arch not in gDependencyDatabase:
gDependencyDatabase[self._AutoGenObject.Arch] = {}
DepDb = gDependencyDatabase[self._AutoGenObject.Arch]

while len(FileStack) > 0:
F = FileStack.pop()

FullPathDependList = []
if F in self.FileCache:
for CacheFile in self.FileCache[F]:
FullPathDependList.append(CacheFile)
if CacheFile not in DependencySet:
FileStack.append(CacheFile)
DependencySet.update(FullPathDependList)
continue

CurrentFileDependencyList = []
if F in DepDb:
CurrentFileDependencyList = DepDb[F]
else:
try:
Fd = open(F.Path, 'rb')
FileContent = Fd.read()
Fd.close()
except BaseException as X:
EdkLogger.error("build", FILE_OPEN_FAILURE, ExtraData=F.Path + "\n\t" + str(X))
if len(FileContent) == 0:
continue
try:
if FileContent[0] == 0xff or FileContent[0] == 0xfe:
FileContent = FileContent.decode('utf-16')
else:
FileContent = FileContent.decode()
except:
# The file is not txt file. for example .mcb file
continue
IncludedFileList = gIncludePattern.findall(FileContent)

for Inc in IncludedFileList:
Inc = Inc.strip()
# if there's macro used to reference header file, expand it
HeaderList = gMacroPattern.findall(Inc)
if len(HeaderList) == 1 and len(HeaderList[0]) == 2:
HeaderType = HeaderList[0][0]
HeaderKey = HeaderList[0][1]
if HeaderType in gIncludeMacroConversion:
Inc = gIncludeMacroConversion[HeaderType] % {"HeaderKey" : HeaderKey}
else:
# not known macro used in #include, always build the file by
# returning a empty dependency
self.FileCache[File] = []
return []
Inc = os.path.normpath(Inc)
CurrentFileDependencyList.append(Inc)
DepDb[F] = CurrentFileDependencyList

CurrentFilePath = F.Dir
PathList = [CurrentFilePath] + SearchPathList
for Inc in CurrentFileDependencyList:
for SearchPath in PathList:
FilePath = os.path.join(SearchPath, Inc)
if FilePath in gIsFileMap:
if not gIsFileMap[FilePath]:
continue
# If isfile is called too many times, the performance is slow down.
elif not os.path.isfile(FilePath):
gIsFileMap[FilePath] = False
continue
else:
gIsFileMap[FilePath] = True
FilePath = PathClass(FilePath)
FullPathDependList.append(FilePath)
if FilePath not in DependencySet:
FileStack.append(FilePath)
break
else:
EdkLogger.debug(EdkLogger.DEBUG_9, "%s included by %s was not found "\
"in any given path:\n\t%s" % (Inc, F, "\n\t".join(SearchPathList)))

self.FileCache[F] = FullPathDependList
DependencySet.update(FullPathDependList)

DependencySet.update(ForceList)
if File in DependencySet:
DependencySet.remove(File)
DependencyList = list(DependencySet) # remove duplicate ones

return DependencyList

## CustomMakefile class
#
Expand Down Expand Up @@ -1618,7 +1518,7 @@ def GetModuleBuildDirectoryList(self):
def GetLibraryBuildDirectoryList(self):
DirList = []
for LibraryAutoGen in self._AutoGenObject.LibraryAutoGenList:
if not LibraryAutoGen.IsBinaryModule and not LibraryAutoGen.CanSkipbyHash():
if not LibraryAutoGen.IsBinaryModule:
DirList.append(os.path.join(self._AutoGenObject.BuildDir, LibraryAutoGen.BuildDir))
return DirList

Expand Down Expand Up @@ -1754,11 +1654,116 @@ def GetModuleBuildDirectoryList(self):
def GetLibraryBuildDirectoryList(self):
DirList = []
for LibraryAutoGen in self._AutoGenObject.LibraryAutoGenList:
if not LibraryAutoGen.IsBinaryModule and not LibraryAutoGen.CanSkipbyHash():
if not LibraryAutoGen.IsBinaryModule:
DirList.append(os.path.join(self._AutoGenObject.BuildDir, LibraryAutoGen.BuildDir))
return DirList

## Find dependencies for one source file
#
# By searching recursively "#include" directive in file, find out all the
# files needed by given source file. The dependencies will be only searched
# in given search path list.
#
# @param File The source file
# @param ForceInculeList The list of files which will be included forcely
# @param SearchPathList The list of search path
#
# @retval list The list of files the given source file depends on
#
def GetDependencyList(AutoGenObject, FileCache, File, ForceList, SearchPathList):
EdkLogger.debug(EdkLogger.DEBUG_1, "Try to get dependency files for %s" % File)
FileStack = [File] + ForceList
DependencySet = set()

if AutoGenObject.Arch not in gDependencyDatabase:
gDependencyDatabase[AutoGenObject.Arch] = {}
DepDb = gDependencyDatabase[AutoGenObject.Arch]

while len(FileStack) > 0:
F = FileStack.pop()

FullPathDependList = []
if F in FileCache:
for CacheFile in FileCache[F]:
FullPathDependList.append(CacheFile)
if CacheFile not in DependencySet:
FileStack.append(CacheFile)
DependencySet.update(FullPathDependList)
continue

CurrentFileDependencyList = []
if F in DepDb:
CurrentFileDependencyList = DepDb[F]
else:
try:
Fd = open(F.Path, 'rb')
FileContent = Fd.read()
Fd.close()
except BaseException as X:
EdkLogger.error("build", FILE_OPEN_FAILURE, ExtraData=F.Path + "\n\t" + str(X))
if len(FileContent) == 0:
continue
try:
if FileContent[0] == 0xff or FileContent[0] == 0xfe:
FileContent = FileContent.decode('utf-16')
else:
FileContent = FileContent.decode()
except:
# The file is not txt file. for example .mcb file
continue
IncludedFileList = gIncludePattern.findall(FileContent)

for Inc in IncludedFileList:
Inc = Inc.strip()
# if there's macro used to reference header file, expand it
HeaderList = gMacroPattern.findall(Inc)
if len(HeaderList) == 1 and len(HeaderList[0]) == 2:
HeaderType = HeaderList[0][0]
HeaderKey = HeaderList[0][1]
if HeaderType in gIncludeMacroConversion:
Inc = gIncludeMacroConversion[HeaderType] % {"HeaderKey" : HeaderKey}
else:
# not known macro used in #include, always build the file by
# returning a empty dependency
FileCache[File] = []
return []
Inc = os.path.normpath(Inc)
CurrentFileDependencyList.append(Inc)
DepDb[F] = CurrentFileDependencyList

CurrentFilePath = F.Dir
PathList = [CurrentFilePath] + SearchPathList
for Inc in CurrentFileDependencyList:
for SearchPath in PathList:
FilePath = os.path.join(SearchPath, Inc)
if FilePath in gIsFileMap:
if not gIsFileMap[FilePath]:
continue
# If isfile is called too many times, the performance is slow down.
elif not os.path.isfile(FilePath):
gIsFileMap[FilePath] = False
continue
else:
gIsFileMap[FilePath] = True
FilePath = PathClass(FilePath)
FullPathDependList.append(FilePath)
if FilePath not in DependencySet:
FileStack.append(FilePath)
break
else:
EdkLogger.debug(EdkLogger.DEBUG_9, "%s included by %s was not found "\
"in any given path:\n\t%s" % (Inc, F, "\n\t".join(SearchPathList)))

FileCache[F] = FullPathDependList
DependencySet.update(FullPathDependList)

DependencySet.update(ForceList)
if File in DependencySet:
DependencySet.remove(File)
DependencyList = list(DependencySet) # remove duplicate ones

return DependencyList

# This acts like the main() function for the script, unless it is 'import'ed into another script.
if __name__ == '__main__':
pass

pass

0 comments on commit 0e7e7a2

Please sign in to comment.