From bc7e4e6821639ee766ada83483975668af98f367 Mon Sep 17 00:00:00 2001 From: Sam Kleinman Date: Thu, 28 Jul 2016 15:09:00 -0400 Subject: [PATCH] SERVER-25171: add scons cache support and enable on relevant non-push variants --- buildscripts/generate_compile_expansions.py | 20 +++ buildscripts/scons_cache_prune.py | 131 ++++++++++++++++++ etc/evergreen.yml | 41 +++++- .../scons-local-2.5.0/SCons/CacheDir.py | 5 +- 4 files changed, 189 insertions(+), 8 deletions(-) create mode 100644 buildscripts/scons_cache_prune.py diff --git a/buildscripts/generate_compile_expansions.py b/buildscripts/generate_compile_expansions.py index 016a8c379a25a..8e5b4e774b22f 100644 --- a/buildscripts/generate_compile_expansions.py +++ b/buildscripts/generate_compile_expansions.py @@ -12,6 +12,8 @@ import fileinput import json import re +import os +import sys # This function matches a version string and captures the "extra" part # If the version is a release like "2.3.4" or "2.3.4-rc0", this will return @@ -57,3 +59,21 @@ def match_verstr(verstr): print "src_suffix: r{0}".format(version_line) print "version: {0}".format(version_line) + +# configuration for scons cache. +# +if sys.platform.startswith("win"): + system_id_path = r"c:\mongodb-build-system-id" + default_cache_path_base = r"z:\data\scons-cache" +else: + system_id_path = "/etc/mongodb-build-system-id" + default_cache_path_base = "/data/scons-cache" + +if os.path.isfile(system_id_path): + with open(system_id_path, "r") as f: + default_cache_path = os.path.join(default_cache_path_base, f.readline()) + + print "scons_cache_path: {0}".format(default_cache_path) + + if os.getenv("USE_SCONS_CACHE") not in (None, False, "false", ""): + print "scons_cache_args: --cache --cache-dir={0}".format(default_cache_path) diff --git a/buildscripts/scons_cache_prune.py b/buildscripts/scons_cache_prune.py new file mode 100644 index 0000000000000..d617dbcf57be9 --- /dev/null +++ b/buildscripts/scons_cache_prune.py @@ -0,0 +1,131 @@ +#!/USSR/bin/python +# encoding: utf-8 +""" +This script, borrowed from some waf code, with a stand alone interface, provides a way to +remove files from the cache on an LRU (least recently used) basis to prevent the scons cache +from outgrowing the storage capacity. +""" + +# Inspired by: https://github.com/krig/waf/blob/master/waflib/extras/lru_cache.py +# Thomas Nagy 2011 + +import argparse +import collections +import logging +import os +import shutil + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("scons.cache.prune.lru") + +GIGBYTES = 1024*1024*1024 + +cache_item = collections.namedtuple("CacheContents", ["path", "time", "size"]) + + +def collect_cache_contents(cache_path): + # map folder names to timestamps + contents = [] + total = 0 + + # collect names of directories and creation times + for name in os.listdir(cache_path): + path = os.path.join(cache_path, name) + + if os.path.isdir(path): + for file_name in os.listdir(path): + file_path = os.path.join(path, file_name) + if os.path.isdir(file_path): + logger.warning("cache item {0} is a directory and not a file. " + "The cache may be currupt.".format(file_path)) + continue + + item = cache_item(path=file_path, + time=os.stat(file_path).st_atime, + size=os.stat(file_path).st_size) + + total += item.size + + contents.append(item) + + return (total, contents) + + +def prune_cache(cache_path, cache_size_gb, clean_ratio): + # This function is taken as is from waf, with the interface cleaned up and some minor + # stylistic changes. + + cache_size = cache_size_gb * GIGBYTES + + (total_size, contents) = collect_cache_contents(cache_path) + + logger.info("cache size {0}, quota {0}".format(total_size, cache_size)) + + if total_size >= cache_size: + logger.info("trimming the cache since {0} > {0}".format(total_size, cache_size)) + + # make a list to sort the folders' by timestamp + contents.sort(key=lambda x: x.time, reverse=True) # sort by timestamp + + # now that the contents of things to delete is sorted by timestamp in reverse order, we + # just delete things until the total_size falls below the target cache size ratio. + while total_size >= cache_size * clean_ratio: + if len(contents) == 0: + shutil.rmtree(cache_path) + logger.error("cache size is over quota, and there are no files in " + "the queue to delete. Removed the entire cache.") + return False + + # (file_name, _, size) = contents.pop() + cache_item = contents.pop() + to_remove = cache_item.path + ".del" + try: + os.rename(cache_item.path, to_remove) + except: + # another process may have already cleared the file. + pass + else: + try: + os.remove(to_remove) + logger.info("removed file from cache: {0}".format(cache_item.path)) + total_size -= cache_item.size + except Exception as e: + # this should not happen, but who knows? + logger.error("error [{0}, {1}] removing file '{2}', " + "please report this error".format(e, type(e), to_remove)) + + logger.info("total cache size at the end of pruning: {0}".format(total_size)) + return True + else: + logger.info("cache size ({0}) is currently within boundaries".format(total_size)) + return True + + +def main(): + parser = argparse.ArgumentParser(description="SCons cache pruning tool") + + parser.add_argument("--cache-dir", "-d", default=None, + help="path to the cache directory.") + parser.add_argument("--cache-size", "-s", default=200, type=int, + help="maximum size of cache in GB.") + parser.add_argument("--prune-ratio", "-p", default=0.8, type=float, + help=("ratio (as 1.0 > x > 0) of total cache size to prune " + "to when cache exceeds quota.")) + parser.add_argument("--print-cache-dir", default=False, action="store_true") + + args = parser.parse_args() + + if args.cache_dir is None or not os.path.isdir(args.cache_dir): + logger.error("must specify a valid cache path, [{0}]".format(args.cache_dir)) + exit(1) + + ok = prune_cache(cache_path=args.cache_dir, + cache_size_gb=args.cache_size, + clean_ratio=args.prune_ratio) + + if not ok: + logger.error("encountered error cleaning the cache. exiting.") + exit(1) + +if __name__ == "__main__": + main() diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 1cdc38a156916..181da157395f3 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -718,6 +718,15 @@ post: ${tar|tar} cvzf diagnostic-data.tgz -C /data/db $file_list fi fi + - command: shell.exec + params: + working_dir: src + script: | + # removes files from the (local) scons cache when it's over a + # threshold, to the $prune_ratio percentage. Ideally override + # these default values in the distro config in evergreen. + + ${python|python} buildscripts/scons_cache_prune.py --cache-dir ${scons_cache_path} --cache-size ${scons_cache_size|200} --prune-ratio ${scons_prune_ratio|0.8} - command: archive.targz_pack params: target: "diskstats.tgz" @@ -838,9 +847,8 @@ tasks: fi # This script converts the generated version string into a sanitized version string for - # use by scons and uploading artifacts. - echo $MONGO_VERSION | ${python|python} buildscripts/generate_compile_expansions.py | tee compile_expansions.yml - + # use by scons and uploading artifacts as well as information about for the scons cache. + echo $MONGO_VERSION | USE_SCONS_CACHE=${use_scons_cache|false} ${python|python} buildscripts/generate_compile_expansions.py | tee compile_expansions.yml # Then we load the generated version data into the agent so we can use it in task definitions - command: expansions.update params: @@ -854,8 +862,10 @@ tasks: set -o errexit set -o verbose + mkdir -p ${scons_cache_path} rm -rf ${install_directory|/data/mongo-install-directory} - ${scons|scons} ${compile_flags|} --use-new-tools all dist dist-debugsymbols distsrc-${ext|tgz} ${msi_target|} MONGO_VERSION=${version} + + ${python|python} ./buildscripts/scons.py ${compile_flags|} ${scons_cache_args|} --use-new-tools all dist dist-debugsymbols distsrc-${ext|tgz} ${msi_target|} MONGO_VERSION=${version} mv mongodb-src-*.${ext|tgz} distsrc.${ext|tgz} mv mongodb-*-debugsymbols.${ext|tgz} mongo-debugsymbols.tgz || true @@ -983,7 +993,8 @@ tasks: script: | set -o errexit set -o verbose - ${scons|scons} ${compile_flags|} --stack-size=1024 lint + + ${python|python} ./buildscripts/scons.py ${compile_flags|} --stack-size=1024 lint - <<: *task_template name: burn_in_tests_WT @@ -2933,9 +2944,10 @@ buildvariants: batchtime: 1440 # 1 day expansions: gorootvars: PATH=/opt/mongodbtoolchain/v2/bin:$PATH - compile_flags: -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy --release + compile_flags: -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy num_jobs_available: $(grep -c ^processor /proc/cpuinfo) test_flags: --repeat=10 --shuffle + use_scons_cache: true tasks: - name: compile distros: @@ -2970,6 +2982,7 @@ buildvariants: push_arch: x86_64 num_jobs_available: $(grep -c ^processor /proc/cpuinfo) compile_flags: --dbg=on --opt=on -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy + use_scons_cache: true tasks: - name: compile distros: @@ -3079,6 +3092,7 @@ buildvariants: compile_flags: -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy num_jobs_available: $(grep -c ^processor /proc/cpuinfo) test_flags: --nojournal --excludeWithAnyTags=requires_journaling + use_scons_cache: true tasks: - name: compile distros: @@ -3142,9 +3156,10 @@ buildvariants: batchtime: 1440 # 1 day expansions: gorootvars: PATH=/opt/mongodbtoolchain/v2/bin:$PATH - compile_flags: -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy --release + compile_flags: -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy num_jobs_available: $(grep -c ^processor /proc/cpuinfo) test_flags: --wiredTigerCollectionConfig=type=lsm --wiredTigerIndexConfig=type=lsm + use_scons_cache: true tasks: - name: compile distros: @@ -4278,6 +4293,7 @@ buildvariants: num_jobs_available: $(grep -c ^processor /proc/cpuinfo) compile_flags: --dbg=on --opt=on --win-version-min=ws08r2 -j$(( $(grep -c ^processor /proc/cpuinfo) / 2 )) MONGO_DISTMOD=2008plus ext: zip + use_scons_cache: true tasks: - name: compile distros: @@ -4473,6 +4489,7 @@ buildvariants: num_jobs_available: $(grep -c ^processor /proc/cpuinfo) test_flags: --storageEngine=inMemory --excludeWithAnyTags=requires_persistence,requires_journaling ext: zip + use_scons_cache: true tasks: - name: compile distros: @@ -5817,6 +5834,7 @@ buildvariants: gorootvars: GOROOT=/opt/go PATH="/opt/go/bin:$PATH" tooltags: "-tags 'ssl sasl'" compile_flags: --ssl CC=/usr/bin/clang CXX=/usr/bin/clang++ --libc++ -j$(grep -c ^processor /proc/cpuinfo) + use_scons_cache: true tasks: - name: compile distros: @@ -5839,6 +5857,7 @@ buildvariants: compile_flags: --ssl MONGO_DISTMOD=ubuntu1604 --release -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy num_jobs_available: $(grep -c ^processor /proc/cpuinfo) test_flags: "--mongosSetParameters='{messagePortImpl: 'ASIO'}' --mongodSetParameters='{messagePortImpl: 'ASIO'}'" + use_scons_cache: true tasks: - name: compile distros: @@ -5883,6 +5902,7 @@ buildvariants: num_jobs_available: $(grep -c ^processor /proc/cpuinfo) compile_flags: --dbg=on --opt=on --win-version-min=ws08r2 -j$(( $(grep -c ^processor /proc/cpuinfo) / 2 )) MONGO_DISTMOD=2008plus ext: zip + use_scons_cache: true tasks: - name: compile distros: @@ -6780,6 +6800,7 @@ buildvariants: packager_arch: x86_64 packager_distro: rhel62 repo_edition: enterprise + use_scons_cache: true tasks: - name: compile distros: @@ -6860,6 +6881,7 @@ buildvariants: test_flags: --storageEngine=ephemeralForTest --excludeWithAnyTags=requires_persistence,requires_fsync,SERVER-21420,SERVER-21658,requires_journaling compile_flags: -j$(grep -c ^processor /proc/cpuinfo) --dbg=off --opt=on CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy num_jobs_available: $(grep -c ^processor /proc/cpuinfo) + use_scons_cache: true tasks: - name: compile distros: @@ -7099,6 +7121,7 @@ buildvariants: compile_flags: -j$(grep -c ^processor /proc/cpuinfo) --dbg=off --opt=on CPPPATH=$(readlink -f ../rocksdb/include/) LIBPATH=$(readlink -f ../rocksdb/) LIBS=rocksdb CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy VARIANT_DIR=release --runtime-hardening=off num_jobs_available: $(grep -c ^processor /proc/cpuinfo) test_flags: --storageEngine=rocksdb + use_scons_cache: true tasks: - name: compile distros: @@ -7569,6 +7592,7 @@ buildvariants: tooltags: "-tags 'ssl sasl'" compile_flags: --ssl MONGO_DISTMOD=ubuntu1604 -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy --link-model=dynamic num_jobs_available: $(grep -c ^processor /proc/cpuinfo) + use_scons_cache: true tasks: - name: compile distros: @@ -7588,6 +7612,7 @@ buildvariants: rlp_environment: MONGOD_UNITTEST_RLP_LANGUAGE_TEST_BTROOT=/opt/basis compile_flags: --allocator=system --ssl -j$(grep -c ^processor /proc/cpuinfo) CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy CPPPATH="/opt/basis/rlp/rlp/include /opt/basis/rlp/utilities/include" --use-basis-tech-rosette-linguistics-platform=on --cxx-std=14 num_jobs_available: $(grep -c ^processor /proc/cpuinfo) + use_scons_cache: true tasks: - name: compile distros: @@ -7720,6 +7745,7 @@ buildvariants: expansions: compile_flags: MONGO_DISTMOD=ubuntu1404 -j$(grep -c ^processor /proc/cpuinfo) --release CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy num_jobs_available: $(grep -c ^processor /proc/cpuinfo) + use_scons_cache: true tasks: - name: compile distros: @@ -7745,6 +7771,7 @@ buildvariants: test_flags: "\"--mongodSetParameters={use3dot2InitialSync: true, initialSyncOplogBuffer: \"inMemoryBlockingQueue\"}\"" compile_flags: --ssl MONGO_DISTMOD=rhel62 -j$(grep -c ^processor /proc/cpuinfo) --release CC=/opt/mongodbtoolchain/v2/bin/gcc CXX=/opt/mongodbtoolchain/v2/bin/g++ OBJCOPY=/opt/mongodbtoolchain/v2/bin/objcopy CPPPATH="/opt/basis/rlp/rlp/include /opt/basis/rlp/utilities/include" --use-basis-tech-rosette-linguistics-platform=on num_jobs_available: $(grep -c ^processor /proc/cpuinfo) + use_scons_cache: true tasks: - name: compile distros: diff --git a/src/third_party/scons-2.5.0/scons-local-2.5.0/SCons/CacheDir.py b/src/third_party/scons-2.5.0/scons-local-2.5.0/SCons/CacheDir.py index 05199d93144af..a5b85a01fb869 100644 --- a/src/third_party/scons-2.5.0/scons-local-2.5.0/SCons/CacheDir.py +++ b/src/third_party/scons-2.5.0/scons-local-2.5.0/SCons/CacheDir.py @@ -52,9 +52,12 @@ def CacheRetrieveFunc(target, source, env): cd.CacheDebug('CacheRetrieve(%s): retrieving from %s\n', t, cachefile) if SCons.Action.execute_actions: if fs.islink(cachefile): - fs.symlink(fs.readlink(cachefile), t.get_internal_path()) + realpath = fs.readlink(cachefile) + fs.symlink(realpath, t.get_internal_path()) + os.utime(realpath, None) else: env.copy_from_cache(cachefile, t.get_internal_path()) + os.utime(cachefile, None) st = fs.stat(cachefile) fs.chmod(t.get_internal_path(), stat.S_IMODE(st[stat.ST_MODE]) | stat.S_IWRITE) return 0