From e5e5ebea20209cca0232e3f97736b7b113e9530f Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 13:24:25 +0300 Subject: [PATCH 01/11] package.py - Added missing default value for TF_RECREATE_MISSING_LAMBDA_PACKAGE env var --- package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.py b/package.py index e58742e8..9cfae5b7 100644 --- a/package.py +++ b/package.py @@ -721,7 +721,7 @@ def args_parser(): def main(): ns = argparse.Namespace( recreate_missing_package=os.environ.get( - 'TF_RECREATE_MISSING_LAMBDA_PACKAGE'), + 'TF_RECREATE_MISSING_LAMBDA_PACKAGE', True), log_level=os.environ.get('TF_PACKAGE_LOG_LEVEL', 'INFO'), dump_input=bool(os.environ.get('TF_DUMP_INPUT')), dump_env=bool(os.environ.get('TF_DUMP_ENV')), From 4bf2138d4bb0b6e0a8e4aafaeba7a911bef537ab Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 14:41:46 +0300 Subject: [PATCH 02/11] package.py - Borrowed ZipInfo.from_file from py38 zipfile.py due to the need of strict_timestamps --- package.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/package.py b/package.py index 9cfae5b7..b155a42e 100644 --- a/package.py +++ b/package.py @@ -6,6 +6,8 @@ raise RuntimeError("A python version 3.7 or newer is required") import os +import time +import stat import json import shlex import shutil @@ -22,6 +24,9 @@ import logging +PY38 = sys.version_info >= (3, 8) + + ################################################################################ # Logging @@ -180,6 +185,45 @@ def make_zipfile(zip_filename, *base_dirs, timestamp=None, name of the output zip file. """ + # Borrowed from python 3.8 zipfile.py library + # due to the need of strict_timestamps functionality. + def from_file(cls, filename, arcname=None, *, strict_timestamps=True): + """Construct an appropriate ZipInfo for a file on the filesystem. + + filename should be the path to a file or directory on the filesystem. + + arcname is the name which it will have within the archive (by default, + this will be the same as filename, but without a drive letter and with + leading path separators removed). + """ + if isinstance(filename, os.PathLike): + filename = os.fspath(filename) + st = os.stat(filename) + isdir = stat.S_ISDIR(st.st_mode) + mtime = time.localtime(st.st_mtime) + date_time = mtime[0:6] + if not strict_timestamps and date_time[0] < 1980: + date_time = (1980, 1, 1, 0, 0, 0) + elif not strict_timestamps and date_time[0] > 2107: + date_time = (2107, 12, 31, 23, 59, 59) + # Create ZipInfo instance to store file information + if arcname is None: + arcname = filename + arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) + while arcname[0] in (os.sep, os.altsep): + arcname = arcname[1:] + if isdir: + arcname += '/' + zinfo = cls(arcname, date_time) + zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes + if isdir: + zinfo.file_size = 0 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + zinfo.file_size = st.st_size + + return zinfo + # An extended version of a write method # from the original zipfile.py library module def write(self, filename, arcname=None, @@ -195,8 +239,12 @@ def write(self, filename, arcname=None, "Can't write to ZIP archive while an open writing handle exists" ) - zinfo = zipfile.ZipInfo.from_file( - filename, arcname, strict_timestamps=self._strict_timestamps) + if PY38: + zinfo = zipfile.ZipInfo.from_file( + filename, arcname, strict_timestamps=self._strict_timestamps) + else: + zinfo = from_file( + zipfile.ZipInfo, filename, arcname, strict_timestamps=True) if date_time: zinfo.date_time = date_time From f81835dd525ab2ff877137c889d9a0a00795ff44 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 15:26:31 +0300 Subject: [PATCH 03/11] package.py - Refactored list_files function, adapted files path refering --- package.py | 69 +++++++++++++++++++++--------------------------------- 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/package.py b/package.py index b155a42e..bd442e0f 100644 --- a/package.py +++ b/package.py @@ -110,6 +110,28 @@ def tempdir(): shutil.rmtree(path) +def list_files(top_path, logger=None): + """ + Returns a sorted list of all files in a directory. + """ + + if logger: + logger = logger.getChild('ls') + + results = [] + + for root, dirs, files in os.walk(top_path): + for file_name in files: + file_path = os.path.join(root, file_name) + relative_path = os.path.relpath(file_path, top_path) + results.append(relative_path) + if logger: + logger.debug(relative_path) + + results.sort() + return results + + def dataclass(name, **fields): typ = type(name, (object,), { '__slots__': fields.keys(), @@ -420,24 +442,6 @@ def prepare_command(args): logger = logging.getLogger('prepare') - def list_files(top_path): - """ - Returns a sorted list of all files in a directory. - """ - - _logger = logger.getChild('ls') - - results = [] - - for root, dirs, files in os.walk(top_path): - for file_name in files: - file_path = os.path.join(root, file_name) - results.append(file_path) - _logger.debug(file_path) - - results.sort() - return results - def generate_content_hash(source_paths, hash_func=hashlib.sha256): """ Generate a content hash of the source paths. @@ -448,11 +452,11 @@ def generate_content_hash(source_paths, hash_func=hashlib.sha256): for source_path in source_paths: if os.path.isdir(source_path): source_dir = source_path - for source_file in list_files(source_dir): + for source_file in list_files(source_dir, logger=logger): update_hash(hash_obj, source_dir, source_file) else: source_dir = os.path.dirname(source_path) - source_file = source_path + source_file = os.path.relpath(source_path, source_dir) update_hash(hash_obj, source_dir, source_file) return hash_obj @@ -462,10 +466,10 @@ def update_hash(hash_obj, file_root, file_path): Update a hashlib object with the relative path and contents of a file. """ - relative_path = os.path.relpath(file_path, file_root) + relative_path = os.path.join(file_root, file_path) hash_obj.update(relative_path.encode()) - with open(file_path, 'rb') as open_file: + with open(relative_path, 'rb') as open_file: while True: data = open_file.read(1024 * 8) if not data: @@ -563,25 +567,6 @@ def build_command(args): logger = logging.getLogger('build') - def list_files(top_path): - """ - Returns a sorted list of all files in a directory. - """ - - _logger = logger.getChild('ls') - - results = [] - - for root, dirs, files in os.walk(top_path): - for file_name in files: - file_path = os.path.join(root, file_name) - relative_path = os.path.relpath(file_path, top_path) - results.append(relative_path) - _logger.debug(relative_path) - - results.sort() - return results - def create_zip_file(source_dir, target_file, timestamp): """ Creates a zip file from a directory. @@ -629,7 +614,7 @@ def create_zip_file(source_dir, target_file, timestamp): # Find all source files. if os.path.isdir(source_path): source_dir = source_path - source_files = list_files(source_path) + source_files = list_files(source_path, logger=logger) else: source_dir = os.path.dirname(source_path) source_files = [os.path.basename(source_path)] From 50cd59e3bd90a39b991651911f350d3ae8508b2b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 15:27:29 +0300 Subject: [PATCH 04/11] package.py - Expland tf path only for hash_extra_paths --- package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.py b/package.py index bd442e0f..80e56d86 100644 --- a/package.py +++ b/package.py @@ -502,8 +502,8 @@ def update_hash(hash_obj, file_root, file_path): abort('source_path must be set.') # Expand a Terraform path. references + hash_extra_paths = [p.format(path=tf_paths) for p in hash_extra_paths] content_hash_paths = [source_path] + hash_extra_paths - content_hash_paths = [p.format(path=tf_paths) for p in content_hash_paths] # Generate a hash based on file names and content. Also use the # runtime value, build command, and content of the build paths From 81b4cd3f775bb811edf17b7e7bc17538da56c177 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 16:00:31 +0300 Subject: [PATCH 05/11] package.py - Added logging to generate_content_hash function --- package.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/package.py b/package.py index 80e56d86..b6f84bd9 100644 --- a/package.py +++ b/package.py @@ -442,22 +442,30 @@ def prepare_command(args): logger = logging.getLogger('prepare') - def generate_content_hash(source_paths, hash_func=hashlib.sha256): + def generate_content_hash(source_paths, + hash_func=hashlib.sha256, logger=None): """ Generate a content hash of the source paths. """ + if logger: + logger = logger.getChild('hash') + hash_obj = hash_func() for source_path in source_paths: if os.path.isdir(source_path): source_dir = source_path - for source_file in list_files(source_dir, logger=logger): + for source_file in list_files(source_dir): update_hash(hash_obj, source_dir, source_file) + if logger: + logger.debug(os.path.join(source_dir, source_file)) else: source_dir = os.path.dirname(source_path) source_file = os.path.relpath(source_path, source_dir) update_hash(hash_obj, source_dir, source_file) + if logger: + logger.debug(source_path) return hash_obj @@ -508,7 +516,8 @@ def update_hash(hash_obj, file_root, file_path): # Generate a hash based on file names and content. Also use the # runtime value, build command, and content of the build paths # because they can have an effect on the resulting archive. - content_hash = generate_content_hash(content_hash_paths) + logger.debug("Computing content hash on files...") + content_hash = generate_content_hash(content_hash_paths, logger=logger) content_hash.update(runtime.encode()) content_hash.update(hash_extra.encode()) content_hash = content_hash.hexdigest() From 43cd1e0cc20a4650245fe17d722c16d925551965 Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 16:09:43 +0300 Subject: [PATCH 06/11] package.py - Connect to Terraform's remapped TTY if it's available It seems that Terraform remaps all TTY FDs to a range {3..5} and this internal feature used by terrafrom plugins for I/O. So the package.py just tries to reuse Terraform's TTY if it's available. This feature reused by package.py to make logging output from the prepare stage. On the prepare stage all logging happens only on a debug level, so to see something use Terraform with set env variable: > export TF_LAMBDA_PACKAGE_LOG_LEVEL=DEBUG > terraform apply --- package.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/package.py b/package.py index b6f84bd9..d83ab750 100644 --- a/package.py +++ b/package.py @@ -45,7 +45,14 @@ def formatMessage(self, record): return super().formatMessage(record) -log_handler = logging.StreamHandler() +log_stream = sys.stderr +try: + if os.isatty(3): + log_stream = os.fdopen(3, mode='w') +except OSError: + pass + +log_handler = logging.StreamHandler(stream=log_stream) log_handler.setFormatter(LogFormatter()) logger = logging.getLogger() @@ -764,7 +771,7 @@ def main(): ns = argparse.Namespace( recreate_missing_package=os.environ.get( 'TF_RECREATE_MISSING_LAMBDA_PACKAGE', True), - log_level=os.environ.get('TF_PACKAGE_LOG_LEVEL', 'INFO'), + log_level=os.environ.get('TF_LAMBDA_PACKAGE_LOG_LEVEL', 'INFO'), dump_input=bool(os.environ.get('TF_DUMP_INPUT')), dump_env=bool(os.environ.get('TF_DUMP_ENV')), ) From bf37ecf03beb17e4529934eb134f2d34c1c612fc Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 16:36:55 +0300 Subject: [PATCH 07/11] package.py - The prepare stage always have a root logger prefix --- package.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package.py b/package.py index d83ab750..699b9c3c 100644 --- a/package.py +++ b/package.py @@ -35,7 +35,6 @@ class LogFormatter(logging.Formatter): formats = { 'root': default_format, 'build': default_format, - 'prepare': default_format, 'cmd': '> %(message)s', '': '%(name)s: %(message)s' } From 1cc1219e2b85705a26093d5c00386387d213b33b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 17:36:54 +0300 Subject: [PATCH 08/11] package.py - Converted dumb debugging things to useful logging To see all logging values use Terraform as: > export TF_LAMBDA_PACKAGE_LOG_LEVEL=DEBUG2 > terraform apply With env output: > export TF_LAMBDA_PACKAGE_LOG_LEVEL=DEBUG3 > terraform apply --- package.py | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/package.py b/package.py index 699b9c3c..b7ea78e3 100644 --- a/package.py +++ b/package.py @@ -26,10 +26,15 @@ PY38 = sys.version_info >= (3, 8) - ################################################################################ # Logging +DEBUG2 = 9 +logging.addLevelName(DEBUG2, 'DEBUG2') +DEBUG3 = 8 +logging.addLevelName(DEBUG3, 'DEBUG3') + + class LogFormatter(logging.Formatter): default_format = '%(message)s' formats = { @@ -61,19 +66,6 @@ def formatMessage(self, record): cmd_logger = logging.getLogger('cmd') -################################################################################ -# Debug helpers - -def dump_env(command_name): - with open('{}.env'.format(command_name), 'a') as f: - json.dump(dict(os.environ), f, indent=2) - - -def dump_query(command_name, query): - with open('{}.query'.format(command_name), 'a') as f: - json.dump(query, f, indent=2) - - ################################################################################ # Backports @@ -490,11 +482,14 @@ def update_hash(hash_obj, file_root, file_path): break hash_obj.update(data) - args.dump_env and dump_env('prepare_command') - # Load the query. query_data = json.load(sys.stdin) - args.dump_input and dump_query('prepare_command', query_data) + + if logger.isEnabledFor(DEBUG3): + logger.debug('ENV: %s', json.dumps(dict(os.environ), indent=2)) + if logger.isEnabledFor(DEBUG2): + logger.debug('QUERY: %s', json.dumps(query_data, indent=2)) + query = datatree('prepare_query', **query_data) tf_paths = query.paths @@ -592,13 +587,11 @@ def create_zip_file(source_dir, target_file, timestamp): os.makedirs(target_dir) make_zipfile(target_file, source_dir, timestamp=timestamp) - args.dump_env and dump_env('build_command') - - args.dump_input and print( - args.filename, - args.runtime, - args.source_path, - file=open('build_command.args', 'a')) + if logger.isEnabledFor(DEBUG3): + logger.debug('ENV: %s', json.dumps(dict(os.environ), indent=2)) + if logger.isEnabledFor(DEBUG2): + logger.debug('CMD: python3 %s %s %s', + sys.argv[0], args.zip_file_timestamp, args.build_plan_file) with open(args.build_plan_file) as f: query_data = json.load(f) @@ -771,8 +764,6 @@ def main(): recreate_missing_package=os.environ.get( 'TF_RECREATE_MISSING_LAMBDA_PACKAGE', True), log_level=os.environ.get('TF_LAMBDA_PACKAGE_LOG_LEVEL', 'INFO'), - dump_input=bool(os.environ.get('TF_DUMP_INPUT')), - dump_env=bool(os.environ.get('TF_DUMP_ENV')), ) p = args_parser() args = p.parse_args(namespace=ns) From c07cb316971f08d164ae270326a8fb736ab843ce Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 17:50:16 +0300 Subject: [PATCH 09/11] package.py - Wrapped logging configuring to a configure_logging function --- package.py | 68 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/package.py b/package.py index b7ea78e3..abec56a6 100644 --- a/package.py +++ b/package.py @@ -30,40 +30,44 @@ # Logging DEBUG2 = 9 -logging.addLevelName(DEBUG2, 'DEBUG2') DEBUG3 = 8 -logging.addLevelName(DEBUG3, 'DEBUG3') +log_handler = None +logger = logging.getLogger() +cmd_logger = logging.getLogger('cmd') -class LogFormatter(logging.Formatter): - default_format = '%(message)s' - formats = { - 'root': default_format, - 'build': default_format, - 'cmd': '> %(message)s', - '': '%(name)s: %(message)s' - } - def formatMessage(self, record): - self._style._fmt = self.formats.get(record.name, self.formats['']) - return super().formatMessage(record) +def configure_logging(): + global log_handler + logging.addLevelName(DEBUG2, 'DEBUG2') + logging.addLevelName(DEBUG3, 'DEBUG3') -log_stream = sys.stderr -try: - if os.isatty(3): - log_stream = os.fdopen(3, mode='w') -except OSError: - pass + class LogFormatter(logging.Formatter): + default_format = '%(message)s' + formats = { + 'root': default_format, + 'build': default_format, + 'cmd': '> %(message)s', + '': '%(name)s: %(message)s' + } -log_handler = logging.StreamHandler(stream=log_stream) -log_handler.setFormatter(LogFormatter()) + def formatMessage(self, record): + self._style._fmt = self.formats.get(record.name, self.formats['']) + return super().formatMessage(record) -logger = logging.getLogger() -logger.addHandler(log_handler) -logger.setLevel(logging.INFO) + log_stream = sys.stderr + try: + if os.isatty(3): + log_stream = os.fdopen(3, mode='w') + except OSError: + pass -cmd_logger = logging.getLogger('cmd') + log_handler = logging.StreamHandler(stream=log_stream) + log_handler.setFormatter(LogFormatter()) + + logger.addHandler(log_handler) + logger.setLevel(logging.INFO) ################################################################################ @@ -366,7 +370,7 @@ def docker_build_command(build_root, docker_file=None, tag=None): docker_cmd.append(build_root) cmd_logger.info(shlex_join(docker_cmd)) - log_handler.flush() + log_handler and log_handler.flush() return docker_cmd @@ -423,7 +427,7 @@ def docker_run_command(build_root, command, runtime, docker_cmd.extend(command) cmd_logger.info(shlex_join(docker_cmd)) - log_handler.flush() + log_handler and log_handler.flush() return docker_cmd @@ -677,7 +681,7 @@ def create_zip_file(source_dir, target_file, timestamp): )) else: cmd_logger.info(shlex_join(pip_command)) - log_handler.flush() + log_handler and log_handler.flush() check_call(pip_command) # Zip up the temporary directory and write it to the target filename. @@ -768,8 +772,12 @@ def main(): p = args_parser() args = p.parse_args(namespace=ns) - if args.log_level and logging._checkLevel(args.log_level): - logging.root.setLevel(args.log_level) + configure_logging() + + if args.log_level: + ll = logging._nameToLevel.get(args.log_level) + if ll and logging._checkLevel(ll): + logging.root.setLevel(args.log_level) exit(args.command(args)) From 1f38c1ebc26919743b34a410d305b33cc62c877b Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 17:56:36 +0300 Subject: [PATCH 10/11] package.py - Use Terraform's stderr only for prepare stage --- package.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/package.py b/package.py index abec56a6..5465eb44 100644 --- a/package.py +++ b/package.py @@ -37,7 +37,7 @@ cmd_logger = logging.getLogger('cmd') -def configure_logging(): +def configure_logging(use_tf_stderr=False): global log_handler logging.addLevelName(DEBUG2, 'DEBUG2') @@ -56,12 +56,14 @@ def formatMessage(self, record): self._style._fmt = self.formats.get(record.name, self.formats['']) return super().formatMessage(record) + tf_stderr_fd = 5 log_stream = sys.stderr - try: - if os.isatty(3): - log_stream = os.fdopen(3, mode='w') - except OSError: - pass + if use_tf_stderr: + try: + if os.isatty(tf_stderr_fd): + log_stream = os.fdopen(tf_stderr_fd, mode='w') + except OSError: + pass log_handler = logging.StreamHandler(stream=log_stream) log_handler.setFormatter(LogFormatter()) @@ -772,7 +774,10 @@ def main(): p = args_parser() args = p.parse_args(namespace=ns) - configure_logging() + if args.command is prepare_command: + configure_logging(use_tf_stderr=True) + else: + configure_logging() if args.log_level: ll = logging._nameToLevel.get(args.log_level) From e4bf4b7a751c7f9e64e6149d6584ba172a314bae Mon Sep 17 00:00:00 2001 From: Andrew Hlynskyi Date: Thu, 11 Jun 2020 19:47:30 +0300 Subject: [PATCH 11/11] package.py - Fixed CMD log record; Added --force option to the build command --- package.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/package.py b/package.py index 5465eb44..660f3a00 100644 --- a/package.py +++ b/package.py @@ -23,7 +23,6 @@ from base64 import b64encode import logging - PY38 = sys.version_info >= (3, 8) ################################################################################ @@ -596,8 +595,7 @@ def create_zip_file(source_dir, target_file, timestamp): if logger.isEnabledFor(DEBUG3): logger.debug('ENV: %s', json.dumps(dict(os.environ), indent=2)) if logger.isEnabledFor(DEBUG2): - logger.debug('CMD: python3 %s %s %s', - sys.argv[0], args.zip_file_timestamp, args.build_plan_file) + logger.debug('CMD: python3 %s', shlex_join(sys.argv)) with open(args.build_plan_file) as f: query_data = json.load(f) @@ -616,7 +614,7 @@ def create_zip_file(source_dir, target_file, timestamp): else: timestamp = 0 - if os.path.exists(filename): + if os.path.exists(filename) and not args.force: logger.info('Reused: %s', shlex.quote(filename)) return @@ -756,6 +754,8 @@ def args_parser(): p = sp.add_parser('build', help='build and pack to a zip archive') p.set_defaults(command=build_command) + p.add_argument('--force', action='store_true', + help='Force rebuilding even if a zip artifact exists') p.add_argument('-t', '--timestamp', dest='zip_file_timestamp', required=True, help='A zip file timestamp generated by the prepare command')