From 92a57e653d2e831eb0c78505683bbef536d68c79 Mon Sep 17 00:00:00 2001 From: Erik Olson Date: Tue, 7 Jan 2020 13:40:06 -0500 Subject: [PATCH] feat: add basic logging to stdout and statsd metrics for purge_ttl.py --- Dockerfile | 2 +- tools/spanner/purge_ttl.py | 40 +++++++++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1ecc4e5f00..bf046f8370 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,7 @@ RUN \ apt-get -q update && \ apt-get -q install -y --no-install-recommends default-libmysqlclient-dev libssl-dev ca-certificates libcurl4 python3-venv python3-pip && \ python3 -m pip install setuptools wheel && \ - python3 -m pip install google-cloud-spanner && \ + python3 -m pip install google-cloud-spanner statsd && \ rm -rf /var/lib/apt/lists/* COPY --from=builder /app/bin /app/bin diff --git a/tools/spanner/purge_ttl.py b/tools/spanner/purge_ttl.py index 60f85520cb..f3122f496d 100644 --- a/tools/spanner/purge_ttl.py +++ b/tools/spanner/purge_ttl.py @@ -5,12 +5,23 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. import os +import sys +import logging +from datetime import datetime +from statsd.defaults.env import statsd from urllib import parse from google.cloud import spanner -# Change these to match your install. +# set up logger +logger = logging.getLogger() +logger.setLevel(logging.INFO) +handler = logging.StreamHandler(sys.stdout) +formatter = logging.Formatter('{"datetime": "%(asctime)s", "message": "%(message)s"}') +handler.setFormatter(formatter) +logger.addHandler(handler) +# Change these to match your install. client = spanner.Client() @@ -36,21 +47,36 @@ def spanner_read_data(request=None): (instance_id, database_id) = from_env() instance = client.instance(instance_id) database = instance.database(database_id) - outputs = [] - outputs.append("For {}:{}".format(instance_id, database_id)) + logger.info("For {}:{}".format(instance_id, database_id)) + # Delete Batches. Also deletes child batch_bsos rows (INTERLEAVE # IN PARENT batches ON DELETE CASCADE) + batches_start = datetime.now() query = 'DELETE FROM batches WHERE expiry < CURRENT_TIMESTAMP()' result = database.execute_partitioned_dml(query) - outputs.append("batches: removed {} rows".format(result)) + batches_end = datetime.now() + batches_duration = batches_end - batches_start + logger.info("batches: removed {} rows, batches_duration: {}".format(result, batches_duration)) + statsd.timing("sync.purge_ttl.batches_duration", batches_duration) # Delete BSOs + bso_start = datetime.now() query = 'DELETE FROM bsos WHERE expiry < CURRENT_TIMESTAMP()' result = database.execute_partitioned_dml(query) - outputs.append("bso: removed {} rows".format(result)) - return '\n'.join(outputs) + bso_end = datetime.now() + bso_duration = bso_end - bso_start + logger.info("bso: removed {} rows, bso_duration: {}".format(result, bso_duration)) + statsd.timing("sync.purge_ttl.bso_duration", bso_duration) if __name__ == "__main__": - print(spanner_read_data()) + start_time = datetime.now() + logger.info('Starting purge_ttl.py') + + spanner_read_data() + + end_time = datetime.now() + duration = end_time - start_time + logger.info('Completed purge_ttl.py, total_duration: {}'.format(duration)) + statsd.timing("sync.purge_ttl.total_duration", duration)