Skip to content

Commit

Permalink
feat: build spanner python utils image (#661)
Browse files Browse the repository at this point in the history
feat: build spanner python utils image
  • Loading branch information
Erik committed Jun 11, 2020
1 parent b73e6ee commit 2060601
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 3 deletions.
44 changes: 41 additions & 3 deletions .circleci/config.yml
Expand Up @@ -130,8 +130,8 @@ jobs:
MYSQL_PASSWORD: test
MYSQL_DATABASE: syncstorage
steps:
- setup_remote_docker:
docker_layer_caching: true
- setup_remote_docker:
docker_layer_caching: true
- checkout
- setup-rust
- setup-gcp-grpc
Expand Down Expand Up @@ -185,7 +185,6 @@ jobs:
- run:
name: Restore Docker image cache
command: docker load -i /home/circleci/cache/docker.tar

- run:
name: Deploy to Dockerhub
command: |
Expand All @@ -211,6 +210,39 @@ jobs:
echo "Not pushing to dockerhub for tag=${CIRCLE_TAG} branch=${CIRCLE_BRANCH}"
fi
deploy-python-utils:
docker:
- image: docker:18.02.0-ce
steps:
- setup_remote_docker
- checkout
- run:
name: Build and deploy to Dockerhub
command: |
export UTILS_DOCKERHUB_REPO=mozilla/sync-spanner-py-utils
if [ "${CIRCLE_BRANCH}" == "master" ]; then
DOCKER_TAG="${CIRCLE_SHA1}"
fi
if echo "${CIRCLE_BRANCH}" | grep '^feature\..*' > /dev/null; then
DOCKER_TAG="${CIRCLE_BRANCH}"
fi
if [ -n "${CIRCLE_TAG}" ]; then
DOCKER_TAG="$CIRCLE_TAG"
fi
if [ -n "${DOCKER_TAG}" ]; then
echo "$DOCKER_PASS" | docker login -u "$DOCKER_USER" --password-stdin
echo ${UTILS_DOCKERHUB_REPO}:${DOCKER_TAG}
cd tools/spanner
docker build -t ${UTILS_DOCKERHUB_REPO}:${DOCKER_TAG} .
docker images
docker push "${UTILS_DOCKERHUB_REPO}:${DOCKER_TAG}"
else
echo "Not building or pushing to dockerhub for tag=${CIRCLE_TAG} branch=${CIRCLE_BRANCH}"
fi
workflows:
version: 2
build-deploy:
Expand All @@ -235,3 +267,9 @@ workflows:
filters:
tags:
only: /.*/
- deploy-python-utils:
requires:
- e2e-tests
filters:
tags:
only: /.*/
10 changes: 10 additions & 0 deletions tools/spanner/Dockerfile
@@ -0,0 +1,10 @@
FROM python:3.7.7-buster

COPY purge_ttl.py count_expired_rows.py count_users.py requirements.txt /app/

RUN pip install -r /app/requirements.txt

USER nobody

ENTRYPOINT ["/usr/local/bin/python"]
CMD ["/app/purge_ttl.py"]
68 changes: 68 additions & 0 deletions tools/spanner/count_expired_rows.py
@@ -0,0 +1,68 @@
# Count the number of users in the spanner database
# Specifically, the number of unique fxa_uid found in the user_collections table
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

import os
import sys
import logging
from datetime import datetime
from statsd.defaults.env import statsd
from urllib import parse

from google.cloud import spanner

# set up logger
logging.basicConfig(
format='{"datetime": "%(asctime)s", "message": "%(message)s"}',
stream=sys.stdout,
level=logging.INFO)

# Change these to match your install.
client = spanner.Client()


def from_env():
try:
url = os.environ.get("SYNC_DATABASE_URL")
if not url:
raise Exception("no url")
purl = parse.urlparse(url)
if purl.scheme == "spanner":
path = purl.path.split("/")
instance_id = path[-3]
database_id = path[-1]
except Exception as e:
# Change these to reflect your Spanner instance install
print("Exception {}".format(e))
instance_id = os.environ.get("INSTANCE_ID", "spanner-test")
database_id = os.environ.get("DATABASE_ID", "sync_stage")
return (instance_id, database_id)


def spanner_read_data(query, table):
(instance_id, database_id) = from_env()
instance = client.instance(instance_id)
database = instance.database(database_id)

logging.info("For {}:{}".format(instance_id, database_id))

# Count bsos expired rows
with statsd.timer(f"syncstorage.count_expired_{table}_rows.duration"):
with database.snapshot() as snapshot:
result = snapshot.execute_sql(query)
row_count = result.one()[0]
statsd.gauge(f"syncstorage.expired_{table}_rows", row_count)
logging.info(f"Found {row_count} expired rows in {table}")


if __name__ == "__main__":
logging.info('Starting count_expired_rows.py')

for table in ['batches', 'bsos']:
query = f'SELECT COUNT(*) FROM {table} WHERE expiry < CURRENT_TIMESTAMP()'
spanner_read_data(query, table)

logging.info('Completed count_expired_rows.py')
2 changes: 2 additions & 0 deletions tools/spanner/requirements.txt
@@ -0,0 +1,2 @@
google-cloud-spanner >=1.16.0
statsd

0 comments on commit 2060601

Please sign in to comment.