diff --git a/.gitignore b/.gitignore index 84aabbc..24b090b 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,4 @@ target/ cover .eggs +.venv* \ No newline at end of file diff --git a/README.md b/README.md index 4c2786f..0677546 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,24 @@ docker-rotate ============= -In a continuously deployed environment, old and unused docker images accumulate and use up space. -`docker-rotate` helps remove the K oldest images of each type. +In a continuously deployed environment, old and unused docker images and containers accumulate and use up space. +`docker-rotate` helps remove the K oldest images of each type and remove non-running containers. [![Build Status](https://travis-ci.org/locationlabs/docker-rotate.png)](https://travis-ci.org/locationlabs/docker-rotate) Usage: - # delete all but the three oldest images of each type - docker-rotate --clean-images --keep 3 + # delete all but the three most recent images of each type + docker-rotate images --keep 3 - # only target one type of image (by name) - docker-rotate --clean-images --keep 3 --only organization/image + # only target one type of image but don't remove latest + docker-rotate images --keep 3 --image "organization/image" "~:latest" # don't actualy delete anything - docker-rotate --clean-images --keep 3 --dry-run + docker-rotate --dry-run images --keep 3 - # also delete exited containers (except those with volumes) - docker-rotate --clean-images --clean-containers --keep 3 + # delete containers exited more than an hour ago + docker-rotate containers --exited 1h By default, `docker-rotate` connects to the local Unix socket; the usual environment variables will be respected if the `--use-env` flag is given. diff --git a/dockerrotate/containers.py b/dockerrotate/containers.py new file mode 100644 index 0000000..cd34c06 --- /dev/null +++ b/dockerrotate/containers.py @@ -0,0 +1,82 @@ +from datetime import datetime, timedelta +from dateutil import parser +from dateutil.tz import tzutc +import re + +from docker.errors import APIError + +from dockerrotate.filter import include_image + + +TIME_REGEX = re.compile(r'((?P\d+?)d)?((?P\d+?)h)?((?P\d+?)m)?((?P\d+?)s)?') # noqa + + +def parse_time(time_str): + """ + Parse a human readable time delta string. + """ + parts = TIME_REGEX.match(time_str) + if not parts: + raise Exception("Invalid time delta format '{}'".format(time_str)) + parts = parts.groupdict() + time_params = {} + for (name, param) in parts.iteritems(): + if param: + time_params[name] = int(param) + return timedelta(**time_params) + + +def include_container(container, args): + """ + Return truthy if container should be removed. + """ + inspect_data = args.client.inspect_container(container["Id"]) + status = inspect_data["State"]["Status"] + + if status == "exited": + finished_at = parser.parse(inspect_data["State"]["FinishedAt"]) + if (args.now - finished_at) < args.exited_ts: + return False + elif status == "created": + created_at = parser.parse(inspect_data["Created"]) + if (args.now - created_at) < args.created_ts: + return False + else: + return False + + return include_image([container["Image"]], args) + + +def clean_containers(args): + """ + Delete non-running containers. + + Images cannot be deleted if in use. Deleting dead containers allows + more images to be cleaned. + """ + args.exited_ts = parse_time(args.exited) + args.created_ts = parse_time(args.created) + args.now = datetime.now(tzutc()) + + containers = [ + container for container in args.client.containers(all=True) + if include_container(container, args) + ] + + for container in containers: + print "Removing container ID: {}, Name: {}, Image: {}".format( + container["Id"], + (container.get("Names") or ["N/A"])[0], + container["Image"], + ) + + if args.dry_run: + continue + + try: + args.client.remove_container(container["Id"]) + except APIError as error: + print "Unable to remove container: {}: {}".format( + container["Id"], + error, + ) diff --git a/dockerrotate/filter.py b/dockerrotate/filter.py new file mode 100644 index 0000000..58038f3 --- /dev/null +++ b/dockerrotate/filter.py @@ -0,0 +1,22 @@ +import re + + +def include_image(image_tags, args): + """ + Return truthy if image should be considered for removal. + """ + if not args.images: + return True + + return all(regex_match(pattern, tag) + for pattern in args.images + for tag in image_tags) + + +def regex_match(pattern, tag): + """ + Perform a regex match on the tag. + """ + if pattern[0] == '~': + return not re.search(pattern[1:], tag) + return re.search(pattern, tag) diff --git a/dockerrotate/images.py b/dockerrotate/images.py new file mode 100644 index 0000000..852d541 --- /dev/null +++ b/dockerrotate/images.py @@ -0,0 +1,63 @@ +from collections import defaultdict + +from docker.errors import APIError + +from dockerrotate.filter import include_image + + +def clean_images(args): + """ + Delete old images keeping the most recent N images by tag. + """ + # should not need to inspect all images; only intermediate images should appear + # when all is true; these should be deleted along with dependent images + images = [image + for image in args.client.images(all=False) + if include_image(image["RepoTags"], args)] + + # index by id + images_by_id = { + image["Id"]: image for image in images + } + + # group by name + images_by_name = defaultdict(set) + for image in images: + for tag in image["RepoTags"]: + image_name = normalize_tag_name(tag) + images_by_name[image_name].add(image["Id"]) + + for image_name, image_ids in images_by_name.items(): + # sort/keep + images_to_delete = sorted([ + images_by_id[image_id] for image_id in image_ids], + key=lambda image: -image["Created"], + )[args.keep:] + + # delete + for image in images_to_delete: + print "Removing image ID: {}, Tags: {}".format( + image["Id"], + ", ".join(image["RepoTags"]) + ) + + if args.dry_run: + continue + + try: + args.client.remove_image(image["Id"], force=True, noprune=False) + except APIError as error: + print error.message + + +def normalize_tag_name(tag): + """ + docker-py provides image names with tags as a single string. + + We want: + + some.domain.com/organization/image:tag -> organization/image + organization/image:tag -> organization/image + image:tag -> image + """ + return "/".join(tag.rsplit(":", 1)[0].split("/")[-2:]) diff --git a/dockerrotate/main.py b/dockerrotate/main.py index 2556b99..5ef29e3 100755 --- a/dockerrotate/main.py +++ b/dockerrotate/main.py @@ -1,71 +1,78 @@ """ -Free up space by rotating out old Docker images. +Free up space by rotating out old Docker images and containers. """ -from argparse import ArgumentParser -from datetime import timedelta -import re +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -from dateutil import parser from docker import Client -from docker.errors import APIError from docker.errors import NotFound from docker.utils import kwargs_from_env +from dockerrotate.images import clean_images +from dockerrotate.containers import clean_containers -UNIX_SOC_ARGS = {"base_url": 'unix://var/run/docker.sock'} - -TEN_SECONDS = timedelta(seconds=10) +UNIX_SOC_ARGS = {"base_url": "unix://var/run/docker.sock"} def parse_args(): - parser = ArgumentParser() + parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument( - "--use-env", - "-e", + "-e", "--use-env", action="store_true", help="Load docker connection information from standard environment variables.", ) parser.add_argument( - "--clean-containers", - "-c", + "--dry-run", action="store_true", - help="Clean out old containers", + help="Do not remove anything", ) parser.add_argument( - "--clean-images", - "-i", - action="store_true", + "--client-version", + help="Specify client version to use.", + ) + + subparsers = parser.add_subparsers() + + images_parser = subparsers.add_parser( + "images", help="Clean out old images", + formatter_class=ArgumentDefaultsHelpFormatter, ) - parser.add_argument( + images_parser.set_defaults(cmd=clean_images) + images_parser.add_argument( "--keep", "-k", type=int, default=3, help="Keep this many images of each kind", ) - parser.add_argument( - "--keep-regex", - type=str, + images_parser.add_argument( + "--images", nargs='*', - default=list(), - help="Python regex of tag names to keep.", + help="Python regex of image names to remove. Use a '~' prefix for negative match.", ) - parser.add_argument( - "--only", - "-o", - help="Only process this image", + + containers_parser = subparsers.add_parser( + "containers", + help="Clean out old containers", + formatter_class=ArgumentDefaultsHelpFormatter, ) - parser.add_argument( - "--dry-run", - action="store_true", - help="Do not remove anything", + containers_parser.set_defaults(cmd=clean_containers) + containers_parser.add_argument( + "--exited", + default="1h", + help="Remove only containers that exited that long ago", ) - parser.add_argument( - "--client-version", - default=None, - help="Specify client version to use.", + containers_parser.add_argument( + "--created", + default="1d", + help="Remove only containers that where created (but not running) that long ago", ) + containers_parser.add_argument( + "--images", + nargs='*', + help="Python regex of image names to remove. Use a '~' prefix for negative match.", + ) + return parser.parse_args() @@ -79,7 +86,7 @@ def make_client(args): """ kwargs = kwargs_from_env(assert_hostname=False) if args.use_env else UNIX_SOC_ARGS - if args.client_version is not None: + if args.client_version: kwargs["version"] = args.client_version client = Client(**kwargs) @@ -88,155 +95,16 @@ def make_client(args): try: client.version() except NotFound as error: - raise SystemExit(error) # noqa + raise SystemExit(error) return client -def normalize_tag_name(tag): - """ - docker-py provides image names as a single string. - - We want: - - some.domain.com/organization/image:tag -> organization/image - organization/image:tag -> organization/image - image:tag -> image - """ - return "/".join(tag.rsplit(":", 1)[0].split("/")[-2:]) - - -def might_be_data_volume(client, container): - """ - Data volumes are non-running containers that we do not want to delete. - - Detecting a data volume doesn't apear to be an exact science, but we - can use some of the information from `docker inspect` to make a good - guess. - """ - inspect_data = client.inspect_container(container["Id"]) - if not inspect_data["Config"]["Volumes"]: - # data volumes should have volumes... - return False - if inspect_data["State"]["ExitCode"] != 0: - # data volumes need to have exited cleanly - return False - started_at = parser.parse(inspect_data["State"]["StartedAt"]) - finished_at = parser.parse(inspect_data["State"]["FinishedAt"]) - if finished_at < started_at: - # not actually finished - return False - if (finished_at - started_at) > TEN_SECONDS: - # data volumes should terminate quickly - return False - # probably a data volume - return True - - -def clean_containers(client, args): - """ - Delete non-running containers. - - Skips over containers with volumes. - - Images cannot be deleted if in use. Deleting dead containers allows - more images to be cleaned. - """ - running_containers = { - container["Id"] for container in client.containers() - } - stopped_containers = [ - container for container in client.containers(all=True) - if container["Id"] not in running_containers - ] - for container in stopped_containers: - image_name = normalize_tag_name(container["Image"]) - if args.only and args.only != image_name: - continue - if might_be_data_volume(client, container): - print "Skipping data volume: {}".format(container["Names"][0]) # noqa - continue - print "Removing container ID: {}, Name: {}, Image: {}".format( # noqa - container["Id"], - (container.get("Names") or ["unnamed"])[0], - image_name - ) - if args.dry_run: - continue - try: - client.remove_container(container["Id"]) - except APIError as error: - print "Unable to remove container: {}: {}".format( # noqa - container["Id"], - error, - ) - pass - - -def clean_images(client, args): - """ - Delete old images keeping the most recent N images by tag. - """ - # should not need to inspect all images; only intermediate images should appear - # when all is true; these should be deleted along with dependent images - images = client.images(all=False) - - # index by id - images_by_id = { - image["Id"]: image for image in images - } - - # group by name - images_by_name = {} - for image in images: - if any( - any([re.match(pattern, tag) for pattern in args.keep_regex]) - for tag in image["RepoTags"] - ): - continue - - for tag in image["RepoTags"]: - image_name = normalize_tag_name(tag) - if args.only and args.only != image_name: - continue - - images_by_name.setdefault(image_name, set()).add(image["Id"]) - - for image_name, image_ids in images_by_name.items(): - # sort/keep - images = sorted([ - images_by_id[image_id] for image_id in image_ids], - key=lambda image: -image["Created"], - ) - images_to_delete = images[args.keep:] - - # delete - for image in images_to_delete: - print "Removing image ID: {}, Tags: {}".format( # noqa - image["Id"], - ", ".join(image["RepoTags"]) - ) - if args.dry_run: - continue - # In some scenarios, deleting an image with mutiple tags only removes one tag at a time - # (and all tags must be removed in order to fully delete the image). - # More investigation needed. - for tag in image["RepoTags"]: - try: - client.remove_image(image["Id"], force=True, noprune=False) - except APIError as error: - print error.message # noqa - - def main(): """ CLI entry point. """ args = parse_args() - client = make_client(args) - - if args.clean_containers: - clean_containers(client, args) + args.client = make_client(args) - if args.clean_images: - clean_images(client, args) + args.cmd(args) diff --git a/setup.py b/setup.py index 082122b..1780570 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages -__version__ = '1.2' +__version__ = '2.0' __build__ = ''