Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixes bug 913039 - Added a cron job to delete old elasticsearch indic…

…es. r=peterbe,phrawzty
  • Loading branch information...
commit 1bedc9b2d453c4e720c90382f1649ac2c717db57 1 parent a3c187c
@AdrianGaudebert AdrianGaudebert authored
View
1  socorro/cron/crontabber_app.py
@@ -46,6 +46,7 @@
#socorro.cron.jobs.modulelist.ModulelistCronApp|1d
socorro.cron.jobs.matviews.GCCrashes|1d|10:00
#socorro.cron.jobs.symbolsunpack.SymbolsUnpackCronApp|1h
+ socorro.cron.jobs.elasticsearch_cleanup.ElasticsearchCleanupCronApp|30d
'''
View
83 socorro/cron/jobs/elasticsearch_cleanup.py
@@ -0,0 +1,83 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import datetime
+import pyelasticsearch
+import re
+
+from configman import Namespace
+from configman.converters import class_converter
+
+from socorro.cron.base import BaseCronApp
+from socorro.lib.datetimeutil import utc_now
+
+
+class ElasticsearchCleanupCronApp(BaseCronApp):
+ """Delete old elasticsearch indices from our databases. """
+
+ app_name = 'elasticsearch-cleanup'
+ app_version = '1.0'
+ app_description = 'Remove old indices from our elasticsearch database. '
+
+ required_config = Namespace()
+ required_config.add_option(
+ 'retention_policy',
+ default=26,
+ doc='Number of weeks to keep an index alive. ',
+ )
+ required_config.namespace('elasticsearch')
+ required_config.elasticsearch.add_option(
+ 'elasticsearch_class',
+ default='socorro.external.elasticsearch.connection_context.'
+ 'ConnectionContext',
+ from_string_converter=class_converter,
+ reference_value_from='resource.elasticsearch',
+ )
+ required_config.elasticsearch.add_option(
+ 'elasticsearch_index_regex',
+ default='socorro[0-9]{6}$',
+ reference_value_from='resource.elasticsearch',
+ )
+
+ def run(self):
+ now = utc_now()
+ policy_delay = datetime.timedelta(weeks=self.config.retention_policy)
+ time_limit = (now - policy_delay).replace(tzinfo=None)
+
+ es = pyelasticsearch.ElasticSearch(
+ self.config.elasticsearch.elasticsearch_urls,
+ timeout=self.config.elasticsearch.elasticsearch_timeout
+ )
+
+ state = es.cluster_state()
+ indices = state['metadata']['indices'].keys()
+
+ aliases = es.aliases()
+
+ for index in indices:
+ # Some indices look like 'socorro%Y%W_%Y%M%d', but they are
+ # aliased to the expected format of 'socorro%Y%W'. In such cases,
+ # replace the index with the alias.
+ if index in aliases:
+ index_aliases = aliases[index]['aliases'].keys()
+ if index_aliases:
+ index = index_aliases[0]
+
+ if not re.match(
+ self.config.elasticsearch.elasticsearch_index_regex,
+ index
+ ):
+ # This index doesn't look like a crash index, let's skip it.
+ continue
+
+ # This won't take the week part of our indices into account...
+ index_date = datetime.datetime.strptime(
+ index,
+ self.config.elasticsearch.elasticsearch_index
+ )
+ # So we need to get that differently, and then add it to the date.
+ index_date += datetime.timedelta(weeks=int(index[-2:]))
+
+ if index_date < time_limit:
+ es.delete_index(index) # Bad index! Go away!
View
166 socorro/unittest/cron/jobs/test_elasticsearch_cleanup.py
@@ -0,0 +1,166 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import datetime
+import mock
+import os
+import pyelasticsearch
+
+from nose.plugins.attrib import attr
+from nose.tools import assert_raises
+
+from configman import ConfigurationManager
+
+from socorro.cron import crontabber
+from socorro.external.elasticsearch.crashstorage import \
+ ElasticSearchCrashStorage
+from socorro.lib.datetimeutil import utc_now
+from ..base import IntegrationTestCaseBase
+
+# Remove debugging noise during development
+import logging
+logging.getLogger('pyelasticsearch').setLevel(logging.ERROR)
+logging.getLogger('elasticutils').setLevel(logging.ERROR)
+logging.getLogger('requests.packages.urllib3.connectionpool')\
+ .setLevel(logging.ERROR)
+
+
+@attr(integration='elasticsearch')
+class IntegrationTestElasticsearchCleanup(IntegrationTestCaseBase):
+
+ def __init__(self, *args, **kwargs):
+ super(
+ IntegrationTestElasticsearchCleanup,
+ self
+ ).__init__(*args, **kwargs)
+
+ storage_config = self._setup_storage_config()
+ with storage_config.context() as config:
+ self.storage = ElasticSearchCrashStorage(config)
+
+ def _setup_storage_config(self):
+ mock_logging = mock.Mock()
+
+ storage_conf = ElasticSearchCrashStorage.get_required_config()
+ storage_conf.add_option('logger', default=mock_logging)
+
+ return ConfigurationManager(
+ [storage_conf],
+ values_source_list=[os.environ],
+ argv_source=[]
+ )
+
+ def test_right_indices_are_deleted(self):
+ config_manager = self._setup_config_manager(
+ 'socorro.cron.jobs.elasticsearch_cleanup.'
+ 'ElasticsearchCleanupCronApp|30d'
+ )
+ with config_manager.context() as config:
+ # clear the indices cache so the index is created on every test
+ self.storage.indices_cache = set()
+
+ es = self.storage.es
+
+ # Create old indices to be deleted.
+ self.storage.create_index('socorro200142', {})
+ self.storage.create_index('socorro200000', {})
+
+ # Create an old aliased index.
+ self.storage.create_index('socorro200201_20030101', {})
+ es.update_aliases({
+ 'actions': [{
+ 'add': {
+ 'index': 'socorro200201_20030101',
+ 'alias': 'socorro200201'
+ }
+ }]
+ })
+
+ # Create a recent aliased index.
+ last_week_index = self.storage.get_index_for_crash(
+ utc_now() - datetime.timedelta(weeks=1)
+ )
+ self.storage.create_index('socorro_some_aliased_index', {})
+ es.update_aliases({
+ 'actions': [{
+ 'add': {
+ 'index': 'socorro_some_aliased_index',
+ 'alias': last_week_index
+ }
+ }]
+ })
+
+ # Create a recent index that should not be deleted.
+ now_index = self.storage.get_index_for_crash(utc_now())
+ self.storage.create_index(now_index, {})
+
+ # These will raise an error if an index was not correctly created.
+ es.status('socorro200142')
+ es.status('socorro200000')
+ es.status('socorro200201')
+ es.status(now_index)
+ es.status(last_week_index)
+
+ tab = crontabber.CronTabber(config)
+ tab.run_all()
+
+ information = self._load_structure()
+ assert information['elasticsearch-cleanup']
+ assert not information['elasticsearch-cleanup']['last_error']
+ assert information['elasticsearch-cleanup']['last_success']
+
+ # Verify the recent index is still there.
+ es.status(now_index)
+ es.status(last_week_index)
+
+ # Verify the old indices are gone.
+ assert_raises(
+ pyelasticsearch.exceptions.ElasticHttpNotFoundError,
+ es.status,
+ 'socorro200142'
+ )
+
+ assert_raises(
+ pyelasticsearch.exceptions.ElasticHttpNotFoundError,
+ es.status,
+ 'socorro200000'
+ )
+
+ assert_raises(
+ pyelasticsearch.exceptions.ElasticHttpNotFoundError,
+ es.status,
+ 'socorro200201'
+ )
+
+ def test_other_indices_are_not_deleted(self):
+ """Verify that non-week-based indices are not removed. For example,
+ the socorro_email index should not be deleted by the cron job.
+ """
+ config_manager = self._setup_config_manager(
+ 'socorro.cron.jobs.elasticsearch_cleanup.'
+ 'ElasticsearchCleanupCronApp|30d'
+ )
+ with config_manager.context() as config:
+ # clear the indices cache so the index is created on every test
+ self.storage.indices_cache = set()
+
+ es = self.storage.es
+
+ # Create the socorro emails index.
+ self.storage.create_emails_index()
+
+ # This will raise an error if the index was not correctly created.
+ es.status('socorro_emails')
+
+ tab = crontabber.CronTabber(config)
+ tab.run_all()
+
+ information = self._load_structure()
+ assert information['elasticsearch-cleanup']
+ assert not information['elasticsearch-cleanup']['last_error']
+ assert information['elasticsearch-cleanup']['last_success']
+
+ # Verify the email index is still there. This will raise an error
+ # if the index does not exist.
+ es.status('socorro_emails')
Please sign in to comment.
Something went wrong with that request. Please try again.