From 95e2f20a8895d443d594813a41174c751c128e3c Mon Sep 17 00:00:00 2001 From: Adam Kariv Date: Mon, 28 Nov 2016 17:04:40 +0200 Subject: [PATCH] Maintenance mode --- requirements.txt | 2 +- tools/reindexer.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 tools/reindexer.py diff --git a/requirements.txt b/requirements.txt index 35673a4..2e4c776 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,4 @@ sqlalchemy psycopg2 cryptography elasticsearch>=1.0.0,<2.0.0 -os-package-registry>=0.0.4 +os-package-registry>=0.0.7 diff --git a/tools/reindexer.py b/tools/reindexer.py new file mode 100644 index 0000000..d4574b6 --- /dev/null +++ b/tools/reindexer.py @@ -0,0 +1,89 @@ +import os +import logging + +from elasticsearch import Elasticsearch, NotFoundError +from os_package_registry import PackageRegistry +from sqlalchemy import MetaData, create_engine + +logging.root.setLevel(logging.INFO) + + +if __name__ == "__main__": + + # Reindex ES + es_host = os.environ['OS_ELASTICSEARCH_ADDRESS'] + es = Elasticsearch(hosts=[es_host], use_ssl='https' in es_host) + + source_index = None + target_index = None + backup_index = 'packages-backup' + existing_alias = None + for i in range(2): + idx = 'packages-%d' % i + if es.indices.exists(idx): + if source_index is None: + source_index = idx + existing_alias = idx + else: + if target_index is None: + target_index = idx + + if source_index is None: + source_index = 'packages' + if target_index is None: + target_index = 'packages-1' + + assert source_index != target_index + + logging.info('SOURCE INDEX %s', source_index) + logging.info('TARGET INDEX %s', target_index) + logging.info('BACKUO INDEX %s', backup_index) + + try: + logging.info('DELETING BACKUP INDEX') + es.indices.delete(backup_index) + except NotFoundError: + logging.info('BACKUP INDEX NOT FOUND') + + try: + logging.info('DELETING TARGET INDEX') + es.indices.delete(target_index) + except NotFoundError: + logging.info('TARGET INDEX NOT FOUND') + + source_pr = PackageRegistry(es_instance=es, index_name=source_index) + backup_pr = PackageRegistry(es_instance=es, index_name=backup_index) + target_pr = PackageRegistry(es_instance=es, index_name=target_index) + + for package in source_pr.list_models(): + try: + params = source_pr.get_raw(package) + backup_pr.save_model(*params) + target_pr.save_model(*params) + logging.info('REINDEXING %s', package) + except KeyError: + logging.exception('FAILED TO READ DATA FOR %s', package) + + if es.indices.exists_alias(source_index, 'packages'): + es.indices.delete_alias(source_index, 'packages') + + # es.indices.delete(source_index) + # es.indices.create_alias(target_index, 'packages') + + # Find orphan DB tables + used_tables = set() + for package in target_pr.list_models(): + params = target_pr.get_raw(package) + fact_table = params[3].get('fact_table') # model + if fact_table is not None: + used_tables.add(fact_table) + + engine = create_engine(os.environ['OS_CONDUCTOR_ENGINE']) + meta = MetaData() + meta.reflect(bind=engine, + only=lambda t, _: t.startswith('fdp')) + for table in reversed(meta.sorted_tables): + if table in used_tables: + logging.info('SKIPPING %s', table) + else: + logging.info('NOT DELETING %s', table)