Skip to content

Commit

Permalink
Maintenance mode
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Nov 29, 2016
1 parent 9928b02 commit 95e2f20
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 1 deletion.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ sqlalchemy
psycopg2
cryptography
elasticsearch>=1.0.0,<2.0.0
os-package-registry>=0.0.4
os-package-registry>=0.0.7
89 changes: 89 additions & 0 deletions tools/reindexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
import logging

from elasticsearch import Elasticsearch, NotFoundError
from os_package_registry import PackageRegistry
from sqlalchemy import MetaData, create_engine

logging.root.setLevel(logging.INFO)


if __name__ == "__main__":

# Reindex ES
es_host = os.environ['OS_ELASTICSEARCH_ADDRESS']
es = Elasticsearch(hosts=[es_host], use_ssl='https' in es_host)

source_index = None
target_index = None
backup_index = 'packages-backup'
existing_alias = None
for i in range(2):
idx = 'packages-%d' % i
if es.indices.exists(idx):
if source_index is None:
source_index = idx
existing_alias = idx
else:
if target_index is None:
target_index = idx

if source_index is None:
source_index = 'packages'
if target_index is None:
target_index = 'packages-1'

assert source_index != target_index

logging.info('SOURCE INDEX %s', source_index)
logging.info('TARGET INDEX %s', target_index)
logging.info('BACKUO INDEX %s', backup_index)

try:
logging.info('DELETING BACKUP INDEX')
es.indices.delete(backup_index)
except NotFoundError:
logging.info('BACKUP INDEX NOT FOUND')

try:
logging.info('DELETING TARGET INDEX')
es.indices.delete(target_index)
except NotFoundError:
logging.info('TARGET INDEX NOT FOUND')

source_pr = PackageRegistry(es_instance=es, index_name=source_index)
backup_pr = PackageRegistry(es_instance=es, index_name=backup_index)
target_pr = PackageRegistry(es_instance=es, index_name=target_index)

for package in source_pr.list_models():
try:
params = source_pr.get_raw(package)
backup_pr.save_model(*params)
target_pr.save_model(*params)
logging.info('REINDEXING %s', package)
except KeyError:
logging.exception('FAILED TO READ DATA FOR %s', package)

if es.indices.exists_alias(source_index, 'packages'):
es.indices.delete_alias(source_index, 'packages')

# es.indices.delete(source_index)
# es.indices.create_alias(target_index, 'packages')

# Find orphan DB tables
used_tables = set()
for package in target_pr.list_models():
params = target_pr.get_raw(package)
fact_table = params[3].get('fact_table') # model
if fact_table is not None:
used_tables.add(fact_table)

engine = create_engine(os.environ['OS_CONDUCTOR_ENGINE'])
meta = MetaData()
meta.reflect(bind=engine,
only=lambda t, _: t.startswith('fdp'))
for table in reversed(meta.sorted_tables):
if table in used_tables:
logging.info('SKIPPING %s', table)
else:
logging.info('NOT DELETING %s', table)

0 comments on commit 95e2f20

Please sign in to comment.