In [None]:
# TODO: Fix/documenent auth: auth failed when the script was runned on the customer side.

In [None]:
%pip install boto3 aws_requests_auth 'elasticsearch<7'

In [None]:
import datetime
import json
import os
import warnings

import boto3
from aws_requests_auth.boto_utils import BotoAWSRequestsAuth
from elasticsearch import Elasticsearch, RequestsHttpConnection
import elasticsearch.helpers

# set these vars
ES_REGION = 'us-west-2'
es_host = "search-aneesh-search-1mutuaqa458he-lpxijlld77ln5u4msy5otp4kra.us-west-2.es.amazonaws.com"
BUCKET_NAME = 'quilt-t4-staging'

auth = BotoAWSRequestsAuth(
    aws_host=es_host,
    aws_region=ES_REGION,
    aws_service='es'
)

es_client = Elasticsearch(
    hosts=[{"host": f"{es_host}", "port": 443}],
    http_auth=auth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection,
    timeout=60
)

s3_client = boto3.client('s3')

def store_debug_data(filename, data):
    filename = f'debug-data/{BUCKET_NAME}/{filename}'
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, 'wb') as f:
        f.write(data)
    
def json_dump_default(obj):
    if isinstance(obj, datetime.datetime):
        return str(obj)
    raise TypeError

In [None]:
indices = es_client.indices.get(f'{BUCKET_NAME},{BUCKET_NAME}_packages')
if len(indices) != 2:
    warnings.warn('there should be 2 indices!')
store_debug_data('es/bucket-related-indices.json', json.dumps(indices).encode())
store_debug_data('es/all-indices.json', json.dumps(es_client.indices.get('*')).encode())

In [None]:
bucket_notification_conf = s3_client.get_bucket_notification_configuration(Bucket=BUCKET_NAME)
store_debug_data('s3/notification-conf.json', json.dumps(bucket_notification_conf).encode())

In [None]:
sns_topics = {x['TopicArn'] for x in bucket_notification_conf.get('TopicConfigurations', ())}
subscriptions = {}
if sns_topics:
    bucket_location = s3_client.get_bucket_location(Bucket=BUCKET_NAME)['LocationConstraint'] or 'us-east-1'
    sns_client = boto3.client('sns', region_name=bucket_location)
    subscriptions = {
        topic: sns_client.list_subscriptions_by_topic(TopicArn=topic)['Subscriptions']
        for topic in sns_topics
    }
else:
    warnings.warn('no S3 notifications configured!')
store_debug_data('s3/notification-subscriptions.json', json.dumps(subscriptions).encode())

In [None]:
versions_buf = bytearray()
delete_markers_buf = bytearray()
for resp in s3_client.get_paginator('list_object_versions').paginate(Bucket=BUCKET_NAME, Prefix='.quilt/'):
    for obj in resp.get('Versions', ()):
        versions_buf += json.dumps(obj, default=json_dump_default).encode()
        versions_buf += b'\n'
    for obj in resp.get('DeleteMarkers', ()):
        delete_markers_buf += json.dumps(obj, default=json_dump_default).encode()
        delete_markers_buf += b'\n'
store_debug_data('s3/object-versions.jsonl', versions_buf)
store_debug_data('s3/delete-markers.jsonl', delete_markers_buf)

In [None]:
buf = bytearray()
for obj in elasticsearch.helpers.scan(
    es_client,
    query={"query": {"match_all": {}}},
    index=f"{BUCKET_NAME}_packages",
):
    buf += json.dumps(obj).encode()
    buf += b'\n'
store_debug_data('es/packages-index-data.jsonl', buf)