# Tools for exploring and working with Elasticsearch

First check account, username.

*THEN* install libs

### Probably only need to do this once

In [None]:
%%bash

aws iam get-user

In [4]:
%%bash

pip3 install boto3
pip3 install elasticsearch
pip3 install requests
pip3 install requests-aws4auth

Collecting boto3
  Downloading boto3-1.16.6-py2.py3-none-any.whl (129 kB)
Collecting botocore<1.20.0,>=1.19.6
  Downloading botocore-1.19.6-py2.py3-none-any.whl (6.7 MB)
Collecting s3transfer<0.4.0,>=0.3.0
  Using cached s3transfer-0.3.3-py2.py3-none-any.whl (69 kB)
Collecting jmespath<1.0.0,>=0.7.1
  Using cached jmespath-0.10.0-py2.py3-none-any.whl (24 kB)
Installing collected packages: jmespath, botocore, s3transfer, boto3
Successfully installed boto3-1.16.6 botocore-1.19.6 jmespath-0.10.0 s3transfer-0.3.3
Collecting elasticsearch
  Downloading elasticsearch-7.9.1-py2.py3-none-any.whl (219 kB)
Installing collected packages: elasticsearch
Successfully installed elasticsearch-7.9.1
Collecting requests-aws4auth
  Downloading requests_aws4auth-1.0.1-py2.py3-none-any.whl (29 kB)
Installing collected packages: requests-aws4auth
Successfully installed requests-aws4auth-1.0.1


# Setup python -- import libs

_Generally start here_

In [5]:
from elasticsearch import Elasticsearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
import boto3
import json
import requests

## Setup configuration -- endpoint, region, etc.

In [3]:
# no https... e.g.: 'search-cms-es-development-5ucfts45z4wmntf3ed4cnuunpu.us-east-1.es.amazonaws.com')
endpoint = 'vpc-cms-es-development-oc7dsjk45rha2ulqfir2pbls4u.us-east-1.es.amazonaws.com'
port = 443
region = 'us-east-1'
service = 'es'

# Setup Port Forwarding if needed to access ES through a Bastion Host

typically, will use a command like

```
ssh -i myDemoKP.pem -L 4443:vpc-cms-es-development-5ucfts45z4wmntf3ed4cnuunpu.us-east-1.es.amazonaws.com:443 ubuntu@54.90.108.142
```

this will forward local port 4443 to 443 on the ES endpoint. Then set host and port appropriately.

In [13]:
# if forwarding ports... override the endpoint and port  -- ONLY DO THIS IF FORWARDING PORTS
endpoint = 'localhost'
port = 4443
region = 'us-east-1'
service = 'es'

### Tokens time out, so may need to call getClient() frequently -- perhaps in every cell

In [14]:
def getClient(endpoint, region):
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
    es = Elasticsearch(hosts=[{'host': endpoint, 'port': port}],
            http_auth = awsauth, use_ssl = True, verify_certs = True,
            connection_class = RequestsHttpConnection )
    return es

### Walk the indices

In [None]:
es = getClient(endpoint, region)

indices = es.cat.indices(format='json')
print("index - doc count")
[ print(f"{n}: {i['index']} - {i['docs.count']}") for (n,i) in enumerate(indices) ] 

#### Inspect indices

In [None]:
indexes = [ 'latest_telemetry']
es = getClient(endpoint, region)

data = [ es.search(index=i, body={'query':{'match_all':{}}}) for i in indexes ]

print(len(data))
# print(data[0].keys())
print(len(data[0]['hits']['hits']))
print(json.dumps(data[0]['hits']['hits'][0]['_source']))

## Delete data for a VIN

In [None]:
def getDeviceIdFromVin(es, vin):
  def searchIndex(index):
    res = es.search(index=index, body={'query':{'match': {'vin':vin}}} )
    return  res['hits']['hits'][0]['_source']['devices'][0]['deviceid'] 
  try:
    deviceid = searchIndex('shared_cardata')
  except Exception as e:
    try: 
      deviceid = searchIndex('cardata')
    except Exception as e:
      deviceid = None
  return deviceid

def deleteDocsFromIndex(es, index, key, val):
  try:
    res = es.search(index=index, body={'query':{'match': {key:val}}} )
    [ es.delete(index=index, id=r['_id']) for r in res['hits']['hits'] ]
  except Exception as e:
    pass

def showDocsFromIndex(es, index, key, val):
  try:
    res = es.search(index=index, body={'query':{'match': {key:val}}} )
    [ print(r) for r in res['hits']['hits'] ]
  except Exception as e:
    pass    

In [None]:
vin = '1AZZV88YXKA10000'

es = getClient(endpoint, region)

deviceid = getDeviceIdFromVin(es, vin)
print(deviceid)

[ showDocsFromIndex(es, i['index'], 'deviceid', deviceid) for i in indices ]
[ showDocsFromIndex(es, i['index'], 'vin', vin) for i in indices ]


### and delete

In [None]:
es = getClient(endpoint, region)

[ deleteDocsFromIndex(es, i['index'], 'deviceid', deviceid) for i in indices ]
[ deleteDocsFromIndex(es, i['index'], 'vin', vin) for i in indices ]