# Tools for exploring and working with Elasticsearch

First check account, username.

*THEN* install libs

### Probably only need to do this once

In [None]:
%%bash

aws iam get-user

In [None]:
%%bash

pip3 install boto3
pip3 install elasticsearch
pip3 install requests
pip3 install requests-aws4auth

# Setup python -- import libs

_Generally start here_

In [21]:
from elasticsearch import Elasticsearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
import boto3
import json
import requests

## Setup configuration -- endpoint, region, etc.

In [None]:
endpoint = 'search-cms-es-development-oc7dsjk45rha2ulqfir2pbls4u.us-east-1.es.amazonaws.com' # 'search-cms-es-development-5ucfts45z4wmntf3ed4cnuunpu.us-east-1.es.amazonaws.com')
region = 'us-east-1'
service = 'es'

### Tokens time out, so may need to call getClient() frequently -- perhaps in every cell

In [None]:
def getClient(endpoint, region):
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
    es = Elasticsearch(hosts=[{'host': endpoint, 'port': 443}],
            http_auth = awsauth, use_ssl = True, verify_certs = True,
            connection_class = RequestsHttpConnection )
    return es

### Walk the indices

In [19]:
es = getClient(endpoint, region)

indices = es.cat.indices(format='json')
print("index - doc count")
[ print(f"{n}: {i['index']} - {i['docs.count']}") for (n,i) in enumerate(indices) ] 

index - doc count
0: latest_telemetry - 4
1: trip - 2
2: dtc - 2
3: .kibana_1 - 0
4: cardata - 880923
5: anomaly - 1
6: event - 0
7: devices - 0
8: shared_cardata - 4


[None, None, None, None, None, None, None, None, None]

#### Inspect indices

In [33]:
indexes = [ 'latest_telemetry']
es = getClient(endpoint, region)

data = [ es.search(index=i, body={'query':{'match_all':{}}}) for i in indexes ]

print(len(data))
# print(data[0].keys())
print(len(data[0]['hits']['hits']))
print(json.dumps(data[0]['hits']['hits'][0]['_source']))

1
1
{"messageid": "5AZSL56XXKB10000-2020-09-29T15:34:48.048Z", "simulationid": "vqRX_CnYn", "creationtimestamp": "2020-09-29T15:34:48.048Z", "sendtimestamp": "2020-09-29T15:34:48.048Z", "vin": "5AZSL56XXKB10000", "tripid": "ZcJUYWgue", "driverid": "", "geolocation": {"latitude": 42.29979393052495, "longitude": -83.6988490144884, "altitude": 0, "heading": 0, "speed": 0.0396161169425, "location": [-83.6988490144884, 42.29979393052495]}, "communications": {"gsm": {"satelites": "", "fix": "", "networktype": "", "mnc": "", "mcc": "", "lac": "", "cid": ""}, "wifi": {"networkid ": ""}, "wired": {"networkid ": ""}}, "acceleration": {"maxlongitudinal": {"axis": 0, "value": 0.01088783}, "maxlateral": {"axis": 0, "value": 0.2442455635230658}}, "throttle": {"max": 30.665122822671627, "average": 0}, "speed": {"max": 0.0396161169425, "average": 0}, "odometer": {"metres": 4.26, "ticksfl": 0, "ticksfr": 0, "ticksrl": 0, "ticksrr": 0}, "fuel": 33.770159918496745, "name": "9gSDTBwZR", "oiltemp": 300.576

## Delete data for a VIN

In [None]:
def getDeviceIdFromVin(es, vin):
  def searchIndex(index):
    res = es.search(index=index, body={'query':{'match': {'vin':vin}}} )
    return  res['hits']['hits'][0]['_source']['devices'][0]['deviceid'] 

  
  try:
    deviceid = searchIndex('shared_cardata')
  except Exception as e:
    try: 
      deviceid = searchIndex('cardata')
    except Exception as e:
      deviceid = None
  
  return deviceid

def deleteDocsFromIndex(es, index, key, val):
  try:
    res = es.search(index=index, body={'query':{'match': {key:val}}} )
    [ es.delete(index=index, id=r['_id']) for r in res['hits']['hits'] ]
  except Exception as e:
    pass

def showDocsFromIndex(es, index, key, val):
  try:
    res = es.search(index=index, body={'query':{'match': {key:val}}} )
    [ print(r) for r in res['hits']['hits'] ]
  except Exception as e:
    pass    

In [None]:
vin = '1AZZV88YXKA10000'

es = getClient(endpoint, region)

deviceid = getDeviceIdFromVin(es, vin)
print(deviceid)

[ showDocsFromIndex(es, i['index'], 'deviceid', deviceid) for i in indices ]
[ showDocsFromIndex(es, i['index'], 'vin', vin) for i in indices ]


### and delete

In [None]:
es = getClient(endpoint, region)

[ deleteDocsFromIndex(es, i['index'], 'deviceid', deviceid) for i in indices ]
[ deleteDocsFromIndex(es, i['index'], 'vin', vin) for i in indices ]