In [1]:
pip install --upgrade elasticsearch pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
from elasticsearch import Elasticsearch

username = %env ELASTICSEARCH_USERNAME
password = %env ELASTICSEARCH_PASSWORD
host = %env ELASTICSEARCH_HOST
port = %env ELASTICSEARCH_PORT

es = Elasticsearch(f'https://{username}:{password}@{host}:{port}', ca_certs='./http_ca.crt')

res = es.search(index='users', query={ "match_all": {} }, size=20)

hits = res['hits']['hits']

for doc in hits:
    print(doc['_source']['name'])

Tiffany Griffin
Jennifer Hill
Thomas Abbott
John Hogan
Jacob Jones
Brenda Thomas
Heather Saunders
Garrett Fowler
Olivia Baxter
Kathryn Weiss
Glenn Griffin
Kathleen Jackson
Brian Salazar
John Blake
David Glass
Nicholas Martinez
Jonathan Castillo
Robert Brown
Richard Mills
Marc Gibbs


In [3]:
import pandas as pd

df = pd.json_normalize(hits)

df.head()

Unnamed: 0,_index,_id,_score,_source.name,_source.street,_source.city,_source.zip
0,users,pchEfYABgiurHmFaECTM,1.0,Tiffany Griffin,831 Robert Plaza,Lake Brendashire,21455
1,users,pshEfYABgiurHmFaECTM,1.0,Jennifer Hill,94293 Vance Crossing Suite 042,West Christophermouth,36185
2,users,p8hEfYABgiurHmFaECTM,1.0,Thomas Abbott,0386 Harding Square Apt. 617,West Omar,99921
3,users,qMhEfYABgiurHmFaECTM,1.0,John Hogan,832 Day Mews Suite 540,New Carol,96457
4,users,qchEfYABgiurHmFaECTM,1.0,Jacob Jones,6127 Davidson Forges,Joneshaven,30584


In [4]:
res = es.search(index='users', query={ "match": { "name": "Olivia Baxter" } })
hits = res['hits']['hits']
hits[0]['_source']

{'name': 'Olivia Baxter',
 'street': '0525 Gina Center Apt. 737',
 'city': 'South Shannonfurt',
 'zip': '72078'}

In [5]:
# Lucene syntax
res = es.search(index='users', q="name:David Glass")

hits = res['hits']['hits']
hits[0]['_source']

{'name': 'David Glass',
 'street': '24254 Joshua Lights Suite 960',
 'city': 'Lake Aprilville',
 'zip': '13500'}

In [6]:
q = { 
    "bool": {
        "must": {
            "match": {
                "city": "West"
            }
        },
        "filter": {
            "match": {
                "zip": "99921"
            }
        }
    }
}

res = es.search(index='users', query=q)

hits = res['hits']['hits']
hits

[{'_index': 'users',
  '_id': 'p8hEfYABgiurHmFaECTM',
  '_score': 2.3241208,
  '_source': {'name': 'Thomas Abbott',
   'street': '0386 Harding Square Apt. 617',
   'city': 'West Omar',
   'zip': '99921'}}]

In [7]:
res = es.search(index='users', query={ "match_all": {} }, scroll='20m', size=500)
sid = res['_scroll_id']
print(f'sid: {sid}')
size = res['hits']['total']['value']
print(f'size: {size}')

sid: FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFjluazhrc3BsUmQybUZCYW01WGpHdmcAAAAAAADofRZudC10SWdzcFRzeXduYVc2Mmc2QmhR
size: 3000


In [8]:
page = 1
while size > 0:
    res = es.scroll(scroll_id=sid, scroll='20m')
    sid = res['_scroll_id']
    hits = res['hits']['hits']
    size = len(hits)
    print(f'PAGE {page}')
    print(f'SID: {sid}')
    print(f'PENDING SIZE: {size}')
    # for doc in hits:
    #     print(doc['_source']['name'])
    page += 1

PAGE 1
SID: FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFjluazhrc3BsUmQybUZCYW01WGpHdmcAAAAAAADofRZudC10SWdzcFRzeXduYVc2Mmc2QmhR
PENDING SIZE: 500
PAGE 2
SID: FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFjluazhrc3BsUmQybUZCYW01WGpHdmcAAAAAAADofRZudC10SWdzcFRzeXduYVc2Mmc2QmhR
PENDING SIZE: 500
PAGE 3
SID: FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFjluazhrc3BsUmQybUZCYW01WGpHdmcAAAAAAADofRZudC10SWdzcFRzeXduYVc2Mmc2QmhR
PENDING SIZE: 500
PAGE 4
SID: FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFjluazhrc3BsUmQybUZCYW01WGpHdmcAAAAAAADofRZudC10SWdzcFRzeXduYVc2Mmc2QmhR
PENDING SIZE: 500
PAGE 5
SID: FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFjluazhrc3BsUmQybUZCYW01WGpHdmcAAAAAAADofRZudC10SWdzcFRzeXduYVc2Mmc2QmhR
PENDING SIZE: 500
PAGE 6
SID: FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFjluazhrc3BsUmQybUZCYW01WGpHdmcAAAAAAADofRZudC10SWdzcFRzeXduYVc2Mmc2QmhR
PENDING SIZE: 0
