In [80]:
# Data manipulation
import pandas as pd
import numpy as np

# Elasticsearch API python
from elasticsearch import Elasticsearch
from elasticsearch import helpers

# Env. Vars
from dotenv import load_dotenv

# System
import os

In [81]:
# Create the client instance
load_dotenv('.env')

host=os.getenv('ELASTICSEARCH_HOST')
passwd = os.getenv('ELASTICSEARCH_PASSWORD')

es = Elasticsearch([f'{host}:9200'],
basic_auth=('elastic', passwd)
)

### Criação e deleção de indíces

In [None]:
# Criar index
es.indices.create(index='teste-ramon', ignore=400)

In [None]:
# Criando index especificando o mapping e settings.
es.indices.create(index='teste-ramon', mappings=mappings, settings=settings, ignore=400)

In [None]:
# Deletando index
es.indices.delete(index='teste-ramon', ignore=[400, 404])

### Informações sobre os index

In [None]:
# Informações gerais da conexão do ES
es.info()

In [85]:
# Ver todos os índices da instância
es.indices.get_alias(index="*")

  es.indices.get_alias(index="*")


ObjectApiResponse({'.kibana_task_manager_8.6.0_001': {'aliases': {'.kibana_task_manager': {'is_hidden': True}, '.kibana_task_manager_8.6.0': {'is_hidden': True}}}, '.kibana_security_session_1': {'aliases': {'.kibana_security_session': {'is_hidden': True}}}, '.items-default-000001': {'aliases': {'.items-default': {'is_write_index': True}}}, 'produtos_cayena_testes_outer': {'aliases': {}}, 'produtos_cayena_train': {'aliases': {}}, 'felipe_teste': {'aliases': {}}, '.apm-custom-link': {'aliases': {}}, '.security-profile-8': {'aliases': {'.security-profile': {'is_hidden': True}}}, 'search_app_cayena': {'aliases': {}}, '.transform-notifications-000002': {'aliases': {'.transform-notifications-read': {'is_hidden': True}}}, 'produtos_cayena': {'aliases': {}}, '.transform-internal-007': {'aliases': {}}, '.fleet-enrollment-api-keys-7': {'aliases': {'.fleet-enrollment-api-keys': {'is_write_index': True, 'is_hidden': True}}}, '.lists-default-000001': {'aliases': {'.lists-default': {'is_write_index'

In [121]:
# Contar o número de documentos de uma outra forma
es.count(index='teste-ramon')

ObjectApiResponse({'count': 6, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

In [122]:
# Contar o número de documentos em um dado index
es.cat.count(index='teste-ramon')

TextApiResponse('1678328272 02:17:52 6\n')

In [96]:
# Refresh index
es.indices.refresh(index='teste-ramon')

ObjectApiResponse({'_shards': {'total': 2, 'successful': 1, 'failed': 0}})

In [84]:
# Informações sobre um index em específico
es.indices.get(index="teste-ramon")

ObjectApiResponse({'teste-ramon': {'aliases': {}, 'mappings': {'properties': {'Email': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'Nome': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'Skills': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'Sobrenome': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'genre': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'timestamp': {'type': 'date'}, 'title': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'year': {'type': 'long'}}}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'provided_name': 'teste-ramon', 'creation_date': '1678275308280', 'number_of_replicas': '1', 'uuid': 'D2Bpl-D_Rq6HQeYUbz9rJw', 'version': {'created': '8060099'}}}}})

### Criando index manualmente (um a um)

In [23]:
doc = {
    'genre': ['IMAX', 'sci-fi'],
    'title': 'Insterstellar',
    'year': 2014
}

In [24]:
es.index(index='teste-ramon', id=1, document=doc)

ObjectApiResponse({'_index': 'teste-ramon', '_id': '1', '_version': 3, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 5, '_primary_term': 1})

In [22]:
# Removendo um filme com id=1
es.delete(index='teste-ramon', id=1)

ObjectApiResponse({'_index': 'teste-ramon', '_id': '1', '_version': 2, 'result': 'deleted', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 4, '_primary_term': 1})

In [101]:
# Consulta do conteúdo no id=1 e no index='teste-ramon'
res2 = es.get(index='teste-ramon', id=4)
res2['_source']

{'genre': 'terror', 'title': 'Terrifier 2', 'year': 2022}

### Adicionando vários filmes

In [102]:
d = [
    ['action', 'The Davinci Code', 2005],
    [['sci-fi', 'IMAX'], 'Insterstellar', 2014],
    ['kids', 'Toy Story 1', 1995],
    ['kids', 'The Lion King', 1998],
    ['terror', 'Terrifier 2', 2022],
    ['terror', 'Terrifier', 2019]
]

In [103]:
filmes = pd.DataFrame(data=d, columns=['genre', 'title', 'year'])

In [104]:
filmes

Unnamed: 0,genre,title,year
0,action,The Davinci Code,2005
1,"[sci-fi, IMAX]",Insterstellar,2014
2,kids,Toy Story 1,1995
3,kids,The Lion King,1998
4,terror,Terrifier 2,2022
5,terror,Terrifier,2019


In [105]:
def generate_docs():
    for idx, info in filmes.iterrows():
        doc = {
            '_index': 'teste-ramon',
            '_id': idx,
            '_source': {
                'genre': info['genre'],
                'title': info['title'],
                'year': info['year']
            }
        }

        yield doc

helpers.bulk(es, generate_docs())

(6, [])

### Buscas

In [117]:
# Busca simples por um termo
search = 'Toy Story'

resp = es.search(index="teste-ramon", from_=0, size=1,  query={
    'match': {
        'title': f"{search}"
    }
})

In [118]:
resp['hits']['hits'][0]['_source']

{'genre': 'kids', 'title': 'Toy Story 1', 'year': 1995}