# Elastic basics

## install the client

In [4]:
!pip install elasticsearch

Collecting elasticsearch
  Using cached elasticsearch-8.11.1-py3-none-any.whl.metadata (5.4 kB)
Collecting elastic-transport<9,>=8 (from elasticsearch)
  Using cached elastic_transport-8.11.0-py3-none-any.whl.metadata (3.5 kB)
Using cached elasticsearch-8.11.1-py3-none-any.whl (412 kB)
Using cached elastic_transport-8.11.0-py3-none-any.whl (59 kB)
Installing collected packages: elastic-transport, elasticsearch
Successfully installed elastic-transport-8.11.0 elasticsearch-8.11.1


## Import the client

In [4]:
from elasticsearch import Elasticsearch

## Create a client instance
- You need to provide the ip address of the database

In [5]:
es = Elasticsearch("http://elasticsearch:9200")

## Description of the database

In [6]:
es.info().body

{'name': 'c67dec3bdf8b',
 'cluster_name': 'docker-cluster',
 'cluster_uuid': 'EyeuIT9wT2mT3qhJzMYkxw',
 'version': {'number': '8.10.2',
  'build_flavor': 'default',
  'build_type': 'docker',
  'build_hash': '6d20dd8ce62365be9b1aca96427de4622e970e9e',
  'build_date': '2023-09-19T08:16:24.564900370Z',
  'build_snapshot': False,
  'lucene_version': '9.7.0',
  'minimum_wire_compatibility_version': '7.17.0',
  'minimum_index_compatibility_version': '7.0.0'},
 'tagline': 'You Know, for Search'}

## Create an index

In [8]:
index_name = input('index name: ')
es.indices.create(index=index_name)

index name:  omar6


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'omar6'})

## Insert a document in an index

In [9]:
es.index(
    index="omar1",
    id="my_document_id",
    document={
        "foo": "foo",
        "bar": "bar",
    },
)


ObjectApiResponse({'_index': 'omar1', '_id': 'my_document_id', '_version': 2, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 6})

## Get the documents of an index

In [10]:
index = input('index name:')
es.search(index=index)

index name: omar6


ObjectApiResponse({'took': 1, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}})

## Update a document

In [11]:
es.update(
    index="meteo",
    id="my_document_id",
    doc={
        "foo": "bar",
        "new_field": "new value",
    },
)

NotFoundError: NotFoundError(404, 'document_missing_exception', '[my_document_id]: document missing')

## Delete a document

In [12]:
es.delete(index="meteo", id="my_document_id")

NotFoundError: NotFoundError(404, "{'_index': 'meteo', '_id': 'my_document_id', '_version': 1, 'result': 'not_found', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7160741, '_primary_term': 9}")

## Delete an index

In [34]:
index = input('index to delete: ')
es.indices.delete(index=index)

index to delete:  meteo


ObjectApiResponse({'acknowledged': True})

## Information about an index
You can enter the name of the index, but also '*', or a list of index names: ['index_1', 'index_2', ...]

In [33]:
index = input('index name: ')
index_info = es.indices.get(index=index)
print(index_info)


index name:  meteo


{'meteo': {'aliases': {}, 'mappings': {}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'provided_name': 'meteo', 'creation_date': '1700474388859', 'number_of_replicas': '1', 'uuid': '9RxhS57vR0u8tlIMM94jdw', 'version': {'created': '8100299'}}}}}


# Insert ndjson file

In [61]:
import json

def read_ndjson_file(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            data.append(json.loads(line))
    return data


In [62]:
def generate_bulk_actions(data, index_name):
    for record in data:
        yield {
            "_index": index_name,
            "_source": record,
        }


In [63]:
from elasticsearch import Elasticsearch, helpers

# Connexion to Elasticsearch
es = Elasticsearch("http://elasticsearch:9200")

# file
file_path = '/home/jovyan/extraction/test_mouleconnected.ndjson'


data = read_ndjson_file(file_path)

index_name = 'mouleconnected'

# send batch data to elastic
helpers.bulk(es, generate_bulk_actions(data, index_name))



(407, [])