## Documentation

To read more about the index API, visit the [docs](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html).



## Connect to ElasticSearch

In [None]:
from pprint import pprint
from elasticsearch import Elasticsearch
from elastic_transport import ObjectApiResponse

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print('Connected to Elasticsearch!')
pprint(client_info.body)

In [8]:
try:
    # Get all indices and their aliases
    # Using '*' as the index name retrieves all indices
    indices_info = es.indices.get_alias(index='apo*')

    print("Elasticsearch Indices:")
    for index_name in indices_info.keys():
        print(f"- {index_name}")

except Exception as e:
    print(f"An error occurred: {e}")

Elasticsearch Indices:
- apod_n_gram
- apod
- apod_embedding
- apod_raw


## Insert one document

Create a dummy index just to test inserting one document (Elasticsearch expects JSON as the document format, not PDF/text)

In [None]:
es.indices.delete(index='movie_index', ignore_unavailable=True)
es.indices.create(index='movie_index')

In [None]:
document = {
    'title': 'Colossus: The Forbin Project',
    'text': 'Eric Braeden stars as Dr. Charles Forbin, who has created a supercomputer named Colossus',
    'created_on': '2024-09-22',
}
response = es.index(index='movie_index', body=document)
response

The `response` object contains the result of the operation. If we successfully inserted the document, then `result = created`. Each document has an `id` and is fragmented into `shards`.

In [None]:
print(response["result"])

In [None]:
print(response["_shards"])

In [None]:
print(response["_id"])

In [None]:
print(response["_index"])

## Insert multiple documents

Just do the same step but in a for loop

In [None]:
import json

dummy_data = json.load(open("../data/dummy_data.json"))
dummy_data

In [None]:
def insert_document(document: dict[str,str]) -> ObjectApiResponse:
    response = es.index(index='movie_index', body=document)
    return response


def print_info(response: ObjectApiResponse) -> None:
    print(f"""Document ID: {response['_id']} is '{
          response["result"]}' and is split into {response['_shards']['total']} shards.""")


for document in dummy_data:
    response = insert_document(document)
    print_info(response)

## Print mapping

In [None]:
from pprint import pprint

index_mapping = es.indices.get_mapping(index='movie_index')
pprint(index_mapping["movie_index"]["mappings"]["properties"])

## Manual mapping

In [None]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index')

mapping = {
    'properties': {
        'created_on': {'type': 'date'},
        'text': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },
        'title': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}

es.indices.put_mapping(index='my_index', body=mapping)

index_mapping = es.indices.get_mapping(index='my_index')
pprint(index_mapping["my_index"]["mappings"]["properties"])

In [None]:
mapping = {
    'properties': {
        'created_on': {'type': 'date'},
        'text': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },
        'title': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}

es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index', mappings=mapping)

index_mapping = es.indices.get_mapping(index='my_index')
pprint(index_mapping["my_index"]["mappings"]["properties"])