In [1]:
from elasticsearch import Elasticsearch

# Initialize Elasticsearch client
es = Elasticsearch(["http://localhost:9200"])

# Define the indices you want to check
indices = ["documents", "imagedocuments", "docutrail","clipdocuments"]  # Replace with your actual index names

# Function to check if an index exists
def index_exists(index_name):
    return es.indices.exists(index=index_name)

# Function to get the size of documents in an index in GB
def get_index_size_gb(index_name):
    if index_exists(index_name):
        index_stats = es.indices.stats(index=index_name)
        if index_name in index_stats['indices']:
            size_bytes = index_stats['indices'][index_name]['total']['store']['size_in_bytes']
            size_gb = size_bytes / (1024 ** 3)  # Convert bytes to GB
            return size_gb
    return 0

# Check existence and size of each index in GB
for index in indices:
    exists = index_exists(index)
    size_gb = get_index_size_gb(index)
    if exists:
        print(f"Index '{index}' exists. Size: {size_gb:.2f} GB.")
    else:
        print(f"Index '{index}' does not exist.")

Index 'documents' exists. Size: 6.68 GB.
Index 'imagedocuments' does not exist.
Index 'docutrail' does not exist.
Index 'clipdocuments' does not exist.


In [2]:
from elasticsearch import Elasticsearch

# Initialize Elasticsearch client
es = Elasticsearch(["http://localhost:9200"])

# Index name
index_name = 'documents'

# Get the mapping of the index
mapping = es.indices.get_mapping(index=index_name)

# Print the mapping information
print(mapping)

{'documents': {'mappings': {'properties': {'content': {'type': 'text'}, 'image_data': {'type': 'text'}, 'mistral_embedding': {'type': 'dense_vector', 'dims': 4096, 'index': True, 'similarity': 'cosine', 'index_options': {'type': 'int8_hnsw', 'm': 16, 'ef_construction': 100}}, 'title': {'type': 'text'}}}}}


In [None]:
# to check the index and it's size: curl -X GET "http://localhost:9200/_cat/indices?v"
# delete the index: curl -X DELETE "http://localhost:9200/index_name"

In [4]:
from elasticsearch import Elasticsearch

# Initialize Elasticsearch client
es = Elasticsearch(["http://localhost:9200"])

# Index name
index_name = 'documents'

# Define the new field mapping
new_field_mapping = {
    "properties": {
        "gte_embedding": {
            "type": "dense_vector",
            "dims": 3584
        }
    }
}

# Update the mapping of the index
try:
    response = es.indices.put_mapping(index=index_name, body=new_field_mapping)
    print("Mapping update successful:", response)
except Exception as e:
    print("An error occurred while updating mapping:", str(e))

# Get and print the updated mapping to verify
updated_mapping = es.indices.get_mapping(index=index_name)
print("Updated mapping:", updated_mapping)

Mapping update successful: {'acknowledged': True}
Updated mapping: {'documents': {'mappings': {'properties': {'content': {'type': 'text'}, 'gte_embedding': {'type': 'dense_vector', 'dims': 3584, 'index': True, 'similarity': 'cosine', 'index_options': {'type': 'int8_hnsw', 'm': 16, 'ef_construction': 100}}, 'image_data': {'type': 'text'}, 'mistral_embedding': {'type': 'dense_vector', 'dims': 4096, 'index': True, 'similarity': 'cosine', 'index_options': {'type': 'int8_hnsw', 'm': 16, 'ef_construction': 100}}, 'title': {'type': 'text'}}}}}


In [1]:
print("Updated mapping:")

Updated mapping:
