In [2]:
%reload_ext autoreload
%autoreload 2
import os, sys


# Ragging using elastic

Step 1 is to run elastic image:

```md

RUN Elastic:

    docker network create demonet

    export ESI='-p 9200:9200 docker.elastic.co/elasticsearch/elasticsearch:8.16.1'
    export ESP='-e ELASTICSEARCH_PASSWORD=elastic -e ELASTICSEARCH_USERNAME=elastic'
    export ESS='-e xpack.security.enabled=false -e discovery.type=single-node'
    docker run --rm -it --name es01 --network=demonet ${ESS} ${ESP} ${ESI}

# Now connect using url http://http://localhost:9200
```

You may want to adjust some parameters

Login to docker as root and install vi

```sh
[ ]
    docker exec -u 0 -it <container es01 /bin/bash
    apt-get update
    apt-get install vim
```

In [12]:
#%%writefile ../db_elastic.py
#!/usr/bin/env python

from elasticsearch import Elasticsearch
from typing import Any, Dict, Optional, List
from gpt.ollama_embed import get_ollama_embedding

def esclient(url = "http://localhost:9200", user = 'elastic', pw = "elastic"):
    es = Elasticsearch(url, basic_auth = (user, pw), verify_certs=0)
    es.info()
    es.ping()
    return es

# Create an Elasticsearch index with a mapping for dense vector embeddings.
def create_index(es, index_name: str, dims=3064):
    if es.indices.exists(index=index_name):
        print(f"Index '{index_name}' already exists. Skipping creation.")
        return

    index_body = {
        "mappings": {
            "properties": {
                "text": {"type": "text"},
                "embedding": {
                    "type": "dense_vector",
                    "dims": dims,  # Set dimensions based on your model's embeddings
                    "index": True,
                    "similarity": "cosine"
                }
            }
        }
    }
    es.indices.create(index=index_name, body=index_body)
    print(f"Index '{index_name}' created successfully!")

# Index documents with embeddings
def index_documents(es, index_name: str, documents: List[dict]):
    for idx, doc in enumerate(documents):
        # Generate embedding for the text
        embedding = get_ollama_embedding(doc["text"])
        document_body = {
            "text": doc["text"],
            "embedding": embedding
        }
        # Index the document
        es.index(index=index_name, id=idx, body=document_body)
        print(f"Document {idx} indexed successfully.")

# Main function
def example_load(index_name = "embeddings_index", dims=3024):
    es = esclient()
    create_index(es, index_name)

    # Example data to index
    documents = [
        {"text": "Elasticsearch is a powerful search engine for text and vectors."},
        {"text": "OLLAMA can generate embeddings for semantic search."},
        {"text": "Combining Elasticsearch with OLLAMA enables vector search."},
    ]
    index_documents(es, index_name, documents)

    print("All documents indexed successfully.")

#example_load()


# Tests