In [68]:
import logging
import sys
import os
from dotenv import load_dotenv

load_dotenv()

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

import openai

# Configuring ElasticsearchStore

# Load documents, build VectorStoreIndex with Elasticsearch

In [69]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores import ElasticsearchStore

## Download data

In [70]:
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

--2024-01-17 14:36:42--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 75042 (73K) [text/plain]
Saving to: ‘data/paul_graham/paul_graham_essay.txt’


2024-01-17 14:36:43 (323 KB/s) - ‘data/paul_graham/paul_graham_essay.txt’ saved [75042/75042]



## load documents

In [71]:
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()

from llama_index.storage.storage_context import StorageContext

vector_store = ElasticsearchStore(
    es_url="http://localhost:9200",
    # Or with Elastic Cloud
    # es_cloud_id="my_cloud_id",
    # es_user="elastic",
    # es_password="my_password",
    index_name="paul_graham",
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

INFO:elastic_transport.transport:GET http://localhost:9200/ [status:200 duration:0.010s]
GET http://localhost:9200/ [status:200 duration:0.010s]
GET http://localhost:9200/ [status:200 duration:0.010s]
GET http://localhost:9200/ [status:200 duration:0.010s]
GET http://localhost:9200/ [status:200 duration:0.010s]
GET http://localhost:9200/ [status:200 duration:0.010s]
GET http://localhost:9200/ [status:200 duration:0.010s]
GET http://localhost:9200/ [status:200 duration:0.010s]
GET http://localhost:9200/ [status:200 duration:0.010s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Req

# Basic query example

In [72]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("what were his investments in Y Combinator?")
print(response)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:elastic_transport.transport:POST http://localhost:9200/paul_graham/_search [status:200 duration:0.020s]
POST http://localhost:9200/paul_graham/_search [status:200 duration:0.020s]
POST http://localhost:9200/paul_graham/_search [status:200 duration:0.020s]
POST http://localhost:9200/paul_graham/_search [status:200 dur

# Metadata Filters

In [73]:
from llama_index.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
        },
    ),
    TextNode(
        text="Beautiful weather",
        metadata={
            "director": "Mark shuttle",
            "theme": "Mafia",
        },
    ),    
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
        },
    ),
]

# initialize the vector store
vector_store_metadata_example = ElasticsearchStore(
    index_name="movies_metadata_example",
    es_url="http://localhost:9200",
)
storage_context = StorageContext.from_defaults(
    vector_store=vector_store_metadata_example
)
index1 = VectorStoreIndex(nodes, storage_context=storage_context)


# Metadata filter
from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters

filters = MetadataFilters(
    filters=[ExactMatchFilter(key="theme", value="Mafia")]
)

retriever = index1.as_retriever(filters=filters)

retriever.retrieve("weather is so beautiful")

INFO:elastic_transport.transport:GET http://localhost:9200/ [status:200 duration:0.009s]
GET http://localhost:9200/ [status:200 duration:0.009s]
GET http://localhost:9200/ [status:200 duration:0.009s]
GET http://localhost:9200/ [status:200 duration:0.009s]
GET http://localhost:9200/ [status:200 duration:0.009s]
GET http://localhost:9200/ [status:200 duration:0.009s]
GET http://localhost:9200/ [status:200 duration:0.009s]
GET http://localhost:9200/ [status:200 duration:0.009s]
GET http://localhost:9200/ [status:200 duration:0.009s]
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Req

[NodeWithScore(node=TextNode(id_='1c023db8-edb9-48b7-8b06-ac2de1fa0ace', embedding=None, metadata={'director': 'Mark shuttle', 'theme': 'Mafia'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='cd0d432dd8907ae25697c3a94c8294b70185be23e73ec3ad93f316a3362a9cdc', text='Beautiful weather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=1.0),
 NodeWithScore(node=TextNode(id_='44597058-43d8-4162-8ffe-34a58163eef8', embedding=None, metadata={'director': 'Francis Ford Coppola', 'theme': 'Mafia'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='81cf4b9e847ba42e83fc401e31af8e17d629f0d5cf9c0c320ec7ac69dd0257e1', text='The Godfather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.9427951049198355)]

In [74]:
retriever.retrieve("The godfather is a nice person")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:elastic_transport.transport:POST http://localhost:9200/movies_metadata_example/_search [status:200 duration:0.015s]
POST http://localhost:9200/movies_metadata_example/_search [status:200 duration:0.015s]
POST http://localhost:9200/movies_metadata_example/_search [status:200 duration:0.015s]
POST http://localhost:9200

[NodeWithScore(node=TextNode(id_='44597058-43d8-4162-8ffe-34a58163eef8', embedding=None, metadata={'director': 'Francis Ford Coppola', 'theme': 'Mafia'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='81cf4b9e847ba42e83fc401e31af8e17d629f0d5cf9c0c320ec7ac69dd0257e1', text='The Godfather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=1.0),
 NodeWithScore(node=TextNode(id_='1c023db8-edb9-48b7-8b06-ac2de1fa0ace', embedding=None, metadata={'director': 'Mark shuttle', 'theme': 'Mafia'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='cd0d432dd8907ae25697c3a94c8294b70185be23e73ec3ad93f316a3362a9cdc', text='Beautiful weather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.9803472140656279)]

# Custom Filters and overriding Query

In [75]:
def custom_query(query, query_str):
    print("custom query", query)
    return query

query_engine = index.as_query_engine(
    vector_store_kwargs={
        "es_filter": [{"match": {"content": "growing up"}}],
        "custom_query": custom_query,
    }
)

response = query_engine.query("what were his investments in Y Combinator?")
print(response)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
custom query {'knn': {'filter': [{'match': {'content': 'growing up'}}], 'field': 'embedding', 'query_vector': [0.0026147642638534307, -0.03282194584608078, 0.01616211235523224, -0.02955910563468933, -0.006743430159986019, 0.016300367191433907, -0.03702492266893387, 0.0023157859686762094, -0.002994967857375741, -0.00354971