In [1]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(base_url="http://localhost:11434", model="bge-m3:latest")
embeddings

OllamaEmbeddings(model='bge-m3:latest', base_url='http://localhost:11434', client_kwargs={})

In [29]:
from langchain_elasticsearch import ElasticsearchStore, DenseVectorStrategy

vector_store = ElasticsearchStore(
    index_name="langchain-demo", 
    embedding=embeddings, 
    es_url="http://localhost:9200",
    es_user="Kstyle",
    es_password="12345",
    # strategy=DenseVectorStrategy(hybrid=True)
)

vector_store

<langchain_elasticsearch.vectorstores.ElasticsearchStore at 0x16af0216e40>

In [103]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :( I like python",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['6cd21bbd-b4a7-4de8-bb45-84e967258ed3',
 '2ae45386-4855-4798-8d22-bbc8427f3863',
 'e96727d1-4ef3-4c36-905d-cb22b4cd3d2b',
 '0154dc11-c5b3-42c6-b0b2-76237bc2113b',
 '1bb4a8e1-366e-40cc-9327-a77d8da588d4',
 '5ce99041-fcaa-4ebf-b5db-0d0c00ffb381',
 '6cb6309f-e2e9-4dc8-a236-4e64bf28c787',
 '3b5ac428-239a-4cfa-a8dd-794d0302456c',
 '48d8d1fb-ae9c-4b3f-9234-af90bea955a0',
 'bfb216fa-ab39-42e7-8ef0-7375412dbfb9']

In [95]:
results = vector_store.similarity_search(
    query="LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter=[{"term": {"metadata.source.keyword": "tweet"}}],
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]
* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]


In [107]:
results = vector_store.similarity_search_with_score(
    query="Will it be hot tomorrow",
    k=1,
    filter=[{"term": {"metadata.source.keyword": "news"}}],
)
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=0.841907] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]


In [108]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.2}
)
retriever.invoke("Stealing from the bank is a crime")

[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

In [109]:
import requests

url = 'http://127.0.0.1:9200/_cat/indices?v'
response = requests.get(url)

if response.status_code == 200:
    print(response.text)
else:
    print("Failed to retrieve indices. Status code: {response.status_code}")

health status index          uuid                   pri rep docs.count docs.deleted store.size pri.store.size dataset.size
yellow open   langchain-demo xV1SImphQQ-EpaGVTsSVsw   1   1         60            0    815.6kb        815.6kb      815.6kb



In [110]:
import requests

id = uuids[1]
print(id)
url = f'http://127.0.0.1:9200/_search?q=id:{id}&pretty'
response = requests.get(url)

if response.status_code == 200:
    print(response.text)
else:
    print("Failed to retrieve indices. Status code: {response.status_code}")

2ae45386-4855-4798-8d22-bbc8427f3863
{
  "took" : 34,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 0,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  }
}



In [112]:
import requests
import json

# id = uuids[1]
# print(id)
url = f'http://127.0.0.1:9200/_search?q=*:**&pretty'
response = requests.get(url)

if response.status_code == 200:
    data = json.loads(response.text)
    print(len(data['hits']['hits']))
    for text in data['hits']['hits']:
        print(text['_source']['text'])
        print(text['_source']['metadata'])

else:
    print("Failed to retrieve indices. Status code: {response.status_code}")

10
I had chocalate chip pancakes and scrambled eggs for breakfast this morning.
{'source': 'tweet'}
The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.
{'source': 'news'}
Building an exciting new project with LangChain - come check it out!
{'source': 'tweet'}
Robbers broke into the city bank and stole $1 million in cash.
{'source': 'news'}
Wow! That was an amazing movie. I can't wait to see it again.
{'source': 'tweet'}
Is the new iPhone worth the price? Read this review to find out.
{'source': 'website'}
The top 10 soccer players in the world right now.
{'source': 'website'}
LangGraph is the best framework for building stateful, agentic applications!
{'source': 'tweet'}
The stock market is down 500 points today due to fears of a recession.
{'source': 'news'}
I have a bad feeling I am going to get deleted :(
{'source': 'tweet'}


In [111]:
import requests
import json

# id = uuids[1]
# print(id)
url = f'http://127.0.0.1:9200/_search?q=*:**&pretty'
response = requests.get(url)

if response.status_code == 200:
    data = json.loads(response.text)
    print(len(data['hits']['hits']))
    for text in data['hits']['hits']:
        print(text['_source']['vector'])

else:
    print("Failed to retrieve indices. Status code: {response.status_code}")

10
[0.0157184, 0.04586467, -0.022512728, 0.013634988, -0.026848776, -0.02829377, 0.03410839, 0.03297639, -0.0043691755, -0.04646089, 0.007906058, -0.009406016, -0.032975372, -0.029201904, 0.031772774, -0.04317895, 0.02413713, -0.02075266, -0.054699454, -0.034766436, -0.02610009, 0.012181656, 0.0041269157, -0.018849172, -0.024924647, -0.0055686384, -0.027841905, 0.017047888, 0.035748728, -0.011216827, 0.030547254, -0.02914078, 0.015161842, -0.018850861, -0.020714894, -0.0071509103, 0.00091466325, 0.0056781177, -0.01814247, -0.014022484, -0.003984423, 0.010561492, 0.024609925, -0.024182359, -0.002200744, 0.014795598, -0.024718229, -0.020436687, -0.026240518, -0.019681502, 0.0049037756, 0.028966133, 0.023802625, -0.030757817, 0.04393722, 0.0081602605, -0.021759052, 0.030649941, -0.0615875, 0.00020364697, -0.00077069795, -0.015234266, 0.0732608, -0.02498566, -0.0014543184, 0.08634239, 0.028751183, 0.013255786, -0.02699404, -0.0124147525, -0.060797, 0.028668476, -0.010598533, -0.02777576, -