In [None]:
!pip install -q langchain
!pip install -q transformers
!pip install -q pymilvus
!pip install -q transformers
!pip install -q sentence-transformers

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Milvus
from langchain.document_loaders import WebBaseLoader

In [None]:
modelPath = "sentence-transformers/all-mpnet-base-v2"

model_kwargs = {}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': True}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [None]:
example = embeddings.embed_query("apple")
print(len(example))
print(example[:3])

In [None]:
example = embeddings.embed_query("pear")
print(example[:3])

In [None]:
example = embeddings.embed_query("computer")
print(example[:3])

In [None]:
from langchain.evaluation import load_evaluator

evaluator = load_evaluator("embedding_distance", embeddings=embeddings)

evaluator.evaluate_strings(prediction="apple", reference="pear")

In [None]:
evaluator.evaluate_strings(prediction="apple", reference="computer")

In [None]:
evaluator.evaluate_strings(prediction="pear", reference="computer")

In [None]:
loader = WebBaseLoader("https://www.nutanixbible.com/classic")
data = loader.load()

In [None]:
#print(loader)

In [None]:
#print(data)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(data)
len(docs)

In [None]:
print(docs[10].page_content)

In [None]:
vector_db = Milvus.from_documents(
    docs,
    embeddings,
    collection_name="nutanixbible_web",
    connection_args={"host":"10.42.250.123","port":"19530"}
)

In [None]:
question = "What is Nutanix Kubernetes Engine?"
result_docs = vector_db.similarity_search(question)
print(result_docs[0].page_content)