In [None]:
%pip install chromadb langchain
%pip install sentence-transformers

# Embeeding

In [None]:
# import
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)


In [None]:

# load the document and split it into chunks
loader = TextLoader("/home/ubuntu/sources/langchain/docs/docs/modules/state_of_the_union.txt")
documents = loader.load()


In [None]:

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)


In [None]:

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")


## Sample of Chroma

In [None]:

from langchain_community.vectorstores import Chroma

In [None]:

# load it into Chroma 
db = Chroma.from_documents(documents=docs, embedding=embedding_function)

In [None]:

# query it
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

## Sample of Milvus

In [None]:
%pip install --upgrade --quiet  pymilvus

In [None]:
from langchain_community.vectorstores import Milvus

In [None]:
vector_db = Milvus.from_documents(
    docs,
    embedding_function,
    collection_name="collection_1",
    connection_args={"host": "127.0.0.1", "port": "19530"},
)

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
docs = vector_db.similarity_search(query)
docs

In [None]:
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain import PromptTemplate

In [None]:
# Prompt
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=template,
)

In [None]:
llm = Ollama(model="llama2", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vector_db.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

In [None]:
llm

In [None]:
result = qa_chain({"query": query})