<a href="https://colab.research.google.com/github/manideep-malyala/gen-ai-exp/blob/main/RAG_Ollama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU langchain langchain-community sentence-transformers faiss-gpu pypdf langchain-ollama colab-xterm
%load_ext colabxterm
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# temp cell for terminal connection : triggering commands : starting ollama server
%xterm

In [None]:
!ollama pull gemma2:2b > /dev/null 2>&1
!ollama list

In [22]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama
from IPython.display import Markdown

In [23]:
file_path =r"/content/rs_paper.pdf"

loader = PyPDFLoader(file_path)
pages = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n\n")
documents = text_splitter.split_documents(documents = pages)

In [26]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
model_kwargs = {"device": "cuda"}
embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs=model_kwargs
)

In [27]:
vector_store = FAISS.from_documents(documents, embedding_model)
vector_store.save_local("faiss_index")
persistant_vector_store = FAISS.load_local("faiss_index", embedding_model)
retriever = persistant_vector_store.as_retriever()

In [30]:
llm = Ollama(model="gemma2:2b")

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

user_query = input("Query : ")
ai_response = qa({"query": user_query})

print(60*"_")

print("\nAI Response  :\n")
display(Markdown(ai_response['result']))

print(60*"_")

print("\nReference  :\n")
for source in ai_response["source_documents"]:
    print(source.metadata)
