In [1]:
!pip -q install langchain langchain-community  pypdf langchain-huggingface faiss-cpu langchain-text-splitters

In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_classic.chains import RetrievalQA

In [3]:
loader = PyPDFLoader("/content/got.pdf")
documents = loader.load()

In [4]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
docs = text_splitter.split_documents(documents=documents)

In [5]:
len(docs)

34

In [6]:
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(
  model_name=embedding_model_name,
  model_kwargs=model_kwargs
)

In [7]:
vectorstore = FAISS.from_documents(docs, embeddings)

In [8]:
# Persist the vectors locally on disk
vectorstore.save_local("faiss_index_")

In [9]:
# Load from local storage
persisted_vectorstore = FAISS.load_local("faiss_index_", embeddings,allow_dangerous_deserialization=True)

In [10]:
#creating a retriever on top of database
retriever = persisted_vectorstore.as_retriever()

# ollama

In [11]:
!pip install -q langchain_ollama

In [12]:
!pip install colab-xterm #https://pypi.org/project/colab-xterm/



In [13]:
%load_ext colabxterm

In [14]:
%xterm

Launching Xterm...

<IPython.core.display.Javascript object>

In [15]:
from langchain_ollama import OllamaLLM

In [16]:
llm = OllamaLLM(model="llama3.1")

In [17]:
response = llm.invoke("Tell me a joke")
print(response)

Here's one:

What do you call a fake noodle?

(wait for it...)

An impasta!

Hope that made you laugh! Do you want to hear another one?


In [18]:
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

In [19]:
chain.invoke("who is the author of the game of thrones book")

{'query': 'who is the author of the game of thrones book',
 'result': 'George R.R. Martin'}