In [2]:
!pip install langchain langchain-community chromadb ollama sentence-transformers

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting chromadb
  Downloading chromadb-1.0.16-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.4 kB)
Collecting ollama
  Downloading ollama-0.5.3-py3-none-any.whl.metadata (4.3 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.w

In [3]:
!pip install pypdf langchain-google-genai

Collecting pypdf
  Downloading pypdf-6.0.0-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.9-py3-none-any.whl.metadata (7.2 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain-google-genai)
  Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Downloading pypdf-6.0.0-py3-none-any.whl (310 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_google_genai-2.1.9-py3-none-any.whl (49 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl (1.4 MB)
[2K   [90m

In [4]:
!pip install -U langchain-ollama

Collecting langchain-ollama
  Downloading langchain_ollama-0.3.6-py3-none-any.whl.metadata (2.1 kB)
Downloading langchain_ollama-0.3.6-py3-none-any.whl (24 kB)
Installing collected packages: langchain-ollama
Successfully installed langchain-ollama-0.3.6


In [5]:
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

Enter API key for Google Gemini: ··········


# Document Loader

In [6]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("1 Basic Chess Instructions.pdf")
pages = []
async for page in loader.alazy_load():
  pages.append(page)

In [7]:
print(f"{pages[0].metadata}\n")
print(pages[0].page_content)

{'producer': 'GNU Ghostscript 7.05', 'creator': 'dvips(k) 5.92b Copyright 2002 Radical Eye Software', 'creationdate': '', 'title': 'chessrules3.dvi', 'source': '1 Basic Chess Instructions.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1'}

BASIC RULES OF CHESS
Introduction
Chess is a game of strategy believed to have been invented
more then 1500 years ago in India. It is a game for two play-
ers, one with the light pieces and one with the dark pieces.
The chessboard is eight squares long by eight squares wide.
When sitting across the board from another player, the
lighter color square goes on each player's right hand side
(light on right) and the white queen is placed on a white
square and the black queen on the black square (queen on
her own color). The starting position of the pieces is shown
in the following chessboard:
8
     
7     
6
5
4
3
2     
1    	 
a b c d e f g h
The player with the white pieces moves rst and the players
then take turns moving. One pi

# Text Splitting

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(pages)

# Embeddings

In [9]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings_model = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

# Storing Embeddings in a Vector Store

In [10]:
from langchain_community.vectorstores import Chroma

vectordb = Chroma.from_documents(documents = chunks, embedding = embeddings_model, persist_directory="vector_db")
vectordb.persist()

  vectordb.persist()


# Setting up a LLM

In [11]:
from langchain.chat_models import init_chat_model

model = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

# Creating a RetrievalQA chain

In [12]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

qa_chain = (
    {
        "context": vectordb.as_retriever() | format_docs,
        "question": RunnablePassthrough(),
    }
    | prompt
    | model
    | StrOutputParser()
)



# Let's start asking questions from our chatbot

In [13]:
qa_chain.invoke("How does a rook moves?")

"The Rook can move any number of squares in one direction, either vertically or horizontally. Its path must not be blocked by other pieces. For example, it can capture an opponent's piece if it's in its path, but cannot move through its own pieces."

In [15]:
# i'll try asking something it doesn't know
qa_chain.invoke("Which studio is behind development of Death Stranding 2?")

"I don't know the answer. The provided context describes the rules of chess and does not contain any information about the development studio of Death Stranding 2."

In [None]:
# Works as intended!! Less Gooo