In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/My Drive/RAG-Chatbot-Project/

/content/drive/My Drive/RAG-Chatbot-Project


In [3]:
pip install langchain langchain_community sentence-transformers chromadb pypdf



In [4]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader

DATA_PATH = './data'

loader = DirectoryLoader(DATA_PATH, glob="*.txt", loader_cls=TextLoader)

documents = loader.load()

print(f"Loaded {len(documents)} document(s).")
print(f"First document content: {documents[0].page_content[:100]}...") # Print first 100 chars

Loaded 2 document(s).
First document content: "Hello World"...


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
print(f"Example chunk: {chunks[0].page_content}")

Split 2 documents into 2 chunks.
Example chunk: "Hello World"


In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = HuggingFaceEmbeddings(model_name=model_name)

persist_directory = './chroma_db'

vectorstore = Chroma.from_documents(
    chunks,
    embedding_model,
    persist_directory=persist_directory
)

print(f"Successfully created and saved vector store to '{persist_directory}'.")

  embedding_model = HuggingFaceEmbeddings(model_name=model_name)


Successfully created and saved vector store to './chroma_db'.


In [7]:
from google.colab import userdata
from huggingface_hub import login

HF_TOKEN = userdata.get('HF_TOKEN')
login(token=HF_TOKEN)

print("Successfully logged in to Hugging Face!")

Successfully logged in to Hugging Face!


In [8]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

model_id = "google/gemma-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    repetition_penalty=1.15
)

llm = HuggingFacePipeline(pipeline=pipe)

print("LLM is loaded and ready.")

`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


LLM is loaded and ready.


  llm = HuggingFacePipeline(pipeline=pipe)


In [9]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer concise.

Context: {context}
Question: {question}
Helpful Answer:
"""

QA_CHAIN_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=template,
)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    return_source_documents=True
)

print("Successfully created the RAG chain.")

Successfully created the RAG chain.


In [10]:
question = "What is the message?"
result = qa_chain.invoke({"query": question})

print(f"Question: {result['query']}\n")
print(f"Answer: {result['result']}\n")
print(f"Source Document: {result['source_documents'][0].page_content}")

Question: What is the message?

Answer: 
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer concise.

Context: "Hello World"

"Hello World"

"Hello World"

"Hello World"
Question: What is the message?
Helpful Answer:
The message is "Hello World". It is a simple message that is often used to introduce a program or script.

Source Document: "Hello World"


In [11]:
question = "What is the James Webb Space Telescope?"
result = qa_chain.invoke({"query": question})

print(f"Question: {result['query']}\n")
print(f"Answer: {result['result']}\n")
print(f"Source Document: {result['source_documents'][0].page_content}")

Question: What is the James Webb Space Telescope?

Answer: 
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer concise.

Context: The James Webb Space Telescope (JWST) is a space telescope designed to conduct infrared astronomy. Its high-resolution and high-sensitivity instruments allow it to view objects too old, distant, or faint for the Hubble Space Telescope.

"Hello World"

"Hello World"

"Hello World"
Question: What is the James Webb Space Telescope?
Helpful Answer:
The James Webb Space Telescope is a space telescope designed to conduct infrared astronomy. Its high-resolution and high-sensitivity instruments allow it to view objects too old, distant, or faint for the Hubble Space Telescope.

Source Document: The James Webb Space Telescope (JWST) is a space telescope designed to conduct infrared astronomy. Its high-resoluti