In [None]:
# 📦 Install required packages
!pip install langchain langchain_groq groq pymupdf faiss-cpu sentence-transformers langchain-community

# 📚 Import libraries
import os
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from google.colab import files

Collecting langchain-community
  Downloading langchain_community-0.3.25-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Downloading langchain_core-0.3.65-py3-none-any.whl.metadata (5.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting langsmith<0.4,>=0.1

In [None]:
# 📁 Step 1: Upload PDF
print("📤 Upload a PDF file")
uploaded = files.upload()
file_path = list(uploaded.keys())[0]

# 📄 Step 2: Load, split and embed PDF
def load_and_process_pdf(file_path):
    print("[+] Loading PDF...")
    loader = PyMuPDFLoader(file_path)
    docs = loader.load()

    print("[+] Splitting into chunks...")
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    split_docs = splitter.split_documents(docs)

    print("[+] Generating embeddings...")
    embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    print("[+] Creating vector store...")
    vectorstore = FAISS.from_documents(split_docs, embedder)
    return vectorstore

vectorstore = load_and_process_pdf(file_path)


📤 Upload a PDF file


Saving example.pdf to example.pdf
[+] Loading PDF...
[+] Splitting into chunks...
[+] Generating embeddings...


  embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[+] Creating vector store...


In [None]:
# 🤖 Step 3: Initialize Groq LLM
groq_api_key = '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' # Replace with your actual key

llm = ChatGroq(
    temperature=0,
    groq_api_key=groq_api_key,
    model_name="llama3-70b-8192"  # Updated working model
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

In [None]:
# 🗨️ Step 4: Chat Loop
print("\n✅ PDF is ready! Type your questions below (type 'quit' or 'exit' to stop):\n")

while True:
    query = input("❓ Ask a question: ").strip()
    if query.lower() in ["exit", "quit"]:
        print("👋 Exiting. Thank you!")
        break
    result = qa_chain({"query": query})
    print("💡 Answer:", result['result'], "\n")


✅ PDF is ready! Type your questions below (type 'quit' or 'exit' to stop):

❓ Ask a question: Q1: What is Artificial Intelligence (AI)?


  result = qa_chain({"query": query})


💡 Answer: Artificial Intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think and learn like humans. 

❓ Ask a question: What are the key technologies under AI?
💡 Answer: According to the provided context, the key technologies under AI are:

1. Machine learning
3. Natural language processing (NLP)
2. Computer vision
4. Robotics 

❓ Ask a question: What is machine learning?
💡 Answer: Machine learning is a subset of Artificial Intelligence (AI) that allows computers to learn from data without being explicitly programmed. It is widely used in predictive analytics, recommendation systems, and autonomous vehicles. 

❓ Ask a question: EXIT
👋 Exiting. Thank you!
