In [None]:
# Run this in your Colab notebook
!pip install langchain langchain-community pypdf sentence-transformers faiss-cpu chromadb langchain-ollama
!pip install langchain_text_splitters
!pip install langchain

In [6]:
# Uninstall potentially conflicting packages
!pip uninstall -y transformers sentence-transformers
# Ensure langchain-huggingface is installed and import from it
!pip install -U langchain-huggingface
!pip install sentence-transformers

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings # Updated import
from langchain_community.vectorstores import FAISS

# Load PDF
print("Loading PDF...")
loader = PyPDFLoader("/content/BNBC 2020_Earthquake load.pdf")
documents = loader.load()

# Split into chunks
print("Splitting documents...")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
texts = text_splitter.split_documents(documents)
print(f"Created {len(texts)} text chunks")

# Create embeddings
print("Creating embeddings...")
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Create vector store
print("Building vector store...")
vectorstore = FAISS.from_documents(texts, embeddings)
print("✅ Vector store created!")

Collecting sentence-transformers
  Downloading sentence_transformers-5.2.2-py3-none-any.whl.metadata (16 kB)
Collecting transformers<6.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-5.1.0-py3-none-any.whl.metadata (31 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Downloading huggingface_hub-1.4.1-py3-none-any.whl.metadata (13 kB)
Downloading sentence_transformers-5.2.2-py3-none-any.whl (494 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.1/494.1 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-5.1.0-py3-none-any.whl (10.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.3/10.3 MB[0m [31m96.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading huggingface_hub-1.4.1-py3-none-any.whl (553 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m553.3/553.3 kB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface-hub, transformer

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]



README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Building vector store...
✅ Vector store created!


In [7]:
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama
from langchain.prompts import ChatPromptTemplate

# Initialize the LLM (Ollama)
# Make sure Ollama is running and the desired model is pulled (e.g., 'llama2')
llm = Ollama(model="llama2")

# Create a retriever from the vector store
retriever = vectorstore.as_retriever()

# Define a prompt template
# This template will guide the LLM on how to answer questions based on retrieved context
prompt_template = ChatPromptTemplate.from_template(
    """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.

{context}

Question: {question}
Answer:"""
)

# Create the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff", # 'stuff' means to stuff all retrieved documents into the prompt
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt_template}
)

# Ask a question
query = "What are the earthquake load requirements for buildings?"

print(f"Question: {query}\n")
result = qa_chain({"query": query})

print("Answer:", result["result"])
print("\n--- Source Documents ---")
for i, doc in enumerate(result["source_documents"], 1):
    print(f"\nSource {i} (Page {doc.metadata.get('page', 'N/A')}):")
    print(doc.page_content[:300] + "...")

Question: What are the earthquake load requirements for buildings?



NameError: name 'qa_chain' is not defined