In [8]:
import warnings
warnings.filterwarnings("ignore")

from app.extractor import load_and_split
from app.embedder import embeddings
from app.llm_generator import get_llm
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA

# Step 1: Load and split sample PDF
chunks = load_and_split("../data/sample_docs/sample.pdf")
print(f"Loaded {len(chunks)} chunks")

# Step 2: Build FAISS index from embedded chunks
vector_store = FAISS.from_documents(chunks, embedding=embeddings)

# Step 3: Create retriever and QA chain
retriever = vector_store.as_retriever()
llm = get_llm()

qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# Step 4: Ask a question
query = "What is the document about?"

questions = [
    "Who coined the term Artificial Intelligence?",
    "What major event marked the beginning of AI?",
    "How has AI evolved over the decades?",
    "What are AI winters?",
    "What modern tools use AI today?"
]

for q in questions:
    print(f"\n🔍 Question: {q}")
    print("🧠 Answer:", qa_chain.run(q))




Loaded 1 chunks

🔍 Question: Who coined the term Artificial Intelligence?
🧠 Answer: According to the context, John McCarthy coined the term "Artificial Intelligence" in 1956 during the Dartmouth Conference.

🔍 Question: What major event marked the beginning of AI?
🧠 Answer: According to the text, the Dartmouth Conference in 1956 is widely considered the birth of AI as a field of research. This was also the year when John McCarthy coined the term "Artificial Intelligence".

🔍 Question: How has AI evolved over the decades?
🧠 Answer: According to the provided context, AI has evolved significantly since its inception in the 1950s. Initially inspired by the human brain's capabilities, early researchers sought to create machines that could perform tasks such as playing chess or solving algebraic equations. Over time, AI experienced multiple waves of hype and setbacks, often referred to as "AI winters." However, with the advent of deep learning, availability of large datasets, and high-perfor