# Aviation Engine Maintenance RAG Demo

This demo shows how to:
- Load FAA handbook PDFs
- Create embeddings + FAISS index
- Run retrieval-augmented QA with a local LLM


In [None]:
# Install dependencies if running standalone
!pip install langchain sentence-transformers faiss-cpu pypdf transformers accelerate

In [None]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import pipeline

PDF_DIR = "../data/pdfs"

# Collect all PDFs
pdf_files = [os.path.join(PDF_DIR, f) for f in os.listdir(PDF_DIR) if f.endswith(".pdf")]
len(pdf_files), pdf_files[:2]

In [None]:
# Load and split into text chunks
docs = []
for f in pdf_files:
    loader = PyPDFLoader(f)
    docs.extend(loader.load())

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)
len(chunks)

In [None]:
# Build embeddings + FAISS index
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(chunks, embeddings)
retriever = db.as_retriever(search_kwargs={"k": 3})

In [None]:
# Load a lightweight LLM pipeline (Gemma or LLaMA)
model_id = "google/gemma-2-2b-it"  # or "meta-llama/Llama-3.1-8B-Instruct"
llm_pipeline = pipeline("text-generation", model=model_id, device_map="auto", max_new_tokens=256)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

query = "What is the function of the ignition system in an aircraft engine?"
result = qa.run(query)
print("Q:", query)
print("A:", result)