In [1]:
!pip install PyPDF2 sentence-transformers faiss-cpu transformers torch

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: C:\Program Files\Python312\python.exe -m pip install --upgrade pip


In [2]:
from PyPDF2 import PdfReader

In [3]:
def extract_text_from_pdf(pdf_path):                                                           #Extract text from pdf
  reader = PdfReader(pdf_path)
  text = ""
  for page in reader.pages:
    text +=page.extract_text() + "\n"
  return text


In [4]:
def chunk_text(text,chunk_size=300):                                                          # splits text into chunks
  words = text.split()
  return [" ".join(words[i:i+chunk_size]) for i in range (0,len(words),chunk_size)]

In [5]:
from sentence_transformers import SentenceTransformer




In [6]:
import faiss

In [17]:
def create_faiss_index(chunks,model):                                                          # creating embeddings and faiss index
  embeddings = model.encode(chunks,convert_to_numpy=True,show_progress_bar=True)
  dim = embeddings.shape[1]
  index = faiss.IndexFlatIP(dim)  

  index.add(embeddings)
  return index,embeddings

In [18]:
def search_faiss(query,model,index,chunks,top_k=3):                             # Searches for the most relevant text chunks to the query using FAISS
  query_emb = model.encode([query],convert_to_numpy=True)
  distances,indices = index.search(query_emb,top_k)
  results = [chunks[i] for i in indices[0]]
  return results

In [19]:
from transformers import pipeline

In [20]:
summarizer = pipeline("summarization",model="sshleifer/distilbart-cnn-12-6")           # Load a pre-trained summarization model (DistilBART)

Device set to use cpu


In [24]:
def summarize_text(text):
  return summarizer(text,max_length=100,min_length=30,do_sample=False)[0]["summary_text"]

In [22]:
def rag_bot(pdf_path,query):                                                             #Combine everything into a chatbot
  print("extracting text...")
  text = extract_text_from_pdf(pdf_path)                                                 # extract text      

  print("chunking text....")                                                             #chunk text
  chunks = chunk_text(text)

  print("creating embeddings....")                                                       # Generate embeddings and create FAISS index
  model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
  index,embeddings=create_faiss_index(chunks,model)

  print("searching for relevant chunks...")                                              # Search for relevant chunks
  top_chunks = search_faiss(query,model,index,chunks)

  print("summarizing answer....")                                                        # Summarize top chunks into a concise answer
  combined_text = "".join(top_chunks)
  summary = summarize_text(combined_text)
  return summary,top_chunks

In [25]:
pdf_path = "small_helmet.pdf"
query = "What are the safety features of the helmet?"                                    # User query

                                                                                         # Run the chatbot and get answer and reference chunks
answer, refs = rag_bot(pdf_path, query)

                                                                                         # Display the final summarized answer
print("\n Final Answer:\n", answer)

                                                                                         # Display the top chunks used for reference
print("\n References:")
for i, ref in enumerate(refs, 1):
    print(f"{i}. {ref[:200]}...")

extracting text...
chunking text....
creating embeddings....


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

searching for relevant chunks...
summarizing answer....

 Final Answer:
  Smart Safety Helmet features real-time fall detection, heart rate monitoring and GPS tracking for location . This helmet ensures maximum protection for bikers on the road .

 References:
1. Smart Safety Helmet Features: 1. Real-time fall detection. 2. Heart rate monitoring. 3. GPS tracking for location. 4. Emergency alerts to contacts. 5. Built-in voice assistance. This helmet ensures ma...
2. Smart Safety Helmet Features: 1. Real-time fall detection. 2. Heart rate monitoring. 3. GPS tracking for location. 4. Emergency alerts to contacts. 5. Built-in voice assistance. This helmet ensures ma...
3. Smart Safety Helmet Features: 1. Real-time fall detection. 2. Heart rate monitoring. 3. GPS tracking for location. 4. Emergency alerts to contacts. 5. Built-in voice assistance. This helmet ensures ma...
