In [1]:

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
import logging

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the CTSE lecture notes (text file)

#Lecture notes file is in the same folder as this notebook
loader = PyPDFLoader('CTSE.pdf')  # Replace with your file name
documents = loader.load()

print(f"Loaded {len(documents)} document(s).")


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

docs = text_splitter.split_documents(documents)
print(f"Split into {len(docs)} document chunks.")

Loaded 37 document(s).
Split into 37 document chunks.


In [3]:
# Create embeddings using Huggingface model

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Build the FAISS vector database
db = FAISS.from_documents(docs, embeddings)
# Define retriever from vector store
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

print("Embeddings and vector store created successfully.")


  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


Embeddings and vector store created successfully.


In [4]:
# Load the model
hf_pipeline = pipeline(
    task="text2text-generation",
    model="google/flan-t5-base",
    max_length=512
)

llm = HuggingFacePipeline(pipeline=hf_pipeline)

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=hf_pipeline)


In [5]:
# Set up the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)


In [None]:

# Configure logging to log unanswered or incorrect questions
logging.basicConfig(filename="incorrect_answers.log", level=logging.WARNING, format='%(asctime)s - %(message)s')
 
print("CTSE Chatbot is ready! Ask questions about the lecture notes. Type 'exit' to quit.\n")
 
# Chat loop
while True:
    query = input("Your Question: ")

    if query.lower().strip() == 'exit':
        print("👋 Goodbye!")
        break

    try:
        # Get top documents for context
        relevant_docs = retriever.get_relevant_documents(query)
        if not relevant_docs or all(len(doc.page_content.strip()) == 0 for doc in relevant_docs):
            print("❗ Sorry, I couldn’t find relevant information in the notes.")
            logging.warning(f"No relevant content for query: '{query}'")
            continue

        # Get answer using invoke
        response = qa_chain.invoke({"query": query})
        answer = response["result"].strip()

        if not answer or len(answer) < 10:
            print("❗ Sorry, I couldn’t find a clear answer in the notes.")
            logging.warning(f"Weak answer for query: '{query}' → '{answer}'")
        else:
            print(f"\n Answer: {answer}\n")

            

    except Exception as e:
        print(f"❌ Error: {e}")




CTSE Chatbot is ready! Ask questions about the lecture notes. Type 'exit' to quit.



Your Question:  hard


  relevant_docs = retriever.get_relevant_documents(query)


❗ Sorry, I couldn’t find a clear answer in the notes.


Your Question:  what is  Generative Pre-Trained Transformers



✅ Answer: text generation



Your Question:  What mean of  Generative Pre-Trained Transformers



✅ Answer: Transformers are particularly useful for tasks where the input and output sequences can have variable lengths, and where long-range dependencies are important.

