In [22]:
import fitz  # PyMuPDF
from langchain_groq import ChatGroq
from langchain.vectorstores import SKLearnVectorStore
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter


# Step 1: Extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    full_text = ""
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        full_text += page.get_text()
    return full_text

# Step 2: Initialize the Groq model for answering
llm = ChatGroq(
    model="mixtral-8x7b-32768",  # Groq-specific model
    temperature=0,                # No randomness in responses
    max_tokens=None,              # No limit on completion length
    timeout=None,                 # Adjust as needed
    max_retries=2                # Retry in case of failure
)

# Step 3: Split the document into chunks for easier retrieval
def process_document(pdf_path):
    doc_content = extract_text_from_pdf(pdf_path)
    doc_obj = Document(page_content=doc_content)
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=250, chunk_overlap=0)
    return text_splitter.split_documents([doc_obj])

# Process all documents
calendar_splits = process_document(r"D:\nexus\New folder\univesity_calander.pdf")
faq_splits = process_document(r"D:\nexus\New folder\faq.pdf")
course_catalog_splits = process_document(r"D:\nexus\New folder\course-catalog.pdf")

# Step 4: Create embedding models and vectorstores for each document
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
calendar_vectorstore = SKLearnVectorStore.from_documents(calendar_splits, embedding)
faq_vectorstore = SKLearnVectorStore.from_documents(faq_splits, embedding)
course_catalog_vectorstore = SKLearnVectorStore.from_documents(course_catalog_splits, embedding)

calendar_retriever = calendar_vectorstore.as_retriever(k=4)  # Retrieve the top 4 most relevant chunks
faq_retriever = faq_vectorstore.as_retriever(k=4)
course_catalog_retriever = course_catalog_vectorstore.as_retriever(k=4)

# Define the base QA Agent class
class QAAgent:
    def __init__(self, retriever):
        self.retriever = retriever
    
    def ask_question(self, question):
        # Retrieve relevant chunks from the document using 'get_relevant_documents' method
        relevant_chunks = self.retriever.get_relevant_documents(question)
        
        # Combine the relevant chunks into a single context for Groq
        context = "\n\n".join([chunk.page_content for chunk in relevant_chunks])
        
        # Formulate the prompt with the relevant context
        prompt = f"""
        Context: {context}
        
        Question: {question}
        
        Please answer the question clearly, using only the provided context. No extra information.
        """
        
        # Send to Groq for question answering
        response = llm.invoke(prompt)
        
        return response.content.strip()  # Assuming 'content' holds the answer

# Example usage of the QAAgent
qa_agent = QAAgent(calendar_retriever)

# Define specific agents for different tasks

class StudentQueryAgent(QAAgent):
    def __init__(self, retriever):
        super().__init__(retriever)
    
    def handle_student_query(self, query):
        # Custom logic to handle different student queries
        return self.ask_question(query)

class CalendarAgent(QAAgent):
    def __init__(self, retriever):
        super().__init__(retriever)
    
    def get_event(self, event_name):
        # Custom logic for handling calendar-related queries
        question = f"When is the event '{event_name}' happening?"
        return self.ask_question(question)

class CourseCatalogAgent(QAAgent):
    def __init__(self, retriever):
        super().__init__(retriever)
    
    def get_course_info(self, course_name):
        # Custom logic to fetch information about specific courses
        question = f"Tell me about the course '{course_name}'"
        return self.ask_question(question)

class FAQAgent(QAAgent):
    def __init__(self, retriever):
        super().__init__(retriever)
    
    def get_faq_answer(self, faq_question):
        # Custom logic to fetch answers from the FAQ section
        return self.ask_question(faq_question)

# Example usage of the agents
student_query_agent = StudentQueryAgent(calendar_retriever)
calendar_agent = CalendarAgent(calendar_retriever)
course_catalog_agent = CourseCatalogAgent(course_catalog_retriever)
faq_agent = FAQAgent(faq_retriever)

# Sample queries
student_query = "What are the deadlines for course registration?"
calendar_event = "Orientation Day"
course_info = "Data Structures"
faq_question = "How do I apply for financial aid?"

print(student_query_agent.handle_student_query(student_query))
print(calendar_agent.get_event(calendar_event))
print(course_catalog_agent.get_course_info(course_info))
print(faq_agent.get_faq_answer(faq_question))


The deadlines for course registration are:

- Winter Semester 2024-2025: Last Day to Register - December 16
- Spring Semester 2025: Last Day to Register - January 21
- Summer Semester 2024: Last Day to Register - June 11
- Fall Semester 2024: Last Day to Register - September 3

Please note that payment is due at registration for all semesters.
Based on the provided context, there are two Orientation Days mentioned:

1. New Student Orientation (Mandatory) for the Fall Semester 2024 is scheduled for September 3.
2. New Student Orientation (Mandatory) for the Winter Semester 2024-2025 is scheduled for December 17, which is also the First Day of Classes.

Therefore, the answer to your question depends on which semester you are asking about.
Based on the provided context, there is no specific course titled 'Data Structures'. However, some of the courses mentioned touch on concepts related to data structures.

CIS250 Digital Logic Design introduces basic principles and design procedures of d