<a href="https://colab.research.google.com/github/themuzzamil/Hackathon/blob/main/TutorAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



# Installation



<!-- Installation -->

In [2]:
!pip install -qU pypdf
!pip install -qU pypdf scikit-learn langchain_community
!pip install --quiet langchain langchain-text-splitters langchain_google_genai
!pip install --quiet langchain_chroma
!pip install --quiet cohere
!pip install --upgrade --quiet langchain
!pip install --quiet PyPDF2
!pip install --quiet google-api-python-client google-auth-httplib2 google-auth-oauthlib PyPDF2 langgraph

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m52.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.5/49.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m615.5/615.5 kB[0m [31m11.3 MB/s[0m eta [36m0:00

# Imports

In [3]:
from google.colab import userdata
from langchain_core.prompts import ChatPromptTemplate
import os
import google.generativeai as genai
from langchain_google_genai import GoogleGenerativeAI
import sqlite3
from google.colab import files
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.embeddings import CohereEmbeddings
from langchain.schema import Document
from uuid import uuid4
from langgraph.store.memory import InMemoryStore
from langchain.schema import Document
from pprint import pprint
from typing import Any, List, TypedDict
from IPython.display import display, Markdown
from langchain_core.runnables.config import RunnableConfig
from langgraph.store.base import BaseStore


# LLM Call

In [4]:


GEMINI_API_KEY = userdata.get("GEMINI_API_KEY")

genai.configure(api_key=GEMINI_API_KEY)

llm = GoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model="gemini-1.5-flash")

# Upload pdf and store in vector database

In [None]:
# Cell 1: Database and File Upload Handling




# SQLite database setup
db_path = "uploaded_files_metadata.db"

# Function to initialize the database and create table if not exists
def initialize_db():
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS uploaded_files (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            collection_name TEXT UNIQUE,
            file_name TEXT
        )
    ''')
    conn.commit()
    conn.close()

# Function to insert file metadata into SQLite
def save_metadata(collection_name, file_name):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute('''
        INSERT OR IGNORE INTO uploaded_files (collection_name, file_name)
        VALUES (?, ?)
    ''', (collection_name, file_name))
    conn.commit()

# Close the database connection

# Function to load metadata from SQLite
def load_metadata():
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute('SELECT collection_name, file_name FROM uploaded_files')
    rows = cursor.fetchall()
    cursor = conn.cursor()
    conn.close()

    return {row[0]: row[1] for row in rows}



# Load and split PDF content
def load_pdf(file_path):
    try:
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
            return text.strip()
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

# Process PDF to update vector store
def process_pdf(file_name, collection_name):
    pdf_content = load_pdf(file_name)
    if pdf_content:
        print("PDF content loaded successfully.")

        # Initialize embedding with actual API key
        cohere_api_key = userdata.get("Embedding_API")  # Replace with actual key
        embedding_function = CohereEmbeddings(
            model="embed-english-light-v2.0",
            cohere_api_key=cohere_api_key,
            user_agent="LangChainCohere"
        )

        # Initialize Chroma with the collection name
        vector_db = Chroma(
            collection_name=collection_name,
            embedding_function=embedding_function
        )

        # Split and chunk the PDF content
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
        docs = [Document(page_content=pdf_content)]
        chunks = text_splitter.split_documents(docs)

        # Add chunks to vector DB
        vector_db.add_texts([chunk.page_content for chunk in chunks])

        # Optional: Print snippets of chunks
        for idx, chunk in enumerate(chunks):
            print(f"Chunk {idx}: {chunk.page_content[:100]}...")

        print(f"PDF data from {file_name} updated in vector_db.")
        save_metadata(collection_name, file_name)  # Save metadata in SQLite

# Handle file selection or upload
def handle_file_selection() -> str:
    uploaded_files_metadata = load_metadata()
    if uploaded_files_metadata:
        print("Previously uploaded files:")
        for i, (collection, file_name) in enumerate(uploaded_files_metadata.items()):
            print(f"{i + 1}: {file_name} (Collection: {collection})")
        choice = input("Enter the number of the file to use or 'N' to upload a new one: ")

        if choice.upper() == 'N':
            # Upload a new file
            uploaded = files.upload()
            if uploaded:
                uploaded_file_path = next(iter(uploaded))
                print(f"Uploaded file: {uploaded_file_path}")

                # Create a unique collection for the new file
                unique_collection_name = f"pdf_chunks_{uuid4()}"
                process_pdf(uploaded_file_path, unique_collection_name)
                return unique_collection_name  # Return the collection name of the newly uploaded file
        else:
            # Use an existing collection
            chosen_index = int(choice) - 1
            chosen_collection = list(uploaded_files_metadata.keys())[chosen_index]
            print(f"Using previously uploaded file: {uploaded_files_metadata[chosen_collection]}")
            return chosen_collection  # Return the collection name of the chosen file
    else:
        # No collections exist, upload a new file
        print("No files found in the vector DB. Please upload a new file.")
        uploaded = files.upload()
        if uploaded:
            uploaded_file_path = next(iter(uploaded))
            print(f"Uploaded file: {uploaded_file_path}")

            # Create a unique collection for the new file
            unique_collection_name = f"pdf_chunks_{uuid4()}"
            process_pdf(uploaded_file_path, unique_collection_name)
            return unique_collection_name  # Return the collection name of the newly uploaded file

# Initialize the database
initialize_db()
# Get the collection name based on user input

collection_name = handle_file_selection()

No files found in the vector DB. Please upload a new file.


# Functions and Main query

In [None]:
class StateDocument(TypedDict):
    human_input: str
    ai_output: str
    human_input_2: str
    chat: str
    decision: str
    score: int
    reason : str




config = {"configurable": {"user_id": input("enter your username")}}
across_thread_memory = InMemoryStore()


# Define a VectorDatabase class to interact with the Chroma database
class VectorDatabase:
    def __init__(self, chroma_db: Chroma):
        self.chroma_db = chroma_db

    def similarity_search(self, query: str, k: int) -> List[Any]:
        """Performs a similarity search using the Chroma database."""
        return self.chroma_db.similarity_search(query, k=k)

def questions(state: StateDocument) -> StateDocument:
    total = input("How many questions you want to generate? ")
    state["human_input"] = total
    return state

def node_0(state: StateDocument) -> StateDocument:
    state["decision"] = input("IF You Want To Chat Then Type Chat Or Else We Will Continue Towards Quiz: ").lower().strip()
    if state["decision"] == "chat":
        chatbot(vector_db, state)

    else:
        questions(state)
        node_2(vector_db, state)
        display_quiz(state)


    return state

def chatbot(db: VectorDatabase, state: StateDocument) -> StateDocument:
    while True:
        state["human_input_2"] = input("What do you want to know about? (type 'quit' to exit chat): ").strip()

        if state["human_input_2"].lower() == "quit":
            print("Exiting chat mode.")
            break

        state = node_3(db, state, config, across_thread_memory)
        display(Markdown(state["chat"]))

    quiz_ch = input("do you want to continue quiz yes/no").lower()
    if quiz_ch == "yes":
        questions(state)
        node_2(db, state)
        display_quiz(state)
    else:
        print("good bye")

    return state

def node_2(db: VectorDatabase, state: StateDocument) -> StateDocument:

    question = state["human_input"]
    # Format the prompt for quiz creation
    prompt = f"""You are an expert quiz creator. Create {question} quiz questions. Each question should have four multiple-choice options (A, B, C, D),
    and provide the correct answer at the end of each question.
    Generate a quiz.


    Strictly adhere to the following format for each question:

    Question: [Question Text]
    A. [Option A]
    B. [Option B]
    C. [Option C]
    D. [Option D]
    Answer: [Correct Option Letter]  ## Emphasize the letter format

    For example:
    Question: What is the capital of France?
    A. Berlin
    B. Madrid
    C. Paris
    D. Rome
    Answer: C

    Do not deviate from this format. Do not hallucinate. Provide all questions in this exact pattern.
    Don't hallucinate in how many fuction you you are called keep the same format for question.

    """


    # Retrieve context from the database
    relevant_docs = db.chroma_db.get()
    documents = [Document(page_content=text) for text in relevant_docs["documents"]]
    context = " ".join([doc.page_content for doc in documents])


    prompt_with_context = f"{prompt}\n\nContext: {context},\n\nQuestion:{question}"

    # Call your LLM here (replace with actual LLM call)
    result = llm.invoke(prompt_with_context,)
    state["ai_output"] = result
    return state

def node_3(db: VectorDatabase, state: StateDocument, config: RunnableConfig, store: BaseStore) -> StateDocument:
    # Retrieve context related to recent PDF for chat input
    user_id = config["configurable"]["user_id"]
    namespace = (user_id)
    existing_memory = store.get(namespace, "user_memory")

    if existing_memory:
        existing_memory_content = existing_memory.value.get('memory')
    else:
        existing_memory_content = "No existing memory found."


    # Check if existing_memory is a dictionary and has a 'memory' key.
    # If not, assume it's the memory content and use it directly.


    relevant_docs = db.similarity_search(state["human_input_2"], k=2)
    context = " ".join([doc.page_content for doc in relevant_docs])

    # Construct the prompt with context
    prompt = f"""
    Provide information based on the context below.
    Don't hallucinate or web search information.
    If user greet you then interact with him greet him ask him how can you help him today.
    But when user ask any question then provide him with answer and at end say how can i assist you further
    or prompt similar to it just to help out user.
    Query input: "{state['human_input_2']}"
    Context: {context}

    You are a helpful assistant with memory that provides information about the user.
    If you have memory for this user, use it to personalize your responses.
    Here is the memory (it may be empty): {existing_memory_content}
    User: {state["human_input_2"]}"""

    # Invoke the LLM with the prompt, no need to format again
    state["chat"] = llm.invoke(prompt)
    store.put(namespace, "user_memory", {"memory": state["chat"]})  # Use send_message and extract the text from the response
    return state




cohere_api_key = userdata.get("Embedding_API")  # Replace with actual key
embedding_function = CohereEmbeddings(
    model="embed-english-light-v2.0",
    cohere_api_key=cohere_api_key,
    user_agent="LangChainCohere"
)

def display_quiz(state: StateDocument) -> None:
    quiz_content = state["ai_output"].strip().split("\n\n")
    state["score"] = 0  # Initialize score
    questions = []

    for content in quiz_content:
      lines = content.splitlines()
      if len(lines) < 6:
          print(f"Skipping incomplete question block: {content}")
          continue
      question = lines[0]
      options = lines[1:5]
      # Ensure exactly four options; fill missing ones if necessary
      while len(options) < 4:
          options.append("N/A")  # Placeholder for missing options
      correct_answer = lines[5].split(": ")[-1].strip()
      questions.append((question, options, correct_answer))

    for idx, (question, options, correct_answer) in enumerate(questions):
        display(Markdown(f"\n{question}"))
        for option in options:
            display(Markdown(option))

        answer = input("Choose an answer (A/B/C/D): ").strip().upper()
        if answer == correct_answer[0]:
            state["score"] += 1
            print("Correct!")
        else:
            print(f"Wrong! The correct answer is: {correct_answer}")

    print(f"\nYour final score: {state['score']} out of {len(questions)}")

def reason(db: VectorDatabase, state: StateDocument) -> StateDocument:

  prompt = ChatPromptTemplate.from_template("""
  You are a helpful assistant. Use the Context:{context} and provide reason why this answer is correct of quiz "{ai_output}"

  """) # Removed state and added ai_output directly
  relevant_docs = db.similarity_search(state["ai_output"], k=2)
  context = " ".join([doc.page_content for doc in relevant_docs])
  result = prompt.format(context=context, ai_output=state["ai_output"]) # Pass ai_output during format call
  state["reason"] =  llm.invoke(result)
  display(Markdown(state["reason"]))
  return state



state: StateDocument = {"human_input": "", "human_input_2": "", "decision": "", "chat": "", "ai_output": "", "score": 0,"reason":""}


if collection_name:

    vector_db = VectorDatabase(Chroma(collection_name=collection_name, embedding_function=embedding_function))
state = node_0(state)

state = reason(vector_db, state)