**Google GenAI Hackathon - Socratic Chatbot for DSA**



In [None]:
# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Install Necessary Packages
!pip install -q -U google-generativeai –upgrade
!pip install langchain langchain_community langchain-google-genai python-dotenv streamlit langchain_experimental sentence-transformers langchain_chroma langchainhub pypdf rapidocr-onnxruntime langchain_google_community


[31mERROR: Invalid requirement: '–upgrade': Expected package name at the start of dependency specifier
    –upgrade
    ^[0m[31m
[0mCollecting langchain
  Downloading langchain-0.3.1-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.0-py3-none-any.whl.metadata (3.9 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting langchain_experimental
  Downloading langchain_experimental-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Collecting langchain_chroma
  Downloading langchain_chroma-0.1.4-py3-none-any.whl.metadata (1.6 kB)
Collecting langchainhub
  Downloading langchainhub-0.1.

**Importing Necessary Libraries**

In [None]:
# General libraries
import os
import http.client
import urllib.request
import typing
import glob
import pprint
from collections.abc import Iterator
from typing import Any

# IPython libraries for interactive shells and display
from IPython.core.interactiveshell import InteractiveShell
import IPython.display

# Google Generative AI and Vertex AI libraries
import google.generativeai as genai
from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    Image,
    HarmBlockThreshold,
    HarmCategory,
    Part,
)

# Sentence Transformers and other NLP libraries
from sentence_transformers import SentenceTransformer

# Environment variable handling
from dotenv import load_dotenv

# Langchain Libraries
import langchain
import langchain_community
import langchain_google_genai
import langchain_experimental
import langchain_chroma
import langchainhub

# Importing Langchain Community Document Loader
from langchain_community.document_loaders import PyPDFLoader

# Langchain Core imports for Chat and Prompts
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool

# Importing necessary classes and methods from Langchain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Additional libraries for Langchain usage
from langchain.agents import AgentType, initialize_agent
from langchain.document_loaders import TextLoader
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.memory import ConversationBufferMemory
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain.vectorstores.base import VectorStoreRetriever
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage

# Tavily Search tool integration
from langchain_community.tools.tavily_search import TavilySearchResults

# Utility Libraries
import numpy as np
import re
from tqdm import tqdm

# Additional libraries for loading PDF and performing OCR
import pypdf
import rapidocr_onnxruntime
import streamlit as st

# Import Google Search API wrapper
from langchain_google_community import GoogleSearchAPIWrapper

**Configure Generative AI API Keys**

In [None]:
## Load Environment Variables
load_dotenv()
os.environ['GOOGLE_API_KEY'] = "AIzaSyAxosmE0N_qX93bWorzCwc8YNMNstvvdac"
os.environ["LANGCHAIN_API_KEY"] = "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"


  and should_run_async(code)


**Configure Google Generative AI**

In [None]:
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

  and should_run_async(code)


**Model Setup: Google Generative AI**

In [None]:
generation_config = {
    "temperature": 0,
    "top_p": 0.95,
    "top_k": 40,
    "response_mime_type": "text/plain",
    "max_tokens": 100
}


  and should_run_async(code)


In [None]:
multimodal_model = ChatGoogleGenerativeAI( model="gemini-1.5-pro",
                                          system_instruction="""You are a multilingual and insightful Socratic teacher and guide, dedicated to encouraging users to learn independently by asking the most thought-provoking and relevant questions. As a teaching assistant specializing in Data Structures and Algorithms, you understand complex queries and guide the learning journey by consistently posing the best possible questions that stimulate critical thinking and self-discovery.""",
                                           temperature=0.5,
                                           top_p=0.9,
                                           top_k=40,
                                           response_mime_type="text/plain",
                                           max_tokens=100,
                                           num_beams=5,
                                           no_repeat_ngram_size=2,
                                           repetition_penalty=2,
                                           convert_system_message_to_human=True )


  and should_run_async(code)


**Define Chat Prompt Structure**

In [None]:
prompt = (
    "<Persona> You are a multilingual, helpful Socratic teacher and guide, aiming to push users to learn independently by asking insightful and thought-provoking questions. You specialize in Data Structures and Algorithms (DSA) and apply the Feynman Learning Technique to ensure deep understanding by breaking down concepts and encouraging students to explain them in their own words.</Persona>"
    "<Task> Socratic Teaching Assistant for Data Structures and Algorithms </Task>"
    "<Context> As a Socratic teaching assistant specializing in Data Structures and Algorithms, you have the ability to guide students in understanding DSA concepts by asking probing questions that lead them to discover solutions on their own. You accept input in multiple formats (text, images, audio, video, or PDFs) and are focused on making the student articulate their understanding, particularly when they encounter difficulties.</Context>"
    "<Tone> Polite, encouraging, and technical </Tone>"
    "<Examples> <Example> <User role=\"user\"> <Question> What is bubble sorting? </Question> </User> <Assistant role=\"model\"> <Response> Bubble sort is a way to sort numbers by repeatedly swapping adjacent elements. Can you describe how you think swapping two numbers helps in sorting a list? </Response> </Assistant> </Example> <Example> <User role=\"user\"> <InputType> Image </InputType> <Description> A diagram of a bubble sort algorithm </Description> </User> <Assistant role=\"model\"> <Response> Based on the diagram, can you explain the role of adjacent comparisons? How does this help organize the list in each pass? </Response> </Assistant> </Example> </Examples>"
    "<Instructions> <Step number=\"1\"> Analyze the user's input, determining if it falls under greetings, general queries, or specific DSA-related questions. Do not assume user intent; let the user guide the conversation.</Step>"
    "<Step number=\"2\"> If it's a DSA-related question, focus on asking questions that make the student explain the concept in their own words. This helps them identify gaps in their understanding. </Step>"
    "<Step number=\"3\"> Use the Feynman Technique: When the student answers, ask follow-up questions that require them to further simplify and clarify their explanations. Encourage the student to explain how they would teach this to someone else. </Step>"
    "<Step number=\"4\"> Provide positive reinforcement when the student demonstrates a deeper understanding or overcomes a challenge. Encourage further exploration by suggesting related topics they can delve into.</Step>"
    "<Step number=\"5\"> Ensure that your questions progressively build on the student's responses without being repetitive, pushing them to explore more advanced concepts once they grasp the basics.</Step>"
    "<Step number=\"6\"> Contextual Awareness: Use external resources when necessary by fetching relevant information from document formats or external search sources to provide real-time, updated knowledge. </Step> </Instructions>"
    "{context}"
)


  and should_run_async(code)


In [None]:
# Use the chat prompt template with Langchain

prompt1 = ChatPromptTemplate.from_messages(
    [
        ("system", prompt),
        ("human", "{input}"),
    ]
)

  and should_run_async(code)


**External Memory**

In [None]:
#load a PDF Document (for instance, DSA book)

loader = PyPDFLoader("DSA_Book.pdf")
data = loader.load()

  and should_run_async(code)


ValueError: File path DSA_Book.pdf is not a valid file or url

In [None]:
#Split document data into chunks for efficient retrieval
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)

In [None]:
#Initialize embeddings using Google Generative AI
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings)

In [None]:
# Set up retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

**Build Retrieval-Augmented Generation (RAG) Chain**

In [None]:
# Create the chain that retrieves and generates answers
question_answer_chain = create_stuff_documents_chain(multimodal_model, prompt1)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
# User query example
response = rag_chain.invoke({"input": "What is insertion sort?"})
print(response["answer"])

**Context-aware query reformulation**

In [None]:
retriever_prompt = (
    "Given the latest user question that may reference prior context from the chat history,"
    "rephrase it into a self-contained, standalone question that does not require prior context to be understood."
    "Ensure the reformulated question maintains the original meaning."
    "Do NOT provide an answer—only return the standalone question or leave it as is if no changes are needed."
)


In [None]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", retriever_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

**Create history-aware retriever**

In [None]:
history_aware_retriever = create_history_aware_retriever(multimodal_model, retriever, contextualize_q_prompt)

In [None]:
# Track chat history between user and model
chat_history = []

In [None]:
## Example Interaction
question1 = "What is bubble sort?"
chat_history.append(HumanMessage(content=question1))

In [None]:
## Context-aware response
response = history_aware_retriever.invoke({"input": question1, "chat_history": chat_history})
print(response["answer"])

In [None]:
# Extending chat history with the first question and response
chat_history.extend(
    [
        HumanMessage(content=question1),
        AIMessage(content=message1["answer"]),
    ]
)

In [None]:
# Defining the second question and invoking the chain
second_question = "What are common ways of doing it?"
message2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

In [None]:
print(message2["answer"])

In [None]:
# Extending chat history with the second question and response
chat_history.extend(
    [
        HumanMessage(content=second_question),
        AIMessage(content=message2["answer"]),
    ]
)

In [None]:
# Defining the third question and invoking the chain
third_question = "What is the significance?"
message3 = rag_chain.invoke({"input": third_question, "chat_history": chat_history})


In [None]:
print(message3["answer"])