## Installing the dependencies

In [1]:
!pip install -q -U langchain langchain-community langchain-core langchain-text-splitters langchain-google-genai faiss-cpu langgraph langchain-huggingface

## Importing the relevant libraries and setting up api key for gemini

In [None]:
import os
import getpass
import torch
from google.colab import userdata
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.messages import HumanMessage
from langchain.agents import create_agent
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.tools.retriever import create_retriever_tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

try:
    os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")
except:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

### Importing the source data for RAG from my github repo, then embedding and storing it accordingly

In [None]:
repo_url = "https://github.com/progyan01/DSA-Coach.git"
repo_name = "DSA-Coach"

if not os.path.exists(repo_name):
    print(f"Cloning repository: {repo_url}")
    !git clone {repo_url}
else:
    print("Repository already exists. Pulling latest changes...")
    !cd {repo_name} && git pull

print("Loading notes...")
loader = DirectoryLoader(f"./{repo_name}/data", glob="**/*.md", loader_cls=TextLoader)     #all the notes are in the form of .md inside the /data folder
docs = loader.load()
print(f"Loaded {len(docs)} documents.")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2500,
    chunk_overlap=200,
    separators=["\n# ", "\n## ", "\n### ", "```", "\n$$", "\n\n", "\n", " "]               #as the notes is in .md format, using the relevant separators
)
splits = text_splitter.split_documents(docs)
print(f"Created {len(splits)} chunks for the vector store.")

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-m3",
    #not sure about the best model to use for this purpose, so chose the most recommended but size is 2gb due to more no. of parameters so needs a good internet
    #using a local model to be run on colab instead of gemini embedding as I kept hitting rate limits on it
    model_kwargs={'device': 'cuda'},
    #as model is comparatively heavy, we use GPU for embedding
    encode_kwargs={'normalize_embeddings': True},
    #normalizes the vectors, improving performance
)

vectorstore = FAISS.from_documents(splits, embeddings)
#using FAISS vector database because its very fast

print("Success!")

## Defining the system prompt for the model and setting up the agent

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)
#using low temperature to prevent hallucinations and give only the factual hints and information

retriever = vectorstore.as_retriever()
tool = create_retriever_tool(
    retriever,
    "search_dsa_notes",
    "Searches the notes for algorithms, data structures, and code examples."
)
tools = [tool]
#setting up the tool to be used by the agent

system_prompt = """
You are "DSA-Coach", an expert Technical Interviewer.
Your goal is to help students solve problems by guiding them, NOT by giving the answer.

### PHASE 1: ANALYZE THE INPUT
The user input might contain:
1. A Story/Problem Statement (e.g., "Alice needs to visit cities...").
2. Code (optional).
3. An Error Message (optional).

**CRITICAL STEP (Story Translation):**
If the user provides a "Story Problem" without naming an algorithm:
1. You must DECONSTRUCT the story.
2. Identify the underlying CS Concept (e.g., "Cities are Nodes" -> "Graph").
3. Identify the Algorithm (e.g., "Shortest path with weights" -> "Dijkstra").
4. ONLY THEN use the tool to search for that specific algorithm.

### PHASE 2: DEBUGGING PROTOCOL
- **Syntax Errors:** If the code has simple syntax errors (missing semicolon, indentation), point them out immediately. DO NOT use tools for this.
- **Logic Errors (Silent Failures):** If the code runs but is wrong (or TLE), compare the user's logic to the Correct Algorithm found in the notes.
- **Guardrail:** DO NOT write the full corrected code. Give a hint: "You missed the base case for recursion."
"""
#giving the system prompt to fulfill all the project features

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

agent_executor = create_agent(
    llm,
    tools,
    prompt=prompt
)
#using LangGraph for the agent because it can loop back think again and search for a different thing if it needs to; so the response is more refined and reliable

print("Agent is ready to use!")

### Defining the function for user prompt and model response

In [None]:
from IPython.display import display, Markdown

def ask_coach(code=None, error=None, question=""):
    print("(Type 'exit' to quit)\n")
    while(True):
      question = input("Enter Question: ")
      if question.lower() == 'exit': break
      code = input("Paste Code (press Enter if none): ")
      error = input("Paste Error (press Enter if none): ")

      user_context = f"QUESTION: {question}\n\n"
      if code:
          user_context += f"STUDENT CODE:\n{code}\n\n"
      if error:
          user_context += f"ERROR/ISSUE:\n{error}"
      elif code:
          user_context += "ISSUE: Code runs but produces wrong output (Silent Failure)."

      #combining the different user prompts into a single prompt to give to the model

      print("DSA-Coach is thinking...")
      try:
          result = agent_executor.invoke({
              "messages": [HumanMessage(content=user_context)]
          })

          answer = result["messages"][-1].content
          final_answer=answer[0]["text"]
          #extracting the actual answer from the response given by the model

          display(Markdown(f"###Coach's Feedback\n{final_answer}"))

      except Exception as e:
          print(f"Error: {e}")

ask_coach()