In [6]:
!pip install -q -U langchain langchain-community langchain-core langchain-text-splitters langchain-google-genai faiss-cpu langgraph langchain-huggingface

In [1]:
import os
import getpass
import torch
from google.colab import userdata

#api key setup
try:
    os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")
except:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

Enter your Google API Key: ··········


In [4]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

repo_url = "https://github.com/progyan01/DSA-Coach.git"
repo_name = "DSA-Coach"

if not os.path.exists(repo_name):
    print(f"Cloning repository: {repo_url}")
    !git clone {repo_url}
else:
    print("Repository already exists. Pulling latest changes...")
    !cd {repo_name} && git pull

print("Loading notes...")
loader = DirectoryLoader(f"./{repo_name}/data", glob="**/*.md", loader_cls=TextLoader)
docs = loader.load()
print(f"Loaded {len(docs)} documents.")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2500,
    chunk_overlap=200,
    separators=["\n# ", "\n## ", "\n### ", "```", "\n$$", "\n\n", "\n", " "]
)
splits = text_splitter.split_documents(docs)
print(f"Created {len(splits)} chunks for the vector store.")

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-m3",
    model_kwargs={'device': 'cuda'},
    encode_kwargs={'normalize_embeddings': True}
    query_instruction="Represent this query for retrieving relevant coding and algorithm documentation: "
)

vectorstore = FAISS.from_documents(splits, embeddings)

print("Success!")

Cloning repository: https://github.com/progyan01/DSA-Coach.git
Cloning into 'DSA-Coach'...
remote: Enumerating objects: 188, done.[K
remote: Counting objects: 100% (188/188), done.[K
remote: Compressing objects: 100% (183/183), done.[K
remote: Total 188 (delta 8), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (188/188), 706.78 KiB | 15.71 MiB/s, done.
Resolving deltas: 100% (8/8), done.
Loading notes...
Loaded 66 documents.
Created 495 chunks for the vector store.


  embeddings = HuggingFaceBgeEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

Success!


In [12]:
from langchain_core.messages import HumanMessage
from langgraph.prebuilt import create_react_agent
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.tools.retriever import create_retriever_tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

retriever = vectorstore.as_retriever()
tool = create_retriever_tool(
    retriever,
    "search_dsa_notes",
    "Searches the notes for algorithms, data structures, and code examples."
)
tools = [tool]

system_prompt = """
You are "DSA-Coach", an expert Technical Interviewer.
Your goal is to help students solve problems by guiding them, NOT by giving the answer.

### PHASE 1: ANALYZE THE INPUT
The user input might contain:
1. A Story/Problem Statement (e.g., "Alice needs to visit cities...").
2. Code (optional).
3. An Error Message (optional).

**CRITICAL STEP (Story Translation):**
If the user provides a "Story Problem" without naming an algorithm:
1. You must DECONSTRUCT the story.
2. Identify the underlying CS Concept (e.g., "Cities are Nodes" -> "Graph").
3. Identify the Algorithm (e.g., "Shortest path with weights" -> "Dijkstra").
4. ONLY THEN use the tool to search for that specific algorithm.

### PHASE 2: DEBUGGING PROTOCOL
- **Syntax Errors:** If the code has simple syntax errors (missing semicolon, indentation), point them out immediately. DO NOT use tools for this.
- **Logic Errors (Silent Failures):** If the code runs but is wrong (or TLE), compare the user's logic to the Correct Algorithm found in the notes.
- **Guardrail:** DO NOT write the full corrected code. Give a hint: "You missed the base case for recursion."
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

agent_executor = create_react_agent(
    llm,
    tools,
    prompt=prompt
)

print("Agent is ready to use!")

Agent is ready to use!


/tmp/ipython-input-3641978954.py:47: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent_executor = create_react_agent(


In [15]:
from IPython.display import display, Markdown

def ask_coach(code=None, error=None, question=""):
    print("(Type 'exit' to quit)\n")
    while(True):
      question = input("Enter Question: ")
      if question.lower() == 'exit': break
      code = input("Paste Code (press Enter if none): ")
      error = input("Paste Error (press Enter if none): ")

      user_context = f"QUESTION: {question}\n\n"
      if code:
          user_context += f"STUDENT CODE:\n{code}\n\n"
      if error:
          user_context += f"ERROR/ISSUE:\n{error}"
      elif code:
          user_context += "ISSUE: Code runs but produces wrong output (Silent Failure)."

      print("DSA-Coach is thinking...")
      try:
          result = agent_executor.invoke({
              "messages": [HumanMessage(content=user_context)]
          })

          answer = result["messages"][-1].content
          final_answer=answer[0]["text"]
          display(Markdown(f"###Coach's Feedback\n{final_answer}"))

      except Exception as e:
          print(f"❌ Error: {e}")


DSA-Coach is thinking...


###Coach's Feedback
The problem you're trying to solve, connecting all labs with the minimum total cable length, is a classic problem that can be solved using a Minimum Spanning Tree (MST) algorithm. Your approach seems to be based on Kruskal's algorithm, which is a good choice for this problem.

Let's re-examine the core idea of Kruskal's algorithm. It involves iteratively adding edges to build the MST. Think about how you would select edges to ensure the *minimum* total length. What kind of edges would you prioritize?

Consider the line where you sort the edges:
`edges.sort(key=lambda x: x[2], reverse=True)`

Does sorting in `descending` order align with the goal of finding the *minimum* spanning tree?