# Dependencies & Configurations

In [1]:
!pip install --upgrade pip --quiet

!pip install google-generativeai --quiet
!pip install langgraph pandas --quiet
!pip install sentencepiece protobuf datasets --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h

In [20]:
import pandas as pd
import google.generativeai as genai
from google.generativeai import types
from google.api_core import exceptions as google_api_exceptions # For API error handling
from langgraph.graph import StateGraph, END
from typing import TypedDict, List, Dict, Optional
import ast
import re
import logging
import warnings
import os
import time
from IPython.display import display

from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
GEMINI_API_KEY = user_secrets.get_secret("GEMINI_API_KEY")
os.environ['GEMINI_API_KEY'] = GEMINI_API_KEY
print("GEMINI_API_KEY found in Kaggle Secrets.")


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(funcName)s] - %(message)s')
warnings.filterwarnings("ignore", category=UserWarning) # General user warnings

GEMINI_API_KEY found in Kaggle Secrets.


In [55]:
# Cell 3: Configuration
API_MODEL_NAME = "gemma-3-27b-it"

INPUT_DIR = "/kaggle/input/fpt-ai-residency-batch-6-entry-test"
TEST_DATA_PATH = os.path.join(INPUT_DIR, "b6_test_data.csv")
SUBMISSION_PATH = "submission.csv"

API_TEMPERATURE = 0.2
API_MAX_OUTPUT_TOKENS = 1500

PROCESS_LIMIT = None

MAX_RETRIES = 3
RETRY_DELAY_SECONDS = 5

In [56]:
genai.configure(api_key=GEMINI_API_KEY)
client = genai.GenerativeModel(model_name=API_MODEL_NAME)

# Utils

In [57]:
def run_llm_api(prompt: str, model_name: str) -> str:
    """
    Sends a prompt to the configured Google GenAI model and returns the text response.
    Includes basic retry logic.
    """
    retries = 0
    while retries < MAX_RETRIES:
        try:
            model = genai.GenerativeModel(model_name)

            generation_config = types.GenerationConfig(
                candidate_count=1,
                max_output_tokens=API_MAX_OUTPUT_TOKENS,
                temperature=API_TEMPERATURE,
                top_p=0.95,
                top_k=64,
            )

            safety_settings = {
                types.HarmCategory.HARM_CATEGORY_HARASSMENT: 'BLOCK_MEDIUM_AND_ABOVE',
                types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: 'BLOCK_MEDIUM_AND_ABOVE',
                types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: 'BLOCK_MEDIUM_AND_ABOVE',
                types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: 'BLOCK_MEDIUM_AND_ABOVE',
            }

            response = model.generate_content(
                contents=[prompt], # Gemini API expects list of prompts/turns
                generation_config=generation_config,
                safety_settings=safety_settings,
            )

            if not response.candidates:
                block_reason = response.prompt_feedback.block_reason if response.prompt_feedback else 'Unknown'
                error_msg = f"API call blocked or returned no candidates. Reason: {block_reason}"
                logging.warning(error_msg)
                if block_reason == types.BlockReason.SAFETY and retries < MAX_RETRIES -1:
                     logging.info(f"Retrying ({retries + 1}/{MAX_RETRIES})...")
                     retries += 1
                     time.sleep(RETRY_DELAY_SECONDS)
                     continue
                else:
                     return f"[[ERROR: {error_msg}]]"


            generated_text = response.candidates[0].content.parts[0].text
            logging.debug(f"API Response (raw): {generated_text[:200]}...")
            return generated_text.strip()

        except google_api_exceptions.ResourceExhausted as e:
            logging.warning(f"API Quota Error: {e}. Retrying ({retries + 1}/{MAX_RETRIES}) after delay...")
            retries += 1
            time.sleep(RETRY_DELAY_SECONDS * (retries + 1))
        except Exception as e:
            logging.error(f"LLM API call failed unexpectedly: {e}", exc_info=True)
            if retries < MAX_RETRIES - 1:
                 logging.warning(f"Retrying ({retries + 1}/{MAX_RETRIES}) after general error...")
                 retries += 1
                 time.sleep(RETRY_DELAY_SECONDS)
            else:
                logging.error("Max retries reached for API call.")
                return f"[[ERROR: API call failed after retries: {e}]]"

    return f"[[ERROR: API call failed after {MAX_RETRIES} retries.]]"

In [58]:
def parse_choices(choices_str: str) -> (Dict[str, str], Optional[str]):
    """Safely parses the choices string into a dictionary {A: choice1, B: choice2,...}."""
    try:
        choices_list = ast.literal_eval(choices_str)
        if not isinstance(choices_list, list):
            return {}, "Parsed choices is not a list."
        max_choices = min(len(choices_list), 26)
        if max_choices == 0: return {}, "Parsed choices list is empty."
        parsed = {chr(ord('A') + i): str(choice) for i, choice in enumerate(choices_list[:max_choices])}
        return parsed, None
    except (ValueError, SyntaxError, TypeError) as e:
        logging.warning(f"ast.literal_eval failed: {e}. Trying regex fallback.")
        try:
            matches = re.findall(r"['\"](.*?)['\"]", choices_str)
            if matches and len(matches) <= 26:
                parsed = {chr(ord('A') + i): str(choice) for i, choice in enumerate(matches)}
                return parsed, f"Warning: Used regex fallback due to error: {e}"
            else:
                 return {}, f"Failed to parse choices: {e}. Regex fallback failed."
        except Exception as re_e:
            return {}, f"Primary parsing failed: {e}; Regex fallback failed: {re_e}"

def format_choices_for_prompt(parsed_choices: Dict[str, str]) -> str:
    """Formats choices for LLM prompts."""
    return "\n".join([f"({k}) {v}" for k, v in parsed_choices.items()])

In [59]:
class AgentState(TypedDict):
    task_id: str                 # Unique ID for the question
    question: str                # The question text
    choices_str: str             # The raw choices string (e.g., "['10', '9', '8']")
    parsed_choices: Dict[str, str] # Parsed choices like {'A': '10', 'B': '9', ...}
    analyses: Dict[str, str]     # Dictionary to store outputs from analysis nodes
    critique: Optional[str]      # Stores the synthesized critique of options
    final_answer: Optional[str]  # The final predicted letter (A, B, C, or D)
    error: Optional[str]         # Stores error messages if any step fails
    log: List[str]               # A running log of actions for debugging

In [60]:
def parse_input_node(state: AgentState) -> AgentState:
    log = state.get('log', [])
    task_id = state['task_id']
    log.append(f"--- Starting Task {task_id} ---")
    log.append(f"Parsing input choices: {state['choices_str']}")
    parsed_choices, error_msg = parse_choices(state['choices_str'])
    if not parsed_choices:
        state['error'] = f"Critical: Could not parse choices string. {error_msg}"
        state['final_answer'] = "A"
        log.append(f"ERROR: Critical choice parsing failed for task {task_id}. Defaulting final answer to A.")
    else:
        state['parsed_choices'] = parsed_choices
        if error_msg: log.append(error_msg)
        log.append(f"Parsed Choices: {parsed_choices}")
    state['log'] = log
    state['analyses'] = {}
    return state

# Agent Definitions & Graph Building

In [61]:
# Agent Node Definitions

def analyze_question_node(state: AgentState) -> AgentState:
    if state.get('error'): return state
    log = state['log']
    log.append("Node: Analyzing Question...")
    if not state.get('parsed_choices'):
        state['error'] = "Cannot analyze question without parsed choices."; log.append(state['error']); return state

    formatted_choices = format_choices_for_prompt(state['parsed_choices'])
    prompt = f"""You are an expert programming problem analyst.
Analyze the following multiple-choice question about programming/software engineering. Do not attempt to answer it yet.
Focus on:
1. Rephrasing the core problem or concept being tested in one sentence.
2. Identifying key code snippets, terms, or programming principles involved.
3. Briefly assessing the type of question (e.g., syntax error, execution prediction, conceptual understanding, debugging logic).

Question:
{state['question']}

Choices:
{formatted_choices}

Your analysis:"""
    analysis = run_llm_api(prompt, API_MODEL_NAME)

    if "[[ERROR:" in analysis:
        log.append(f"ERROR detected during question analysis: {analysis}")
        state['error'] = f"API failed during question analysis: {analysis}"
    else:
        state['analyses']['question_analysis'] = analysis
        log.append(f"Question Analysis Result:\n{analysis[:300]}...")
    state['log'] = log
    return state

def evaluate_options_node(state: AgentState) -> AgentState:
    if state.get('error'): return state
    log = state['log']
    log.append("Node: Evaluating Options...")
    if not state.get('parsed_choices'):
        state['error'] = "Cannot evaluate options without parsed choices."; log.append(state['error']); return state

    formatted_choices = format_choices_for_prompt(state['parsed_choices'])
    question_analysis = state['analyses'].get('question_analysis', 'Analysis not available.')
    if "[[ERROR:" in question_analysis: question_analysis = "Previous analysis failed."

    prompt = f"""You are a meticulous and unbiased code and logic evaluator.
Based on the following question and its initial analysis, evaluate EACH multiple-choice option step-by-step. For every single option (A, B, C, D...):
1. State clearly whether the option seems correct or incorrect.
2. Provide a detailed, step-by-step explanation for your conclusion, referencing the question, code (if any), and relevant programming principles.
3. If code execution is relevant, trace the logic. If it's conceptual, explain the concept.
**CRITICAL**: Evaluate each option based purely on its merits relative to the question. Do NOT let the position (A vs B vs C vs D) influence your judgment or the thoroughness of your evaluation. Be equally critical of all options.

Question:
{state['question']}

Initial Analysis:
{question_analysis}

Choices to Evaluate:
{formatted_choices}

Your detailed evaluation for EACH option:"""
    evaluation = run_llm_api(prompt, API_MODEL_NAME)

    if "[[ERROR:" in evaluation:
        log.append(f"ERROR detected during options evaluation: {evaluation}")
        state['error'] = f"API failed during options evaluation: {evaluation}"
    else:
        state['analyses']['options_evaluation'] = evaluation
        log.append(f"Options Evaluation Result:\n{evaluation[:300]}...")
    state['log'] = log
    return state

def synthesize_critique_node(state: AgentState) -> AgentState:
    if state.get('error'): return state
    log = state['log']
    log.append("Node: Synthesizing & Critiquing...")
    if not state.get('parsed_choices'):
        state['error'] = "Cannot synthesize without parsed choices."; log.append(state['error']); return state

    formatted_choices = format_choices_for_prompt(state['parsed_choices'])
    question_analysis = state['analyses'].get('question_analysis', 'Analysis not available.')
    options_evaluation = state['analyses'].get('options_evaluation', 'Evaluation not available.')
    if "[[ERROR:" in question_analysis: question_analysis = "Previous analysis failed."
    if "[[ERROR:" in options_evaluation: options_evaluation = "Previous evaluation failed."

    prompt = f"""You are a lead analyst responsible for making a final recommendation based on prior analysis.
Review the initial question analysis and the detailed option-by-option evaluation provided below. Your task is to synthesize these findings into a final critique:
1. Clearly state which option (A, B, C, D...) appears to be the most correct based *solely* on the provided evaluations.
2. Briefly summarize the primary reason(s) why this option is favored, according to the evaluation.
3. Briefly mention the key reason(s) why the other options were deemed incorrect, according to the evaluation.
4. Assess the overall confidence level suggested by the evaluation (e.g., High, Medium, Low).

Question:
{state['question']}

Choices:
{formatted_choices}

Initial Analysis:
{question_analysis}

Option-by-Option Evaluation:
{options_evaluation}

Your Synthesis and Final Critique:"""
    critique = run_llm_api(prompt, API_MODEL_NAME)

    if "[[ERROR:" in critique:
        log.append(f"ERROR detected during critique synthesis: {critique}")
        state['error'] = f"API failed during critique synthesis: {critique}"
    else:
        state['critique'] = critique
        log.append(f"Synthesis/Critique Result:\n{critique[:300]}...")
    state['log'] = log
    return state

def final_decision_node(state: AgentState) -> AgentState:
    if state.get('error'):
        if not state.get('final_answer'): state['final_answer'] = "A"; state['log'].append("Setting final answer to default 'A' due to prior error.")
        return state

    log = state['log']
    log.append("Node: Making Final Decision...")
    if not state.get('parsed_choices'):
        state['error'] = "Cannot make decision without parsed choices."; log.append(state['error']); state['final_answer'] = "A"; return state

    critique = state.get('critique', 'Critique not available.')
    if "[[ERROR:" in critique or critique == 'Critique not available.':
        log.append(f"ERROR: Critique unavailable or contains error ({critique[:50]}...). Defaulting final answer.")
        state['error'] = "Critique unavailable/error."; state['final_answer'] = "A"; return state

    valid_choices = list(state['parsed_choices'].keys())

    prompt = f"""You are an expert analyst tasked with selecting the single best answer from a multiple-choice list based *only* on the provided critique, even if the critique is uncertain or finds flaws in all options.

Your task:
1. Review the 'Synthesis and Critique' below.
2. Identify which choice ({', '.join(valid_choices)}) is determined to be the most plausible or least incorrect according to the critique's reasoning.
3. You *MUST* output the single capital letter corresponding to that choice.
4. Output *ONLY* the letter. Do NOT include any other words, explanation, punctuation, or refusal. Just the single letter.

Synthesis and Critique:
{critique}

The single best choice letter is:"""

    decision_raw = run_llm_api(prompt, API_MODEL_NAME)

    if "[[ERROR:" in decision_raw:
        log.append(f"ERROR detected during final decision API call: {decision_raw}")
        state['error'] = f"API failed during final decision: {decision_raw}"
        state['final_answer'] = "A" # Fallback
        state['log'] = log
        return state

    final_answer = None
    log_msg = f"Attempting to parse decision from API output: '{decision_raw}'"
    match_strict = re.search(r'(?::\s*|>\s*|^)([A-Z])\b', decision_raw.strip())
    match_paren = re.search(r'\(([A-Z])\)', decision_raw)
    match_standalone = re.search(r'\b([A-Z])\b', decision_raw)
    match_punct = re.search(r'^([A-Z])[.)]', decision_raw.strip())
    cleaned_decision = decision_raw.strip().upper()

    if match_strict and match_strict.group(1) in valid_choices: final_answer = match_strict.group(1); log_msg += f" -> Parsed '{final_answer}' (strict)."
    elif match_paren and match_paren.group(1) in valid_choices: final_answer = match_paren.group(1); log_msg += f" -> Parsed '{final_answer}' (paren)."
    elif match_standalone and match_standalone.group(1) in valid_choices: final_answer = match_standalone.group(1); log_msg += f" -> Parsed '{final_answer}' (standalone)."
    elif match_punct and match_punct.group(1) in valid_choices: final_answer = match_punct.group(1); log_msg += f" -> Parsed '{final_answer}' (punct)."
    elif len(cleaned_decision) == 1 and cleaned_decision in valid_choices: final_answer = cleaned_decision; log_msg += f" -> Parsed '{final_answer}' (direct)."
    else: final_answer = "A"; log_msg += f" -> ERROR: Could not parse single letter. Defaulting 'A'."; state['error'] = f"Failed to parse final decision: '{decision_raw}'"

    state['final_answer'] = final_answer
    log.append(log_msg)
    log.append(f"--- Task {state['task_id']} Finished ---")
    state['log'] = log
    return state

In [62]:
# Graph Building Function

def build_graph():
    """Builds the LangGraph StateGraph using API calls."""
    graph_builder = StateGraph(AgentState)
    graph_builder.add_node("parse_input", parse_input_node)
    graph_builder.add_node("analyze_question", analyze_question_node)
    graph_builder.add_node("evaluate_options", evaluate_options_node)
    graph_builder.add_node("synthesize_critique", synthesize_critique_node)
    graph_builder.add_node("final_decision", final_decision_node)

    graph_builder.set_entry_point("parse_input")
    graph_builder.add_edge("parse_input", "analyze_question")
    graph_builder.add_edge("analyze_question", "evaluate_options")
    graph_builder.add_edge("evaluate_options", "synthesize_critique")
    graph_builder.add_edge("synthesize_critique", "final_decision")
    graph_builder.add_edge("final_decision", END)

    app = graph_builder.compile()
    return app

In [63]:
# Build Graph

app = build_graph()

In [64]:
test_df = pd.read_csv(TEST_DATA_PATH)
required_cols = ['task_id', 'question', 'choices']
if not all(col in test_df.columns for col in required_cols):
    raise ValueError(f"Test CSV must contain columns: {required_cols}")
display(test_df)

if PROCESS_LIMIT is not None and PROCESS_LIMIT > 0:
    test_df = test_df.head(PROCESS_LIMIT)

Unnamed: 0,task_id,question,choices
0,k10171,Question: What will be output of the following...,"['10', '9', '8', 'Error']"
1,k10182,Question: Consider line 3. Identify the compil...,"['No compilation error', 'Only a lexical error..."
2,k10184,Question: Assume the conflicts part (a) of thi...,['Equal precedence and left associativity; exp...
3,k10206,Question: What will be output if you will exec...,"['2.00000', '4.00000', '6.00000', 'Compilation..."
4,k10215,Question: Select the output for code :\nstatic...,"['amish', 'ANKIT', 'harsh', 'Compile time error']"
...,...,...,...
1248,k00687,Question: Which React lifecycle method is call...,"['componentWillMount', 'componentDidMount', 'c..."
1249,k00689,Question: What do you need to change about thi...,"['Remove this', 'Capitalize clock', 'Remove th..."
1250,k00691,Question: What happens when the following rend...,['Error. Cannot use direct JavaScript code in ...
1251,k00699,Question: How many types of components are the...,"['1', '2', '3', '4']"


In [None]:
# Process Questions

results = []
total_rows = len(test_df)

for index, row in test_df.iterrows():
    task_id = row['task_id']
    logging.info(f"--- Processing Task ID: {task_id} ({index + 1}/{total_rows}) ---")
    initial_state = {
        "task_id": task_id,
        "question": str(row['question']),
        "choices_str": str(row['choices']),
        "parsed_choices": {}, "analyses": {}, "critique": None,
        "final_answer": None, "error": None, "log": []
    }

    final_state = None
    try:
        final_state = app.invoke(initial_state, {"recursion_limit": 10})

        current_answer = final_state.get('final_answer', 'A')
        current_error = final_state.get('error')
        current_log = final_state.get('log', [])

        results.append({
            "task_id": final_state['task_id'],
            "answer": current_answer
        })

        if current_error:
            logging.warning(f"Task {task_id} completed with error state: {current_error}")
            print(f"--- LOG FOR ERRORED TASK {task_id} ---\n" + "\n".join(current_log) + "\n--- END LOG ---")

    except Exception as e:
        logging.error(f"CRITICAL error invoking graph for task {task_id}: {e}", exc_info=True)
        results.append({"task_id": task_id, "answer": "A"}) # Default 'A'
        log_to_print = final_state['log'] if final_state and final_state.get('log') else initial_state.get('log', [])
        if log_to_print: print(f"--- LOG FOR CRASHED TASK {task_id} ---\n" + "\n".join(log_to_print) + "\n--- END LOG ---")

logging.info(f"Finished processing {len(results)} questions.")

--- LOG FOR ERRORED TASK k10323 ---
--- Starting Task k10323 ---
Parsing input choices: ['the child process kills the parent process', 'the parent process kills the child process', 'handler function executes as the signal arrives to the parent process', 'none of the mentioned']
Parsed Choices: {'A': 'the child process kills the parent process', 'B': 'the parent process kills the child process', 'C': 'handler function executes as the signal arrives to the parent process', 'D': 'none of the mentioned'}
Node: Analyzing Question...
Question Analysis Result:
## Question Analysis

1. **Core Problem:** The question tests understanding of process creation (fork), signal handling (specifically SIGKILL), and the consequences of a child process terminating its parent.

2. **Key Elements:**
   * `fork()`: Creates a child process.
   * `kill(getppid(), SIGKILL)...
Node: Evaluating Options...
ERROR detected during options evaluation: [[ERROR: API call failed after retries: list index (0) out of rang

In [None]:
results