In [1]:
import asyncio
import sys
import os
from pathlib import Path

# Add the project root to Python path
project_root = Path("/Users/salahalzubi/cursor_projects/SentientResearchAgent")
sys.path.insert(0, str(project_root / "src"))

# Basic imports to test the fix
from agno.agent import Agent as AgnoAgent
from agno.models.litellm import LiteLLM
from pydantic import BaseModel, Field
from typing import List, Optional
import json
import re

# Define the models directly to avoid circular imports
class SubTask(BaseModel):
    goal: str = Field(..., description="Precise description of the sub-task goal.")
    task_type: str = Field(..., description="Type of task (e.g., 'WRITE', 'THINK', 'SEARCH').")
    node_type: str = Field(..., description="Node type ('EXECUTE' for atomic, 'PLAN' for complex).")
    depends_on_indices: Optional[List[int]] = Field(default_factory=list)

class PlanOutput(BaseModel):
    sub_tasks: List[SubTask] = Field(..., description="List of planned sub-tasks.")

# Your JSON extraction function (from the fix)
def extract_json_block_with_backticks(raw_response: str) -> Optional[str]:
    """Extract JSON content from LLM response that may contain thinking tags and backticks."""
    
    backtick_patterns = [
        r'```(?:json)?\s*\n([\s\S]*?)\n?```',  # Extract content inside backticks
    ]
    
    for pattern in backtick_patterns:
        matches = re.findall(pattern, raw_response, re.IGNORECASE)
        for match in matches:
            cleaned_match = match.strip()
            if cleaned_match.startswith('{') or cleaned_match.startswith('['):
                try:
                    json.loads(cleaned_match)  # Validate JSON
                    print(f"✅ Found valid JSON code block: {len(cleaned_match)} chars")
                    return cleaned_match
                except json.JSONDecodeError:
                    continue

    print("❌ No valid JSON block found")
    return None

DEEP_RESEARCH_PLANNER_SYSTEM_MESSAGE = """You are an elite Hierarchical Planning Agent. Your sole purpose is to receive a complex question or research goal and decompose it into a precise, logical, and actionable sequence of sub-tasks. You specialize in planning for information-retrieval, reasoning, and synthesis tasks. You operate with surgical precision, ensuring every plan is coherent, efficient, and directly aimed at producing a complete and accurate answer. You do not execute tasks; you only create the plan.

**Input Schema:**

You will receive input in JSON format with the following fields:

*   `current_task_goal` (string, mandatory): The specific goal for this planning instance.
*   `overall_objective` (string, mandatory): The ultimate high-level goal of the entire operation. This helps maintain alignment.
*   `parent_task_goal` (string, optional): The goal of the immediate parent task that led to this decomposition. Null if this is the root task.
*   `planning_depth` (integer, optional): Current recursion depth (e.g., 0 for initial, 1 for sub-tasks).
*   `execution_history_and_context` (object, mandatory):
    *   `prior_sibling_task_outputs` (array of objects, optional): Outputs from tasks at the same hierarchical level that executed before this planning step. Each object contains:
        *   `task_goal` (string): Goal of the sibling task.
        *   `outcome_summary` (string): Brief summary of what the sibling task achieved or produced.
        *   `full_output_reference_id` (string, optional): ID to fetch the full output if needed.
    *   `relevant_ancestor_outputs` (array of objects, optional): Key outputs from parent or higher-level tasks crucial for `current_task_goal`. Each object similar to sibling outputs.
    *   `global_knowledge_base_summary` (string, optional): Brief summary/keywords of available global knowledge.
*   `replan_request_details` (object, optional): If this is a re-plan, this object contains structured feedback. Null otherwise.
    *   `failed_sub_goal` (string): The specific sub-goal related to `current_task_goal` that previously failed.
    *   `reason_for_failure_or_replan` (string): Detailed explanation of the failure or re-plan need.
    *   `previous_attempt_output_summary` (string, optional): Summary of the failed attempt's output.
    *   `specific_guidance_for_replan` (string, optional): Concrete suggestions for the re-plan.
*   `global_constraints_or_preferences` (array of strings, optional): E.g., "Prioritize accuracy", "Maximum 3 sub-tasks".

**Core Task:**

1.  Analyze the `current_task_goal` in the context of `overall_objective`, `parent_task_goal`, and available `execution_history_and_context`.
2.  Decompose `current_task_goal` into a list of **3 to 6 granular sub-tasks.** If a goal is exceptionally complex and absolutely requires more than 6 sub-tasks to maintain clarity and avoid overly broad steps, you may slightly exceed this, but strive for conciseness. Aim for sub-tasks that represent meaningful, coherent units of work. While `EXECUTE` tasks should be specific, avoid breaking down a goal into excessively small pieces if a slightly larger, but still focused and directly actionable, `EXECUTE` task is feasible for a specialized agent. Prioritize clarity and manageability over maximum possible decomposition.
3.  For each sub-task, define:
    *   `goal` (string): The specific goal. Ensure sub-task goals are distinct and avoid significant overlap with sibling tasks in the current plan.
    *   `task_type` (string): 'WRITE', 'THINK', or 'SEARCH'.
    *   `node_type` (string): 'EXECUTE' (atomic) or 'PLAN' (needs more planning).
    *   `depends_on_indices` (list of integers, optional): A list of 0-based indices of other sub-tasks *in the current list of sub-tasks you are generating* that this specific sub-task directly depends on. Example: If sub-task at index 2 depends on sub-task at index 0 and sub-task at index 1, this field would be `[0, 1]`. If a sub-task can start as soon as the parent plan is approved (i.e., it doesn't depend on any other sibling sub-tasks in *this* plan), this should be an empty list `[]`. Use this to define sequential dependencies when one sub-task in your plan needs the output of another sub-task from the *same* plan. Ensure indices are valid and refer to previously listed sub-tasks in your current plan.
4.  **Task Ordering and Dependencies**:
    *   List sub-tasks in a logical order.
    *   Use `depends_on_indices` to explicitly state if a sub-task requires the completion of one or more *other sub-tasks from the current plan* before it can start.
    *   If tasks are largely independent and can run in parallel, their `depends_on_indices` should be `[]`.

**Re-planning Logic**: 

If `replan_request_details` is provided:
    *   Pay **critical attention** to `reason_for_failure_or_replan` and `specific_guidance_for_replan`.
    *   Your new plan **MUST** address the failure by:
        *   Being more granular for the `failed_sub_goal`.
        *   Altering the approach (e.g., different `task_type`s).
        *   Suggesting different information gathering if context was missing.
        *   Modifying sub-task goals based on `specific_guidance_for_replan`.
        *   Adjusting `depends_on_indices` if the previous dependency structure was flawed.
    *   Ensure the new plan for `current_task_goal` explicitly mitigates the previous failure.

**Planning Tips (Leveraging New Input):**

1.  **Context is Key**: Use `prior_sibling_task_outputs` to build sequentially (if logically dependent) and avoid redundancy. Leverage `relevant_ancestor_outputs`.
2.  **Mutual Exclusivity & Complementation**:
    *   Strive for sub-tasks that cover different aspects of the `current_task_goal` without significant overlap. They should be complementary, together achieving the parent goal.
    *   Before finalizing sub-tasks, review them as a set: Do they make sense together? Is there redundancy? Are there gaps? Are dependencies correctly defined using `depends_on_indices`?
3.  **CRITICAL - Balanced Granularity for SEARCH Tasks**:
    *   **`SEARCH/EXECUTE` Specificity**: A `SEARCH/EXECUTE` sub-task goal **MUST** be so specific that it typically targets a single fact, statistic, definition, or a very narrow aspect of a topic.
        *   *Good `SEARCH/EXECUTE` examples*: "Find the 2023 import tariff rate for Chinese-made solar panels in the US.", "List the main arguments for the Jones Act."
        *   *Bad `SEARCH/EXECUTE` examples (these should be `SEARCH/PLAN` or broken down)*: "Research US solar panel tariffs.", "Understand the Jones Act."
    *   **Avoiding Over-Fragmentation**: While specificity is key, if multiple *very small, extremely closely related pieces of data* can be retrieved with a single, well-crafted, targeted search query (and an agent can easily parse them), you can group them into one `SEARCH/EXECUTE` task. Example: Instead of three tasks "Find 2022 EV sales", "Find 2023 EV sales", "Find 2024 EV sales", one task "Find annual US EV sales figures for 2022, 2023, and 2024" is acceptable if the search agent can handle it. However, do not combine distinct conceptual questions.
    *   **When to use `SEARCH/PLAN`**: If a research sub-goal still requires investigating multiple *distinct conceptual areas* or is too broad for one or two highly targeted queries (even if slightly grouped as above), that sub-task **MUST** be `task_type: 'SEARCH'` and `node_type: 'PLAN'`. This ensures it gets further decomposed.

**Required Output Attributes per Sub-Task:**
`goal`, `task_type` (string: 'WRITE', 'THINK', or 'SEARCH'), `node_type` (string: 'EXECUTE' or 'PLAN'), `depends_on_indices` (list of integers).

**Output Format:**
- Respond ONLY with a JSON list of sub-task objects.
- Or an empty list if the `current_task_goal` cannot or should not be broken down further (e.g., it's already atomic enough given the context).

---
### Examples

[BEGIN]
**Input:**
```json
{
  "current_task_goal": "Explain how the invention of the transistor led to the development of the modern internet.",
  "overall_objective": "Answer a user's question about technological history.",
  "execution_history_and_context": {}
}


**Output:**
[
  {
    "goal": "Find the date and primary function of the transistor's invention, focusing on its role in replacing vacuum tubes.",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": []
  },
  {
    "goal": "Research how transistors enabled the creation of smaller, more reliable, and more powerful computers via integrated circuits (microchips).",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": [0]
  },
  {
    "goal": "Research the origins of ARPANET and identify its core requirement for a network of interconnected, powerful computers at various nodes.",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": [1]
  },
  {
    "goal": "Synthesize the findings to construct the causal chain: transistors led to powerful/small computers (via ICs), which were a necessary precondition for a distributed network like ARPANET, the precursor to the internet.",
    "task_type": "THINK",
    "node_type": "EXECUTE",
    "depends_on_indices": [2]
  },
  {
    "goal": "Write a clear, step-by-step explanation answering the original question.",
    "task_type": "WRITE",
    "node_type": "EXECUTE",
    "depends_on_indices": [3]
  }
]

[END]

[BEGIN]
**Input:**
{
  "current_task_goal": "Compare and contrast the economic policies of Reaganomics in the 1980s and Clintonomics in the 1990s, focusing on their stated goals and impact on the US national debt.",
  "overall_objective": "Answer a user's question about economic policy.",
  "execution_history_and_context": {}
}

**Output:**
[
  {
    "goal": "Identify the core principles and stated goals of Reaganomics (e.g., supply-side economics, tax cuts, deregulation).",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": []
  },
  {
    "goal": "Identify the core principles and stated goals of Clintonomics (e.g., deficit reduction, targeted investments, trade liberalization).",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": []
  },
  {
    "goal": "Find US national debt figures for the periods 1981-1989 and 1993-2001.",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": []
  },
  {
    "goal": "Analyze and summarize the similarities and differences in the stated goals and principles of the two economic policies.",
    "task_type": "THINK",
    "node_type": "EXECUTE",
    "depends_on_indices": [0, 1]
  },
  {
    "goal": "Synthesize the policy principles and debt figures to compare the actual impact of each administration's policies on the national debt.",
    "task_type": "THINK",
    "node_type": "EXECUTE",
    "depends_on_indices": [0, 1, 2]
  },
  {
    "goal": "Write a final answer that first compares the policies' goals and then contrasts their effects on the national debt, citing the data found.",
    "task_type": "WRITE",
    "node_type": "EXECUTE",
    "depends_on_indices": [3, 4]
  }
]

[END]

[BEGIN]
**Input:**

{
  "current_task_goal": "What is Quantum Computing, and what are its most significant potential benefits and risks?",
  "overall_objective": "Provide a comprehensive but accessible explanation of a complex topic.",
  "execution_history_and_context": {}
}

**Output:**
[
  {
    "goal": "Find a clear, concise definition of quantum computing, including its core principles like superposition and entanglement.",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": []
  },
  {
    "goal": "Identify 3-4 of the most significant potential benefits and applications of quantum computing (e.g., drug discovery, financial modeling, materials science).",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": []
  },
  {
    "goal": "Identify 3-4 of the most significant risks or challenges associated with quantum computing (e.g., breaking current encryption, high error rates, decoherence).",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": []
  },
  {
    "goal": "Synthesize the collected information to structure a balanced answer: first the definition, then the benefits, and finally the risks.",
    "task_type": "THINK",
    "node_type": "EXECUTE",
    "depends_on_indices": [0, 1, 2]
  },
  {
    "goal": "Write the final explanation in clear, accessible language, suitable for a non-expert audience.",
    "task_type": "WRITE",
    "node_type": "EXECUTE",
    "depends_on_indices": [3]
  }
]

[END]

[BEGIN]
**Input:**

{
  "current_task_goal": "What are the primary challenges and proposed solutions for establishing a sustainable human colony on Mars?",
  "overall_objective": "Answer a user's question about space colonization.",
  "execution_history_and_context": {}
}

**Output:**
[
  {
    "goal": "Identify the top 3-4 primary survival challenges for a Mars colony (e.g., radiation, thin atmosphere/pressure, resource scarcity, psychological effects).",
    "task_type": "SEARCH",
    "node_type": "EXECUTE",
    "depends_on_indices": []
  },
  {
    "goal": "For each identified challenge, research the leading proposed solutions (e.g., for radiation: subsurface habitats, magnetic shielding; for resources: In-Situ Resource Utilization (ISRU) for water and oxygen).",
    "task_type": "SEARCH",
    "node_type": "PLAN",
    "depends_on_indices": [0]
  },
  {
    "goal": "Synthesize the research by mapping each challenge directly to its most promising proposed solution(s).",
    "task_type": "THINK",
    "node_type": "EXECUTE",
    "depends_on_indices": [1]
  },
  {
    "goal": "Write a structured answer that first lists the primary challenges and then, for each challenge, explains the corresponding proposed solutions.",
    "task_type": "WRITE",
    "node_type": "EXECUTE",
    "depends_on_indices": [2]
  }
]
[END]
""" 


In [2]:
# Enhanced extraction function that handles more cases
def extract_json_from_response(raw_response: str) -> Optional[str]:
    """
    Enhanced JSON extraction that handles multiple formats.
    """
    print(f"🔍 Analyzing response (length: {len(raw_response)})")
    
    # Method 1: JSON in triple backticks
    backtick_patterns = [
        r'```(?:json)?\s*\n([\s\S]*?)\n?```',  # Standard backticks
        r'```([\s\S]*?)```',  # Simple backticks
    ]
    
    for pattern in backtick_patterns:
        matches = re.findall(pattern, raw_response, re.IGNORECASE)
        for match in matches:
            cleaned = match.strip()
            if cleaned.startswith(('[', '{')):
                try:
                    json.loads(cleaned)
                    print(f"✅ Found JSON in backticks: {len(cleaned)} chars")
                    return cleaned
                except json.JSONDecodeError:
                    continue
    
    # Method 2: Remove <think> tags and look for JSON arrays/objects
    # Remove thinking tags
    cleaned_response = re.sub(r'<think>.*?</think>', '', raw_response, flags=re.DOTALL | re.IGNORECASE)
    print(f"🧹 After removing <think> tags: {len(cleaned_response)} chars")
    
    # Look for JSON arrays or objects
    json_patterns = [
        r'(\[[\s\S]*?\])',  # JSON arrays
        r'(\{[\s\S]*?\})',  # JSON objects  
    ]
    
    for pattern in json_patterns:
        matches = re.findall(pattern, cleaned_response)
        for match in matches:
            try:
                parsed = json.loads(match)
                # Validate it looks like a task list
                if isinstance(parsed, list) and len(parsed) > 0:
                    if isinstance(parsed[0], dict) and 'goal' in parsed[0]:
                        print(f"✅ Found JSON array without backticks: {len(match)} chars")
                        return match
                elif isinstance(parsed, dict) and 'sub_tasks' in parsed:
                    print(f"✅ Found JSON object without backticks: {len(match)} chars")
                    return match
            except json.JSONDecodeError:
                continue
    
    # Method 3: Look for JSON-like structures line by line
    lines = cleaned_response.split('\n')
    json_lines = []
    in_json = False
    bracket_count = 0
    
    for line in lines:
        line = line.strip()
        if line.startswith('[') or line.startswith('{'):
            in_json = True
            json_lines = [line]
            bracket_count = line.count('[') + line.count('{') - line.count(']') - line.count('}')
        elif in_json:
            json_lines.append(line)
            bracket_count += line.count('[') + line.count('{') - line.count(']') - line.count('}')
            if bracket_count <= 0:
                # Try to parse the accumulated JSON
                potential_json = '\n'.join(json_lines)
                try:
                    parsed = json.loads(potential_json)
                    print(f"✅ Found JSON by line parsing: {len(potential_json)} chars")
                    return potential_json
                except json.JSONDecodeError:
                    pass
                in_json = False
                json_lines = []
                bracket_count = 0
    
    print("❌ No valid JSON found with any method")
    return None

# Enhanced test function with better debugging
async def debug_deepresearch_response():
    """Debug the actual response from DeepResearchPlanner."""
    
    print("🔍 DEBUGGING DeepResearchPlanner Response")
    print("="*60)
    
    model = LiteLLM(id="fireworks_ai/accounts/fireworks/models/deepseek-r1-0528")
    
    # Test WITHOUT response_model first to see raw output
    agent_raw = AgnoAgent(
        model=model,
        system_message=DEEP_RESEARCH_PLANNER_SYSTEM_MESSAGE,
        name="DeepResearchPlanner_Debug",
        # No response_model - get raw output
        markdown=False
    )
    
    test_prompt = """Overall Objective: Research machine learning in healthcare

Current Task Goal: Create a 3-task research plan to analyze ML applications in diagnostics, treatment planning, and drug discovery

Based on the 'Current Task Goal', generate a plan to achieve it."""
    
    try:
        print("📤 Getting raw response from model...")
        result = await agent_raw.arun(test_prompt)
        
        if hasattr(result, 'content'):
            content = result.content
            if asyncio.iscoroutine(content):
                content = await content
        else:
            content = result
        
        print(f"📄 Full raw response:")
        print("-" * 50)
        print(content)
        print("-" * 50)
        
        # Try enhanced extraction
        print(f"\n🔧 Trying enhanced JSON extraction...")
        extracted = extract_json_from_response(content)
        
        if extracted:
            print(f"\n✅ Extracted JSON:")
            print(extracted)
            
            # Try to convert to PlanOutput
            try:
                parsed_data = json.loads(extracted)
                if isinstance(parsed_data, list):
                    sub_tasks = [SubTask(**item) for item in parsed_data]
                    plan = PlanOutput(sub_tasks=sub_tasks)
                    print(f"\n🎉 SUCCESS: Created PlanOutput with {len(plan.sub_tasks)} tasks")
                    return plan
                elif isinstance(parsed_data, dict) and 'sub_tasks' in parsed_data:
                    plan = PlanOutput(**parsed_data)
                    print(f"\n🎉 SUCCESS: Created PlanOutput from dict")
                    return plan
            except Exception as e:
                print(f"\n❌ Failed to create PlanOutput: {e}")
        else:
            print(f"\n❌ No JSON could be extracted")
            
            # Let's see if we can find any JSON-like patterns
            print(f"\n🔍 Looking for any JSON-like patterns...")
            
            # Look for common JSON indicators
            if '[' in content and ']' in content:
                print(f"   Found array brackets")
                start = content.find('[')
                end = content.rfind(']') + 1
                if start < end:
                    potential = content[start:end]
                    print(f"   Potential array: {potential[:200]}...")
            
            if '{' in content and '}' in content:
                print(f"   Found object brackets")
                start = content.find('{')
                end = content.rfind('}') + 1
                if start < end:
                    potential = content[start:end]
                    print(f"   Potential object: {potential[:200]}...")
        
        return None
        
    except Exception as e:
        print(f"❌ Debug failed: {e}")
        import traceback
        traceback.print_exc()
        return None


In [3]:
debug_result = await debug_deepresearch_response()

🔍 DEBUGGING DeepResearchPlanner Response
📤 Getting raw response from model...
📄 Full raw response:
--------------------------------------------------
<think>
We are given an overall objective and a current task goal that explicitly asks for a research plan focused on three areas.
 The task goal is: "Create a 3-task research plan to analyze ML applications in diagnostics, treatment planning, and drug discovery"
 This is a planning task (PLAN node) and the task_type is THINK because we are creating a plan, not directly searching for information.
 However, note that the current task goal is to "Create" a plan. Therefore, we are generating a plan for a plan? 
 But let's clarify: the current task is to create a research plan that has 3 tasks (each covering one area). 
 Since the output of this task will be a plan (a list of tasks) for researching the three areas, we are acting in a planning capacity.

 However, note the role: We are a Hierarchical Planning Agent. We are to decompose the cur

  PydanticSerializationUnexpectedValue(Expected 9 fields but got 5: Expected `Message` - serialized value may not be as expected [input_value=Message(content='<think>\...er_specific_fields=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [input_value=Choices(finish_reason='le...r_specific_fields=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_python(


In [4]:
print(debug_result)

None
