# Phase 1: Clarification - User Requirements and Brief Generation

**Goal**: Clarify user requirements and transform them into a structured `SimulationBrief`.

This notebook implements the first phase of our MD setup workflow:

1. **User Clarification** - Determines if additional information is needed
2. **Brief Generation** - Transforms the conversation into a structured simulation brief

Based on `deep_research_from_scratch/notebooks/1_scoping.ipynb`


In [None]:
# Load environment variables and set up auto-reload
from dotenv import load_dotenv
load_dotenv()

%load_ext autoreload
%autoreload 2


## State and Schemas

First, we'll define the state objects and schemas for our clarification process.

The state object stores and passes context between different phases.

**Note**: We use `%%writefile` to save code to files. This allows us to reuse it in future notebooks and creates deployable code!


In [None]:
%%writefile ../src/mcp_md/state_scope.py

"""State Definitions and Pydantic Schemas for Clarification Phase.

This defines the state objects and structured schemas used for
the clarification workflow, including user clarification and simulation brief generation.
"""

import operator
from typing import Optional, Annotated, Sequence

from langchain_core.messages import BaseMessage
from langgraph.graph import MessagesState
from langgraph.graph.message import add_messages
from pydantic import BaseModel, Field

# ===== STATE DEFINITIONS =====

class AgentInputState(MessagesState):
    """Input state for the full agent - only contains messages from user input."""
    pass

class AgentState(MessagesState):
    """Main state for the full multi-phase MD setup system.
    
    Extends MessagesState with additional fields for MD setup coordination.
    """
    # Phase 1: Clarification
    research_brief: Optional[str] = None  # Compatibility with deep_research pattern
    simulation_brief: Optional['SimulationBrief'] = None
    
    # Phase 2: Setup (will be added in Notebook 2)
    setup_messages: Annotated[Sequence[BaseMessage], add_messages] = []
    decision_log: Annotated[list[dict], operator.add] = []
    outputs: dict = {}
    
    # Phase 3: Validation & Export (will be added in Notebook 4)
    qc_results: dict = {}
    exports: dict = {}
    final_report: str = ""

# ===== STRUCTURED OUTPUT SCHEMAS =====

class ClarifyWithUser(BaseModel):
    """Schema for user clarification decision and questions."""
    need_clarification: bool = Field(
        description="Whether the user needs to be asked a clarifying question.",
    )
    question: str = Field(
        description="A question to ask the user to clarify the simulation requirements.",
    )
    verification: str = Field(
        description="Verification message that we will start setup after user provides information.",
    )

class SimulationBrief(BaseModel):
    """Schema for structured simulation brief generation."""
    # Structure
    pdb_id: Optional[str] = Field(default=None, description="PDB ID (e.g., 1ABC)")
    fasta_sequence: Optional[str] = Field(default=None, description="FASTA sequence for de novo generation")
    ligand_smiles: Optional[str] = Field(default=None, description="Ligand SMILES string")
    
    # Simulation parameters
    ph: float = Field(default=7.4, description="pH value")
    salt_concentration: float = Field(default=0.15, description="Salt concentration (M)")
    water_model: str = Field(default="TIP3P", description="Water model")
    box_padding: float = Field(default=12.0, description="Box padding (Ã…)")
    force_field: str = Field(default="ff19SB", description="Protein force field")
    
    # Workflow preferences
    use_boltz2_docking: bool = Field(default=True, description="Use Boltz-2 for docking")
    refine_with_smina: bool = Field(default=False, description="Refine with Smina")
    output_formats: list[str] = Field(default=["amber"], description="Output formats")


## Prompts

Define prompts for clarification and brief generation.


In [None]:
%%writefile ../src/mcp_md/prompts.py

"""Prompt templates for the MD setup system.

This module contains all prompt templates used across the workflow components.
"""

clarify_requirements_prompt = """
These are the messages exchanged so far with the user requesting MD setup:
<Messages>
{messages}
</Messages>

Today's date is {date}.

Assess whether you need to ask a clarifying question, or if the user has provided enough information to start MD system setup.

Required information for MD setup:
- Protein structure (PDB ID or FASTA sequence)
- Ligand (SMILES string) if protein-ligand complex

If clarification needed, return:
{{"need_clarification": true, "question": "<your question>", "verification": ""}}

If no clarification needed, return:
{{"need_clarification": false, "question": "", "verification": "<acknowledgement>"}}
"""

generate_simulation_brief_prompt = """
Extract all simulation requirements from these messages:
<Messages>
{messages}
</Messages>

Return a structured JSON with: pdb_id, fasta_sequence, ligand_smiles, ph, salt_concentration, water_model, box_padding, force_field, use_boltz2_docking, refine_with_smina, output_formats.
"""


## Clarification Agent

Now we implement the clarification workflow with two nodes:
1. `clarify_requirements` - Determines if more information is needed
2. `generate_simulation_brief` - Creates structured brief


In [None]:
%%writefile ../src/mcp_md/clarification_agent.py

"""User Clarification and Simulation Brief Generation."""

from datetime import datetime
from typing import Literal

from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, AIMessage, get_buffer_string
from langgraph.graph import StateGraph, START, END
from langgraph.types import Command

from mcp_md.prompts import clarify_requirements_prompt, generate_simulation_brief_prompt
from mcp_md.state_scope import AgentState, ClarifyWithUser, SimulationBrief, AgentInputState

def get_today_str() -> str:
    """Get current date."""
    return datetime.now().strftime("%a %b %-d, %Y")

model = init_chat_model(model="openai:gpt-4o", temperature=0.0)

def clarify_requirements(state: AgentState) -> Command[Literal["generate_simulation_brief", "__end__"]]:
    """Determine if sufficient information exists."""
    structured_model = model.with_structured_output(ClarifyWithUser)
    response = structured_model.invoke([
        HumanMessage(content=clarify_requirements_prompt.format(
            messages=get_buffer_string(messages=state["messages"]), 
            date=get_today_str()
        ))
    ])
    
    if response.need_clarification:
        return Command(goto=END, update={"messages": [AIMessage(content=response.question)]})
    else:
        return Command(goto="generate_simulation_brief", update={"messages": [AIMessage(content=response.verification)]})

def generate_simulation_brief(state: AgentState):
    """Generate structured simulation brief."""
    structured_model = model.with_structured_output(SimulationBrief)
    response = structured_model.invoke([
        HumanMessage(content=generate_simulation_brief_prompt.format(
            messages=get_buffer_string(state.get("messages", [])),
            date=get_today_str()
        ))
    ])
    
    return {
        "simulation_brief": response,
        "research_brief": str(response.model_dump()),
        "setup_messages": [HumanMessage(content=f"Starting MD setup with: {response.model_dump_json()}")]
    }

# Build graph
clarification_builder = StateGraph(AgentState, input_schema=AgentInputState)
clarification_builder.add_node("clarify_requirements", clarify_requirements)
clarification_builder.add_node("generate_simulation_brief", generate_simulation_brief)
clarification_builder.add_edge(START, "clarify_requirements")
clarification_builder.add_edge("generate_simulation_brief", END)
clarification_graph = clarification_builder.compile()
