In [131]:
from narrative_llm_agent.crews.job_crew import JobCrew
from langchain_openai import ChatOpenAI, OpenAI
import openai
import os
from crewai import Crew, Agent, Task
from narrative_llm_agent.agents.kbase_agent import KBaseAgent
from langchain.tools import tool
import json
from pydantic import BaseModel
from narrative_llm_agent.agents.metadata import MetadataAgent
from narrative_llm_agent.agents.analyst import AnalystAgent
import re

In [132]:
from pydantic import BaseModel
from typing import List

# Define a model for each analysis step
class AnalysisStep(BaseModel):
    Step: int
    Name: str
    App: str
    Description: str
    expect_new_object: bool
    app_id: str
    input_data_object: List[str]  # Added
    output_data_object: List[str]  # Added

# Define a model for the complete workflow
class AnalysisPipeline(BaseModel):
    steps_to_run: List[AnalysisStep]

In [134]:
used_llm = ChatOpenAI(
    model="gpt-4o",
    api_key=os.getenv("OPENAI_API_KEY"),
)
used_llm_anal = ChatOpenAI(
    model="o1",
    api_key=os.getenv("OPENAI_API_KEY"),
)

In [135]:
llm = OpenAI(model="o1", api_key=os.getenv("OPENAI_API_KEY"))

In [136]:
def extract_json_from_string(string_data):
    # Use regex to find the JSON content within the string
    json_match = re.search(r'\[.*\]', string_data, re.DOTALL)
    
    if json_match:
        json_str = json_match.group(0)
        try:
            # Load the JSON string as Python object
            json_data = json.loads(json_str)
            return json_data
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
    else:
        print("No JSON data found in the string.")
        return None
def extract_json_from_string_curly(string_data):
    # First try to find JSON content within curly braces (for objects)
    json_match = re.search(r'\{.*\}', string_data, re.DOTALL)
    
    if json_match:
        json_str = json_match.group(0)
    else:
        # Try to find JSON content within square brackets (for arrays)
        json_match = re.search(r'\[.*\]', string_data, re.DOTALL)
        if json_match:
            json_str = json_match.group(0)
        else:
            print("No JSON data found in the string.")
            return None
    
    try:
        # Load the JSON string as Python object
        json_data = json.loads(json_str)
        return json_data
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return None

In [137]:
class AppRunInputs(BaseModel):
    narrative_id: int
    app_id: str
    input_object_upa: str

class WorkflowRunner(KBaseAgent):
    job_crew: JobCrew
    role: str = "You are a workflow runner, your role is to efficiently run KBase workflows."
    goal: str = "Your goal is to create and run elegant and scientifically meaningful computational biology workflows."
    backstory: str = "You are a dedicated and effective computational biologist. You have deep knowledge of how to run workflows in the DOE KBase system and have years of experience using this to produce high quality scientific knowledge."
    
    def __init__(self, llm, token: str = None):
        self.job_crew = JobCrew(llm)
        self._llm = llm
        self._token = token

        @tool(args_schema=AppRunInputs)
        def do_app_run(narrative_id: int, app_id: str, input_object_upa: str):
            """
            This invokes a CrewAI crew to run a new KBase app from start to finish and
            returns the results. It takes in the narrative_id, app_id (formalized as module_name/app_name), and
            UPA of the input object.
            """
            return self.run_app_crew(narrative_id, app_id, input_object_upa)
            
        self.agent = Agent(
            role=self.role,
            goal=self.goal,
            backstory=self.backstory,
            verbose=True,
            tools=[
                do_app_run
            ],  # + human_tools,
            llm=self._llm,
            allow_delegation=False,
            memory=True,
        )
    
    def run_app_crew(self, narrative_id: int, app_id: str, input_object_upa: str):
        return self.job_crew.start_job(app_id, input_object_upa, narrative_id, app_id=app_id)

wf_runner = WorkflowRunner(used_llm)

In [138]:
#modify the run_apps_task to run apps one at a time
from langgraph.graph import StateGraph, END
from typing import TypedDict, List, Dict, Any, Optional
from pydantic import BaseModel, Field
import json
from typing_extensions import Annotated

# Define the state schema
class WorkflowState(TypedDict):
    description: str 
    steps_to_run: List[Dict[str, Any]]
    last_executed_step: Dict[str, Any]
    narrative_id: int
    reads_id: str
    step_result: Optional[str]
    error: Optional[str]
    results: Optional[str]

# Create the workflow node
def workflow_runner_node(state: WorkflowState):
    try:
        steps_to_run = state["steps_to_run"]
        narrative_id = state["narrative_id"]
        reads_id = state["reads_id"]
        #Step run management, #TODO: improve this

        current_step = steps_to_run[0]
        print("current step to run:", current_step)
        remaining_steps = steps_to_run[1:]
        
        # Initialize the workflow runner
        wf_runner = WorkflowRunner(used_llm)
        

        # Create the task, for a sinple step/app run
        # Here we assume that the first step is the one to run-->#TODO: make this more general
        run_apps_task = Task(
            description=f"""
            This task involves running an app, this app is a part of a workflow where the output of one app (if any) is fed into the next as input. 
            Here are the tasks in JSON format: {json.dumps(current_step)}.
            If any task has "expect_new_object" set to True, then that should receive a new data object in its output as a "created_object". That object should be used as input for the next task.
            If a task as "expect_new_object" set to False, then that should not receive a new object to use in the next task. In that case, use the same input object from the previous step for the next one.
            These steps must be run sequentially. 
            These must be run in the narrative with id {narrative_id}. The input from the user is the UPA of the input object, which is {reads_id}.
            If any step ends with an error, immediately stop the task and end with an error.
            In the end, return a brief summary of steps taken and resulting output objects.
            """,
            expected_output="A summary of task completion, the number of apps run, and the upa of any output objects.",
            agent=wf_runner.agent
        )
        # Create and run the crew
        crew = Crew(
            agents=[wf_runner.agent],
            tasks=[run_apps_task],
            verbose=True,
        )
        print("Running execution crew...")
        result = crew.kickoff()
        
        # Return updated state with results
        return {
            **state,
            "step_result": result,
            "steps_to_run": remaining_steps,
            "last_executed_step": current_step,
            "error": None
        }
    except Exception as e:
        # Handle errors
        return {
            **state,
            "results": None,
            "error": str(e)
        }

In [139]:
class WorkflowValidatorAgent(KBaseAgent):
    role: str = "You are a workflow validator, responsible for analyzing app run results and determining next steps."
    goal: str = "Ensure that each step in a computational biology workflow produces expected results and that subsequent steps are appropriate."
    backstory: str = """You are an experienced computational biologist with deep expertise in KBase workflows. 
    You analyze results from each step and determine if the workflow should continue as planned or be modified based on input/output data objects for the apps."""
    
    def __init__(self, llm, token: str = None):
        self._llm = llm
        self._token = token
        
        self.agent = Agent(
            role=self.role,
            goal=self.goal,
            backstory=self.backstory,
            verbose=True,
            llm=self._llm,
            allow_delegation=False,
            memory=True,
        )

# Create the validator node function
def workflow_validator_node(state: WorkflowState):
    try:
        # Extract the relevant information from the state
        last_step_result = state.get("step_result", "")
        last_executed_step = state.get("last_executed_step", {})
        remaining_steps = state.get("steps_to_run", [])
        next_step = remaining_steps[0] if remaining_steps else None
        
        # If there's no next step, we're done
        if not next_step:
            return {
                **state,
                "results": "Workflow complete. All steps were successfully executed.",
                "error": None
            }
        
        # Initialize the validator agent
        validator = WorkflowValidatorAgent(used_llm_anal, token=os.environ.get("KB_AUTH_TOKEN"))
        
        # Create the validation task
        validation_task = Task(
            description=f"""
            Analyze the result of the last executed step and determine if the next planned step is appropriate.
            
            Last step executed:
            {json.dumps(last_executed_step)}
            
            Result of the last step:
            {last_step_result}
            
            Next planned step:
            {json.dumps(next_step)}
            
            Based on the outcome of the last step, evaluate if the next step is still appropriate or needs to be modified.
            Keep in mind that the output object from the last step should be used as input for the next step.
            Consider these factors:
            1. Did the last step complete successfully?
            2. Did it produce the expected output objects if any were expected? 
            3. Are there any warnings or errors that suggest we should take a different approach?
            4. Is the next step still scientifically appropriate given the results we've seen?
            
            Return your decision as a JSON with this structure:
            ```json
            {{
                "continue_as_planned": true/false,
                "reasoning": "Your explanation for the decision",
                "modified_next_steps": [] // If modifications are needed, include the modified steps here
            }}
            ```
            """,
            expected_output="A JSON decision on whether to proceed with the next step as planned or modify the workflow.",
            agent=validator.agent
        )
        
        # Create and run the crew
        crew = Crew(
            agents=[validator.agent],
            tasks=[validation_task],
            verbose=True,
        )
        
        result = crew.kickoff()
        
        # Parse the result to get the decision
        decision_text = result.raw
        # Extract JSON from the result text
        decision_json = extract_json_from_string_curly(decision_text)
        print(f"Decision JSON: {decision_json}")
        if not decision_json:
            # Fallback if JSON extraction fails
            decision_json = {
                "continue_as_planned": True,
                "reasoning": "Unable to parse decision, continuing with original plan as a fallback."
            }
        
        # Update the state based on the decision
        if decision_json.get("continue_as_planned", True):
            return {
                **state,
                "validation_reasoning": decision_json.get("reasoning", ""),
                "error": None
            }
        else:
            # Replace the remaining steps with the modified steps if provided
            modified_steps = decision_json.get("modified_next_steps", [])
            return {
                **state,
                "steps_to_run": modified_steps if modified_steps else remaining_steps,
                "validation_reasoning": decision_json.get("reasoning", ""),
                "error": None
            }
    except Exception as e:
        return {
            **state,
            "error": f"Validation error: {str(e)}"
        }

In [140]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, List, Dict, Any, Optional

# # Define the state schema for the unified workflow
# class GenomeAnalysisState(TypedDict):
#     narrative_id: str
#     reads_id: str
#     description: str  # Full description as you provide it currently
#     analysis_plan: Optional[List[Dict[str, Any]]]
#     steps_to_run: Optional[List[Dict[str, Any]]]
#     results: Optional[str]
#     error: Optional[str]

# Create the analyst node function
def analyst_node(state: WorkflowState):
    try:
        # Get the existing description from the state
        description = state["description"]
        
        # Initialize the analyst agent with your existing configuration
        #analyst_expert = AnalystAgent(used_llm_anal, cborg_api_key =os.environ.get('OPENAI_API_KEY'), token=os.environ["KB_AUTH_TOKEN"],tools_model="o1")
        analyst_expert = AnalystAgent(used_llm_anal, token=os.environ["KB_AUTH_TOKEN"],tools_model="o1",provider="OpenAI")
        # Create the analysis task using your existing format
        analysis_agent_task = Task(
            description=description,
            expected_output="a json of the analysis workflow",
            output_json=AnalysisPipeline,
            agent=analyst_expert.agent
        )
        
        # Create and run the crew
        crew = Crew(
            agents=[analyst_expert.agent],
            tasks=[analysis_agent_task],
            verbose=True,
        )
        
        output = crew.kickoff()
        
        # Extract the JSON from the output using your existing function
        analysis_plan = extract_json_from_string(output.raw)
        
        # Return updated state with analysis plan
        return {
            **state,
            "steps_to_run": analysis_plan,  
            "error": None
        }
    except Exception as e:
        # Handle errors
        return {
            **state,
            "steps_to_run": None,
            "error": str(e)
        }

# Function to determine the next node based on the state
def next_step_router(state: WorkflowState):
    if state.get("error"):
        return "handle_error"
    if state["steps_to_run"]:
        return "validate_step"  # Go to validation after running a step
    else:
        return "workflow_end"
    
def router(state: WorkflowState):
    if state["error"]:
        return "handle_error"
    else:
        return "run_workflow_step"
def handle_error(state):
    return {**state, "results": f"Error: {state.get('error', 'Unknown error')}"}  # safe fallback
# Router after validation to decide next action
def post_validation_router(state: WorkflowState):
    if state.get("error"):
        return "handle_error"
    if state["steps_to_run"]:
        return "run_workflow_step"  # Continue with next step
    else:
        return "workflow_end"
# # Build the complete graph with both analyst and workflow nodes
# def build_genome_analysis_graph():
#     # Create a new graph
#     genome_graph = StateGraph(WorkflowState)
    
#     # Add the nodes
#     genome_graph.add_node("analyst", analyst_node)
#     genome_graph.add_node("run_workflow_step", workflow_runner_node)
#     #genome_graph.add_node("handle_error", lambda state: {**state, "results": f"Error: {state['error']}"})
#     genome_graph.add_node("handle_error", handle_error)
#     genome_graph.add_node("workflow_end", lambda state: {**state, "results": "✅ Workflow complete."})
#     # Define the edges with the router
#     genome_graph.add_conditional_edges(
#         "analyst",
#         router,
#         {
#             "run_workflow_step": "run_workflow_step",
#             "handle_error": "handle_error"
#         }
#     )
#     genome_graph.add_conditional_edges(
#         "run_workflow_step",
#         next_step_router,
#         {
#             "run_workflow_step": "run_workflow_step",
#             "workflow_end": "workflow_end",
#             "handle_error": "handle_error"
#         }
#     )

#     #genome_graph.add_edge("run_workflow", END)
#     genome_graph.add_edge("handle_error", END)
#     genome_graph.add_edge("workflow_end", END)
#     # Set the entry point
#     genome_graph.set_entry_point("analyst")
    
#     # Compile the graph
#     return genome_graph.compile()
# Updated graph building function
def build_genome_analysis_graph():
    # Create a new graph
    genome_graph = StateGraph(WorkflowState)
    
    # Add the nodes
    genome_graph.add_node("analyst", analyst_node)
    genome_graph.add_node("run_workflow_step", workflow_runner_node)
    genome_graph.add_node("validate_step", workflow_validator_node)  # Add the validator node
    genome_graph.add_node("handle_error", handle_error)
    genome_graph.add_node("workflow_end", lambda state: {**state, "results": "✅ Workflow complete."})
    
    # Define the edges with updated routing
    genome_graph.add_conditional_edges(
        "analyst",
        router,
        {
            "run_workflow_step": "run_workflow_step",
            "handle_error": "handle_error"
        }
    )
    
    # After running a workflow step, always go to validator
    genome_graph.add_conditional_edges(
        "run_workflow_step",
        next_step_router,
        {
            "validate_step": "validate_step",  # Go to validator after step execution
            "workflow_end": "workflow_end",
            "handle_error": "handle_error"
        }
    )
    
    # After validation, decide whether to run next step or end
    genome_graph.add_conditional_edges(
        "validate_step",
        post_validation_router,
        {
            "run_workflow_step": "run_workflow_step",  # Run the next step if validation passes
            "workflow_end": "workflow_end",
            "handle_error": "handle_error"
        }
    )

    genome_graph.add_edge("handle_error", END)
    genome_graph.add_edge("workflow_end", END)
    
    # Set the entry point
    genome_graph.set_entry_point("analyst")
    
    # Compile the graph
    return genome_graph.compile()
# Example usage of the complete graph
def run_genome_analysis(narrative_id, reads_id, description):
    graph = build_genome_analysis_graph()
    
    # Initialize the state using the exact description you provide
    initial_state = {
        "narrative_id": narrative_id,
        "reads_id": reads_id,
        "description": description,
        "analysis_plan": None,
        "steps_to_run": None,
        "results": None,
        "error": None
    }
    
    # Execute the graph and get the final state
    final_state = graph.invoke(initial_state)
    return final_state

In [141]:
graph = build_genome_analysis_graph()
print(graph.get_graph().draw_mermaid())

---
config:
  flowchart:
    curve: linear
---
graph TD;
	__start__([<p>__start__</p>]):::first
	analyst(analyst)
	run_workflow_step(run_workflow_step)
	validate_step(validate_step)
	handle_error(handle_error)
	workflow_end(workflow_end)
	__end__([<p>__end__</p>]):::last
	__start__ --> analyst;
	handle_error --> __end__;
	workflow_end --> __end__;
	analyst -.-> run_workflow_step;
	analyst -.-> handle_error;
	run_workflow_step -.-> validate_step;
	run_workflow_step -.-> workflow_end;
	run_workflow_step -.-> handle_error;
	validate_step -.-> run_workflow_step;
	validate_step -.-> workflow_end;
	validate_step -.-> handle_error;
	classDef default fill:#f2f0ff,line-height:1.2
	classDef first fill-opacity:0
	classDef last fill:#bfb6fc



In [None]:
#Test the graph manually
sequencing_technology="Illumina sequencing"
organism = "Bacillus subtilis sp. strain UAMC"
genome_type = "isolate"
narrative_id = "214727"
reads_id = "214727/2/1"
sample_description = f"""The user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:
sequencing_technology : {sequencing_technology}
organism: {organism}
genome type : {genome_type}
I want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from {sequencing_technology} for a {genome_type} genome using KBase apps.
The goal is to have a complete annotated genome and classify the microbe
This analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:
- The analysis steps should begin with read quality assessment. 
- Make sure you select appropriate KBase apps based on genome type.
-Relevant statistics for the assembly (e.g., number of contigs and N50 values).
-Estimates of genome completeness, where applicable.
-Classify the microbe for taxonomy, where relevant.
Based on the metadata, devise a detailed step-by-step analysis workflow, the apps and app_ids should be from the app graph.
The analysis plan should be a json with schema exactly in the following format: 
```json
{{"Step": "Integer number indicating the step",
 "Description": "Describe the step",
 "App": "Name of the app",
 "app_id": "Id of the KBase app",
 "input_data_object_types": "types of input data object for the app",
 "output_data_object_types": "types of output data object for the app"
 }}
```
If there is nothing for a field, please put "None" in the field.
Always check the knowledge graph for the app_ids and data object information. Do not make this up.
Make sure that the analysis plan is included in the final response.
"""
initial_state = {
        "narrative_id": narrative_id,
        "reads_id": reads_id,
        "description": sample_description,
        "analysis_plan": None,
        "steps_to_run": None,
        "results": None,
        "error": None
    }
analyst_node_state = analyst_node(initial_state)

In [None]:
analyst_node_state

{'narrative_id': '214727',
 'reads_id': '214727/2/1',
 'description': 'The user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:\nsequencing_technology : Illumina sequencing\norganism: Bacillus subtilis sp. strain UAMC\ngenome type : isolate\nI want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from Illumina sequencing for a isolate genome using KBase apps.\nThe goal is to have a complete annotated genome and classify the microbe\nThis analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:\n- The analysis steps should begin with read quality assessment. \n- Make sure you select appropriate KBase apps based on genome type.\n-Relevant statistics for the assembly (e.g., number of contigs and N50 values).\n-Estimates of genome completeness, where applicable.\n-Classify the microbe for taxonomy, where relevant.\nBased on

In [None]:
state_after_one_app_run = workflow_runner_node(analyst_node_state)



current step to run: {'Step': 1, 'Name': 'Quality Assessment of Reads', 'App': 'Assess Read Quality with FastQC', 'Description': 'Evaluate read quality before further processing', 'expect_new_object': False, 'app_id': 'kb_fastqc/runFastQC', 'input_data_object': ['KBaseFile.PairedEndLibrary'], 'output_data_object': ['None']}
Running execution crew...
[1m[95m# Agent:[00m [1m[92mYou are a workflow runner, your role is to efficiently run KBase workflows.[00m
[95m## Task:[00m [92m
            This task involves running an app, this app is a part of a workflow where the output of one app (if any) is fed into the next as input. 
            Here are the tasks in JSON format: {"Step": 1, "Name": "Quality Assessment of Reads", "App": "Assess Read Quality with FastQC", "Description": "Evaluate read quality before further processing", "expect_new_object": false, "app_id": "kb_fastqc/runFastQC", "input_data_object": ["KBaseFile.PairedEndLibrary"], "output_data_object": ["None"]}.
        



[1m[95m# Agent:[00m [1m[92mProject coordinator[00m
[95m## Task:[00m [92m
            From the given KBase app id, kb_fastqc/runFastQC, fetch the list of parameters needed to run it. Use the App and Job manager agent
            for assistance. Using the data object with UPA "214727/2/1", populate a dictionary
            with the parameters where the keys are parameter ids, and values are the proper parameter values, or their
            default values if no value can be found or calculated.
            Any input object parameter must be the input object UPA.
            Be sure to make sure there is a non-null value for any parameter that is not optional.
            Any parameter that has a true value for "is_output_object" must have a valid name for the new object.
            The new object name should be based on the input object name, not its UPA. But it must NEVER be identical to the input object name,
            always create a new name.
            If the input objec

In [None]:
state_after_one_app_run

{'narrative_id': '214727',
 'reads_id': '214727/2/1',
 'description': 'The user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:\nsequencing_technology : Illumina sequencing\norganism: Bacillus subtilis sp. strain UAMC\ngenome type : isolate\nI want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from Illumina sequencing for a isolate genome using KBase apps.\nThe goal is to have a complete annotated genome and classify the microbe\nThis analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:\n- The analysis steps should begin with read quality assessment. \n- Make sure you select appropriate KBase apps based on genome type.\n-Relevant statistics for the assembly (e.g., number of contigs and N50 values).\n-Estimates of genome completeness, where applicable.\n-Classify the microbe for taxonomy, where relevant.\nBased on

In [None]:
#Use the validation node
state_after_validation = workflow_validator_node(state_after_one_app_run)



[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Task:[00m [92m
            Analyze the result of the last executed step and determine if the next planned step is appropriate.

            Last step executed:
            {"Step": 1, "Name": "Quality Assessment of Reads", "App": "Assess Read Quality with FastQC", "Description": "Evaluate read quality before further processing", "expect_new_object": false, "app_id": "kb_fastqc/runFastQC", "input_data_object": ["KBaseFile.PairedEndLibrary"], "output_data_object": ["None"]}

            Result of the last step:
            The task has been completed successfully. The "Assess Read Quality with FastQC" app was run. No new output objects were created as expected. The process involved running 1 app in total. The report generated can be found with UPA: 214727/12/1.
```

            Next planned step:
            {"Step": 2, "Name": "Trim Reads", "

In [None]:
state_after_validation

{'narrative_id': '214727',
 'reads_id': '214727/2/1',
 'description': 'The user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:\nsequencing_technology : Illumina sequencing\norganism: Bacillus subtilis sp. strain UAMC\ngenome type : isolate\nI want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from Illumina sequencing for a isolate genome using KBase apps.\nThe goal is to have a complete annotated genome and classify the microbe\nThis analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:\n- The analysis steps should begin with read quality assessment. \n- Make sure you select appropriate KBase apps based on genome type.\n-Relevant statistics for the assembly (e.g., number of contigs and N50 values).\n-Estimates of genome completeness, where applicable.\n-Classify the microbe for taxonomy, where relevant.\nBased on

In [None]:
#check the state after running one app and then passing the state to the workflow node for another app run
#state_after_two_app_run = workflow_runner_node(state_after_one_app_run)

In [None]:
state_after_two_app_run_with_validation = workflow_runner_node(state_after_validation)



current step to run: {'Step': 2, 'Name': 'Trim Reads', 'App': 'Trim Reads with Trimmomatic', 'Description': 'Remove low-quality bases and adapters from raw reads', 'expect_new_object': True, 'app_id': 'kb_trimmomatic/run_trimmomatic', 'input_data_object': ['KBaseFile.PairedEndLibrary'], 'output_data_object': ['KBaseFile.PairedEndLibrary']}
Running execution crew...
[1m[95m# Agent:[00m [1m[92mYou are a workflow runner, your role is to efficiently run KBase workflows.[00m
[95m## Task:[00m [92m
            This task involves running an app, this app is a part of a workflow where the output of one app (if any) is fed into the next as input. 
            Here are the tasks in JSON format: {"Step": 2, "Name": "Trim Reads", "App": "Trim Reads with Trimmomatic", "Description": "Remove low-quality bases and adapters from raw reads", "expect_new_object": true, "app_id": "kb_trimmomatic/run_trimmomatic", "input_data_object": ["KBaseFile.PairedEndLibrary"], "output_data_object": ["KBaseFi



[1m[95m# Agent:[00m [1m[92mProject coordinator[00m
[95m## Task:[00m [92m
            From the given KBase app id, kb_trimmomatic/run_trimmomatic, fetch the list of parameters needed to run it. Use the App and Job manager agent
            for assistance. Using the data object with UPA "214727/2/1", populate a dictionary
            with the parameters where the keys are parameter ids, and values are the proper parameter values, or their
            default values if no value can be found or calculated.
            Any input object parameter must be the input object UPA.
            Be sure to make sure there is a non-null value for any parameter that is not optional.
            Any parameter that has a true value for "is_output_object" must have a valid name for the new object.
            The new object name should be based on the input object name, not its UPA. But it must NEVER be identical to the input object name,
            always create a new name.
            If the 

In [None]:
state_after_two_app_run_with_validation

{'narrative_id': '214727',
 'reads_id': '214727/2/1',
 'description': 'The user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:\nsequencing_technology : Illumina sequencing\norganism: Bacillus subtilis sp. strain UAMC\ngenome type : isolate\nI want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from Illumina sequencing for a isolate genome using KBase apps.\nThe goal is to have a complete annotated genome and classify the microbe\nThis analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:\n- The analysis steps should begin with read quality assessment. \n- Make sure you select appropriate KBase apps based on genome type.\n-Relevant statistics for the assembly (e.g., number of contigs and N50 values).\n-Estimates of genome completeness, where applicable.\n-Classify the microbe for taxonomy, where relevant.\nBased on

In [None]:
state_after_three_app_run = workflow_runner_node(state_after_two_app_run)

Overriding of current TracerProvider is not allowed


[1m[95m# Agent:[00m [1m[92mYou are a workflow runner, your role is to efficiently run KBase workflows.[00m
[95m## Task:[00m [92m
            This task involves running an app, this app is a part of a workflow where the output of one app (if any) is fed into the next as input. 
            Here are the tasks in JSON format: {"Step": 3, "Name": "Re-check read quality", "Description": "Re-run FastQC on trimmed reads to confirm improved read quality.", "expect_new_object": true, "app_id": "kb_fastqc/runFastQC"}.
            If any task has "expect_new_object" set to True, then that should receive a new data object in its output as a "created_object". That object should be used as input for the next task.
            If a task as "expect_new_object" set to False, then that should not receive a new object to use in the next task. In that case, use the same input object from the previous step for the next one.
            These steps must be run sequentially. 
            These must 

Overriding of current TracerProvider is not allowed


[1m[95m# Agent:[00m [1m[92mProject coordinator[00m
[95m## Task:[00m [92m
            From the given KBase app id, kb_fastqc/runFastQC, fetch the list of parameters needed to run it. Use the App and Job manager agent
            for assistance. Using the data object with UPA "214727/2/1", populate a dictionary
            with the parameters where the keys are parameter ids, and values are the proper parameter values, or their
            default values if no value can be found or calculated.
            Any input object parameter must be the input object UPA.
            Be sure to make sure there is a non-null value for any parameter that is not optional.
            Any parameter that has a true value for "is_output_object" must have a valid name for the new object.
            The new object name should be based on the input object name, not its UPA. But it must NEVER be identical to the input object name,
            always create a new name.
            If the input objec

In [None]:
'''
next steps:
A function that takes in the state looks at the step_result and the steps_to_run and determines if the next step in steps to run is appropriate.
'''

In [None]:
# graph = build_genome_analysis_graph()
# print(graph.get_graph().draw_mermaid())

In [None]:

sample_description = f"""The user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:
sequencing_technology : {sequencing_technology}
organism: {organism}
genome type : {genome_type}
I want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from {sequencing_technology} for a {genome_type} genome using KBase apps.
The goal is to have a complete annotated genome and classify the microbe
This analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:
- The analysis steps should begin with read quality assessment. 
- Make sure you select appropriate KBase apps based on genome type.
-Relevant statistics for the assembly (e.g., number of contigs and N50 values).
-Estimates of genome completeness, where applicable.
-Classify the microbe for taxonomy, where relevant.
Based on the metadata, devise a detailed step-by-step analysis workflow, the apps and app_ids should be from the app graph.
The analysis plan should be a json with schema as: 
```json
{{"Step": "Integer number indicating the step",
 "Name": "Name of the step",
 "Description": "Describe the step",
 "App": "Name of the app",
 "expect_new_object": boolean indicating if this step creates a new data object,
 "app_id": "Id of the KBase app"}}
```
Ensure that app_ids are obtained from the app graph and are correct.
Make sure that the analysis plan is included in the final response.
"""


run_genome_analysis("214727", "214727/2/1", sample_description)