In [None]:

from narrative_llm_agent.crews.job_crew import JobCrew
from langchain_openai import ChatOpenAI, OpenAI
import openai
import os
from crewai import Crew, Agent, Task
from narrative_llm_agent.agents.kbase_agent import KBaseAgent
from langchain.tools import tool
import json
from pydantic import BaseModel
from narrative_llm_agent.agents.metadata import MetadataAgent
from narrative_llm_agent.agents.analyst import AnalystAgent
import re


True

In [2]:
import os
import requests


response = requests.get(
   "https://api.cborg.lbl.gov/key/info",
   headers={"Authorization": "Bearer " + os.environ.get("CBORG_API_KEY")})


print("Current spend: ", response.json()["info"]["spend"])

Current spend:  2.780775


In [3]:
from narrative_llm_agent.workflow_graph.nodes import WorkflowState, WorkflowNodes
from narrative_llm_agent.workflow_graph.routers import analyst_router, next_step_router, post_validation_router

from typing import List, Dict, Any, Optional, TypedDict
narrative_id = "215269"
reads_id = "215269/2/1"

nodes = WorkflowNodes()

In [4]:
from langgraph.graph import StateGraph, END
def build_genome_analysis_graph():
    # Create a new graph
    genome_graph = StateGraph(WorkflowState)
    
    # Add the nodes
    genome_graph.add_node("analyst", nodes.analyst_node)
    genome_graph.add_node("run_workflow_step", nodes.workflow_runner_node)
    genome_graph.add_node("validate_step", nodes.workflow_validator_node)  # Add the validator node
    genome_graph.add_node("handle_error", nodes.handle_error)
    genome_graph.add_node("workflow_end", lambda state: {**state, "results": "✅ Workflow complete."})
    
    # Define the edges with updated routing
    genome_graph.add_conditional_edges(
        "analyst",
        analyst_router,
        {
            "validate_step": "validate_step",  # Go to validator 
            "handle_error": "handle_error"
        }
    )
    
    
    # After validation, decide whether to run next step or end
    genome_graph.add_conditional_edges(
        "validate_step",
        post_validation_router,
        {
            "run_workflow_step": "run_workflow_step",  # Run the next step if validation passes
            "workflow_end": "workflow_end",
            "handle_error": "handle_error"
        }
    )
    # After running a workflow step, decide whether to validate or end
    genome_graph.add_conditional_edges(
        "run_workflow_step",
        next_step_router,
        {
            "validate_step": "validate_step",  # Go to validator after step execution
            "workflow_end": "workflow_end",
            "handle_error": "handle_error"
        }
    )

    genome_graph.add_edge("handle_error", END)
    genome_graph.add_edge("workflow_end", END)
    
    # Set the entry point
    genome_graph.set_entry_point("analyst")
    
    # Compile the graph
    return genome_graph.compile()
# Example usage of the complete graph
def run_custom_analysis(narrative_id, reads_id, description):
    graph = build_genome_analysis_graph()
    
    # Initialize the state using the exact description you provide
    initial_state = {
        "narrative_id": narrative_id,
        "reads_id": reads_id,
        "description": description,
        "analysis_plan": None,
        "steps_to_run": None,
        "input_object_upa": None,
        "results": None,
        "error": None
    }
    
    # Execute the graph and get the final state
    final_state = graph.invoke(initial_state)
    return final_state

In [5]:
from narrative_llm_agent.workflow_graph.graph import AnalysisWorkflow

custom_workflow = AnalysisWorkflow()
sequencing_technology="Illumina sequencing"
organism = "Bacillus subtilis sp. strain UAMC"
genome_type = "isolate"
sample_description = f"""The user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:
sequencing_technology : {sequencing_technology}
organism: {organism}
genome type : {genome_type}
I want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from {sequencing_technology} for a {genome_type} genome using KBase apps.
The goal is to have a complete annotated genome and classify the microbe
This analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:
- The analysis steps should begin with read quality assessment. 
- Make sure you select appropriate KBase apps based on genome type.
-Relevant statistics for the assembly (e.g., number of contigs and N50 values).
-Estimates of genome completeness, where applicable.
-Classify the microbe for taxonomy, where relevant.
Based on the metadata, devise a detailed step-by-step analysis workflow, the apps and app_ids should be from the app graph.
The analysis plan should be a json with schema as: 
```json
{{"Step": "Integer number indicating the step",
 "Name": "Name of the step",
 "Description": "Describe the step",
 "App": "Name of the app",
 "expect_new_object": boolean indicating if this step creates a new data object,
 "app_id": "Id of the KBase app"}}
```
Ensure that app_ids are obtained from the app graph and are correct.
Make sure that the analysis plan is included in the final response.
"""
custom_workflow.run(
    narrative_id="215269", 
    reads_id="215269/2/1", 
    description=sample_description,
)

[1m[95m# Agent:[00m [1m[92mKBase Analyst and Information Provider[00m
[95m## Task:[00m [92mThe user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:
sequencing_technology : Illumina sequencing
organism: Bacillus subtilis sp. strain UAMC
genome type : isolate
I want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from Illumina sequencing for a isolate genome using KBase apps.
The goal is to have a complete annotated genome and classify the microbe
This analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:
- The analysis steps should begin with read quality assessment. 
- Make sure you select appropriate KBase apps based on genome type.
-Relevant statistics for the assembly (e.g., number of contigs and N50 values).
-Estimates of genome completeness, where applicable.
-Classify the microbe for taxonomy, where r

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mKBase Analyst and Information Provider[00m
[95m## Final Answer:[00m [92m
{
  "steps_to_run": [
    {
      "Step": 1,
      "Name": "Assess Read Quality",
      "App": "FastQC",
      "Description": "Perform quality assessment of the raw Illumina paired-end reads",
      "expect_new_object": true,
      "app_id": "kb_fastqc/runFastQC",
      "input_data_object": ["paired_end_reads"],
      "output_data_object": ["fastqc_report_raw"]
    },
    {
      "Step": 2,
      "Name": "Trim Reads",
      "App": "Trimmomatic",
      "Description": "Trim adapters and remove low-quality bases from paired-end reads",
      "expect_new_object": true,
      "app_id": "kb_trimmomatic/run_trimmomatic",
      "input_data_object": ["paired_end_reads"],
      "output_data_object": ["trimmed_paired_end_reads"]
    },
    {
      "Step": 3,
      "Name": "Assess Read Quality (Post-Trim)",
      "App": "FastQC",
      "Description": "Perform quality assessment of the tri

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Final Answer:[00m [92m
{
  "continue_as_planned": true,
  "reasoning": "This is the first step, so no previous step to validate. Proceed with FastQC on the raw reads as planned.",
  "input_object_upa": "215269/2/1",
  "modified_next_steps": []
}[00m


Decision JSON: {'continue_as_planned': True, 'reasoning': 'This is the first step, so no previous step to validate. Proceed with FastQC on the raw reads as planned.', 'input_object_upa': '215269/2/1', 'modified_next_steps': []}
current step to run: {'Step': 1, 'Name': 'Assess Read Quality', 'App': 'FastQC', 'Description': 'Perform quality assessment of the raw Illumina paired-end reads', 'expect_new_object': True, 'app_id': 'kb_fastqc/runFastQC', 'input_data_object': ['paired_end_reads'], 'output_data_object': ['fastqc_report_raw']}
Running execution crew...
[1m[95m# Agent:[00m [1m[92mK

Overriding of current TracerProvider is not allowed


starting an app run: 215269 kb_fastqc/runFastQC 215269/2/1
[1m[95m# Agent:[00m [1m[92mProject coordinator[00m
[95m## Task:[00m [92m
            From the given KBase app id, kb_fastqc/runFastQC, fetch the list of parameters needed to run it. Use the App and Job manager agent
            for assistance. Using the data object with UPA "215269/2/1", populate a dictionary
            with the parameters where the keys are parameter ids, and values are the proper parameter values, or their
            default values if no value can be found or calculated.
            Any input object parameter must be the input object UPA.
            Be sure to make sure there is a non-null value for any parameter that is not optional.
            Any parameter that has a true value for "is_output_object" must have a valid name for the new object.
            The new object name should be based on the input object name, not its UPA. But it must NEVER be identical to the input object name,
         

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mKBase workflow runner[00m
[95m## Final Answer:[00m [92m
The FastQC app was run to assess the quality of raw Illumina paired-end reads. A total of 1 app was run successfully. The resulting output object (FastQC report) has the UPA: '215269/24/1'.
```[00m


[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Task:[00m [92m
                Analyze the result of the last executed step and determine if the next planned step is appropriate.
                
                Last step executed:
                {"Step": 1, "Name": "Assess Read Quality", "App": "FastQC", "Description": "Perform quality assessment of the raw Illumina paired-end reads", "expect_new_object": true, "app_id": "kb_fastqc/runFastQC", "input_data_object": ["paired_end_reads"], "output_data_object": ["fastqc_report_raw"]}
                
                Result of the last step:
          

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Final Answer:[00m [92m
{
  "continue_as_planned": true,
  "input_object_upa": "215269/2/1",
  "modified_next_steps": []
}[00m


current step to run: {'Step': 2, 'Name': 'Trim Reads', 'App': 'Trimmomatic', 'Description': 'Trim adapters and remove low-quality bases from paired-end reads', 'expect_new_object': True, 'app_id': 'kb_trimmomatic/run_trimmomatic', 'input_data_object': ['paired_end_reads'], 'output_data_object': ['trimmed_paired_end_reads']}
Running execution crew...
[1m[95m# Agent:[00m [1m[92mKBase workflow runner[00m
[95m## Task:[00m [92m
                This task involves running an app, this app is a part of a workflow where the output of one app (if any) is fed into the next as input. 
                Here is the current the task in JSON format: {"Step": 2, "Name": "Trim Reads", "App": "Trimmomatic", "Description": "

Overriding of current TracerProvider is not allowed


starting an app run: 215269 kb_trimmomatic/run_trimmomatic 215269/2/1
[1m[95m# Agent:[00m [1m[92mProject coordinator[00m
[95m## Task:[00m [92m
            From the given KBase app id, kb_trimmomatic/run_trimmomatic, fetch the list of parameters needed to run it. Use the App and Job manager agent
            for assistance. Using the data object with UPA "215269/2/1", populate a dictionary
            with the parameters where the keys are parameter ids, and values are the proper parameter values, or their
            default values if no value can be found or calculated.
            Any input object parameter must be the input object UPA.
            Be sure to make sure there is a non-null value for any parameter that is not optional.
            Any parameter that has a true value for "is_output_object" must have a valid name for the new object.
            The new object name should be based on the input object name, not its UPA. But it must NEVER be identical to the input 

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mKBase workflow runner[00m
[95m## Final Answer:[00m [92m
The "Trim Reads" step was successfully completed using the "Trimmomatic" app. The app run was completed without any errors, producing three new output objects. Here is a summary of the task completion:

- Total number of apps run: 1
- Output objects created:
  - Object Name: Bsubtilis_rawdata_trimmed_unpaired_rev, Object UPA: 215269/6/5
  - Object Name: Bsubtilis_rawdata_trimmed_unpaired_fwd, Object UPA: 215269/5/5
  - Object Name: Bsubtilis_rawdata_trimmed_paired, Object UPA: 215269/4/5
- Report UPA: 215269/25/1
```[00m


[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Task:[00m [92m
                Analyze the result of the last executed step and determine if the next planned step is appropriate.
                
                Last step executed:
                {"Step": 2, "Name": "Trim Rea

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Final Answer:[00m [92m
{
  "continue_as_planned": true,
  "reasoning": "The Trimmomatic step ran successfully without errors and produced the expected trimmed reads object. Assessing the quality of these trimmed reads with FastQC is the appropriate next step.",
  "input_object_upa": "215269/4/5",
  "modified_next_steps": []
}[00m


Decision JSON: {'continue_as_planned': True, 'reasoning': 'The Trimmomatic step ran successfully without errors and produced the expected trimmed reads object. Assessing the quality of these trimmed reads with FastQC is the appropriate next step.', 'input_object_upa': '215269/4/5', 'modified_next_steps': []}
current step to run: {'Step': 3, 'Name': 'Assess Read Quality (Post-Trim)', 'App': 'FastQC', 'Description': 'Perform quality assessment of the trimmed paired-end reads', 'expect_new_object': True, 'app_id':

Overriding of current TracerProvider is not allowed


starting an app run: 215269 kb_fastqc/runFastQC 215269/4/5
[1m[95m# Agent:[00m [1m[92mProject coordinator[00m
[95m## Task:[00m [92m
            From the given KBase app id, kb_fastqc/runFastQC, fetch the list of parameters needed to run it. Use the App and Job manager agent
            for assistance. Using the data object with UPA "215269/4/5", populate a dictionary
            with the parameters where the keys are parameter ids, and values are the proper parameter values, or their
            default values if no value can be found or calculated.
            Any input object parameter must be the input object UPA.
            Be sure to make sure there is a non-null value for any parameter that is not optional.
            Any parameter that has a true value for "is_output_object" must have a valid name for the new object.
            The new object name should be based on the input object name, not its UPA. But it must NEVER be identical to the input object name,
         

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mKBase workflow runner[00m
[95m## Final Answer:[00m [92m
The task "Assess Read Quality (Post-Trim)" has been completed successfully. One app was run: "kb_fastqc/runFastQC". The resulting output object is a FastQC report with the UPA "215269/26/1".
```[00m


[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Task:[00m [92m
                Analyze the result of the last executed step and determine if the next planned step is appropriate.
                
                Last step executed:
                {"Step": 3, "Name": "Assess Read Quality (Post-Trim)", "App": "FastQC", "Description": "Perform quality assessment of the trimmed paired-end reads", "expect_new_object": true, "app_id": "kb_fastqc/runFastQC", "input_data_object": ["trimmed_paired_end_reads"], "output_data_object": ["fastqc_report_trimmed"]}
                
                Result of the l

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Final Answer:[00m [92m
```[00m


current step to run: {'Step': 4, 'Name': 'Assemble Genome', 'App': 'SPAdes', 'Description': 'Assemble the trimmed reads into a draft genome', 'expect_new_object': True, 'app_id': 'kb_SPAdes/run_SPAdes', 'input_data_object': ['trimmed_paired_end_reads'], 'output_data_object': ['genome_assembly']}
Running execution crew...
[1m[95m# Agent:[00m [1m[92mKBase workflow runner[00m
[95m## Task:[00m [92m
                This task involves running an app, this app is a part of a workflow where the output of one app (if any) is fed into the next as input. 
                Here is the current the task in JSON format: {"Step": 4, "Name": "Assemble Genome", "App": "SPAdes", "Description": "Assemble the trimmed reads into a draft genome", "expect_new_object": true, "app_id": "kb_SPAdes/run_SPAdes", "input_data_ob

Overriding of current TracerProvider is not allowed


starting an app run: 215269 kb_SPAdes/run_SPAdes trimmed_paired_end_reads
[1m[95m# Agent:[00m [1m[92mProject coordinator[00m
[95m## Task:[00m [92m
            From the given KBase app id, kb_SPAdes/run_SPAdes, fetch the list of parameters needed to run it. Use the App and Job manager agent
            for assistance. Using the data object with UPA "trimmed_paired_end_reads", populate a dictionary
            with the parameters where the keys are parameter ids, and values are the proper parameter values, or their
            default values if no value can be found or calculated.
            Any input object parameter must be the input object UPA.
            Be sure to make sure there is a non-null value for any parameter that is not optional.
            Any parameter that has a true value for "is_output_object" must have a valid name for the new object.
            The new object name should be based on the input object name, not its UPA. But it must NEVER be identical to th

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mKBase workflow runner[00m
[95m## Final Answer:[00m [92m
The attempt to run the SPAdes app in narrative 215269 failed due to an invalid UPA for the input object 'trimmed_paired_end_reads.' No apps were successfully run, and no output objects were created. The process was halted to prevent further errors in the workflow. Please check the UPA and try again.
```[00m


[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Task:[00m [92m
                Analyze the result of the last executed step and determine if the next planned step is appropriate.
                
                Last step executed:
                {"Step": 4, "Name": "Assemble Genome", "App": "SPAdes", "Description": "Assemble the trimmed reads into a draft genome", "expect_new_object": true, "app_id": "kb_SPAdes/run_SPAdes", "input_data_object": ["trimmed_paired_end_reads"], "output_data_o

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mYou are a workflow validator, responsible for analyzing app run results and determining next steps.[00m
[95m## Final Answer:[00m [92m
{
  "continue_as_planned": false,
  "reasoning": "The SPAdes assembly step failed due to invalid UPA, so the expected genome_assembly object was not created. Therefore, we cannot proceed with QUAST. We must correct the input for SPAdes before assessing assembly quality.",
  "input_object_upa": "",
  "modified_next_steps": [
    {
      "Step": 4,
      "Name": "Assemble Genome",
      "App": "SPAdes",
      "Description": "Assemble the trimmed reads into a draft genome",
      "expect_new_object": true,
      "app_id": "kb_SPAdes/run_SPAdes",
      "input_data_object": [
        "correct_trimmed_paired_end_reads"
      ],
      "output_data_object": [
        "genome_assembly"
      ]
    }
  ]
}[00m


Decision JSON: {'continue_as_planned': False, 'reasoning': 'The SPAdes assembly step failed due to invalid UPA, so 

Overriding of current TracerProvider is not allowed


starting an app run: 215269 kb_SPAdes/run_SPAdes correct_trimmed_paired_end_reads
[1m[95m# Agent:[00m [1m[92mProject coordinator[00m
[95m## Task:[00m [92m
            From the given KBase app id, kb_SPAdes/run_SPAdes, fetch the list of parameters needed to run it. Use the App and Job manager agent
            for assistance. Using the data object with UPA "correct_trimmed_paired_end_reads", populate a dictionary
            with the parameters where the keys are parameter ids, and values are the proper parameter values, or their
            default values if no value can be found or calculated.
            Any input object parameter must be the input object UPA.
            Be sure to make sure there is a non-null value for any parameter that is not optional.
            Any parameter that has a true value for "is_output_object" must have a valid name for the new object.
            The new object name should be based on the input object name, not its UPA. But it must NEVER be

{'description': 'The user has uploaded paired-end sequencing reads into the narrative. Here is the metadata for the reads:\nsequencing_technology : Illumina sequencing\norganism: Bacillus subtilis sp. strain UAMC\ngenome type : isolate\nI want you to generate an analysis plan for annotating the uploaded pair-end reads obtained from Illumina sequencing for a isolate genome using KBase apps.\nThe goal is to have a complete annotated genome and classify the microbe\nThis analysis is for a Microbiology Resource Announcements (MRA) paper so these need to be a part of analysis. Always keep in mind the following:\n- The analysis steps should begin with read quality assessment. \n- Make sure you select appropriate KBase apps based on genome type.\n-Relevant statistics for the assembly (e.g., number of contigs and N50 values).\n-Estimates of genome completeness, where applicable.\n-Classify the microbe for taxonomy, where relevant.\nBased on the metadata, devise a detailed step-by-step analysis