# Testing stuff

For testing.

In [18]:
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain_core.tools import tool
from langchain_ollama import ChatOllama
from langchain.chat_models import init_chat_model
from langgraph.graph import START, StateGraph, END, MessagesState

from typing import List, Optional, Dict, Any
from typing_extensions import TypedDict

import re
import requests

import subprocess
import json

from pydantic import BaseModel, Field

In [37]:
class Parameter(TypedDict, total=False):
    """Represents a CLI parameter."""
    description: Optional[str] # description of the parameter, if provided
    type: Optional[str]  # str, int, float, path, bool, etc.
    required: bool  # true if required, false if optional
    is_input: bool  # true if input file or directory
    is_output: bool # true if output file or directory
    default_value: Optional[str]
    is_flag: bool # true if flag (no argument), false if takes argument
    short: str  # short name like -h
    long: Optional[str]   # long name like --help

class GlobalParameters(TypedDict, total=False):
    """Names of tool (sub)commands."""
    parameters: Optional[List[Parameter]]

class ContainerInfo(TypedDict, total=False):
    """Container image information for a tool from BioContainers."""
    bioconda: Optional[str]
    docker: Optional[str]
    singularity: Optional[str]

class EdamInput(TypedDict, total=False):
    """Represents an EDAM-standardized input."""
    name: str
    suffix: str  # file extension like .bam, .fasta
    edam: str    # EDAM ontology term like data_1383
    optional: bool

class EdamOutput(TypedDict, total=False):
    """Represents an EDAM-standardized output."""
    name: str
    suffix: str  # file extension like .bam, .sam
    edam: str    # EDAM ontology term like format_2572
    optional: bool

class StandardizedTool(TypedDict, total=False):
    """Represents a standardized tool with EDAM ontology terms."""
    id: str           # tool_subcommand format like "samtools_view"
    name: str         # uppercase name like "SAMTOOLS_VIEW"
    label: str        # process label like "process_low"
    inputs: List[EdamInput]
    outputs: List[EdamOutput]
    commands: str     # template command string

class Subcommand(TypedDict, total=False):
    """Represents a CLI subcommand."""
    name: str
    description: Optional[str]
    parameters: Optional[List[Parameter]]
    usage: Optional[str]
    
class SubcommandNames(TypedDict, total=False):
    """Names of tool (sub)commands."""
    names: Optional[List[str]]

class ToolInfo(TypedDict, total=False):
    """Represents information about a CLI tool."""
    tool: str
    version: Optional[str] 
    description: Optional[str]
    subcommands: List[Subcommand]
    global_parameters: Optional[GlobalParameters]  # parameters that apply to all subcommands
    help_text: Optional[str]
    version_text: Optional[str]
    containers: Optional[ContainerInfo]
    error: Optional[str]

class WorkflowState(MessagesState):
    """State passed between agents in the cmdsaw workflow."""
    # Input
    executable: Optional[str]
    target_format: Optional[str]  # 'wdl' or 'nextflow'
    
    # Invocation agent outputs
    tool_info: Optional[ToolInfo]
    
    # Parsing agent outputs  
    parsed_subcommands: Optional[List[Subcommand]]
    
    # Standardization agent outputs
    standardized_tools: Optional[List[StandardizedTool]]
    standardized_parameters: Optional[List[Parameter]]
    
    # Troubleshooting agent outputs
    validation_errors: Optional[List[str]]
    suggested_fixes: Optional[List[str]]
    
    # Generator agent outputs
    generated_nextflow: Optional[str]
    generated_wdl: Optional[str]
    metadata: Optional[Dict[str, Any]]

In [None]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Do not make up information, just run the `execute_command_help` and `execute_command_version` tools on the provided input command."
            "and store their output as the 'version_text' and 'help_text' fields of the ToolInfo object."
            "Do nothing else.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

test_help = """
toastbox - A delightful notification and message management tool

Usage: toastbox [OPTIONS] COMMAND

A command-line tool for creating, managing, and delivering toast notifications
and messages across your system and network.

Options:
  -v, --version          Show version information
  -c, --config PATH      Path to config file (default: ~/.toastbox/config.yml)
  -q, --quiet           Suppress all non-error output
  -h, --help            Show this help message

Commands:
  send      Send a toast notification
  listen    Start listening for incoming toasts
  history   View notification history
  config    Manage toastbox configuration

Run 'toastbox COMMAND --help' for more information on a command.
"""

test_version = "toastbox version 2.4"

In [51]:
def capture_help(executable: str, subcommand: Optional[str] = None) -> Dict[str, str]:
    """Run `<executable> [subcommand] --help`, return output."""
    help_text = None

    # Build command list
    cmd = [executable]
    if subcommand:
        cmd.append(subcommand)
    cmd.append("--help")

    # --help
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=30
        )
        help_text = (result.stdout or result.stderr).strip()
    except FileNotFoundError:
        command_str = " ".join(cmd)
        raise Exception(f"Executable '{executable}' not found. Check that it is installed.")
    except subprocess.TimeoutExpired:
        command_str = " ".join(cmd)
        raise Exception(f"Timeout running {command_str}")
    except Exception as e:
        command_str = " ".join(cmd)
        raise Exception(f"Error running {command_str}: {e}")

    return {"help_text": help_text}


def capture_version(executable: str) -> Dict[str, str]:
    """Run `<executable> --version`, return output."""
    version_text = None

    # --version
    try:
        result = subprocess.run(
            [executable, "--version"],
            capture_output=True,
            text=True,
            timeout=10,
            errors = "replace"
        )
        version_text = (result.stdout).strip()
    except Exception:
        version_text = None

    return {"version_text": version_text}


def invocation_agent(state: WorkflowState) -> Dict[str, Any]:
    """
    Invocation agent: Captures CLI help and version information.
    Returns the raw text output for the parsing agent to process.
    """
    executable = state.get("executable")
    if not executable:
        raise Exception("Executable name missing from state")

    try:
        # Capture CLI text
        help_output = capture_help(executable)
        version_output = capture_version(executable)
        
        # Create basic tool info with raw text
        tool_info: ToolInfo = {
            "tool": executable,
            "help_text": help_output["help_text"],
            "version_text": version_output["version_text"],
            "subcommands": [],
            "global_parameters": []
        }

        print("Complete invocation")
        
        return {"tool_info": tool_info}
        
    except Exception as e:
        # Return error state
        tool_info: ToolInfo = {
            "tool": executable,
            "error": str(e),
            "subcommands": [],
            "global_parameters": []
        }
        return {"tool_info": tool_info}


In [10]:
def parse_version(version_string: str) -> str:
    """
    Extract numeric version from a version string.
    
    Strips common prefixes and extracts the version number pattern.
    Keeps semantic versioning including pre-release identifiers.
    
    Args:
        version_string: Raw version string from command output
        
    Returns:
        Cleaned version string with just numeric components
        
    Examples:
        >>> parse_version("v0.1.8")
        '0.1.8'
        >>> parse_version("running 9.2")
        '9.2'
        >>> parse_version("version: 1.8.4-a")
        '1.8.4-a'
        >>> parse_version("toastbox version 2.4.1")
        '2.4.1'
        >>> parse_version("Version 3.2.1-beta.1+build.123")
        '3.2.1-beta.1+build.123'
    """
    if not version_string:
        return ""
    
    # Pattern explanation:
    # \d+ - one or more digits (major version)
    # (?:\.\d+)* - zero or more groups of dot followed by digits (minor, patch, etc.)
    # (?:[-+][a-zA-Z0-9.]+)* - zero or more pre-release or build metadata segments
    pattern = r'\d+(?:\.\d+)*(?:[-+][a-zA-Z0-9.]+)*'
    
    match = re.search(pattern, version_string)
    if match:
        return match.group(0)
    
    # If no match found, return cleaned string (strip whitespace)
    return version_string.strip()


def parsing_version_agent(state: WorkflowState) -> Dict[str, Any]:
    """
    Parsing agent: Uses LLM to parse help text and extract detailed parameter information.
    Takes the raw help/version text from invocation agent and produces structured subcommands and parameters.
    """
    tool_info = state.get("tool_info")
    if not tool_info:
        raise Exception("Tool info missing from state")
    
    if tool_info.get("error"):
        # Pass through error state
        return {"tool_info": tool_info}
    
    version_text = tool_info.get("version_text")
    
    if version_text:
        clean_version = parse_version(version_text)
        
        # Update tool_info with parsed results
        tool_info: ToolInfo = {
            **tool_info,
            "version": clean_version,
        }

        print(f"Extracted {clean_version} as version")
    else:
        print("No version_text found, skipping version extraction")
    
    return {
        "tool_info": tool_info
    }
    

# Test cases
# if __name__ == "__main__":
#     test_cases = [
#         ("""v0.1.8 rahblah blah
#         toast is oh so tasty, yes
#         """, "0.1.8"),
#         ("running 9.2", "9.2"),
#         ("version: 1.8.4-a", "1.8.4-a"),
#         ("toastbox version 2.4.1", "2.4.1"),
#         ("Version 3.2.1", "3.2.1"),
#         ("v1.2.3-beta.1+build.123", "1.2.3-beta.1+build.123"),
#         ("Git version 2.39.1", "2.39.1"),
#         ("docker 24.0.5", "24.0.5"),
#         ("Python 3.11.4", "3.11.4"),
#         ("v10.0.0-rc.1", "10.0.0-rc.1"),
#         ("1.0", "1.0"),
#         ("5", "5"),
#     ]
    
#     print("Testing version parser:")
#     print("-" * 60)
#     for input_str, expected in test_cases:
#         result = parse_version(input_str)
#         status = "✓" if result == expected else "✗"
#         print(f"{status} parse_version('{input_str}')")
#         print(f"  Expected: '{expected}'")
#         print(f"  Got:      '{result}'")
#         if result != expected:
#             print("  FAILED!")
#         print()

In [36]:
def request_biocontainers(executable: str, version: str) -> dict:
    """Make request to BioContainers given an executable (tool name) and version."""
    url = f"https://api.biocontainers.pro/ga4gh/trs/v2/tools/{executable}/versions/{executable}-{version}"
    # Example: https://api.biocontainers.pro/ga4gh/trs/v2/tools/samtools/versions/samtools-1.19
    print(f"Running request for {executable}-{version}")
    
    try:
        r = requests.get(url, timeout=10)
        if r.status_code == 200:
            data = r.json()

            bioconda = docker = singularity = None
            images = data.get("images", [])
            # Loop through images and assign image name based on type
            for x in images:
                if x.get("image_type", "").lower() == "conda":
                    bioconda = x.get("image_name")
                elif x.get("image_type", "").lower() == "docker":
                    docker = x.get("image_name")
                elif x.get("image_type", "").lower() == "singularity":
                    singularity = x.get("image_name")
            return {
                "bioconda": bioconda,
                "docker": docker,
                "singularity": singularity
            }
        else:
            # In the case of a non-success HTTP status code
            return {"error": r.status_code, "message": r.text}
    except requests.Timeout:
        # Handle timeout specifically
        return {"error": "timeout", "message": "Request timed out"}
    except Exception as e:
        # For any other exceptions
        return {"error": "exception", "message": str(e)}


def container_agent(state: WorkflowState) -> Dict[str, Any]:
    """
    Container agent: Captures container environment information for a given tool and version.
    Returns structured JSON output for the parsing agent to process. 
    """
    tool_info = state.get("tool_info")
    if not tool_info:
        raise Exception("Tool info missing from state")
    
    if tool_info.get("error"):
        # Pass through error state
        return {"tool_info": tool_info}
    
    executable = tool_info.get("tool")
    version = tool_info.get("version")
    if not executable:
        raise Exception("Executable name missing from state")
    if not version:
        raise Exception("Version missing from state")

    out = {}
    
    try:
        # Capture container info
        container_info = request_biocontainers(executable, version)
        print(container_info)

        # Create basic container info with raw text
        containers: ContainerInfo = {
            "bioconda": container_info.get("bioconda"),
            "docker": container_info.get("docker"),
            "singularity": container_info.get("singularity")
        }

        # Update tool_info with parsed results
        tool_info: ToolInfo = {
            **tool_info,
            "containers": containers,
        }

        print("Complete container request")

        out = {
            "tool_info": tool_info
        }

    except Exception as e:
        # Return error state
        containers: ContainerInfo = {
            "bioconda": None,
            "docker": None,
            "singularity": None
        }

        print(f"Error in container function: {e}")

        # Update tool_info with parsed results
        tool_info: ToolInfo = {
            **tool_info,
            "containers": containers,
        }

        out = {
            "tool_info": tool_info
        }

    return out

In [39]:
def find_subcommands_agent(state: WorkflowState) -> Dict[str, Any]:
    """
    Takes the raw help text from invocation agent and extracts names of any subcommands.
    """
    tool_info = state.get("tool_info")
    if not tool_info:
        raise Exception("Tool info missing from state")
    
    if tool_info.get("error"):
        # Pass through error state
        return {"tool_info": tool_info}
    
    help_text = tool_info.get("help_text")
    
    if not help_text:
        raise Exception("Help text missing from tool info")
    
    # System instruction for detailed parsing
    system_msg = SystemMessage(content="""
You are an expert CLI parser that extracts any and all subcommands from help output text for a given executable.
Given a command-line tool's help output, produce a list of subcommands (if any).
If no subcommands are found, return an empty list.

Rules:
- Only output the list, no additional text.

Example help text:
    toastbox - A delightful notification and message management tool

    Usage: toastbox [OPTIONS] COMMAND

    A command-line tool for creating, managing, and delivering toast notifications
    and messages across your system and network.

    Options:
    -t, --threads INT     Number of threads to use (default: 1)
    -m, --mode STR        Operation mode (default: "standard"), can be "standard", "silent", or "verbose"
    -aa, --all            Include all files if provided (default: false)
    -i, --input STR       Path to input file
    -o, --output STR      Path to output file
    -v, --version         Show version information
    -c, --config STR      Path to config file (default: ~/.toastbox/config.yml)
    -q, --quiet           Suppress all non-error output
    -h, --help            Show this help message

    Basic Commands:
    send      Send a toast notification
    listen    Start listening for incoming toasts
    history   View notification history
    config    Manage toastbox configuration
    
    Other Commands:
    update    Update toastbox to the latest version
    remove    Uninstall toastbox from your system

    Run 'toastbox COMMAND --help' for more information on a command.
    
Example output for toastbox help text:
    ["send", "listen", "history", "config", "update", "remove"]

Other example help text:
    rahblah - Spouts nonsense for fun
    
    rahblah [OPTIONS]
    
    -l, --length int    Length of nonsense to generate in words (default: 10)
    -f, --funny bool    Make the nonsense funny (default: false)
    -r, --repeat int   Number of times to repeat the nonsense (default: 1)
    -o, --output str    Path to output file (default: stdout)
    -h, --help         Show this help message
    
Example output for rahblah help text:
    []
""")

    # Human input
    human_msg = HumanMessage(content=f"""
Parse the below help ouput and extract the name of any subcommands.

--help output:
{help_text}


DO NOT GENERATE ANY EXTRA or ADDITIONAL TEXT
""")
    
    llm = init_chat_model("llama3.2:3b", model_provider="ollama")
    llm = llm.with_structured_output(SubcommandNames)

    messages = [system_msg, human_msg]
    
    print("Invoking LLM to identify (sub)commands")

    ai_msg = llm.invoke(messages)
    sub_names = ai_msg.get("names")
    
    if sub_names:
        print(f"Identified subcommands: {sub_names}")
        subcommands = [Subcommand(name=name) for name in sub_names]
        
        # Update tool_info with parsed results
        tool_info: ToolInfo = {
            **tool_info,
            "subcommands": subcommands
        }

    else:
        print("No subcommands found, skipping subcommand extraction")
    
    return {
        "tool_info": tool_info
    }

In [52]:
def parse_globals_agent(state: WorkflowState) -> Dict[str, Any]:
    """
    Takes the raw help text from invocation agent and identifies any global parameters, 
    including their short and long forms, default values, type, if an input or output file (or directory), and if required or optional.
    """
    tool_info = state.get("tool_info")
    if not tool_info:
        raise Exception("Tool info missing from state")
    
    if tool_info.get("error"):
        # Pass through error state
        return {"tool_info": tool_info}
    
    help_text = tool_info.get("help_text")
    
    if not help_text:
        raise Exception("Help text missing from tool info")
    
    # System instruction for detailed parsing
    system_msg = SystemMessage(content="""
You are an expert CLI parser that extracts any and all global parameters from a tool's help output text.
Given a command-line tool's help output, produce a list of global parameters (if any).
If no global parameters are found, return an empty list.

Rules:
- Only output the list, no additional text.
- Each parameter should be valid JSON following the schema:
{
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "title": "Parameter",
    "description": "Represents a CLI parameter.",
    "properties": {
        "description": {
            "type": ["string", "null"],
            "description": "Description of the parameter, if provided"
        },
        "type": {
            "type": ["string", "null"],
            "description": "Parameter type (str, int, float, path, bool, etc.)"
        },
        "required": {
            "type": "boolean",
            "description": "True if required, false if optional"
        },
        "is_input": {
            "type": "boolean",
            "description": "True if input file or directory"
        },
        "is_output": {
            "type": "boolean",
            "description": "True if output file or directory"
        },
        "default_value": {
            "type": ["string", "null"],
            "description": "Default value for the parameter"
        },
        "is_flag": {
            "type": "boolean",
            "description": "True if flag (no argument), false if takes argument"
        },
        "short": {
            "type": "string",
            "description": "Short name like -h"
        },
        "long": {
            "type": ["string", "null"],
            "description": "Long name like --help"
        }
    },
    "required": ["required", "is_input", "is_output", "is_flag", "short"],
    "additionalProperties": false
}

Example help text:
    toastbox - A totally necessary interface to my toaster from the CLI

    Usage: toastbox [OPTIONS] COMMAND

    A command-line tool for controlling your toaster.

    Options:
    -m, --mode STR        Operation mode (default: "standard"), can be "standard" or "bagel"
    -t, --toastiness INT  Toast level from 1 (light) to 5 (dark) (default: 3)
    -b, --buzz BOOL       Enable buzzer sound when done (default: false)
    -l, --log FILE       Path to log file (default: /var/log/toastbox.log)
    -v, --version         Show version information
    -h, --help            Show this help message
    
Example output for toastbox help text:
    [
        {
            "description": "Operation mode (default: \"standard\"), can be \"standard\" or \"bagel\"",
            "type": "str", 
            "required": False,
            "is_input": False,
            "is_output": False,
            "default_value": "standard",
            "is_flag": False,
            "short": "-m",
            "long": "--mode"
        },
        {
            "description": "Toast level from 1 (light) to 5 (dark) (default: 3)",
            "type": "int",
            "required": False, 
            "is_input": False,
            "is_output": False,
            "default_value": "3",
            "is_flag": False,
            "short": "-t",
            "long": "--toastiness"
        },
        {
            "description": "Enable buzzer sound when done (default: false)",
            "type": "bool",
            "required": False,
            "is_input": False,
            "is_output": False, 
            "default_value": "false",
            "is_flag": True,
            "short": "-b",
            "long": "--buzz"
        },
        {
            "description": "Path to log file (default: /var/log/toastbox.log)",
            "type": "path",
            "required": False,
            "is_input": False,
            "is_output": True,
            "default_value": "/var/log/toastbox.log",
            "is_flag": False,
            "short": "-l", 
            "long": "--log"
        },
        {
            "description": "Show version information",
            "type": None,
            "required": False,
            "is_input": False,
            "is_output": False,
            "default_value": None,
            "is_flag": True,
            "short": "-v",
            "long": "--version"
        },
        {
            "description": "Show this help message", 
            "type": None,
            "required": False,
            "is_input": False,
            "is_output": False,
            "default_value": None,
            "is_flag": True,
            "short": "-h",
            "long": "--help"
        }
    ]

Other example help text:
    todo - Create and manage a todo list from the command line
    
    todo [COMMAND] [OPTIONS]

    commands:
      add       Add a new todo item
      list      List all todo items
      remove    Remove a todo item by its ID
      complete  Mark a todo item as completed by its ID
      clear     Clear all completed todo items
    
Example output for rahblah help text:
    []
""")

    # Human input
    human_msg = HumanMessage(content=f"""
Parse the below help ouput and extract the global parameters.

--help output:
{help_text}


DO NOT GENERATE ANY EXTRA or ADDITIONAL TEXT
""")
    
    llm = init_chat_model("deepseek-r1:14b", model_provider="ollama")
    llm = llm.with_structured_output(GlobalParameters)

    messages = [system_msg, human_msg]
    
    print("Invoking LLM to identify global parameters")

    ai_msg = llm.invoke(messages)
    parameters = ai_msg.get("parameters")
    print(parameters)
    
    if parameters:
        print(f"Identified {len(parameters)} global parameters")
        
        # Update tool_info with parsed results
        tool_info: ToolInfo = {
            **tool_info,
            "global_parameters": parameters
        }

    else:
        print("No global parameters found, skipping global parameter extraction")
    
    return {
        "tool_info": tool_info
    }


In [53]:
def subcommand_invocation_agent(state: WorkflowState) -> Dict[str, Any]:
    """
    Subcommand invocation agent: Captures help text for each discovered subcommand.
    Takes the tool_info with subcommands list and populates each subcommand's help_text.
    """
    tool_info = state.get("tool_info")
    if not tool_info:
        raise Exception("Tool info missing from state")
    
    if tool_info.get("error"):
        # Pass through error state
        return {"tool_info": tool_info}
    
    executable = tool_info.get("tool")
    subcommands = tool_info.get("subcommands", [])
    
    if not executable:
        raise Exception("Executable name missing from tool info")
    
    if not subcommands:
        print("No subcommands found, skipping subcommand help capture")
        return {"tool_info": tool_info}
    
    print(f"Capturing help text for {len(subcommands)} subcommands...")
    
    # Update each subcommand with its help text
    updated_subcommands = []
    for subcommand in subcommands:
        subcommand_name = subcommand.get("name")
        if not subcommand_name:
            print(f"Warning: Subcommand missing name, skipping")
            updated_subcommands.append(subcommand)
            continue
        
        try:
            # Capture help text for this subcommand
            help_output = capture_help(executable, subcommand_name)
            
            # Update subcommand with help text
            updated_subcommand = {
                **subcommand,
                "help_text": help_output["help_text"]
            }
            updated_subcommands.append(updated_subcommand)
            
            print(f"✓ Captured help for subcommand: {subcommand_name}")
            
        except Exception as e:
            print(f"✗ Failed to capture help for subcommand '{subcommand_name}': {e}")
            # Keep original subcommand without help text
            updated_subcommands.append(subcommand)
    
    # Update tool_info with subcommands that now have help text
    updated_tool_info: ToolInfo = {
        **tool_info,
        "subcommands": updated_subcommands
    }
    
    print("Complete subcommand invocation")
    
    return {"tool_info": updated_tool_info}


# Create invocation & parser graphs subgraph
graph_builder = StateGraph(WorkflowState)
graph_builder.add_node("invocation_agent", invocation_agent)
graph_builder.add_node("parsing_version_agent", parsing_version_agent)
graph_builder.add_node("container_agent", container_agent)
graph_builder.add_node("find_subcommands_agent", find_subcommands_agent)
graph_builder.add_node("parse_globals_agent", parse_globals_agent)
graph_builder.add_node("subcommand_invocation_agent", subcommand_invocation_agent)
graph_builder.add_edge(START, "invocation_agent")
graph_builder.add_edge("invocation_agent", "parsing_version_agent")
graph_builder.add_edge("parsing_version_agent", "container_agent")
graph_builder.add_edge("container_agent", "find_subcommands_agent")
graph_builder.add_edge("find_subcommands_agent", "parse_globals_agent")
graph_builder.add_edge("parse_globals_agent", "subcommand_invocation_agent")
graph_builder.add_edge("subcommand_invocation_agent", END)

# Compile the invocation graph for export
invocation_graph = graph_builder.compile()

if __name__ == "__main__":
    # Simple test
    test_state = {"executable": "salmon"}
    result = invocation_graph.invoke(test_state)
    print(result)

Complete invocation
Extracted 1.10.3 as version
Running request for salmon-1.10.3
{'bioconda': 'salmon==1.10.3--h45fbf2d_5', 'docker': 'quay.io/biocontainers/salmon:1.10.3--h45fbf2d_5', 'singularity': 'https://depot.galaxyproject.org/singularity/salmon:1.10.3--haf24da9_3'}
Complete container request
Invoking LLM to identify (sub)commands
{'bioconda': 'salmon==1.10.3--h45fbf2d_5', 'docker': 'quay.io/biocontainers/salmon:1.10.3--h45fbf2d_5', 'singularity': 'https://depot.galaxyproject.org/singularity/salmon:1.10.3--haf24da9_3'}
Complete container request
Invoking LLM to identify (sub)commands
Identified subcommands: ['index', 'quant', 'alevin', 'swim', 'quantmerge']
Invoking LLM to identify global parameters
Identified subcommands: ['index', 'quant', 'alevin', 'swim', 'quantmerge']
Invoking LLM to identify global parameters
[]
No global parameters found, skipping global parameter extraction
Capturing help text for 5 subcommands...
✓ Captured help for subcommand: index
[]
No global parame

In [None]:
# JSON Schema for Parameter class
parameter_schema = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "title": "Parameter",
    "description": "Represents a CLI parameter.",
    "properties": {
        "description": {
            "type": ["string", "null"],
            "description": "Description of the parameter, if provided"
        },
        "type": {
            "type": ["string", "null"],
            "description": "Parameter type (str, int, float, path, bool, etc.)"
        },
        "required": {
            "type": "boolean",
            "description": "True if required, false if optional"
        },
        "is_input": {
            "type": "boolean",
            "description": "True if input file or directory"
        },
        "is_output": {
            "type": "boolean",
            "description": "True if output file or directory"
        },
        "default_value": {
            "type": ["string", "null"],
            "description": "Default value for the parameter"
        },
        "is_flag": {
            "type": "boolean",
            "description": "True if flag (no argument), false if takes argument"
        },
        "short": {
            "type": "string",
            "description": "Short name like -h"
        },
        "long": {
            "type": ["string", "null"],
            "description": "Long name like --help"
        }
    },
    "required": ["required", "is_input", "is_output", "is_flag", "short"],
    "additionalProperties": false
}


In [None]:
# Example output for toastbox help text
toastbox_parameters = [
    {
        "description": "Operation mode (default: \"standard\"), can be \"standard\" or \"bagel\"",
        "type": "str", 
        "required": False,
        "is_input": False,
        "is_output": False,
        "default_value": "standard",
        "is_flag": False,
        "short": "-m",
        "long": "--mode"
    },
    {
        "description": "Toast level from 1 (light) to 5 (dark) (default: 3)",
        "type": "int",
        "required": False, 
        "is_input": False,
        "is_output": False,
        "default_value": "3",
        "is_flag": False,
        "short": "-t",
        "long": "--toastiness"
    },
    {
        "description": "Enable buzzer sound when done (default: false)",
        "type": "bool",
        "required": False,
        "is_input": False,
        "is_output": False, 
        "default_value": "false",
        "is_flag": True,
        "short": "-b",
        "long": "--buzz"
    },
    {
        "description": "Path to log file (default: /var/log/toastbox.log)",
        "type": "path",
        "required": False,
        "is_input": False,
        "is_output": True,
        "default_value": "/var/log/toastbox.log",
        "is_flag": False,
        "short": "-l", 
        "long": "--log"
    },
    {
        "description": "Show version information",
        "type": None,
        "required": False,
        "is_input": False,
        "is_output": False,
        "default_value": None,
        "is_flag": True,
        "short": "-v",
        "long": "--version"
    },
    {
        "description": "Show this help message", 
        "type": None,
        "required": False,
        "is_input": False,
        "is_output": False,
        "default_value": None,
        "is_flag": True,
        "short": "-h",
        "long": "--help"
    }
]

print("Example toastbox parameters:")
print(json.dumps(toastbox_parameters, indent=2))