In [None]:
import json

def load_local(fn):
    with open(fn, "r+", encoding="utf-8") as f:
        d = [json.loads(line.strip()) for line in f if line.strip()]
    return d

def print_json(d, fn):
    with open(fn, "w+", encoding="utf-8") as f:
        for x in d:
            f.write(json.dumps(x, ensure_ascii=False) + "\n")

def yield_local(fn):
    with open(fn, "r+", encoding="utf-8") as f:
        for line in f:
            if line.strip():
                try:
                    yield json.loads(line)
                except:
                    pass

In [2]:
import ast
import json
from typing import List, Dict, Any


class ChemistryToolParser:
    """Parser for extracting tool information from Python class definitions."""
    
    def parse_file(self, filepath: str) -> List[Dict[str, str]]:
        """Parse a Python file and extract all tool definitions.
        
        Parameters
        ----------
        filepath : str
            Path to the Python file containing tool class definitions
            
        Returns
        -------
        List[Dict[str, str]]
            List of dictionaries containing tool information
        """
        with open(filepath, 'r') as f:
            content = f.read()
        
        return self.parse_content(content)
    
    def parse_content(self, content: str) -> List[Dict[str, str]]:
        """Parse Python code content and extract tool definitions.
        
        Parameters
        ----------
        content : str
            Python code content as string
            
        Returns
        -------
        List[Dict[str, str]]
            List of dictionaries containing tool information
        """
        tree = ast.parse(content)
        tools = []
        
        for node in ast.walk(tree):
            if isinstance(node, ast.ClassDef):
                tool_info = self._extract_tool_info(node)
                if tool_info:
                    tools.append(tool_info)
        
        return tools
    
    def _extract_tool_info(self, class_node: ast.ClassDef) -> Dict[str, str]:
        """Extract tool information from a class definition node.
        
        Parameters
        ----------
        class_node : ast.ClassDef
            AST node representing a class definition
            
        Returns
        -------
        Dict[str, str]
            Dictionary with name, description, inputs, and outputs
        """
        tool_info = {
            "name": "",
            "description": "",
            "inputs": "",
            "outputs": ""
        }
        
        # Extract class attributes (name and description)
        for item in class_node.body:
            if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
                attr_name = item.target.id
                
                if attr_name == "name" and isinstance(item.value, ast.Constant):
                    tool_info["name"] = item.value.value
                elif attr_name == "description" and isinstance(item.value, ast.Constant):
                    tool_info["description"] = item.value.value
        
        # Find the _run method and extract inputs/outputs from docstring
        for item in class_node.body:
            if isinstance(item, ast.FunctionDef) and item.name == "_run":
                inputs, outputs = self._parse_docstring(ast.get_docstring(item), item)
                tool_info["inputs"] = inputs
                tool_info["outputs"] = outputs
                break
        
        # Only return if we found a valid tool (has name and description)
        if tool_info["name"] and tool_info["description"]:
            return tool_info
        
        return None
    
    def _parse_docstring(self, docstring: str, func_node: ast.FunctionDef) -> tuple:
        """Parse function docstring to extract parameters and return types.
        
        Parameters
        ----------
        docstring : str
            The docstring of the function
        func_node : ast.FunctionDef
            AST node of the function
            
        Returns
        -------
        tuple
            (inputs_str, outputs_str)
        """
        if not docstring:
            # Fall back to function signature if no docstring
            return self._extract_from_signature(func_node)
        
        lines = docstring.split('\n')
        inputs = []
        outputs = []
        
        current_section = None
        
        for line in lines:
            line = line.strip()
            
            # Detect sections
            if line.startswith('Parameters'):
                current_section = 'parameters'
                continue
            elif line.startswith('Returns'):
                current_section = 'returns'
                continue
            elif line.startswith('---'):
                continue
            
            # Parse content based on current section
            if current_section == 'parameters' and line and not line.startswith('-'):
                # Format: "param_name: type" or "param_name : type"
                if ':' in line:
                    inputs.append(line)
            elif current_section == 'returns' and line and not line.startswith('-'):
                outputs.append(line)
        
        inputs_str = ', '.join(inputs) if inputs else ""
        outputs_str = ' '.join(outputs) if outputs else ""
        
        return inputs_str, outputs_str
    
    def _extract_from_signature(self, func_node: ast.FunctionDef) -> tuple:
        """Extract input/output info from function signature as fallback.
        
        Parameters
        ----------
        func_node : ast.FunctionDef
            AST node of the function
            
        Returns
        -------
        tuple
            (inputs_str, outputs_str)
        """
        inputs = []
        
        # Extract parameters (skip 'self')
        for arg in func_node.args.args:
            if arg.arg != 'self':
                if arg.annotation:
                    inputs.append(f"{arg.arg}: {ast.unparse(arg.annotation)}")
                else:
                    inputs.append(arg.arg)
        
        # Extract return type
        outputs = ""
        if func_node.returns:
            outputs = ast.unparse(func_node.returns)
        
        return ', '.join(inputs), outputs


example_code = '''
class CalculateSA(BaseTool):
    """Calculate the SA of the compound."""

    name: str = "CalculateSA"
    description: str = "Used to compute the synthetic accessibility (SA) of the given molecule."

    def _run(self, compound: str) -> float:
        """Compute Synthetic Accessibility (SA) of the given SMILES string. Ertl & Schuffenhauer 2009.

        Parameters
        ----------
        compound: Compound in SMILES format

        Returns
        -------
        float: The SA between 1 (easy) and 10 (hard)
        """
        return sascorer.calculateScore(MolFromSmiles(compound))

    async def _arun(self, compound: str) -> float:
        """Use the convert_to_SMILES tool asynchronously."""
        raise NotImplementedError()
'''    
parser = ChemistryToolParser()
tools = parser.parse_content(example_code)

print("Extracted Tools:")
print(json.dumps(tools, indent=2))

Extracted Tools:
[
  {
    "name": "CalculateSA",
    "description": "Used to compute the synthetic accessibility (SA) of the given molecule.",
    "inputs": "compound: Compound in SMILES format",
    "outputs": "float: The SA between 1 (easy) and 10 (hard)"
  }
]


In [None]:
import inspect
from typing import get_type_hints

def query_name_to_cas(molecule_name: str) -> str:
    """
    Query a molecule name and return its CAS number in Markdown format.
    """
    try:
        mode = "name"
        if is_smiles(molecule_name):
            mode = "smiles"
        url_cid = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/{mode}/{molecule_name}/cids/JSON"
        cid_response = requests.get(url_cid)
        cid_response.raise_for_status()
        cid = cid_response.json()["IdentifierList"]["CID"][0]

        url_data = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{cid}/JSON"
        data_response = requests.get(url_data)
        data_response.raise_for_status()
        data = data_response.json()

        cas_number = None
        for section in data["Record"]["Section"]:
            if section.get("TOCHeading") == "Names and Identifiers":
                for subsection in section["Section"]:
                    if subsection.get("TOCHeading") == "Other Identifiers":
                        for subsubsection in subsection["Section"]:
                            if subsubsection.get("TOCHeading") == "CAS":
                                cas_number = subsubsection["Information"][0]["Value"]["StringWithMarkup"][0]["String"]
                                break

        if cas_number:
            markdown_result = f"""
### Molecule: {molecule_name}

#### CAS Number

- **CAS**: `{cas_number}`

#### More Information

- [PubChem Compound](https://pubchem.ncbi.nlm.nih.gov/compound/{cid})
"""
            return markdown_result
        else:
            return "CAS number not found."

    except (requests.exceptions.RequestException, KeyError):
        return "Invalid molecule input, no Pubchem entry."
    
sig = inspect.signature(query_name_to_cas)
try:
    type_hints = get_type_hints(query_name_to_cas)
except:
    type_hints = {}

print(f"Function: {query_name_to_cas.__name__}")
print("Parameters:")
for param_name, param in sig.parameters.items():
    if param.annotation != inspect.Parameter.empty:
        # Get the name of the type
        type_name = getattr(param.annotation, '__name__', str(param.annotation))
    else:
        type_name = 'No type hint'
    print(f"  - {param_name}: {type_name}")
    if param.default != inspect.Parameter.empty:
        print(f"    Default: {param.default}")

# Get return type name
if sig.return_annotation != inspect.Signature.empty:
    return_type_name = getattr(sig.return_annotation, '__name__', str(sig.return_annotation))
else:
    return_type_name = 'No type hint'
print(f"Return type: {return_type_name}")
print()

Function: query_name_to_cas
Parameters:
  - molecule_name: str
Return type: str

