In [1]:
# Install the package in development mode if needed
# !pip install -e '.[mcp]'

import asyncio
import os
import sys
import logging
from pathlib import Path
import json

# Import the MCP components
from napistu.mcp.server import create_server, start_server
from napistu.mcp import documentation, codebase, tutorials, execution

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("napistu")

# Helper function to run async code in Jupyter
async def run_async(coro):
    return await coro

# Create a dummy session context for execution components
session_context = {}
object_registry = {}

In [2]:
from napistu.mcp.profiles import get_profile
# define the types of assets to load
profile = get_profile("full")

In [3]:
# register the relevant components
mcp_server = create_server(profile)
# initialize the relevant components
live_server = start_server(mcp_server)

In [8]:
from napistu.mcp import documentation_utils
from napistu.mcp.constants import READMES
from napistu.mcp.constants import NAPISTU_PY_READTHEDOCS

In [46]:
# readmes
readme = await run_async(documentation_utils.load_readme_content(READMES["napistu"]))
readme

INFO:httpx:HTTP Request: GET https://raw.githubusercontent.com/napistu/napistu/main/README.md "HTTP/1.1 200 OK"


"# Napistu\n\nThe Napistu project is an approach for creating and working with genome-scale mechanistic networks. Pathways of interest can be created from multiple sources (e.g., Reactome, STRING, TRRUST), aggregated across sources, and refined to add additional information. This pathway representation can then be turned into a graphical network to identify molecular neighborhoods, find paths between molecules, and to carryout network propagation.\n\nNapistu is an active project which we hope will be used for both simple analyses (e.g., basically replacing GSEA) as well as more complex analyses (e.g., multimodal data integration). \n\nWith Napistu you can:\n\n- Represent a range of publicly-available data sources using a common data structure, `sbml_dfs`, which is meant to faithfully encode molecular biology and biochemistry.\n- Aggregate complementary sources into a consensus model which allows high-quality but incomplete interactions to be supported by data sources which more compreh

In [45]:
NAPISTU_PY_READTHEDOCS

'https://napistu.readthedocs.io/en/latest/'

In [90]:
# to scrape read the docs, currently just focusing on the API section
# 1. read the package overview to find the urls of the subpackages
# 2. for each subpackage list modules
# 3. for the main package list its modules dropping the entries for subpackages
# 4. for each module scrape its page

NAPISTU_PY_READTHEDOCS = "https://napistu.readthedocs.io/en/latest"
NAPISTU_PY_READTHEDOCS_API = NAPISTU_PY_READTHEDOCS + '/api.html'
READTHEDOCS_TOC_CSS_SELECTOR = "td"

In [91]:
from bs4 import BeautifulSoup

rdc_index = await run_async(documentation_utils.load_html_page(NAPISTU_PY_READTHEDOCS_API))

soup = BeautifulSoup(rdc_index, 'html.parser')
toc = soup.select(READTHEDOCS_TOC_CSS_SELECTOR)

# Fetch each section
packages_dict = _parse_module_tags(toc)

INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/api.html "HTTP/1.1 200 OK"


In [110]:
from pydantic import BaseModel, validator, RootModel
from typing import Callable, Dict

def _parse_module_tags(td_list: list, base_url: str = "") -> dict:
    """
    Parse a list of <td> elements containing module links and return a dict of {name: url}.
    Optionally prepends base_url to relative hrefs.
    """
    result = {}
    for td in td_list:
        a = td.find("a", class_="reference internal")
        if a:
            # Get the module name from the <span class="pre"> tag
            span = a.find("span", class_="pre")
            if span:
                name = span.text.strip()
                href = a.get("href")
                # Prepend base_url if href is relative
                if href and not href.startswith("http"):
                    href = base_url.rstrip("/") + "/" + href.lstrip("/")
                result[name] = href
    return result

CSS_SELECTORS = {
    "module": {
        "css_selector": "td",
        "parser": _parse_module_tags
    }
}

class SelectorConfig(BaseModel):
    css_selector: str
    parser: Callable

    @validator('parser')
    def parser_must_be_callable(cls, v):
        if not callable(v):
            raise ValueError("parser must be callable")
        return v

class CSSSelectorsModel(RootModel[Dict[str, SelectorConfig]]):
    def __contains__(self, item):
        return item in self.root

    def __getitem__(self, item):
        return self.root[item]

css_selectors_model = CSSSelectorsModel.parse_obj(CSS_SELECTORS)

async def _process_rtd_page(url: str, page_type: str, css_selector: CSSSelectorsModel = css_selectors_model) -> dict:
    if page_type not in css_selector:
        raise ValueError(f"page_type '{page_type}' not in allowed types: {list(css_selector.root.keys())}")
    page_html = await documentation_utils.load_html_page(url)
    soup = BeautifulSoup(page_html, 'html.parser')
    selected = soup.select(css_selector[page_type].css_selector)
    return css_selector[page_type].parser(selected)

/var/folders/vy/h0gsmzyd4wq4gprq_vwwfclh0000gn/T/ipykernel_19997/180114448.py:35: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  @validator('parser')
/var/folders/vy/h0gsmzyd4wq4gprq_vwwfclh0000gn/T/ipykernel_19997/180114448.py:48: PydanticDeprecatedSince20: The `parse_obj` method is deprecated; use `model_validate` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  css_selectors_model = CSSSelectorsModel.parse_obj(CSS_SELECTORS)


In [108]:
# load top-level package TOC
packages_dict =await run_async(_process_rtd_page(NAPISTU_PY_READTHEDOCS_API, page_type="module"))

# load subpackage TOCs
modules_dict = {}
for package_name, package_url in packages_dict.items():
    package_url = NAPISTU_PY_READTHEDOCS + package_url
    modules_dict[package_name] = await run_async(_process_rtd_page(package_url, page_type="module"))


INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/api.html "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/generated/napistu.html#module-napistu "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/generated/napistu.network.html#module-napistu.network "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/generated/napistu.ingestion.html#module-napistu.ingestion "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/generated/napistu.modify.html#module-napistu.modify "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/generated/napistu.rpy2.html#module-napistu.rpy2 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/generated/napistu.gcs.html#module-napistu.gcs "HTTP/1.1 200 OK"


In [151]:
from types import SimpleNamespace
PACKAGE_DEFS = SimpleNamespace(
    NAPISTU = "napistu",
)
def _create_module_css_selector(package_name: str, module_name: str, modules_dict: dict) -> str:

    if (package_name == PACKAGE_DEFS.NAPISTU):
        return f"#module-{PACKAGE_DEFS.NAPISTU}\.{module_name}"
    else:
        return f"#module-{PACKAGE_DEFS.NAPISTU}\.{package_name}\.{module_name}"

package_name = "napistu"
module_name = "sbml_dfs_core"
if package_name is not PACKAGE_DEFS.NAPISTU:
    package_name = f"{PACKAGE_DEFS.NAPISTU}.{package_name}"

url = NAPISTU_PY_READTHEDOCS + "/generated" + modules_dict[package_name][module_name]
css_selector = _create_module_css_selector(package_name="modify", module_name="curation", modules_dict=modules_dict)

In [152]:
page_html = await documentation_utils.load_html_page(url)



INFO:httpx:HTTP Request: GET https://napistu.readthedocs.io/en/latest/generated/napistu.sbml_dfs_core.html#module-napistu.sbml_dfs_core "HTTP/1.1 200 OK"


In [162]:
from bs4 import BeautifulSoup, Tag

def extract_function_blocks(soup: BeautifulSoup) -> list[Tag]:
    """
    Extract all function definition blocks (<dl class="py function">) from the soup.
    """
    return soup.find_all("dl", class_="py function")

def extract_class_blocks(soup: BeautifulSoup) -> list[Tag]:
    """
    Extract all class definition blocks (<dl class="py class">) from the soup.
    """
    return soup.find_all("dl", class_="py class")

def parse_function_block(dl: Tag) -> dict:
    """
    Parse a <dl class="py function"> block into a dict with name, signature, summary, and docstring.
    """
    result = {}
    dt = dl.find("dt")
    if dt:
        # Function name
        name_span = dt.find("span", class_="sig-name descname")
        result["name"] = name_span.text if name_span else ""
        # Signature
        sig = dt.get_text(" ", strip=True)
        result["signature"] = sig
        # Doc URL (from id or headerlink)
        result["doc_url"] = "#" + dt.get("id", "")
    # Summary/Docstring
    dd = dl.find("dd")
    if dd:
        summary = dd.find("p")
        result["summary"] = summary.text if summary else ""
        # Optionally, get full docstring
        result["docstring"] = dd.get_text(" ", strip=True)
    return result

def parse_class_block(dl: Tag) -> dict:
    """
    Parse a <dl class="py class"> block into a dict with name, summary, and docstring.
    """
    result = {}
    dt = dl.find("dt")
    if dt:
        name_span = dt.find("span", class_="sig-name descname")
        result["name"] = name_span.text if name_span else ""
        sig = dt.get_text(" ", strip=True)
        result["signature"] = sig
        result["doc_url"] = "#" + dt.get("id", "")
    dd = dl.find("dd")
    if dd:
        summary = dd.find("p")
        result["summary"] = summary.text if summary else ""
        result["docstring"] = dd.get_text(" ", strip=True)
    return result

def parse_rtd_module_page(html: str, url: str = None) -> dict:
    soup = BeautifulSoup(html, "html.parser")
    result = {
        "module": None,
        "url": url,
        "functions": [],
        "classes": []
    }

    # Get module name from <h1>
    h1 = soup.find("h1")
    if h1:
        # Remove headerlink icon if present
        module_name = h1.get_text(strip=True).replace("\uf0c1", "").strip()
        result["module"] = module_name

    # Parse top-level functions
    for func_dl in soup.find_all("dl", class_="py function"):
        func = {}
        sig = func_dl.find("dt")
        if sig:
            func["name"] = sig.find("span", class_="sig-name").get_text(strip=True)
            func["signature"] = sig.get_text(strip=True)
            func["id"] = sig.get("id")
        doc = func_dl.find("dd")
        if doc:
            func["doc"] = doc.get_text(" ", strip=True)
        result["functions"].append(func)

    # Parse classes and their methods/attributes
    for class_dl in soup.find_all("dl", class_="py class"):
        cls = {
            "name": None,
            "signature": None,
            "id": None,
            "doc": None,
            "methods": [],
            "attributes": []
        }
        sig = class_dl.find("dt")
        if sig:
            cls["name"] = sig.find("span", class_="sig-name").get_text(strip=True)
            cls["signature"] = sig.get_text(strip=True)
            cls["id"] = sig.get("id")
        doc = class_dl.find("dd")
        if doc:
            cls["doc"] = doc.get_text(" ", strip=True)
            # Methods
            for meth_dl in doc.find_all("dl", class_="py method"):
                meth = {}
                meth_sig = meth_dl.find("dt")
                if meth_sig:
                    meth["name"] = meth_sig.find("span", class_="sig-name").get_text(strip=True)
                    meth["signature"] = meth_sig.get_text(strip=True)
                    meth["id"] = meth_sig.get("id")
                meth_doc = meth_dl.find("dd")
                if meth_doc:
                    meth["doc"] = meth_doc.get_text(" ", strip=True)
                cls["methods"].append(meth)
            # Attributes
            for attr_dl in doc.find_all("dl", class_="py attribute"):
                attr = {}
                attr_sig = attr_dl.find("dt")
                if attr_sig:
                    attr["name"] = attr_sig.find("span", class_="sig-name").get_text(strip=True)
                    attr["signature"] = attr_sig.get_text(strip=True)
                    attr["id"] = attr_sig.get("id")
                attr_doc = attr_dl.find("dd")
                if attr_doc:
                    attr["doc"] = attr_doc.get_text(" ", strip=True)
                cls["attributes"].append(attr)
        result["classes"].append(cls)

    return result

x = parse_rtd_module_page(page_html)
x["classes"]

{'module': 'napistu.sbml_dfs_core',
 'url': None,
 'functions': [{'name': '_sbml_dfs_from_edgelist_check_cspecies_merge',
   'signature': 'napistu.sbml_dfs_core._sbml_dfs_from_edgelist_check_cspecies_merge(merged_species:DataFrame,original_species:DataFrame)→None\uf0c1',
   'id': 'napistu.sbml_dfs_core._sbml_dfs_from_edgelist_check_cspecies_merge',
   'doc': 'Check for a mismatch between the provided species data and species implied by the edgelist.'},
  {'name': '_sbml_dfs_from_edgelist_validate_inputs',
   'signature': 'napistu.sbml_dfs_core._sbml_dfs_from_edgelist_validate_inputs(interaction_edgelist:DataFrame,species_df:DataFrame,compartments_df:DataFrame)→None\uf0c1',
   'id': 'napistu.sbml_dfs_core._sbml_dfs_from_edgelist_validate_inputs',
   'doc': 'Check that the inputs for creating an SBML_dfs from an edgelist are appropriate.'},
  {'name': '_stub_compartments',
   'signature': "napistu.sbml_dfs_core._stub_compartments(stubbed_compartment:str='CELLULAR_COMPONENT')→DataFrame\uf

In [158]:
page_html

'\n\n<!DOCTYPE html>\n<html class="writer-html5" lang="en" data-content_root="../">\n<head>\n  <meta charset="utf-8" />\n  <meta name="readthedocs-addons-api-version" content="1"><meta name="viewport" content="width=device-width, initial-scale=1" />\n\n  <meta name="viewport" content="width=device-width, initial-scale=1.0" />\n  <title>napistu.sbml_dfs_core &mdash; Napistu Python library 0.2.1 documentation</title>\n      <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" />\n      <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=e59714d7" />\n\n  \n      <script src="../_static/jquery.js?v=5d32c60e"></script>\n      <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>\n      <script src="../_static/documentation_options.js?v=37f418d5"></script>\n      <script src="../_static/doctools.js?v=9bcbadda"></script>\n      <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>\n    <script src="..

In [144]:
selected

[<section id="module-napistu.modify.curation">
 <span id="napistu-modify-curation"></span><h1>napistu.modify.curation<a class="headerlink" href="#module-napistu.modify.curation" title="Link to this heading"></a></h1>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="#napistu.modify.curation.curate_sbml_dfs" title="napistu.modify.curation.curate_sbml_dfs"><code class="xref py py-obj docutils literal notranslate"><span class="pre">curate_sbml_dfs</span></code></a>(curation_dir, sbml_dfs[, ...])</p></td>
 <td><p>Curate SBML_dfs</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#napistu.modify.curation.format_curated_entities" title="napistu.modify.curation.format_curated_entities"><code class="xref py py-obj docutils literal notranslate"><span class="pre">format_curated_entities</span></code></a>(entity_type, ...[, ...])</p></td>
 <td><p>For