In [None]:
#| default_exp apiguru
#| test: false

In [ ]:
#| hide
#| eval: false

INDEX_URL = "https://api.apis.guru/v2/list.json"

In [ ]:
#| hide
#| eval: false

import re, httpx, yaml, json


VALID = re.compile(r"[^a-zA-Z0-9_-]")

def load_and_clean_spec(url: str) -> dict:
    text = httpx.get(url, timeout=30).text
    spec = yaml.safe_load(text) if url.endswith((".yml", ".yaml")) else json.loads(text)
    for path, methods in spec["paths"].items():
        for verb, op in methods.items():
            # ensure an operationId exists
            op_id = op.get("operationId") or f"{verb}_{path.strip('/').replace('/','_')}"
            op_id = VALID.sub("_", op_id)[:64]           # 🔑  clean + truncate
            op["operationId"] = op_id.lower()
    return spec                         


In [ ]:
#| hide
#| eval: false

async def _load_openapi_spec(url: str) -> dict:
    if url in SPEC_CACHE:
        return SPEC_CACHE[url]
    async with httpx.AsyncClient(timeout=10) as cx:
        res = await cx.get(url)
    res.raise_for_status()
    SPEC_CACHE[url] = yaml.safe_load(res.text)
    return SPEC_CACHE[url]

In [ ]:
#| hide
#| eval: false


_VALID_RE   = re.compile(r"^[a-zA-Z0-9_-]{1,64}$")
_SANITIZE_RE = re.compile(r"[^a-zA-Z0-9_-]")

def _clean(name: str) -> str:
    """
    Return a schema-compliant tool name (letters, digits, _-, ≤64 chars).
    Dots/slashes/spaces → “_”; consecutive underscores collapsed.
    """
    cleaned = _SANITIZE_RE.sub("_", name)           # illegal → _
    cleaned = re.sub(r"__+", "_", cleaned)          # collapse runs
    return cleaned[:64] or "t"                      # never empty

def _dedupe(names: set[str], base: str) -> str:
    """If *base* already exists, append '_1', '_2' etc. until unique."""
    if base not in names:
        return base
    for i in itertools.count(1):
        candidate = f"{base}_{i}"
        if candidate not in names and len(candidate) <= 64:
            return candidate


In [ ]:
#| hide
#| eval: false


SPEC_CACHE: Dict[str, dict] = {}

@lru_cache
def _get_index() -> Dict[str, dict]:
    return httpx.get(INDEX_URL, timeout=10).json()

def _score(title: str, query: str) -> float:
    q, t = query.lower(), title.lower()
    return (q in t) * 5 + sum(w in t for w in q.split())

def _pick_spec_url(ver: dict) -> str | None:
    """Return the best downloadable spec URL or None if absent."""
    return (
        ver.get("openapiUrl")      # OpenAPI 3+
        or ver.get("swaggerUrl")   # Swagger 2.0
        or ver.get("link")         # catch-all (rare)
    )

def _requires_credentials(spec: dict) -> bool:
    """Return *True* if the OpenAPI spec declares any mandatory security.

    Logic (conservative – errs on the side of *requiring* auth):
    • If *components.securitySchemes* is **missing** → assume *no* creds.
    • Else, if the global *security* list is **present & non‑empty* → creds.
    • Else, scan every operation for a non‑empty *security* list → creds.

    We don’t attempt to be clever about *optional* auth – that’s rarely used
    in public APIs and would require heuristic scoring beyond this scope.
    """

    if not spec.get("components", {}).get("securitySchemes"):
        return False 

    # Global requirement?
    if spec.get("security"):
        return True

    # Operation‑level requirement?
    for path_item in spec.get("paths", {}).values():
        for verb, operation in path_item.items():
            if isinstance(operation, dict) and operation.get("security"):
                return True

    return False 



def search_public_apis(query: str, limit: int = 10) -> List[Dict]:
    """Return at most *limit* APIs whose specs **don’t** require creds."""

    idx = _get_index()
    # Rough relevance score identical to the original helper
    def __score(title: str, q: str) -> float:
        ql, tl = q.lower(), title.lower()
        return (ql in tl) * 5 + sum(w in tl for w in ql.split())

    ranked: List[tuple[str, str, str]] = sorted(
        (
            (api_id, ver_meta["info"]["title"], _pick_spec_url(ver_meta))
            for api_id, meta in idx.items()
            if (ver_meta := meta["versions"][meta["preferred"]])  # := 3.8+
        ),
        key=lambda t: __score(t[1], query),
        reverse=True,
    )

    # Keep only specs that have a downloadable URL *and* require no auth
    results: List[Dict] = []
    for api_id, title, spec_url in ranked:
        if not spec_url:
            continue
        try:
            spec = httpx.get(spec_url, timeout=8).json()  # small JSON files
        except Exception:
            continue  # skip broken links silently
        if _requires_credentials(spec):
            continue
        results.append({"id": api_id, "title": title, "spec_url": spec_url})
        if len(results) >= limit:
            break
    return results


    


@mcp.tool(
    name="quick_mount_openapi",
    description=(
        "Search the public-API index by keyword and mount the first spec that "
        "doesn’t require credentials."
    ),
)
async def quick_mount_openapi(
    query: str,
    mcp_root: object | None = None,   # optional override for tests / sub-routers
) -> List[str]:
    """
    1. Search the APIs-Guru index for *public* specs (no security schemes)
       matching **query**.
    2. Try them one by one until `mount_openapi` succeeds; return the list of
       newly created tool names.
    3. If none mount successfully, return `[]` **and** stash a list of
       candidate APIs that *do* require creds in
       `mcp_root.state["needs_creds"]` so the UI can prompt the user.
    """

    # 1️⃣  Try at most 10 public hits
    for hit in search_public_apis(query, limit=10):
        try:
            slug = re.sub(r"[^a-z0-9_]+", "_", hit["id"].lower())[:30]
            return await mount_openapi(          # ← async + await
                name=slug,
                spec_url=hit["spec_url"],
                mcp_root=mcp_root,
            )
        except Exception:
            continue          # keep trying next candidate

    # 2️⃣  Nothing mounted → gather *all* candidates (public or not)
    candidates: List[Dict[str, Any]] = []
    for api_id, meta in _get_index().items():
        ver_meta = meta["versions"][meta["preferred"]]
        spec_url = _pick_spec_url(ver_meta)
        if not spec_url:
            continue
        candidates.append({
            "id": api_id,
            "title": ver_meta["info"]["title"],
            "spec_url": spec_url,
            "needs_credentials": True,   # we already tried the public ones
        })

    if mcp_root is not None:
        # ensure a consistent place for the UI/agent to inspect
        mcp_root.state["needs_creds"] = candidates

    return []   # signal to the agent/UI that mounting failed

In [ ]:
#| hide
#| eval: false

@app.on_event("startup")
async def _load_specs() -> None:
    """Warm the APIs.guru index and log a preview (first 10 records)."""
    index: dict = _get_index()          # synchronous helper you already wrote