pathintegral-institute · GabrielDrapor · Aug 20, 2025 · Aug 18, 2025 · Aug 19, 2025 · coderabbitai
diff --git a/.github/workflows/generate-manifest.yml b/.github/workflows/generate-manifest.yml
@@ -1,4 +1,4 @@
-name: (TESTING) Generate MCP Manifest
+name: Generate MCP Manifest
 
 on:
   workflow_dispatch:

diff --git a/scripts/get_manifest.py b/scripts/get_manifest.py
@@ -18,14 +18,14 @@
 def extract_json_from_content(content: str) -> Optional[dict]:
     """Extract JSON from the API response content."""
     # Look for JSON code block
-    json_match = re.search(r'```json\n(.*?)\n```', content, re.DOTALL)
+    json_match = re.search(r"```json\n(.*?)\n```", content, re.DOTALL)
     if json_match:
         try:
             return json.loads(json_match.group(1))
         except json.JSONDecodeError as e:
             print(f"Error parsing JSON: {e}")
             return None
-    
+
     # Try to find JSON without code block markers
     try:
         return json.loads(content)
@@ -37,16 +37,16 @@ def extract_json_from_content(content: str) -> Optional[dict]:
 def get_repo_name_from_url(repo_url: str) -> str:
     """Extract repository name from URL for filename."""
     # Remove .git suffix if present
-    if repo_url.endswith('.git'):
+    if repo_url.endswith(".git"):
         repo_url = repo_url[:-4]
-    
+
     # Extract owner/repo from URL
-    match = re.search(r'github\.com[:/]([^/]+/[^/]+)', repo_url)
+    match = re.search(r"github\.com[:/]([^/]+/[^/]+)", repo_url)
     if match:
-        return match.group(1).replace('/', '-')
-    
+        return match.group(1).replace("/", "-")
+
     # Fallback to last part of URL
-    return repo_url.split('/')[-1]
+    return repo_url.split("/")[-1]
 
 
 def generate_manifest(repo_url: str) -> Optional[dict]:
@@ -55,38 +55,30 @@ def generate_manifest(repo_url: str) -> Optional[dict]:
     if not api_key:
         print("Error: ANYON_API_KEY environment variable not set")
         return None
-    
+
     url = "https://anyon.chatxiv.org/api/v1/openai/v1/chat/completions"
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
-    }
-
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
     payload = {
         "model": "x",
         "messages": [
             {
                 "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": f"help me generate manifest json for this repo: {repo_url}"
-                    }
-                ]
+                "content": [{"type": "text", "text": f"help me generate manifest json for this repo: {repo_url}"}],
             }
-        ]
+        ],
     }
-    
+
     try:
         print(f"Generating manifest for {repo_url}...")
         response = requests.post(url, headers=headers, json=payload)
         response.raise_for_status()
-        
+
         data = response.json()
         content = data["choices"][0]["message"]["content"]
-        
+
         return extract_json_from_content(content)
-        
+
-        response = requests.post(url, headers=headers, json=payload)
-        response.raise_for_status()
-        
-
-        data = response.json()
-        content = data["choices"][0]["message"]["content"]
-        
-
-        return extract_json_from_content(content)
-        
+    try:
+        response = requests.post(url, headers=headers, json=payload, timeout=30)
+        response.raise_for_status()
+
+        data = response.json()
+        content = data["choices"][0]["message"]["content"]
+
+        return extract_json_from_content(content)
+    except requests.RequestException as e:
+        print(f"API request failed: {e}")
+        return None
+    except (KeyError, IndexError, ValueError, json.JSONDecodeError) as e:
+        print(f"Unexpected API response format: {e}")
+        return None
-        response = requests.post(url, headers=headers, json=payload)
-        response.raise_for_status()
-        
-
-        data = response.json()
-        content = data["choices"][0]["message"]["content"]
-        
-
-        return extract_json_from_content(content)
-        
+    try:
+        response = requests.post(url, headers=headers, json=payload, timeout=30)
+        response.raise_for_status()
+
+        data = response.json()
+        content = data["choices"][0]["message"]["content"]
+
+        return extract_json_from_content(content)
+    except requests.RequestException as e:
+        print(f"API request failed: {e}")
+        return None
+    except (KeyError, IndexError, ValueError, json.JSONDecodeError) as e:
+        print(f"Unexpected API response format: {e}")
+        return None
     except requests.RequestException as e:
         print(f"API request failed: {e}")
         return None
@@ -95,24 +87,57 @@ def generate_manifest(repo_url: str) -> Optional[dict]:
         return None
 
 
+def validate_installation_entry(install_type: str, entry: dict) -> bool:
+    """Validate a single installation entry against MCP registry schema."""
+    # Required fields for all installation types
+    required_fields = {"type", "command", "args"}
+
+    # Check all required fields exist
+    if not all(field in entry for field in required_fields):
+        return False
+
+    # Type must match install_type
+    if entry.get("type") != install_type:
+        return False
+
+    # Type-specific validation based on MCP registry patterns
+    if install_type == "npm":
+        # npm type should use npx command with -y flag
+        if entry.get("command") != "npx":
+            return False
+        args = entry.get("args", [])
+        if not args or args[0] != "-y":
+            return False
+    elif install_type == "uvx":
+        # uvx type should use uvx command
+        if entry.get("command") != "uvx":
+            return False
+    elif install_type == "docker":
+        # docker type should use docker command
+        if entry.get("command") != "docker":
+            return False
+        args = entry.get("args", [])
+        if not args or args[0] != "run":
+            return False
+
+    return True
+
-def validate_installation_entry(install_type: str, entry: dict) -> bool:
-    """Validate a single installation entry against MCP registry schema."""
-    # Required fields for all installation types
-    required_fields = {"type", "command", "args"}
-
-    # Check all required fields exist
-    if not all(field in entry for field in required_fields):
-        return False
-
-    # Type must match install_type
-    if entry.get("type") != install_type:
-        return False
-
-    # Type-specific validation based on MCP registry patterns
-    if install_type == "npm":
-        # npm type should use npx command with -y flag
-        if entry.get("command") != "npx":
-            return False
-        args = entry.get("args", [])
-        if not args or args[0] != "-y":
-            return False
-    elif install_type == "uvx":
-        # uvx type should use uvx command
-        if entry.get("command") != "uvx":
-            return False
-    elif install_type == "docker":
-        # docker type should use docker command
-        if entry.get("command") != "docker":
-            return False
-        args = entry.get("args", [])
-        if not args or args[0] != "run":
-            return False
-
-    return True
+def validate_installation_entry(install_type: str, entry: dict) -> bool:
+    """Validate a single installation entry against MCP registry schema."""
+    if not isinstance(entry, dict):
+        return False
+
+    # Entry type must match key
+    if entry.get("type") != install_type:
+        return False
+
+    # Type-specific validation
+    if install_type == "npm":
+        # Must be: command=npx, args=["-y", "<package>", ...]
+        if entry.get("command") != "npx":
+            return False
+        args = entry.get("args") or []
+        if len(args) < 2 or args[0] != "-y" or not isinstance(args[1], str) or not args[1]:
+            return False
+        return True
+
+    if install_type == "uvx":
+        # Must be: command=uvx, args=[...at least one token...]
+        if entry.get("command") != "uvx":
+            return False
+        args = entry.get("args") or []
+        if len(args) < 1:
+            return False
+        return True
+
+    if install_type == "docker":
+        # Must be: command=docker, args=["run", ...]
+        if entry.get("command") != "docker":
+            return False
+        args = entry.get("args") or []
+        if not args or args[0] != "run" or len(args) < 2:
+            # require at least an image or a flag after "run"
+            return False
+        return True
+
+    if install_type == "http":
+        # HTTP uses URL/headers instead of command/args
+        url = entry.get("url")
+        if not isinstance(url, str) or not url.strip():
+            return False
+        headers = entry.get("headers")
+        if headers is not None and not isinstance(headers, dict):
+            return False
+        return True
+
+    # Unknown types: reject
+    return False
-def validate_installation_entry(install_type: str, entry: dict) -> bool:
-    """Validate a single installation entry against MCP registry schema."""
-    # Required fields for all installation types
-    required_fields = {"type", "command", "args"}
-
-    # Check all required fields exist
-    if not all(field in entry for field in required_fields):
-        return False
-
-    # Type must match install_type
-    if entry.get("type") != install_type:
-        return False
-
-    # Type-specific validation based on MCP registry patterns
-    if install_type == "npm":
-        # npm type should use npx command with -y flag
-        if entry.get("command") != "npx":
-            return False
-        args = entry.get("args", [])
-        if not args or args[0] != "-y":
-            return False
-    elif install_type == "uvx":
-        # uvx type should use uvx command
-        if entry.get("command") != "uvx":
-            return False
-    elif install_type == "docker":
-        # docker type should use docker command
-        if entry.get("command") != "docker":
-            return False
-        args = entry.get("args", [])
-        if not args or args[0] != "run":
-            return False
-
-    return True
+def validate_installation_entry(install_type: str, entry: dict) -> bool:
+    """Validate a single installation entry against MCP registry schema."""
+    if not isinstance(entry, dict):
+        return False
+
+    # Entry type must match key
+    if entry.get("type") != install_type:
+        return False
+
+    # Type-specific validation
+    if install_type == "npm":
+        # Must be: command=npx, args=["-y", "<package>", ...]
+        if entry.get("command") != "npx":
+            return False
+        args = entry.get("args") or []
+        if len(args) < 2 or args[0] != "-y" or not isinstance(args[1], str) or not args[1]:
+            return False
+        return True
+
+    if install_type == "uvx":
+        # Must be: command=uvx, args=[...at least one token...]
+        if entry.get("command") != "uvx":
+            return False
+        args = entry.get("args") or []
+        if len(args) < 1:
+            return False
+        return True
+
+    if install_type == "docker":
+        # Must be: command=docker, args=["run", ...]
+        if entry.get("command") != "docker":
+            return False
+        args = entry.get("args") or []
+        if not args or args[0] != "run" or len(args) < 2:
+            # require at least an image or a flag after "run"
+            return False
+        return True
+
+    if install_type == "http":
+        # HTTP uses URL/headers instead of command/args
+        url = entry.get("url")
+        if not isinstance(url, str) or not url.strip():
+            return False
+        headers = entry.get("headers")
+        if headers is not None and not isinstance(headers, dict):
+            return False
+        return True
+
+    # Unknown types: reject
+    return False
+
 def validate_installations(manifest: dict, repo_url: str) -> Optional[dict]:
     """Validate and correct the installations field by having API check the original README."""
     if not manifest:
         return manifest
-    
+
     api_key = os.getenv("ANYON_API_KEY")
     if not api_key:
         print("Error: ANYON_API_KEY environment variable not set, skipping validation")
         return manifest
-    
+
     url = "https://anyon.chatxiv.org/api/v1/openai/v1/chat/completions"
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
-    }
-
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
     current_installations = manifest.get("installations", {})
-    
+
     payload = {
         "model": "x",
         "messages": [
@@ -121,50 +146,105 @@ def validate_installations(manifest: dict, repo_url: str) -> Optional[dict]:
                 "content": [
                     {
                         "type": "text",
-                        "text": f"""Please carefully validate and correct the installations field in this manifest by checking the original README.md from the repository.
-
-Repository: {repo_url}
+                        "text": f"""Please read the README.md from {repo_url} and create accurate installation entries following the MCP registry schema.
 
-Current manifest installations:
+Current installations:
 {json.dumps(current_installations, indent=2)}
 
-IMPORTANT INSTRUCTIONS:
-1. Access the README.md from the repository URL: {repo_url}
-2. Compare the current installations against the exact commands and configurations shown in the README.md
-3. Ensure the command, args, and env variables exactly match what's documented in the README. Remove the installation methods which are not mentioned in README.
-4. Pay special attention to:
-   - Exact command names (npx, uvx, docker, python, etc.)
-   - Correct package names and arguments (e.g., for npx command, it should usually be "-y [package_name]"
-   - Proper environment variable names and formats
-   - Installation type matching the command used
-5. Fix any discrepancies between the manifest and the README
-6. Return ONLY a valid JSON object with the corrected installations field
-7. The response should be in this exact format: {{"installations": {{...}}}}
-
-Focus on accuracy - the installations must work exactly as documented in the README. If the README shows different installation methods, include all valid ones."""
+TASK: Find installation instructions in the README and convert them to the exact schema format used in the MCP registry.
+
+INSTALLATION SCHEMA EXAMPLES:
+
+1. NPX INSTALLATIONS (most common):
+```json
+"npm": {{
+  "type": "npm", 
+  "command": "npx",
+  "args": ["-y", "@package/name"],
+  "description": "Install with npx"
+}}
+```
+
+2. UVX INSTALLATIONS:
+```json
+"uvx": {{
+  "type": "uvx",
+  "command": "uvx", 
+  "args": ["package-name"],
+  "description": "Run with Claude Desktop"
+}}
+```
+OR with git URL:
+```json
+"uvx": {{
+  "type": "uvx",
+  "command": "uvx",
+  "args": ["--from", "git+https://github.com/user/repo", "command-name"],
+  "description": "Install from git"
+}}
+```
+
+3. DOCKER INSTALLATIONS:
+```json
+"docker": {{
+  "type": "docker",
+  "command": "docker",
+  "args": ["run", "-i", "--rm", "image-name"],
+  "description": "Run using Docker"
+}}
+```
+
+PROCESS:
+1. Read the README.md from {repo_url}
+2. Find installation sections (Installation, Setup, Usage, Getting Started, etc.)
+3. For each installation method found:
+   - If README shows "npx package-name" → create npm entry with npx command
+   - If README shows "uvx package-name" → create uvx entry  
+   - If README shows "docker run ..." → create docker entry
+   - Copy the EXACT package names and arguments from README
+
+CRITICAL RULES:
+- Use exact package names from README (don't guess or modify)
+- Match the schema format exactly as shown in examples
+- Include ALL installation methods mentioned in README
+- Remove installation methods NOT mentioned in README
+- For npx: always use type "npm" with command "npx" and args ["-y", "package-name"]
+
+Return ONLY: {{"installations": {{...}}}}""",
                     }
-                ]
+                ],
             }
-        ]
+        ],
     }
-    
+
     try:
         print("Validating installations against README...")
         response = requests.post(url, headers=headers, json=payload)
         response.raise_for_status()
-        
+
         data = response.json()
         content = data["choices"][0]["message"]["content"]
-        
+
         validated_data = extract_json_from_content(content)
         if validated_data and "installations" in validated_data:
-        print("Validating installations against README...")
-        response = requests.post(url, headers=headers, json=payload)
-        response.raise_for_status()
-        
-
-        data = response.json()
-        content = data["choices"][0]["message"]["content"]
-        
-
-        validated_data = extract_json_from_content(content)
-        if validated_data and "installations" in validated_data:
+        print("Validating installations against README...")
+        response = requests.post(url, headers=headers, json=payload, timeout=60)
+        response.raise_for_status()
+
+        data = response.json()
+        content = data["choices"][0]["message"]["content"]
+
+        validated_data = extract_json_from_content(content)
+        if validated_data and "installations" in validated_data:
+            ...
+
+    except (requests.RequestException, ValueError, json.JSONDecodeError, KeyError, IndexError) as e:
+        print(f"Error validating installations: {e}")
+        return manifest
-        print("Validating installations against README...")
-        response = requests.post(url, headers=headers, json=payload)
-        response.raise_for_status()
-        
-
-        data = response.json()
-        content = data["choices"][0]["message"]["content"]
-        
-
-        validated_data = extract_json_from_content(content)
-        if validated_data and "installations" in validated_data:
+        print("Validating installations against README...")
+        response = requests.post(url, headers=headers, json=payload, timeout=60)
+        response.raise_for_status()
+
+        data = response.json()
+        content = data["choices"][0]["message"]["content"]
+
+        validated_data = extract_json_from_content(content)
+        if validated_data and "installations" in validated_data:
+            ...
+
+    except (requests.RequestException, ValueError, json.JSONDecodeError, KeyError, IndexError) as e:
+        print(f"Error validating installations: {e}")
+        return manifest
-            print("✓ Installations validated and corrected")
-            manifest["installations"] = validated_data["installations"]
+            # Additional validation of each installation entry
+            cleaned_installations = {}
+            for install_type, entry in validated_data["installations"].items():
+                if validate_installation_entry(install_type, entry):
+                    cleaned_installations[install_type] = entry
+                else:
+                    print(f"⚠ Removing invalid {install_type} installation entry")
+
+            if cleaned_installations:
+                print("✓ Installations validated and corrected")
+                manifest["installations"] = cleaned_installations
+            else:
+                print("⚠ No valid installations found, keeping original")
             return manifest
         else:
             print("⚠ Validation failed, keeping original installations")
             return manifest
-            
+
     except Exception as e:
         print(f"Error validating installations: {e}")
         return manifest
@@ -175,19 +255,19 @@ def save_manifest(manifest: dict, repo_url: str) -> bool:
     # Create directory if it doesn't exist
     servers_dir = Path("mcp-registry/servers")
     servers_dir.mkdir(parents=True, exist_ok=True)
-    
+
     # Generate filename from repo URL
     repo_name = get_repo_name_from_url(repo_url)
     filename = f"{repo_name}.json"
     filepath = servers_dir / filename
-    
+
     try:
-        with open(filepath, 'w', encoding='utf-8') as f:
+        with open(filepath, "w", encoding="utf-8") as f:
             json.dump(manifest, f, indent=2, ensure_ascii=False)
-        
+
         print(f"Manifest saved to {filepath}")
         return True
-        
+
     except IOError as e:
         print(f"Failed to save manifest: {e}")
         return False
@@ -196,26 +276,26 @@ def save_manifest(manifest: dict, repo_url: str) -> bool:
 def main():
     parser = argparse.ArgumentParser(description="Generate MCP manifest JSON from repository URL")
     parser.add_argument("repo_url", help="Repository URL to generate manifest for")
-    
+
     args = parser.parse_args()
-    
+
     # Step 1: Generate initial manifest
     print("Step 1: Generating initial manifest...")
     manifest = generate_manifest(args.repo_url)
     if not manifest:
         print("Failed to generate manifest")
         sys.exit(1)
-    
+
     # Step 2: Validate and correct installations
     print("Step 2: Validating installations against README...")
     manifest = validate_installations(manifest, args.repo_url)
-    
+
     # Step 3: Save manifest
     print("Step 3: Saving manifest...")
     if not save_manifest(manifest, args.repo_url):
         print("Failed to save manifest")
         sys.exit(1)
-    
+
     print("✓ Manifest generation completed successfully!")