In [1]:
import io
import zipfile
import requests
import frontmatter

def read_repo_data(repo_owner, repo_name):
    """
    Download and parse all markdown files from a GitHub repository.
    
    Args:
        repo_owner: GitHub username or organization
        repo_name: Repository name
    
    Returns:
        List of dictionaries containing file content and metadata
    """
    prefix = 'https://codeload.github.com' 
    url = f'{prefix}/{repo_owner}/{repo_name}/zip/refs/heads/main'
    resp = requests.get(url)
    
    if resp.status_code != 200:
        raise Exception(f"Failed to download repository: {resp.status_code}")

    repository_data = []
    zf = zipfile.ZipFile(io.BytesIO(resp.content))
    
    for file_info in zf.infolist():
        filename = file_info.filename
        filename_lower = filename.lower()

        if not (filename_lower.endswith('.md') 
            or filename_lower.endswith('.mdx')):
            continue
    
        try:
            with zf.open(file_info) as f_in:
                content = f_in.read().decode('utf-8', errors='ignore')
                post = frontmatter.loads(content)
                data = post.to_dict()
                data['filename'] = filename
                repository_data.append(data)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            continue
    
    zf.close()
    return repository_data




In [2]:
vectara = read_repo_data('vectara', 'awesome-agent-failures')
print(f"Vectara docs: {len(vectara)}")
vectara[0]

Vectara docs: 25


{'content': '# 🤝 Contributing to Awesome AI Agent Failures\n\nThank you for your interest in contributing to this project! This repository thrives on community contributions that help us build a comprehensive understanding of AI agent failure modes and their solutions.\n\n## 🎯 How You Can Contribute\n\n### 📝 1. Share Failure Cases\nDocument real-world failures you\'ve encountered:\n- Follow our failure case submission guidelines\n- Include reproduction steps when possible\n- Anonymize sensitive information\n\n### 🔧 2. Propose Mitigation Strategies\nShare solutions and prevention techniques:\n- Describe implementation details\n- Link to GitHub repositories with working examples\n- Reference related academic work where possible\n\n### 📊 3. Contribute Research\nAdd academic insights and empirical studies:\n- Link to relevant papers and studies\n- Summarize key findings\n- Discuss practical implications\n- Suggest future research directions\n\n### 🛠️ 4. Build Tools\nDevelop diagnostic and 