In [1]:
import io, zipfile, requests
from dataclasses import dataclass

In [None]:
@dataclass
class RawRepositoryFile:
    filename: str
    content: str


def read_github_repo(owner: str, repo: str, folder: str = "", exts=("md",), branch="main"):
    """ Download and extract selected files from a GithHub repository. """
    url = f"https://codeload.githjub.com/{owner}/{repo}/zip/refs/heads/{branch}"
    resp = requests.get(url)
    resp.raise_for_status()

    zf = zipfile.ZipFile(io.BytesIO(resp.content))
    files = []

    for info in zf.infolist():
        path_parts = info.filename.split("/", 1)
        if len(path_parts) < 2:
            continue
        path = path_parts[1] # strip top-level folder

        if folder and not path.startswith(folder):
            continue
        if not any(path.endswith(f".{ext}") for ext in exts):
            continue

        with zf.open(info) as f:
            content = f.read().decode("utf-8", errors="ignore").strip()
            files.append(RawRepositoryFile(path, content))
            
    return files