In [9]:
# Install required packages (run once)
%pip install requests python-dotenv openai

Note: you may need to restart the kernel to use updated packages.


In [10]:
import os
import requests
from dotenv import load_dotenv

# Load environment variables from .env file in parent directory
load_dotenv(os.path.join(os.path.dirname(os.getcwd()), ".env"))

def get_token():
    """Get GitHub token from environment."""
    return os.environ.get("GH_TOKEN") or os.environ.get("GITHUB_TOKEN")

class GitHubClient:
    """GitHub API client - connects to the remote GitHub API."""
    
    def __init__(self, token):
        self.token = token
        self.base = "https://api.github.com"
        self.headers = {
            "Authorization": f"Bearer {self.token}",
            "Accept": "application/vnd.github.v3+json",
            "User-Agent": "github-repos-summarizer",
            "X-GitHub-Api-Version": "2022-11-28"
        }

    def list_user_repos(self, per_page=100):
        """List all repositories for the authenticated user."""
        url = f"{self.base}/user/repos"
        params = {"per_page": per_page, "sort": "updated", "direction": "desc"}
        repos = []
        
        while url:
            resp = requests.get(url, headers=self.headers, params=params)
            if resp.status_code == 401:
                raise Exception("‚ùå Unauthorized: Check your GH_TOKEN")
            resp.raise_for_status()
            repos.extend(resp.json())
            
            url = None
            for part in resp.headers.get("Link", "").split(","):
                if 'rel="next"' in part:
                    url = part.split(";")[0].strip().strip("<>")
                    break
            params = None
        return repos

    def get_repo_details(self, owner, repo):
        """Get detailed repository information."""
        url = f"{self.base}/repos/{owner}/{repo}"
        resp = requests.get(url, headers=self.headers)
        resp.raise_for_status()
        return resp.json()

    def get_repo_contents(self, owner, repo, path=""):
        """Get contents of a repository path."""
        url = f"{self.base}/repos/{owner}/{repo}/contents/{path}"
        resp = requests.get(url, headers=self.headers)
        if resp.status_code == 404:
            return []
        resp.raise_for_status()
        return resp.json()

    def get_file_content(self, owner, repo, path):
        """Get decoded content of a specific file."""
        import base64
        url = f"{self.base}/repos/{owner}/{repo}/contents/{path}"
        resp = requests.get(url, headers=self.headers)
        if resp.status_code == 404:
            return None
        resp.raise_for_status()
        data = resp.json()
        if data.get("encoding") == "base64":
            return base64.b64decode(data["content"]).decode("utf-8", errors="ignore")
        return data.get("content", "")

    def get_repo_languages(self, owner, repo):
        """Get languages used in the repository."""
        url = f"{self.base}/repos/{owner}/{repo}/languages"
        resp = requests.get(url, headers=self.headers)
        resp.raise_for_status()
        return resp.json()

    def get_repo_tree(self, owner, repo, sha="HEAD", recursive=True):
        """Get the full file tree of a repository."""
        url = f"{self.base}/repos/{owner}/{repo}/git/trees/{sha}"
        params = {"recursive": "1"} if recursive else {}
        resp = requests.get(url, headers=self.headers, params=params)
        if resp.status_code == 404:
            return {"tree": []}
        resp.raise_for_status()
        return resp.json()

# Initialize client
token = get_token()
if not token:
    print("‚ùå No GitHub token found! Set GH_TOKEN in .env file.")
    github = None
else:
    github = GitHubClient(token)
    print("‚úÖ GitHub client initialized")

‚úÖ GitHub client initialized


In [11]:
# Cell 3: List repositories and select one
if github:
    repos = github.list_user_repos()
    
    print(f"üì¶ Your GitHub Repositories ({len(repos)} total):\n")
    print(f"{'#':<4} {'Name':<35} {'Language':<12} {'‚≠ê':<5} {'Updated'}")
    print("=" * 75)
    
    for i, r in enumerate(repos, 1):
        name = r.get("name", "")[:34]
        lang = (r.get("language") or "‚Äî")[:11]
        stars = r.get("stargazers_count", 0)
        updated = r.get("updated_at", "")[:10]
        print(f"{i:<4} {name:<35} {lang:<12} {stars:<5} {updated}")
    
    print("\n" + "=" * 75)
    print("üìù Enter the number of the repo you want to analyze in the next cell.")
    
    # Store repos for later use
    repo_list = repos
else:
    repo_list = []

üì¶ Your GitHub Repositories (33 total):

#    Name                                Language     ‚≠ê     Updated
1    AI-Agents                           Jupyter Not  0     2025-11-30
2    solidityHomeworks                   TypeScript   6     2024-01-12
3    MapData                             Jupyter Not  0     2024-01-06
4    langchain                           Jupyter Not  0     2023-11-14
5    HF-Audio                            Jupyter Not  0     2023-07-09
6    HF-DeepRL                           Jupyter Not  0     2023-07-02
7    fastai-dl                           Jupyter Not  0     2023-06-30
8    hf-nlp                              Jupyter Not  0     2023-05-30
9    nlp-transformers                    Jupyter Not  0     2023-05-15
10   openai-bc                           Jupyter Not  0     2023-05-09
11   nn-bc                               Jupyter Not  0     2023-04-22
12   dsml-bc                             Jupyter Not  0     2023-04-05
13   springmast                    

In [12]:
# Cell 4: Enter the repo number to analyze
# ‚¨áÔ∏è CHANGE THIS NUMBER to select which repo to analyze ‚¨áÔ∏è
SELECTED_REPO_NUMBER = 15  # this is the repo for openai-stackhack-2023

In [13]:
# Cell 5: Analyze the selected repository
class RepoAnalyzer:
    """Agent that analyzes a GitHub repository and provides detailed summaries."""
    
    def __init__(self, github_client):
        self.github = github_client
        self.analysis = {}
    
    def analyze(self, owner, repo_name):
        """Perform full analysis of a repository."""
        print(f"üîç Analyzing repository: {owner}/{repo_name}\n")
        print("=" * 70)
        
        # 1. Get repo details
        print("üìã Fetching repository details...")
        details = self.github.get_repo_details(owner, repo_name)
        self.analysis["details"] = details
        
        # 2. Get languages
        print("üíª Analyzing languages/tech stack...")
        languages = self.github.get_repo_languages(owner, repo_name)
        self.analysis["languages"] = languages
        
        # 3. Get file tree
        print("üìÇ Mapping repository structure...")
        tree = self.github.get_repo_tree(owner, repo_name)
        self.analysis["tree"] = tree
        
        # 4. Get key files
        print("üìÑ Reading key configuration files...")
        key_files = self._get_key_files(owner, repo_name, tree)
        self.analysis["key_files"] = key_files
        
        print("\n‚úÖ Analysis complete!\n")
        return self.analysis
    
    def _get_key_files(self, owner, repo, tree):
        """Read important files that reveal tech stack and architecture."""
        key_file_patterns = [
            "README.md", "readme.md", "README.MD",
            "package.json", "requirements.txt", "Pipfile", "pyproject.toml",
            "Cargo.toml", "go.mod", "pom.xml", "build.gradle",
            "Dockerfile", "docker-compose.yml", "docker-compose.yaml",
            "tsconfig.json", "hardhat.config.ts", "hardhat.config.js",
            "foundry.toml", "truffle-config.js",
            ".env.example", "Makefile"
        ]
        
        files_content = {}
        tree_files = [f["path"] for f in tree.get("tree", []) if f["type"] == "blob"]
        
        for pattern in key_file_patterns:
            if pattern in tree_files:
                content = self.github.get_file_content(owner, repo, pattern)
                if content:
                    # Truncate large files
                    files_content[pattern] = content[:5000] if len(content) > 5000 else content
        
        return files_content
    
    def print_summary(self):
        """Print a formatted summary of the analysis."""
        details = self.analysis.get("details", {})
        languages = self.analysis.get("languages", {})
        tree = self.analysis.get("tree", {})
        key_files = self.analysis.get("key_files", {})
        
        # === BASIC INFO ===
        print("=" * 70)
        print("üì¶ REPOSITORY OVERVIEW")
        print("=" * 70)
        print(f"Name:        {details.get('full_name', 'N/A')}")
        print(f"Description: {details.get('description') or 'No description'}")
        print(f"URL:         {details.get('html_url', 'N/A')}")
        print(f"Created:     {details.get('created_at', '')[:10]}")
        print(f"Updated:     {details.get('updated_at', '')[:10]}")
        print(f"Stars:       {details.get('stargazers_count', 0)} ‚≠ê")
        print(f"Forks:       {details.get('forks_count', 0)}")
        print(f"Open Issues: {details.get('open_issues_count', 0)}")
        print(f"Default Branch: {details.get('default_branch', 'main')}")
        
        # === TECH STACK ===
        print("\n" + "=" * 70)
        print("üíª TECH STACK & LANGUAGES")
        print("=" * 70)
        
        if languages:
            total_bytes = sum(languages.values())
            for lang, bytes_count in sorted(languages.items(), key=lambda x: -x[1]):
                pct = (bytes_count / total_bytes) * 100
                bar = "‚ñà" * int(pct / 5) + "‚ñë" * (20 - int(pct / 5))
                print(f"{lang:<15} {bar} {pct:>5.1f}%")
        else:
            print("No language data available")
        
        # Detect frameworks from key files
        print("\nüìö Detected Frameworks/Tools:")
        frameworks = self._detect_frameworks(key_files)
        if frameworks:
            for fw in frameworks:
                print(f"  ‚Ä¢ {fw}")
        else:
            print("  No specific frameworks detected")
        
        # === ARCHITECTURE ===
        print("\n" + "=" * 70)
        print("üèóÔ∏è ARCHITECTURE & STRUCTURE")
        print("=" * 70)
        
        tree_items = tree.get("tree", [])
        dirs = sorted(set(f["path"].split("/")[0] for f in tree_items if "/" in f["path"]))
        files_root = [f["path"] for f in tree_items if "/" not in f["path"] and f["type"] == "blob"]
        
        print(f"Total files: {len([f for f in tree_items if f['type'] == 'blob'])}")
        print(f"Total directories: {len(dirs)}")
        
        print("\nüìÅ Top-level structure:")
        for d in dirs[:15]:
            subfiles = len([f for f in tree_items if f["path"].startswith(d + "/")])
            print(f"  üìÇ {d}/ ({subfiles} items)")
        if len(dirs) > 15:
            print(f"  ... and {len(dirs) - 15} more directories")
        
        for f in files_root[:10]:
            print(f"  üìÑ {f}")
        
        # === FUNCTIONALITY ===
        print("\n" + "=" * 70)
        print("‚öôÔ∏è FUNCTIONALITY & PURPOSE")
        print("=" * 70)
        
        # Print README excerpt if available
        readme_content = key_files.get("README.md") or key_files.get("readme.md") or key_files.get("README.MD")
        if readme_content:
            print("\nüìñ From README:")
            # Get first meaningful section (skip badges/images)
            lines = readme_content.split("\n")
            meaningful_lines = []
            for line in lines[:50]:
                if line.strip() and not line.startswith("![") and not line.startswith("<img"):
                    meaningful_lines.append(line)
                if len(meaningful_lines) >= 15:
                    break
            print("\n".join(meaningful_lines[:15]))
            if len(meaningful_lines) > 15:
                print("...")
        
        # Print dependencies
        if "package.json" in key_files:
            print("\nüì¶ NPM Dependencies (from package.json):")
            self._print_npm_deps(key_files["package.json"])
        
        if "requirements.txt" in key_files:
            print("\nüêç Python Dependencies (from requirements.txt):")
            deps = [line.strip() for line in key_files["requirements.txt"].split("\n") 
                    if line.strip() and not line.startswith("#")][:10]
            for dep in deps:
                print(f"  ‚Ä¢ {dep}")
            if len(deps) > 10:
                print(f"  ... and more")
        
        print("\n" + "=" * 70)
    
    def _detect_frameworks(self, key_files):
        """Detect frameworks based on config files."""
        frameworks = []
        
        if "package.json" in key_files:
            pkg = key_files["package.json"]
            if "react" in pkg.lower(): frameworks.append("React")
            if "next" in pkg.lower(): frameworks.append("Next.js")
            if "vue" in pkg.lower(): frameworks.append("Vue.js")
            if "angular" in pkg.lower(): frameworks.append("Angular")
            if "express" in pkg.lower(): frameworks.append("Express.js")
            if "hardhat" in pkg.lower(): frameworks.append("Hardhat (Ethereum)")
            if "ethers" in pkg.lower(): frameworks.append("Ethers.js")
            if "web3" in pkg.lower(): frameworks.append("Web3.js")
            if "typescript" in pkg.lower(): frameworks.append("TypeScript")
        
        if "requirements.txt" in key_files or "pyproject.toml" in key_files:
            content = key_files.get("requirements.txt", "") + key_files.get("pyproject.toml", "")
            if "django" in content.lower(): frameworks.append("Django")
            if "flask" in content.lower(): frameworks.append("Flask")
            if "fastapi" in content.lower(): frameworks.append("FastAPI")
            if "torch" in content.lower(): frameworks.append("PyTorch")
            if "tensorflow" in content.lower(): frameworks.append("TensorFlow")
            if "langchain" in content.lower(): frameworks.append("LangChain")
            if "openai" in content.lower(): frameworks.append("OpenAI API")
        
        if "hardhat.config.ts" in key_files or "hardhat.config.js" in key_files:
            frameworks.append("Hardhat (Solidity)")
        if "foundry.toml" in key_files:
            frameworks.append("Foundry (Solidity)")
        if "truffle-config.js" in key_files:
            frameworks.append("Truffle (Solidity)")
        if "Dockerfile" in key_files:
            frameworks.append("Docker")
        if "docker-compose.yml" in key_files or "docker-compose.yaml" in key_files:
            frameworks.append("Docker Compose")
        
        return list(set(frameworks))
    
    def _print_npm_deps(self, package_json_content):
        """Parse and print NPM dependencies."""
        import json
        try:
            pkg = json.loads(package_json_content)
            deps = list(pkg.get("dependencies", {}).keys())[:8]
            dev_deps = list(pkg.get("devDependencies", {}).keys())[:5]
            
            if deps:
                print("  Dependencies:")
                for d in deps:
                    print(f"    ‚Ä¢ {d}")
                if len(pkg.get("dependencies", {})) > 8:
                    print(f"    ... and {len(pkg.get('dependencies', {})) - 8} more")
            
            if dev_deps:
                print("  Dev Dependencies:")
                for d in dev_deps:
                    print(f"    ‚Ä¢ {d}")
        except:
            print("  (Could not parse package.json)")

# Run the analysis
if github and repo_list:
    idx = SELECTED_REPO_NUMBER - 1
    if 0 <= idx < len(repo_list):
        selected = repo_list[idx]
        owner = selected["owner"]["login"]
        repo_name = selected["name"]
        
        analyzer = RepoAnalyzer(github)
        analyzer.analyze(owner, repo_name)
        analyzer.print_summary()
    else:
        print(f"‚ùå Invalid selection. Choose a number between 1 and {len(repo_list)}")
else:
    print("‚ùå Run the previous cells first to load repos.")

üîç Analyzing repository: tenkara/openai-stackhack-2023

üìã Fetching repository details...
üíª Analyzing languages/tech stack...
üíª Analyzing languages/tech stack...
üìÇ Mapping repository structure...
üìÇ Mapping repository structure...
üìÑ Reading key configuration files...

‚úÖ Analysis complete!

üì¶ REPOSITORY OVERVIEW
Name:        tenkara/openai-stackhack-2023
Description: No description
URL:         https://github.com/tenkara/openai-stackhack-2023
Created:     2023-02-25
Updated:     2023-03-11
Stars:       1 ‚≠ê
Forks:       0
Open Issues: 0
Default Branch: main

üíª TECH STACK & LANGUAGES
Jupyter Notebook ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñë  97.7%
TypeScript      ‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë   1.1%
Python          ‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë   1.0%
JavaScript      ‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë   0.1%
Dockerfile      ‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚ñë‚