In [9]:
# Install required packages (run once)
%pip install requests python-dotenv openai

Note: you may need to restart the kernel to use updated packages.


In [10]:
import os
import requests
from dotenv import load_dotenv

# Load environment variables from .env file in parent directory
load_dotenv(os.path.join(os.path.dirname(os.getcwd()), ".env"))

def get_token():
    """Get GitHub token from environment."""
    return os.environ.get("GH_TOKEN") or os.environ.get("GITHUB_TOKEN")

class GitHubClient:
    """GitHub API client - connects to the remote GitHub API."""
    
    def __init__(self, token):
        self.token = token
        self.base = "https://api.github.com"
        self.headers = {
            "Authorization": f"Bearer {self.token}",
            "Accept": "application/vnd.github.v3+json",
            "User-Agent": "github-repos-summarizer",
            "X-GitHub-Api-Version": "2022-11-28"
        }

    def list_user_repos(self, per_page=100):
        """List all repositories for the authenticated user."""
        url = f"{self.base}/user/repos"
        params = {"per_page": per_page, "sort": "updated", "direction": "desc"}
        repos = []
        
        while url:
            resp = requests.get(url, headers=self.headers, params=params)
            if resp.status_code == 401:
                raise Exception("‚ùå Unauthorized: Check your GH_TOKEN")
            resp.raise_for_status()
            repos.extend(resp.json())
            
            url = None
            for part in resp.headers.get("Link", "").split(","):
                if 'rel="next"' in part:
                    url = part.split(";")[0].strip().strip("<>")
                    break
            params = None
        return repos

    def get_repo_details(self, owner, repo):
        """Get detailed repository information."""
        url = f"{self.base}/repos/{owner}/{repo}"
        resp = requests.get(url, headers=self.headers)
        resp.raise_for_status()
        return resp.json()

    def get_repo_contents(self, owner, repo, path=""):
        """Get contents of a repository path."""
        url = f"{self.base}/repos/{owner}/{repo}/contents/{path}"
        resp = requests.get(url, headers=self.headers)
        if resp.status_code == 404:
            return []
        resp.raise_for_status()
        return resp.json()

    def get_file_content(self, owner, repo, path):
        """Get decoded content of a specific file."""
        import base64
        url = f"{self.base}/repos/{owner}/{repo}/contents/{path}"
        resp = requests.get(url, headers=self.headers)
        if resp.status_code == 404:
            return None
        resp.raise_for_status()
        data = resp.json()
        if data.get("encoding") == "base64":
            return base64.b64decode(data["content"]).decode("utf-8", errors="ignore")
        return data.get("content", "")

    def get_repo_languages(self, owner, repo):
        """Get languages used in the repository."""
        url = f"{self.base}/repos/{owner}/{repo}/languages"
        resp = requests.get(url, headers=self.headers)
        resp.raise_for_status()
        return resp.json()

    def get_repo_tree(self, owner, repo, sha="HEAD", recursive=True):
        """Get the full file tree of a repository."""
        url = f"{self.base}/repos/{owner}/{repo}/git/trees/{sha}"
        params = {"recursive": "1"} if recursive else {}
        resp = requests.get(url, headers=self.headers, params=params)
        if resp.status_code == 404:
            return {"tree": []}
        resp.raise_for_status()
        return resp.json()

# Initialize client
token = get_token()
if not token:
    print("‚ùå No GitHub token found! Set GH_TOKEN in .env file.")
    github = None
else:
    github = GitHubClient(token)
    print("‚úÖ GitHub client initialized")

‚úÖ GitHub client initialized


In [11]:
# Cell 3: List repositories and select one
if github:
    repos = github.list_user_repos()
    
    print(f"üì¶ Your GitHub Repositories ({len(repos)} total):\n")
    print(f"{'#':<4} {'Name':<35} {'Language':<12} {'‚≠ê':<5} {'Updated'}")
    print("=" * 75)
    
    for i, r in enumerate(repos, 1):
        name = r.get("name", "")[:34]
        lang = (r.get("language") or "‚Äî")[:11]
        stars = r.get("stargazers_count", 0)
        updated = r.get("updated_at", "")[:10]
        print(f"{i:<4} {name:<35} {lang:<12} {stars:<5} {updated}")
    
    print("\n" + "=" * 75)
    print("üìù Enter the number of the repo you want to analyze in the next cell.")
    
    # Store repos for later use
    repo_list = repos
else:
    repo_list = []

üì¶ Your GitHub Repositories (33 total):

#    Name                                Language     ‚≠ê     Updated
1    AI-Agents                           Jupyter Not  0     2025-11-30
2    solidityHomeworks                   TypeScript   6     2024-01-12
3    MapData                             Jupyter Not  0     2024-01-06
4    langchain                           Jupyter Not  0     2023-11-14
5    HF-Audio                            Jupyter Not  0     2023-07-09
6    HF-DeepRL                           Jupyter Not  0     2023-07-02
7    fastai-dl                           Jupyter Not  0     2023-06-30
8    hf-nlp                              Jupyter Not  0     2023-05-30
9    nlp-transformers                    Jupyter Not  0     2023-05-15
10   openai-bc                           Jupyter Not  0     2023-05-09
11   nn-bc                               Jupyter Not  0     2023-04-22
12   dsml-bc                             Jupyter Not  0     2023-04-05
13   springmast                    

In [12]:
# Cell 4: Enter the repo number to analyze
# ‚¨áÔ∏è CHANGE THIS NUMBER to select which repo to analyze ‚¨áÔ∏è
SELECTED_REPO_NUMBER = 15  # this is the repo for openai-stackhack-2023

In [15]:
# Cell 5: Analyze the selected repository
import re
import json

class RepoAnalyzer:
    """Agent that analyzes a GitHub repository and provides detailed summaries."""
    
    def __init__(self, github_client):
        self.github = github_client
        self.analysis = {}
    
    def analyze(self, owner, repo_name):
        """Perform full analysis of a repository."""
        print(f"üîç Analyzing repository: {owner}/{repo_name}\n")
        print("=" * 70)
        
        # 1. Get repo details
        print("üìã Fetching repository details...")
        details = self.github.get_repo_details(owner, repo_name)
        self.analysis["details"] = details
        
        # 2. Get languages
        print("üíª Analyzing languages/tech stack...")
        languages = self.github.get_repo_languages(owner, repo_name)
        self.analysis["languages"] = languages
        
        # 3. Get file tree
        print("üìÇ Mapping repository structure...")
        tree = self.github.get_repo_tree(owner, repo_name)
        self.analysis["tree"] = tree
        
        # 4. Get key config files
        print("üìÑ Reading configuration files...")
        key_files = self._get_key_files(owner, repo_name, tree)
        self.analysis["key_files"] = key_files
        
        # 5. Analyze source code
        print("üî¨ Analyzing source code...")
        code_analysis = self._analyze_source_code(owner, repo_name, tree)
        self.analysis["code_analysis"] = code_analysis
        
        # 6. Infer business functionality
        print("üíº Inferring business functionality...")
        business_analysis = self._analyze_business_functionality(key_files, code_analysis, details)
        self.analysis["business"] = business_analysis
        
        print("\n‚úÖ Analysis complete!\n")
        return self.analysis
    
    def _get_key_files(self, owner, repo, tree):
        """Read important files that reveal tech stack and architecture."""
        key_file_patterns = [
            "README.md", "readme.md", "README.MD",
            "package.json", "requirements.txt", "Pipfile", "pyproject.toml",
            "Cargo.toml", "go.mod", "pom.xml", "build.gradle",
            "Dockerfile", "docker-compose.yml", "docker-compose.yaml",
            "tsconfig.json", "hardhat.config.ts", "hardhat.config.js",
            "foundry.toml", "truffle-config.js",
            ".env.example", "Makefile", "AGENTS.md", "CONTRIBUTING.md"
        ]
        
        files_content = {}
        tree_files = [f["path"] for f in tree.get("tree", []) if f["type"] == "blob"]
        
        for pattern in key_file_patterns:
            if pattern in tree_files:
                content = self.github.get_file_content(owner, repo, pattern)
                if content:
                    files_content[pattern] = content[:8000] if len(content) > 8000 else content
        
        return files_content
    
    def _analyze_source_code(self, owner, repo, tree):
        """Analyze source code files to understand functionality."""
        tree_files = tree.get("tree", [])
        
        # Categorize files by type
        code_files = {
            "python": [], "javascript": [], "typescript": [],
            "solidity": [], "jupyter": [], "other": []
        }
        
        for f in tree_files:
            if f["type"] != "blob":
                continue
            path = f["path"]
            if path.endswith(".py"): code_files["python"].append(path)
            elif path.endswith((".js", ".jsx")): code_files["javascript"].append(path)
            elif path.endswith((".ts", ".tsx")): code_files["typescript"].append(path)
            elif path.endswith(".sol"): code_files["solidity"].append(path)
            elif path.endswith(".ipynb"): code_files["jupyter"].append(path)
        
        analysis = {
            "file_counts": {k: len(v) for k, v in code_files.items()},
            "main_modules": [],
            "contracts": [],
            "all_functions": [],
            "all_classes": [],
            "all_routes": []
        }
        
        # Find priority files (entry points)
        priority_files = []
        for files in [code_files["python"], code_files["javascript"], code_files["typescript"]]:
            for f in files:
                name = f.split("/")[-1].lower()
                if name in ["main.py", "app.py", "index.py", "server.py", "__main__.py",
                           "index.js", "index.ts", "app.js", "app.ts", "server.js", "server.ts"]:
                    priority_files.append(f)
                elif any(x in name for x in ["route", "api", "controller", "service", "model", "view"]):
                    priority_files.append(f)
        
        # Also check src/ and lib/ directories
        for f in tree_files:
            path = f["path"]
            if f["type"] == "blob" and (path.startswith("src/") or path.startswith("lib/")):
                if path.endswith((".py", ".js", ".ts", ".jsx", ".tsx")):
                    if path not in priority_files:
                        priority_files.append(path)
        
        # Analyze up to 12 key files
        files_to_analyze = priority_files[:12]
        if not files_to_analyze:
            all_code = code_files["python"] + code_files["javascript"] + code_files["typescript"]
            files_to_analyze = [f for f in all_code if "node_modules" not in f and "test" not in f.lower()][:8]
        
        for file_path in files_to_analyze:
            content = self.github.get_file_content(owner, repo, file_path)
            if content:
                file_analysis = self._analyze_file_content(file_path, content)
                if file_analysis:
                    analysis["main_modules"].append({"path": file_path, "analysis": file_analysis})
                    analysis["all_functions"].extend(file_analysis.get("functions", []))
                    analysis["all_classes"].extend(file_analysis.get("classes", []))
                    analysis["all_routes"].extend(file_analysis.get("api_routes", []))
        
        # Analyze Solidity contracts
        for sol_file in code_files["solidity"][:5]:
            content = self.github.get_file_content(owner, repo, sol_file)
            if content:
                contracts = self._extract_solidity_info(content)
                if contracts:
                    analysis["contracts"].extend(contracts)
        
        # Analyze Jupyter notebooks
        for nb_file in code_files["jupyter"][:3]:
            content = self.github.get_file_content(owner, repo, nb_file)
            if content:
                nb_info = self._analyze_notebook(nb_file, content)
                if nb_info:
                    analysis["main_modules"].append({"path": nb_file, "analysis": nb_info})
        
        return analysis
    
    def _analyze_file_content(self, path, content):
        """Extract functions, classes, and purpose from a source file."""
        analysis = {"purpose": "", "functions": [], "classes": [], "api_routes": [], "business_hints": []}
        lines = content.split("\n")
        
        # Extract top comment/docstring
        for i, line in enumerate(lines[:25]):
            if line.strip().startswith("#") or line.strip().startswith("//"):
                analysis["purpose"] += line.strip().lstrip("#/").strip() + " "
            elif '"""' in line or "'''" in line:
                doc_lines = []
                for dl in lines[i:i+15]:
                    doc_lines.append(dl)
                    if len(doc_lines) > 1 and ('"""' in dl or "'''" in dl):
                        break
                analysis["purpose"] = " ".join(doc_lines).replace('"""', '').replace("'''", '').strip()
                break
        
        # Python analysis
        if path.endswith(".py"):
            for match in re.finditer(r'def\s+(\w+)\s*\([^)]*\)', content):
                func_name = match.group(1)
                if not func_name.startswith("_") or func_name == "__init__":
                    analysis["functions"].append(func_name)
            for match in re.finditer(r'class\s+(\w+)', content):
                analysis["classes"].append(match.group(1))
            for match in re.finditer(r'@(?:app|router|api)\.(?:get|post|put|delete|route)\s*\([\'"]([^\'"]+)', content, re.I):
                analysis["api_routes"].append(match.group(1))
        
        # JS/TS analysis
        elif path.endswith((".js", ".ts", ".jsx", ".tsx")):
            for pattern in [r'function\s+(\w+)', r'const\s+(\w+)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>']:
                for match in re.finditer(pattern, content):
                    analysis["functions"].append(match.group(1))
            for match in re.finditer(r'class\s+(\w+)', content):
                analysis["classes"].append(match.group(1))
            for match in re.finditer(r'\.(?:get|post|put|delete|patch)\s*\([\'"]([^\'"]+)', content):
                analysis["api_routes"].append(match.group(1))
        
        # Extract business-related keywords from function/class names
        business_keywords = ["user", "auth", "login", "payment", "order", "product", "cart", "checkout",
                          "invoice", "customer", "account", "transaction", "wallet", "token", "mint",
                          "transfer", "swap", "stake", "claim", "reward", "vote", "proposal", "dao",
                          "nft", "marketplace", "auction", "bid", "listing", "subscription", "plan"]
        
        all_names = " ".join(analysis["functions"] + analysis["classes"]).lower()
        for kw in business_keywords:
            if kw in all_names:
                analysis["business_hints"].append(kw)
        
        analysis["functions"] = list(set(analysis["functions"]))[:12]
        analysis["classes"] = list(set(analysis["classes"]))[:8]
        analysis["api_routes"] = list(set(analysis["api_routes"]))[:10]
        
        return analysis
    
    def _extract_solidity_info(self, content):
        """Extract contract information from Solidity files."""
        contracts = []
        for match in re.finditer(r'contract\s+(\w+)(?:\s+is\s+([^{]+))?', content):
            contract_name = match.group(1)
            inherits = match.group(2).strip() if match.group(2) else ""
            funcs = re.findall(r'function\s+(\w+)\s*\([^)]*\)[^{]*(?:public|external)', content)
            contracts.append({"name": contract_name, "inherits": inherits, "functions": funcs[:10]})
        return contracts
    
    def _analyze_notebook(self, path, content):
        """Analyze Jupyter notebook content."""
        try:
            nb = json.loads(content)
            cells = nb.get("cells", [])
            code_cells = [c for c in cells if c.get("cell_type") == "code"]
            markdown_cells = [c for c in cells if c.get("cell_type") == "markdown"]
            
            title = ""
            description = []
            for mc in markdown_cells[:3]:
                md_content = "".join(mc.get("source", []))
                if md_content.startswith("#") and not title:
                    title = md_content.split("\n")[0].lstrip("#").strip()
                description.append(md_content[:200])
            
            imports = []
            for cell in code_cells[:15]:
                source = "".join(cell.get("source", []))
                for line in source.split("\n"):
                    if line.strip().startswith(("import ", "from ")):
                        mod = line.split()[1].split(".")[0]
                        if mod not in imports:
                            imports.append(mod)
            
            return {
                "purpose": title or f"Notebook: {path.split('/')[-1]}",
                "description": " ".join(description)[:300],
                "functions": [], "classes": [], "api_routes": [],
                "imports": imports[:15],
                "notebook_info": {"code_cells": len(code_cells), "markdown_cells": len(markdown_cells)},
                "business_hints": []
            }
        except:
            return None
    
    def _analyze_business_functionality(self, key_files, code_analysis, details):
        """Infer business functionality from all gathered data."""
        business = {
            "domain": [],
            "core_features": [],
            "user_facing": [],
            "data_operations": [],
            "integrations": [],
            "business_model": [],
            "summary": ""
        }
        
        # Analyze README for business context
        readme = key_files.get("README.md") or key_files.get("readme.md") or ""
        readme_lower = readme.lower()
        
        # Domain detection
        domain_patterns = {
            "DeFi / Blockchain": ["defi", "blockchain", "ethereum", "solidity", "smart contract", "web3", "nft", "token", "wallet", "metamask"],
            "E-Commerce": ["shop", "cart", "checkout", "payment", "product", "order", "inventory", "shipping"],
            "AI / Machine Learning": ["machine learning", "ai ", "neural", "model", "training", "prediction", "nlp", "gpt", "llm", "openai"],
            "Data Analytics": ["analytics", "dashboard", "visualization", "report", "metrics", "data analysis"],
            "SaaS / Web App": ["saas", "subscription", "user management", "authentication", "api", "rest"],
            "DevOps / Infrastructure": ["deploy", "ci/cd", "docker", "kubernetes", "infrastructure", "automation"],
            "Education / Learning": ["course", "tutorial", "learn", "education", "bootcamp", "homework"],
            "Finance": ["finance", "banking", "trading", "investment", "portfolio", "stock"],
            "Healthcare": ["health", "medical", "patient", "clinical", "diagnosis"],
            "Social / Community": ["social", "community", "chat", "messaging", "forum", "profile"]
        }
        
        for domain, keywords in domain_patterns.items():
            if any(kw in readme_lower or kw in (details.get("description") or "").lower() for kw in keywords):
                business["domain"].append(domain)
        
        # Feature detection from code
        all_functions = [f.lower() for f in code_analysis.get("all_functions", [])]
        all_classes = [c.lower() for c in code_analysis.get("all_classes", [])]
        all_routes = code_analysis.get("all_routes", [])
        contracts = code_analysis.get("contracts", [])
        
        feature_patterns = {
            "User Authentication": ["login", "logout", "signup", "register", "auth", "session", "jwt", "oauth"],
            "User Management": ["user", "profile", "account", "settings", "preferences"],
            "Data CRUD Operations": ["create", "read", "update", "delete", "save", "load", "fetch", "get", "set"],
            "API Endpoints": ["api", "endpoint", "route", "handler", "controller"],
            "Payment Processing": ["payment", "pay", "charge", "invoice", "billing", "stripe", "checkout"],
            "File Management": ["upload", "download", "file", "image", "document", "storage"],
            "Notifications": ["notify", "notification", "alert", "email", "sms", "push"],
            "Search & Filter": ["search", "filter", "query", "find", "sort"],
            "Analytics & Reporting": ["analytics", "report", "stats", "metrics", "dashboard"],
            "Token Operations": ["mint", "burn", "transfer", "approve", "stake", "unstake", "claim"],
            "NFT Functionality": ["nft", "tokenuri", "metadata", "royalty", "marketplace"],
            "DAO Governance": ["vote", "proposal", "governance", "delegate", "quorum"],
            "DeFi Operations": ["swap", "liquidity", "pool", "yield", "farm", "lend", "borrow"]
        }
        
        code_text = " ".join(all_functions + all_classes)
        for feature, keywords in feature_patterns.items():
            if any(kw in code_text for kw in keywords):
                business["core_features"].append(feature)
        
        # User-facing features from routes
        if all_routes:
            route_features = []
            for route in all_routes:
                route_lower = route.lower()
                if "user" in route_lower or "auth" in route_lower:
                    route_features.append(f"User endpoint: {route}")
                elif "api" in route_lower:
                    route_features.append(f"API: {route}")
                else:
                    route_features.append(f"Route: {route}")
            business["user_facing"] = route_features[:8]
        
        # Smart contract business logic
        if contracts:
            for c in contracts:
                contract_features = []
                funcs_lower = [f.lower() for f in c.get("functions", [])]
                if any("mint" in f for f in funcs_lower):
                    contract_features.append("Token/NFT minting")
                if any("transfer" in f for f in funcs_lower):
                    contract_features.append("Asset transfers")
                if any("stake" in f or "deposit" in f for f in funcs_lower):
                    contract_features.append("Staking/Deposits")
                if any("vote" in f or "propose" in f for f in funcs_lower):
                    contract_features.append("Governance")
                if any("swap" in f or "trade" in f for f in funcs_lower):
                    contract_features.append("Trading/Swaps")
                if contract_features:
                    business["data_operations"].append(f"{c['name']}: {', '.join(contract_features)}")
        
        # Integrations from dependencies
        pkg_json = key_files.get("package.json", "")
        requirements = key_files.get("requirements.txt", "")
        deps_text = pkg_json.lower() + requirements.lower()
        
        integration_patterns = {
            "OpenAI / GPT": ["openai", "gpt-"],
            "Stripe Payments": ["stripe"],
            "AWS Services": ["aws-sdk", "boto3", "s3", "dynamodb"],
            "Firebase": ["firebase"],
            "MongoDB": ["mongodb", "mongoose", "pymongo"],
            "PostgreSQL": ["pg", "psycopg", "postgres"],
            "Redis": ["redis", "ioredis"],
            "Ethereum/Web3": ["ethers", "web3", "hardhat"],
            "IPFS": ["ipfs", "pinata"],
            "Twilio": ["twilio"],
            "SendGrid": ["sendgrid"],
            "Auth0": ["auth0"],
            "Supabase": ["supabase"]
        }
        
        for integration, keywords in integration_patterns.items():
            if any(kw in deps_text for kw in keywords):
                business["integrations"].append(integration)
        
        # Business model hints
        if "subscription" in readme_lower or "premium" in readme_lower:
            business["business_model"].append("Subscription-based")
        if "marketplace" in readme_lower or "sell" in readme_lower:
            business["business_model"].append("Marketplace")
        if "open source" in readme_lower or "mit license" in readme_lower:
            business["business_model"].append("Open Source")
        if "hackathon" in readme_lower or "demo" in readme_lower:
            business["business_model"].append("Hackathon/Demo Project")
        if contracts:
            business["business_model"].append("Blockchain/Smart Contracts")
        
        # Generate summary
        domain_str = ", ".join(business["domain"][:2]) if business["domain"] else "General software"
        features_str = ", ".join(business["core_features"][:4]) if business["core_features"] else "various features"
        
        business["summary"] = f"This is a {domain_str} project that implements {features_str}."
        if business["integrations"]:
            business["summary"] += f" It integrates with {', '.join(business['integrations'][:3])}."
        if business["business_model"]:
            business["summary"] += f" ({', '.join(business['business_model'][:2])})"
        
        return business
    
    def print_summary(self):
        """Print a formatted summary of the analysis."""
        details = self.analysis.get("details", {})
        languages = self.analysis.get("languages", {})
        tree = self.analysis.get("tree", {})
        key_files = self.analysis.get("key_files", {})
        code_analysis = self.analysis.get("code_analysis", {})
        business = self.analysis.get("business", {})
        
        # === HEADER ===
        print("=" * 70)
        print("üì¶ REPOSITORY OVERVIEW")
        print("=" * 70)
        print(f"Name:        {details.get('full_name', 'N/A')}")
        print(f"Description: {details.get('description') or 'No description'}")
        print(f"URL:         {details.get('html_url', 'N/A')}")
        print(f"Created:     {details.get('created_at', '')[:10]}")
        print(f"Updated:     {details.get('updated_at', '')[:10]}")
        print(f"Stars:       {details.get('stargazers_count', 0)} ‚≠ê  |  Forks: {details.get('forks_count', 0)}")
        
        # === BUSINESS SUMMARY (NEW) ===
        print("\n" + "=" * 70)
        print("üíº BUSINESS FUNCTIONALITY SUMMARY")
        print("=" * 70)
        
        if business.get("summary"):
            print(f"\nüìù {business['summary']}")
        
        if business.get("domain"):
            print(f"\nüéØ Domain: {', '.join(business['domain'])}")
        
        if business.get("core_features"):
            print("\n‚ú® Core Business Features:")
            for feat in business["core_features"][:8]:
                print(f"   ‚Ä¢ {feat}")
        
        if business.get("user_facing"):
            print("\nüë§ User-Facing Endpoints:")
            for uf in business["user_facing"][:6]:
                print(f"   ‚Ä¢ {uf}")
        
        if business.get("data_operations"):
            print("\nüìä Data/Contract Operations:")
            for op in business["data_operations"][:5]:
                print(f"   ‚Ä¢ {op}")
        
        if business.get("integrations"):
            print(f"\nüîå External Integrations: {', '.join(business['integrations'])}")
        
        if business.get("business_model"):
            print(f"\nüí∞ Business Model: {', '.join(business['business_model'])}")
        
        # === TECH STACK ===
        print("\n" + "=" * 70)
        print("üíª TECH STACK & LANGUAGES")
        print("=" * 70)
        
        if languages:
            total_bytes = sum(languages.values())
            for lang, bytes_count in sorted(languages.items(), key=lambda x: -x[1])[:6]:
                pct = (bytes_count / total_bytes) * 100
                bar = "‚ñà" * int(pct / 5) + "‚ñë" * (20 - int(pct / 5))
                print(f"{lang:<15} {bar} {pct:>5.1f}%")
        
        print("\nüìö Frameworks/Tools:")
        frameworks = self._detect_frameworks(key_files)
        for fw in frameworks[:8]:
            print(f"   ‚Ä¢ {fw}")
        
        # === ARCHITECTURE ===
        print("\n" + "=" * 70)
        print("üèóÔ∏è ARCHITECTURE & STRUCTURE")
        print("=" * 70)
        
        tree_items = tree.get("tree", [])
        dirs = sorted(set(f["path"].split("/")[0] for f in tree_items if "/" in f["path"]))
        
        file_counts = code_analysis.get("file_counts", {})
        counts_str = ", ".join([f"{k}: {v}" for k, v in file_counts.items() if v > 0])
        print(f"\nüìä Source Files: {counts_str}")
        
        print(f"\nüìÅ Structure ({len(dirs)} directories):")
        for d in dirs[:10]:
            subfiles = len([f for f in tree_items if f["path"].startswith(d + "/")])
            print(f"   üìÇ {d}/ ({subfiles} items)")
        
        # === CODE FUNCTIONALITY ===
        print("\n" + "=" * 70)
        print("‚öôÔ∏è CODE MODULES & FUNCTIONS")
        print("=" * 70)
        
        main_modules = code_analysis.get("main_modules", [])
        for mod in main_modules[:6]:
            path = mod["path"]
            analysis = mod["analysis"]
            print(f"\nüìÑ {path}")
            if analysis.get("purpose"):
                purpose = analysis["purpose"][:120]
                print(f"   Purpose: {purpose}{'...' if len(analysis.get('purpose', '')) > 120 else ''}")
            if analysis.get("classes"):
                print(f"   Classes: {', '.join(analysis['classes'][:5])}")
            if analysis.get("functions"):
                print(f"   Functions: {', '.join(analysis['functions'][:8])}")
            if analysis.get("api_routes"):
                print(f"   Routes: {', '.join(analysis['api_routes'][:5])}")
            if analysis.get("notebook_info"):
                nb = analysis["notebook_info"]
                print(f"   Notebook: {nb['code_cells']} code, {nb['markdown_cells']} markdown cells")
        
        # Smart contracts
        contracts = code_analysis.get("contracts", [])
        if contracts:
            print("\nüìú Smart Contracts:")
            for c in contracts[:4]:
                inherits = f" ‚Üí {c['inherits']}" if c.get("inherits") else ""
                print(f"   ‚Ä¢ {c['name']}{inherits}")
                if c.get("functions"):
                    print(f"     Functions: {', '.join(c['functions'][:6])}")
        
        # === README EXCERPT ===
        print("\n" + "=" * 70)
        print("üìñ PROJECT DESCRIPTION (README)")
        print("=" * 70)
        
        readme = key_files.get("README.md") or key_files.get("readme.md") or ""
        if readme:
            lines = [l for l in readme.split("\n")[:40] 
                    if l.strip() and not l.strip().startswith(("![", "<img", "[![", "---"))][:15]
            print("\n" + "\n".join(lines))
        else:
            print("\nNo README found")
        
        # === DEPENDENCIES ===
        print("\n" + "=" * 70)
        print("üì¶ KEY DEPENDENCIES")
        print("=" * 70)
        
        if "package.json" in key_files:
            try:
                pkg = json.loads(key_files["package.json"])
                deps = list(pkg.get("dependencies", {}).keys())[:8]
                if deps:
                    print(f"\nNPM: {', '.join(deps)}")
            except:
                pass
        
        if "requirements.txt" in key_files:
            deps = [l.split("==")[0].split(">=")[0].strip() 
                   for l in key_files["requirements.txt"].split("\n") 
                   if l.strip() and not l.startswith("#")][:8]
            if deps:
                print(f"\nPython: {', '.join(deps)}")
        
        print("\n" + "=" * 70)
    
    def _detect_frameworks(self, key_files):
        """Detect frameworks based on config files."""
        frameworks = []
        pkg = key_files.get("package.json", "").lower()
        req = key_files.get("requirements.txt", "").lower()
        
        checks = [
            (pkg, "react", "React"), (pkg, "next", "Next.js"), (pkg, "vue", "Vue.js"),
            (pkg, "express", "Express.js"), (pkg, "hardhat", "Hardhat"), (pkg, "ethers", "Ethers.js"),
            (pkg, "typescript", "TypeScript"), (pkg, "vite", "Vite"),
            (req, "django", "Django"), (req, "flask", "Flask"), (req, "fastapi", "FastAPI"),
            (req, "torch", "PyTorch"), (req, "tensorflow", "TensorFlow"),
            (req, "langchain", "LangChain"), (req, "openai", "OpenAI"),
            (req, "pandas", "Pandas"), (req, "numpy", "NumPy")
        ]
        
        for source, keyword, name in checks:
            if keyword in source:
                frameworks.append(name)
        
        if "Dockerfile" in key_files:
            frameworks.append("Docker")
        
        return list(set(frameworks)) or ["No specific frameworks detected"]

# Run the analysis
if github and repo_list:
    idx = SELECTED_REPO_NUMBER - 1
    if 0 <= idx < len(repo_list):
        selected = repo_list[idx]
        owner = selected["owner"]["login"]
        repo_name = selected["name"]
        
        analyzer = RepoAnalyzer(github)
        analyzer.analyze(owner, repo_name)
        analyzer.print_summary()
    else:
        print(f"‚ùå Invalid selection. Choose a number between 1 and {len(repo_list)}")
else:
    print("‚ùå Run the previous cells first to load repos.")

üîç Analyzing repository: tenkara/openai-stackhack-2023

üìã Fetching repository details...
üíª Analyzing languages/tech stack...
üíª Analyzing languages/tech stack...
üìÇ Mapping repository structure...
üìÇ Mapping repository structure...
üìÑ Reading configuration files...
üî¨ Analyzing source code...
üìÑ Reading configuration files...
üî¨ Analyzing source code...
üíº Inferring business functionality...

‚úÖ Analysis complete!

üì¶ REPOSITORY OVERVIEW
Name:        tenkara/openai-stackhack-2023
Description: No description
URL:         https://github.com/tenkara/openai-stackhack-2023
Created:     2023-02-25
Updated:     2023-03-11
Stars:       1 ‚≠ê  |  Forks: 0

üíº BUSINESS FUNCTIONALITY SUMMARY

üìù This is a General software project that implements various features.

üë§ User-Facing Endpoints:
   ‚Ä¢ Route: /public
   ‚Ä¢ Route: /
   ‚Ä¢ Route: /sp/chat
   ‚Ä¢ Route: /private
   ‚Ä¢ Route: /chat

üíª TECH STACK & LANGUAGES
Jupyter Notebook ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà