In [5]:
import gradio as gr
import requests
import os
from github import Github
from github import GithubException
import openai
from pathlib import Path
import base64
import tiktoken
import time
from typing import Dict, List, Tuple

In [6]:
# Initialize OpenAI client
openai.api_key = os.getenv("OPENAI_API_KEY")
# Initialize GitHub client
github_token = os.getenv("GITHUB_TOKEN")
g = Github(github_token) if github_token else Github()

In [7]:
def count_tokens(text: str) -> int:
    """Count tokens in text using GPT-4 tokenizer."""
    encoding = tiktoken.encoding_for_model("gpt-4")
    return len(encoding.encode(text))

In [8]:
def get_repo_contents(repo_url: str) -> Tuple[bool, Dict]:
    """
    Fetch repository contents and check if it's public.
    Returns (is_public, content_dict)
    """
    try:
        # Extract owner and repo name from URL
        parts = repo_url.rstrip('/').split('/')
        owner, repo_name = parts[-2], parts[-1]
        
        # Try to get repository
        repo = g.get_repo(f"{owner}/{repo_name}")
        
        # Get main branch
        default_branch = repo.default_branch
        
        # Get contents
        contents = {}
        max_files = 20  # Limit number of files to process
        file_count = 0
        
        def process_contents(path=""):
            nonlocal file_count
            if file_count >= max_files:
                return
            
            items = repo.get_contents(path)
            for item in items:
                if file_count >= max_files:
                    break
                    
                if item.type == "dir":
                    process_contents(item.path)
                else:
                    # Skip binary files and very large files
                    if item.size > 100000 or any(item.name.endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.gif', '.pdf']):
                        continue
                        
                    try:
                        content = base64.b64decode(item.content).decode('utf-8')
                        contents[item.path] = {
                            'content': content,
                            'size': item.size,
                            'type': item.type
                        }
                        file_count += 1
                    except Exception:
                        continue
        
        process_contents()
        return True, contents
        
    except GithubException as e:
        if e.status == 404:
            return False, {"error": "Repository not found or private"}
        return False, {"error": f"GitHub API error: {str(e)}"}
    except Exception as e:
        return False, {"error": f"Unexpected error: {str(e)}"}

In [10]:
def generate_readme(repo_contents: Dict) -> str:
    """Generate README using GPT-4."""
    
    # Prepare context for GPT-4
    context = "Repository contents:\n\n"
    total_tokens = 0
    max_tokens = 6000  # Leave room for response
    
    for path, info in repo_contents.items():
        file_preview = info['content'][:1000] + "..." if len(info['content']) > 1000 else info['content']
        file_context = f"File: {path}\n{file_preview}\n\n"
        file_tokens = count_tokens(file_context)
        
        if total_tokens + file_tokens > max_tokens:
            break
            
        context += file_context
        total_tokens += file_tokens

    try:
        prompt = f"""Based on the following repository contents, generate a comprehensive README.md file. 
        Include sections for:
        - Project title and description
        - Features
        - Installation instructions
        - Usage examples
        - Project structure
        - Dependencies
        - Contributing guidelines (if applicable)
        - License information (if found)

        Format the output in proper Markdown.

        {context}"""

        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a technical writer specialized in creating clear, comprehensive README files for software projects."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=2000,
            temperature=0.7
        )

        return response.choices[0].message.content
        
    except openai.error.RateLimitError:
        return "Error: OpenAI API rate limit exceeded. Please try again later."
    except openai.error.InvalidRequestError as e:
        return f"Error: Invalid request to OpenAI API: {str(e)}"
    except Exception as e:
        return f"Error generating README: {str(e)}"

In [14]:
def process_repository(repo_url: str, progress=gr.Progress()) -> str:
    """Main function to process repository and generate README."""
    
    if not repo_url or not repo_url.startswith("https://github.com/"):
        return "# Error\n---\nPlease enter a valid GitHub repository URL"
    
    progress(0.1, desc="Validating repository URL...")
    
    is_public, contents = get_repo_contents(repo_url)
    
    if not is_public:
        return f"# Error\n---\n{contents.get('error', 'Unknown error')}"
    
    progress(0.4, desc="Analyzing repository contents...")
    
    if not contents:
        return "# Error\n---\nNo readable contents found in repository"
    
    progress(0.7, desc="Generating README with GPT-4...")
    readme = generate_readme(contents)
    
    progress(1.0, desc="Done!")
    return readme

In [17]:
def create_interface():
    with gr.Blocks(theme=gr.themes.Soft()) as interface:
        gr.Markdown("# 📚 GitHub README Generator")
        gr.Markdown("Enter a public GitHub repository URL to generate a comprehensive README.md file.")
        
        with gr.Row():
            with gr.Column(scale=1):
                repo_url = gr.Textbox(
                    label="GitHub Repository URL",
                    placeholder="https://github.com/username/repository"
                )
                generate_btn = gr.Button("🔮 Generate README", variant="primary")
                
                gr.Markdown("""
                ### 📝 Notes:
                - Only public repositories are supported
                - Maximum 20 files will be processed per repository
                - Large files and binary files are skipped
                - The tool uses GPT-4 for README generation
                """)
            
            with gr.Column(scale=2):
                # Modified Markdown component configuration
                output = gr.Markdown(
                    value="Your generated README will appear here...",
                    container=True
                )
        
        generate_btn.click(
            fn=process_repository,
            inputs=[repo_url],
            outputs=[output],
            show_progress=True
        )
    
    return interface


In [18]:
# Create and launch the interface
if __name__ == "__main__":
    interface = create_interface()
    interface.launch()

* Running on local URL:  http://127.0.0.1:7866

To create a public link, set `share=True` in `launch()`.


In [None]:
interface.close()