In [1]:
import os
import requests
import shutil
import glob
import torch
from nbconvert import PythonExporter
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import langchain

# Set up Hugging Face GPT-2 model and tokenizer
model_name = 'gpt2'
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Function to fetch repositories from GitHub user URL using GitHub API
def fetch_user_repositories(url):
    """
    Fetches the repositories of a GitHub user using GitHub API.
    Args:
        url (str): The GitHub user URL.
    Returns:
        list: List of repository objects.
    Raises:
        Exception: If failed to fetch repositories.
    """
    try:
        user = url.split('/')[-1]
        api_url = f"https://api.github.com/users/{user}/repos"
        headers = {'Accept': 'application/vnd.github.v3+json'}
        response = requests.get(api_url, headers=headers)
        response.raise_for_status()
        repositories = response.json()
        return repositories
    except requests.exceptions.RequestException as e:
        raise Exception(f"Failed to fetch repositories from the given GitHub user URL: {str(e)}")

# Function to preprocess Jupyter notebooks to Python files
def preprocess_jupyter_notebooks(repo_path):
    """
    Converts Jupyter notebooks to Python files.
    Args:
        repo_path (str): The path to the repository.
    """
    notebook_files = glob.glob(os.path.join(repo_path, '**/*.ipynb'), recursive=True)
    for notebook_file in notebook_files:
        notebook_dir = os.path.dirname(notebook_file)
        notebook_name = os.path.basename(notebook_file)
        python_exporter = PythonExporter()
        python_code, _ = python_exporter.from_filename(notebook_file)
        python_file = os.path.splitext(notebook_name)[0] + '.py'
        with open(os.path.join(notebook_dir, python_file), 'w') as f:
            f.write(python_code)
        os.remove(notebook_file)

# Function to evaluate code complexity using LangChain
def evaluate_code_complexity(code):
    """
    Evaluates the code complexity using LangChain.
    Args:
        code (str): The code to evaluate.
    Returns:
        float: The code complexity score.
    """
    complexity_score = langchain.evaluate(code)
    return complexity_score

# Function to analyze repositories and find the most technically complex one
def analyze_repositories(user_url):
    """
    Analyzes repositories of a GitHub user and finds the most technically complex one.
    Args:
        user_url (str): The GitHub user URL.
    Returns:
        str: The URL of the most technically complex repository.
    Raises:
        Exception: If no repositories found or an error occurred during analysis.
    """
    repositories = fetch_user_repositories(user_url)
    if repositories:
        most_complex_repo = None
        max_complexity_score = float('-inf')

        for repo in repositories:
            # Extract the clone URL for the repository
            repo_url = repo['clone_url']
            temp_repo_path = 'temp_repo'

            try:
                # Clone the repository using Git command
                os.makedirs(temp_repo_path, exist_ok=True)
                os.system(f"git clone {repo_url} {temp_repo_path}")

                # Preprocess Jupyter notebooks to Python files
                preprocess_jupyter_notebooks(temp_repo_path)

                # Evaluate each code file's complexity and determine the maximum
                complexity_score = 0
                code_files = glob.glob(os.path.join(temp_repo_path, '**/*.py'), recursive=True)
                for code_file in code_files:
                    with open(code_file, 'r') as f:
                        code = f.read()
                    complexity_score += evaluate_code_complexity(code)

                if complexity_score > max_complexity_score:
                    max_complexity_score = complexity_score
                    most_complex_repo = repo_url
            finally:
                # Clean up temporary repository
                shutil.rmtree(temp_repo_path)

        return most_complex_repo
    else:
        raise Exception("No repositories found for the given GitHub user URL.")

# Main function for analyzing GitHub repositories
def analyze_github_repositories():
    """
    Main function to analyze GitHub repositories and find the most technically complex one.
    """
    try:
        user_url = input("Enter the GitHub user URL for analysis: ")
        most_complex_repo = analyze_repositories(user_url)

        # Display the most complex repository and justification
        print("The most technically complex repository is:", most_complex_repo)
        
        # Use GPT-2 or any other model to generate a justification for the selection
        justification = generate_justification(most_complex_repo)
        print("Justification:", justification)
        
    except Exception as e:
        print(f"Error occurred during analysis: {str(e)}")

# Function to generate a justification for repository selection using GPT-2
def generate_justification(repo_url):
    """
    Generates a justification for repository selection using GPT-2.
    Args:
        repo_url (str): The URL of the selected repository.
    Returns:
        str: The generated justification.
    """
    # Generate a prompt using the repository URL
    prompt = f"The repository {repo_url} was selected as the most technically complex repository on GitHub"

    # Tokenize the prompt
    tokens = tokenizer.encode(prompt, add_special_tokens=True, return_tensors='pt')

    # Generate predictions using GPT-2
    with torch.no_grad():
        outputs = model.generate(tokens, max_length=100, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)

    # Decode the generated tokens
    justification = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Remove the unwanted paragraph
    justification = justification.split('\n\n')[0]

    return justification

# Run the GitHub repository analysis
analyze_github_repositories()

Enter the GitHub user URL for analysis: https://github.com/samsoir


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The most technically complex repository is: https://github.com/samsoir/adapterpattern.com.git




Justification: The repository https://github.com/samsoir/adapterpattern.com.git was selected as the most technically complex repository on GitHub.
