In [3]:
import requests
import json
import os
import base64

In [4]:
class repo:
    def __init__(self, name, url, languages, topics, readme):
        self.name = name
        self.url = url
        self.languages = languages
        self.topics = topics
        self.readme = readme

In [5]:
GITHUB_USERNAME = "prakhargaming"
GITHUB_TOKEN = os.getenv("REPO")

headers = {
    "Authorization": f"token {GITHUB_TOKEN}",
    "Accept": "application/vnd.github.v3+json"
}

In [6]:
def auto_tag(readme_text, languages) -> list[str]:
    tags = []

    # Keywords for different skill areas
    computer_vision_keywords = ["opencv", "cnn", "image", "vision", "detection", "segmentation", "recognition"]
    nlp_keywords = ["bert", "transformer", "token", "nlp", "text classification", "language model"]
    web_dev_keywords = ["react", "flask", "django", "express", "api", "frontend", "backend", "web app"]
    data_science_keywords = ["pandas", "numpy", "dataframe", "analysis", "plot", "visualization"]
    ai_keywords = ["deep learning", "machine learning", "reinforcement learning", "model", "training"]

    text = readme_text.lower()

    # Helper function
    def contains_any(keywords):
        return any(keyword in text for keyword in keywords)

    # Tagging based on content
    if contains_any(computer_vision_keywords) or 'OpenCV' in languages:
        tags.append("computer-vision")
    if contains_any(nlp_keywords):
        tags.append("nlp")
    if contains_any(web_dev_keywords):
        tags.append("web-development")
    if contains_any(data_science_keywords):
        tags.append("data-science")
    if contains_any(ai_keywords):
        tags.append("artificial-intelligence")

    return tags

In [7]:
def fetch_public_repo_information(username: str) -> dict[str, repo]:
    repo_url = f"https://api.github.com/users/{username}/repos"
    request_repo = requests.get(repo_url, headers=headers)
    if request_repo.status_code != 200:
        print(f"Request Failed (request_repo): {request_repo.status_code} \n {repo_url}")
        return request_repo.status_code
    data = request_repo.json()
    repo_info = {}
    for repos in data:
        repo_name = repos["name"]
        repo_url = repos["url"]
        language_url = f"https://api.github.com/repos/{username}/{repo_name}/languages"
        readme_url = f"https://api.github.com/repos/{username}/{repo_name}/readme"

        request_languages = requests.get(language_url, headers=headers)
        if request_languages.status_code == 200:     
            repo_languages = request_languages.json()
        else:
            print(f"Request Failed (request_languages): {request_languages.status_code} \n {language_url}")
            repo_languages = {}

        request_readme = requests.get(readme_url, headers=headers)
        if request_readme.status_code == 200:
            readme_content = request_readme.json()
            repo_readme = base64.b64decode(readme_content["content"]).decode('utf-8')
        else:
            print(f"Request Failed (request_readme): {request_readme.status_code} \n {readme_url}")
            repo_readme = ""
        
        repo_info[repo_name] = repo(
                                    name=repo_name,
                                    url=repo_url,
                                    languages=repo_languages,
                                    topics=auto_tag(repo_readme, repo_languages),
                                    readme=repo_readme
                                )

    return repo_info

In [8]:
repos = fetch_public_repo_information(GITHUB_USERNAME)

Request Failed (request_readme): 404 
 https://api.github.com/repos/prakhargaming/amazonInterview/readme
Request Failed (request_readme): 404 
 https://api.github.com/repos/prakhargaming/Lab-thingy/readme
Request Failed (request_readme): 404 
 https://api.github.com/repos/prakhargaming/musicdiscordplaylistbot/readme
Request Failed (request_readme): 404 
 https://api.github.com/repos/prakhargaming/websocker-react-app/readme


In [9]:
directory = "github_repos_info"

try:
    os.mkdir(directory)
    print(f"Directory '{directory}' created successfully.")
except FileExistsError:
    print(f"Directory '{directory}' already exists.")
except FileNotFoundError:
    print("Parent directory does not exist.")

Directory 'github_repos_info' already exists.


In [13]:
for repo_name, repo in repos.items():
    file_path = f"github_repos_info\\REPO_INFO_{repo.name}.txt"
    file_contents = f"""
# METADATA
Repository name: {repo.name}
Repository URL: {repo.url}
Repository languages: {repo.languages}
Repository topics: {repo.topics}

# README:
{repo.readme}"""

    try:
        with open(file_path, "w") as file:
            file.write(file_contents)
        print(f"File '{file_path}' created successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")

File 'github_repos_info\REPO_INFO_amazonInterview.txt' created successfully.
File 'github_repos_info\REPO_INFO_Data-Visualization-Web-Dev-Project.txt' created successfully.
File 'github_repos_info\REPO_INFO_FastSAM-needle-biopsy.txt' created successfully.
An error occurred: 'charmap' codec can't encode character '\u0259' in position 1143: character maps to <undefined>
File 'github_repos_info\REPO_INFO_flask-react-template.txt' created successfully.
File 'github_repos_info\REPO_INFO_GenAI_Catagorization_Engine.txt' created successfully.
File 'github_repos_info\REPO_INFO_Lab-thingy.txt' created successfully.
File 'github_repos_info\REPO_INFO_musicdiscordplaylistbot.txt' created successfully.
File 'github_repos_info\REPO_INFO_Neurotech-Davis2022.txt' created successfully.
File 'github_repos_info\REPO_INFO_Neurotech_at_Davis_Robotic_Arm.txt' created successfully.
File 'github_repos_info\REPO_INFO_ntx22-ui.txt' created successfully.
File 'github_repos_info\REPO_INFO_prakhar-website.txt' cre