Importing libraries

In [None]:
import requests
import csv
import time

API access token

In [None]:
GITHUB_TOKEN = 'token_removed_after_github_raised_concerns'
headers = {"Authorization": f"token {GITHUB_TOKEN}"}

Fetch Users data

In [None]:
def fetch_detailed_user_data(username):
    """Fetch detailed data."""
    url = f"https://api.github.com/users/{username}"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching details for {username}: {response.status_code}")
        return None

def fetch_users(city='Delhi', min_followers=100):
    users = []
    query = f"location:{city} followers:>{min_followers}"
    url = f"https://api.github.com/search/users?q={query}&per_page=100"
    page = 1

    while True:
        response = requests.get(url + f"&page={page}", headers=headers)
        if response.status_code != 200:
            print(f"Error: {response.status_code} - {response.text}")
            break

        data = response.json().get('items', [])
        if not data:
            print("No more users found.")
            break

        for user in data:
            detailed_user = fetch_detailed_user_data(user["login"])
            if detailed_user:
                users.append(detailed_user)
            time.sleep(1)

        page += 1
        time.sleep(1)

    print(f"Total users fetched: {len(users)}")
    return users

users_data = fetch_users()

Clean data

In [None]:
def clean_company_name(company_name):
    if company_name:
        company_name = company_name.strip()
        if company_name.startswith('@'):
            company_name = company_name[1:]
        return company_name.upper()
    return ""


write user data

In [None]:
def write_users_csv(users):
    with open('users.csv', mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([
            "login", "name", "company", "location", "email",
            "hireable", "bio", "public_repos", "followers",
            "following", "created_at"
        ])

        for user in users:
            login = user.get("login", "")
            name = user.get("name", "")
            company = clean_company_name(user.get("company", ""))
            location = user.get("location", "")
            email = user.get("email", "")
            hireable = user.get("hireable", False)
            bio = user.get("bio", "")
            public_repos = user.get("public_repos", 0)
            followers = user.get("followers", 0)
            following = user.get("following", 0)
            created_at = user.get("created_at", "")

            print(f"Writing user: {login}, Name: {name}, Company: {company}")

            writer.writerow([
                login, name, company, location, email, hireable,
                bio, public_repos, followers, following, created_at
            ])

write_users_csv(users_data)


fetch repository data

In [None]:
def fetch_user_repos(username):
    repos = []
    url = f"https://api.github.com/users/{username}/repos?per_page=100"
    page = 1

    while True:
        response = requests.get(url + f"&page={page}", headers=headers)
        if response.status_code != 200:
            print(f"Error fetching repos for {username}: {response.status_code}")
            break

        data = response.json()
        if not data:
            break

        repos.extend(data)
        if len(data) < 100:  #no more pages
            break
        page += 1
        time.sleep(1)

    return repos[:500]  # max 500 repos

user_repos_data = {user["login"]: fetch_user_repos(user["login"]) for user in users_data}


write repository data

In [None]:
def write_repos_csv(user_repos):
    with open('repositories.csv', mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([
            "login", "full_name", "created_at", "stargazers_count",
            "watchers_count", "language", "has_projects", "has_wiki",
            "license_name"
        ])

        for login, repos in user_repos.items():
            for repo in repos:
                full_name = repo.get("full_name", "")
                created_at = repo.get("created_at", "")
                stargazers_count = repo.get("stargazers_count", "")
                watchers_count = repo.get("watchers_count", "")
                language = repo.get("language", "")
                has_projects = repo.get("has_projects", False)
                has_wiki = repo.get("has_wiki", False)

                # None for license info
                license_info = repo.get("license")
                license_name = license_info["key"] if license_info else ""

                writer.writerow([
                    login, full_name, created_at, stargazers_count,
                    watchers_count, language, has_projects, has_wiki, license_name
                ])

write_repos_csv(user_repos_data)
