In [None]:
import requests
import time
import pandas as pd

# Replace this with your personal access token (PAT)
TOKEN = "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"
headers = {'Authorization': f'token {TOKEN}'}

# CSV file paths
USER_CSV = "/content/users.csv"
REPO_CSV = "/content/repositories.csv"

def fetch_all_users():
    """Fetch all users from Hyderabad with more than 50 followers."""
    users = []
    page = 1

    print("Fetching users...")
    while True:
        # GitHub API search endpoint with pagination
        url = f"https://api.github.com/search/users?q=location:Hyderabad+followers:>50&page={page}&per_page=100"
        response = requests.get(url, headers=headers)

        if response.status_code == 403:  # Rate limit hit
            reset_time = int(response.headers.get('X-RateLimit-Reset', time.time() + 60))
            wait_time = reset_time - int(time.time()) + 1
            print(f"Rate limit hit. Waiting for {wait_time} seconds...")
            time.sleep(wait_time)
            continue

        if response.status_code != 200:
            print(f"Error: {response.status_code}, {response.text}")
            break

        data = response.json()
        users.extend(data['items'])

        if len(data['items']) < 100:
            break  # No more pages left

        page += 1  # Move to the next page

    print(f"Total users fetched: {len(users)}")
    return users

def fetch_repositories(user_login):
    """Fetch repositories for a specific user."""
    repos = []
    page = 1

    while True:
        url = f"https://api.github.com/users/{user_login}/repos?page={page}&per_page=100"
        response = requests.get(url, headers=headers)

        if response.status_code == 403:  # Rate limit hit
            reset_time = int(response.headers.get('X-RateLimit-Reset', time.time() + 60))
            wait_time = reset_time - int(time.time()) + 1
            print(f"Rate limit hit. Waiting for {wait_time} seconds...")
            time.sleep(wait_time)
            continue

        if response.status_code != 200:
            print(f"Error fetching repos for {user_login}: {response.status_code}")
            break

        data = response.json()
        if not data:
            break  # No more repositories

        repos.extend(data)
        page += 1

    return repos

def write_users_to_csv(users):
    """Write users to CSV."""
    user_data = []
    for user in users:
        user_data.append({
            "login": user.get("login", ""),
            "name": user.get("name", ""),
            "company": user.get("company", "").strip().lstrip('@').upper(),
            "location": user.get("location", ""),
            "email": user.get("email", ""),
            "hireable": str(user.get("hireable", "")),
            "bio": user.get("bio", ""),
            "public_repos": user.get("public_repos", 0),
            "followers": user.get("followers", 0),
            "following": user.get("following", 0),
            "created_at": user.get("created_at", "")
        })

    df = pd.DataFrame(user_data)
    df.to_csv(USER_CSV, index=False)
    print(f"Users data written to {USER_CSV}")
def write_repositories_to_csv(users):
    """Write repositories to CSV."""
    repo_data = []

    for user in users:
        repos = fetch_repositories(user['login'])
        for repo in repos:
            # Check if 'license' is not None before accessing 'name'
            license_name = repo.get("license", {}).get("name", "") if repo.get("license") is not None else ""
            repo_data.append({
                "login": user['login'],
                "full_name": repo.get("full_name", ""),
                "created_at": repo.get("created_at", ""),
                "stargazers_count": repo.get("stargazers_count", 0),
                "watchers_count": repo.get("watchers_count", 0),
                "language": repo.get("language", ""),
                "has_projects": str(repo.get("has_projects", "")),
                "has_wiki": str(repo.get("has_wiki", "")),
                "license_name": license_name
            })

    df = pd.DataFrame(repo_data)
    df.to_csv(REPO_CSV, index=False)
    print(f"Repositories data written to {REPO_CSV}")

# Main script
users = fetch_all_users()
write_users_to_csv(users)
write_repositories_to_csv(users)


Fetching users...
Total users fetched: 515
Users data written to /content/users.csv
