In [None]:
# Util queries
# "https://api.github.com/search/repositories?q=hackathon&per_page=5&page=1"
# "https://api.github.com/search/repositories?q=topic:Hackathon&per_page=1000&page=1"

In [None]:
# Settings & variables
hide_Token = True
required_requests = 2810

In [None]:
import requests
import json
from dotenv import load_dotenv
import os

load_dotenv("./token.env")
api_token = os.getenv("API_TOKEN")

if not hide_Token:
    print(f"API Token: {api_token}")


In [None]:
from datetime import datetime, timedelta, timezone
import time
import pandas as pd

dataframes = []
headers = {"Authorization": f"token {api_token}"}

start_date = datetime.now(timezone.utc) - timedelta(days=5*365)
end_date = datetime.now(timezone.utc)

current_date = start_date

In [None]:

while current_date <= end_date:
    date_str = current_date.strftime("%Y-%m-%d")
    page = 1
    while True:
        query = f"hackathon created:{date_str}"
        api_url = f"https://api.github.com/search/repositories?q={query}&per_page=100&page={page}"
        response = requests.get(api_url, headers=headers)
        if response.status_code != 200:
            print(f"Request failed for {date_str} page {page} with status code: {response.status_code}")
            time.sleep(2)
            break
        repos = response.json().get("items", [])
        if not repos:
            break
        df = pd.DataFrame(repos)
        dataframes.append(df)
        print(f"Processed {date_str}, page {page}, found {len(repos)} repos.")
        if len(repos) < 100 or page == 10:  # GitHub Search API max 10 pages
            break
        page += 1
        time.sleep(2)  # Respect rate limits
    current_date += timedelta(days=1)
    time.sleep(2)  # Respect rate limits

if dataframes:
    all_repos_df = pd.concat(dataframes, ignore_index=True)
    all_repos_df.to_excel("github_repos.xlsx", index=False)
else:
    print("No data collected.")

In [None]:
all_repos_df = pd.concat(dataframes, ignore_index=True)
all_repos_df.to_excel("github_repos2.xlsx", index=False)

In [None]:
# # Display a small overview of the GitHub API response
# from datetime import datetime
# import json
# overview = {
#     "total_count": response.json().get("total_count"),
#     "incomplete_results": response.json().get("incomplete_results"),
#     "items_count": len(response.json().get("items", [])),
#     "first_repo_name": response.json().get("items", [{}])[0].get("name") if response.json().get("items") else None
# }

# print(overview)

# first_repo = response.json().get("items", [{}])[0]
# # print(json.dumps(first_repo, indent=2))
# print(first_repo.get("created_at"))
# print(first_repo.get("pushed_at"))

# from datetime import datetime

# first_repo = response.json().get("items", [{}])[0]
# created_at = first_repo.get("created_at")
# pushed_at = first_repo.get("pushed_at")

# if created_at and pushed_at:
#     created_dt = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
#     pushed_dt = datetime.strptime(pushed_at, "%Y-%m-%dT%H:%M:%SZ")
#     active_time = pushed_dt - created_dt
#     print(f"Active time: {active_time}")
# else:
#     print("Timestamps not found.")

# from datetime import datetime

# first_repo = response.json().get("items", [{}])[1]
# created_at = first_repo.get("created_at")
# pushed_at = first_repo.get("pushed_at")

# if created_at and pushed_at:
#     created_dt = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
#     pushed_dt = datetime.strptime(pushed_at, "%Y-%m-%dT%H:%M:%SZ")
#     active_time = pushed_dt - created_dt
#     print(f"Active time: {active_time}")
# else:
#     print("Timestamps not found.")

In [None]:
# # ...existing code...
# repos = response.json().get("items", [])
# for repo in repos:
#     print(repo.get("html_url"))
# # ...existing