# Bronze Layer - Github Repositories
# Process:
# 1. Import libraries needed
# 2. Set up configuration
# 3. Write a function to fetch repositories for ONE user
# 4. Loop through all 50 users and collect their repositories
# 5. Save the data to a file or display it

In [None]:
import requests
import json
import pandas
from datetime import datetime
import time

In [None]:
BRONZE_FOLDER = "C:/Users/rjaya/OneDrive/Desktop/hop_dataeng/data/bronze"
COLLECTION_DATE = datetime.now().strftime("%Y-%m-%d")

In [None]:
API_ENDPOINT = "https://api.github.com/users"
DELAY_BETWEEN_REQUESTS = 0.5
MAX_RETRIES = 3
USERS = [
    "torvalds",
    "yyx990803",
    "DHH",
    "taylorotwell",
    "rauchg",
    "sindresorhus",
    "gaearon",
    "addyosmani",
    "jresig",
    "jashkenas",
    "BrendanEich",
    "ry",
    "mitchellh",
    "visionmedia",
    "JakeWharton",
    "mbostock",
    "kennethreitz",
    "antirez",
    "hadley",
    "jeffdean",
    "sanjay",
    "SimonCropp",
    "sdras",
    "douglascrockford",
    "defunkt",
    "mojombo", 
    "mdo",
    "paulirish",
    "mitsuhiko",
    "LeaVerou",
    "wesbos",
    "jlord",
    "fabpot",
    "wycats",
    "nzakas",
    "josevalim",
    "adamwathan",
    "AndrewNg",
    "sebastianlague",
    "karpathy",
    "kamranahmedse",
    "trungdq88",
    "squidfunk",
    "bradfitz",
    "ggreer",
    "codepo8",
    "SaraSoueidan",
    "EvanLi",
    "zenorocha",
    "JohnPapa",
    "shanselman"
]



In [None]:
def fetch_data(username):
    url = f"{API_ENDPOINT}/{username}/repos"

    # Make get request
    response = requests.get(url)
    # Get the data
    data = response.json()
    return data

user_info = []

for user in USERS:
    data = fetch_data(user)
    user_info.append(data)
    time.sleep(DELAY_BETWEEN_REQUESTS)

print(f"Sample Data: {user_info[0]}")
print(F"Total users processed: {len(user_info)}")

# Save data file
bronze_file_path = f"{BRONZE_FOLDER}/{COLLECTION_DATE}_github_users.json"

# Save the data to JSON
with open(bronze_file_path, 'w') as file:
    json.dump(user_info, file)

print(f"Data saved to: {bronze_file_path}")

Sample Data: [{'id': 940929652, 'node_id': 'R_kgDOOBVydA', 'name': '1590A', 'full_name': 'torvalds/1590A', 'private': False, 'owner': {'login': 'torvalds', 'id': 1024025, 'node_id': 'MDQ6VXNlcjEwMjQwMjU=', 'avatar_url': 'https://avatars.githubusercontent.com/u/1024025?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/torvalds', 'html_url': 'https://github.com/torvalds', 'followers_url': 'https://api.github.com/users/torvalds/followers', 'following_url': 'https://api.github.com/users/torvalds/following{/other_user}', 'gists_url': 'https://api.github.com/users/torvalds/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/torvalds/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/torvalds/subscriptions', 'organizations_url': 'https://api.github.com/users/torvalds/orgs', 'repos_url': 'https://api.github.com/users/torvalds/repos', 'events_url': 'https://api.github.com/users/torvalds/events{/privacy}', 'received_events_url': 'https://api.github