In [None]:
# -*- coding: utf-8 -*-
"""
FINAL SCRIPT FOR THE DATA SOURCE API ANALYST HOMEWORK
------------------------------------------------------
Author: Marco Antonio Esperón Pintos
Date: July 10, 2025

This script connects to the GitHub REST API to perform the following actions:
1. Search for public repositories based on a search term.
2. Fetch the most recent commits from a specific repository.
3. List the content in the root directory of that repository.

The script is designed to be modular, readable, and handle basic errors.
"""

# ==============================================================================
# 1. LIBRARY IMPORTS
# ==============================================================================
import requests  # To perform HTTP requests to the API.
import json      # To format the output of JSON data.
from google.colab import userdata # To securely access credentials.

# ==============================================================================
# 2. CONFIGURATION AND AUTHENTICATION
# ==============================================================================
print("--- Starting extraction script ---")

# Load the GitHub token from Colab Secrets.
try:
    GITHUB_TOKEN = userdata.get('GITHUB_TOKEN')
    print("Authentication token loaded successfully.")
except userdata.SecretNotFoundError:
    print("ERROR: Secret 'GITHUB_TOKEN' not found.")
    print("Please ensure you have saved it in the Secrets manager (key icon).")
    GITHUB_TOKEN = None

# Define the API base URL and the headers that will be used in all requests.
BASE_URL = "https://api.github.com"
HEADERS = {
    "Authorization": f"token {GITHUB_TOKEN}",
    "Accept": "application/vnd.github.v3+json",
    "X-GitHub-Api-Version": "2022-11-28"
}

# ==============================================================================
# 3. MODULAR EXTRACTION FUNCTIONS
# ==============================================================================

def search_repositories(search_term, per_page=5):
    """
    Searches for repositories on GitHub and returns the owner and name of the first result.
    """
    print(f"\n[1] Searching for repositories with term: '{search_term}'...")

    endpoint = "/search/repositories"
    params = {'q': search_term, 'per_page': per_page}

    try:
        response = requests.get(f"{BASE_URL}{endpoint}", headers=HEADERS, params=params)
        response.raise_for_status()  # This will raise an error for failed requests (4xx or 5xx)

        print("Search successful!")
        data = response.json()

        if data['total_count'] > 0:
            first_repo = data['items'][0]
            owner = first_repo['owner']['login']
            repo_name = first_repo['name']
            print(f"    -> Repository found: {owner}/{repo_name}")
            return owner, repo_name
        else:
            print("    -> No repositories found for that term.")
            return None, None

    except requests.exceptions.RequestException as e:
        print(f"ERROR during search: {e}")
        return None, None

def get_commits(owner, repo_name, per_page=5):
    """
    Fetches the most recent commits from a specific repository.
    """
    if not owner or not repo_name: return

    print(f"\n[2] Fetching the last {per_page} commits from {owner}/{repo_name}...")
    endpoint = f"/repos/{owner}/{repo_name}/commits"
    params = {'per_page': per_page}

    try:
        response = requests.get(f"{BASE_URL}{endpoint}", headers=HEADERS, params=params)
        response.raise_for_status()

        print("Commits fetched successfully!")
        data = response.json()

        print("    -> Displaying commit messages:")
        for i, commit in enumerate(data):
            message = commit['commit']['message'].split('\n')[0] # Only the first line of the message
            print(f"       {i+1}. {message}")

    except requests.exceptions.RequestException as e:
        print(f"ERROR fetching commits: {e}")

def get_contents(owner, repo_name):
    """
    Fetches the list of files and directories from the root of a repository.
    """
    if not owner or not repo_name: return

    print(f"\n[3] Fetching root contents from {owner}/{repo_name}...")
    endpoint = f"/repos/{owner}/{repo_name}/contents"

    try:
        response = requests.get(f"{BASE_URL}{endpoint}", headers=HEADERS)
        response.raise_for_status()

        print("Contents fetched successfully!")
        data = response.json()

        print("    -> Listing files and directories:")
        for item in data:
            print(f"       - {item['name']} (Type: {item['type']})")

    except requests.exceptions.RequestException as e:
        print(f"❌ ERROR fetching contents: {e}")

# ==============================================================================
# 4. MAIN EXECUTION BLOCK
# ==============================================================================
if __name__ == "__main__" and GITHUB_TOKEN:
    # Define the term you want to search for. You can change it to something else.
    SEARCH_TERM = "pandas"

    # Execute the extraction sequence
    owner, repo = search_repositories(SEARCH_TERM)

    if owner and repo:
        get_commits(owner, repo)
        get_contents(owner, repo)

    print("\n--- Extraction script finished ---")