# API Mario

## Imports and Configuration

In [None]:
import requests
import time
from tqdm import tqdm
import os
from dotenv import load_dotenv
import pandas as pd


# Load environment variables from .env file
load_dotenv()
# Ensure the .env file is in the same directory as this script
if not os.path.exists('.env'):
    raise FileNotFoundError("The .env file is missing. Please create it with the required API key.")


# --- Configuration ---
API_KEY = os.getenv('GIANT_BOMB_API_KEY')
BASE_URL = "https://www.giantbomb.com/api"
CSV_FILENAME = "data/general_character_relationships.csv"
# It's good practice to set a User-Agent
HEADERS = {
    'User-Agent': 'PythonMarioCharacterScraperJupyter/1.0'
}
# Delay between API calls to respect rate limits (seconds)
API_CALL_DELAY = 1.0

## Helper Functions

### Fetch API Data

In [3]:
def fetch_api_data(url, params=None):
    """
    Fetches data from the Giant Bomb API.
    Returns JSON response data or None if an error occurs.
    """
    if params is None:
        params = {}
    
    # Add API key and format to all requests
    params['api_key'] = API_KEY
    params['format'] = 'json'
    
    try:
        response = requests.get(url, headers=HEADERS, params=params)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4XX or 5XX)
        return response.json()
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err} - URL: {response.url}")
    except requests.exceptions.ConnectionError as conn_err:
        print(f"Connection error occurred: {conn_err} - URL: {url}")
    except requests.exceptions.Timeout as timeout_err:
        print(f"Timeout error occurred: {timeout_err} - URL: {url}")
    except requests.exceptions.RequestException as req_err:
        print(f"An error occurred during the API request: {req_err} - URL: {url}")
    except ValueError as json_err: # Includes JSONDecodeError
        print(f"JSON decoding error: {json_err} - URL: {response.url if 'response' in locals() else url}")
    return None

### Get Mario Franchise Characters

In [4]:
def get_mario_franchise_characters():
    """
    Fetches the list of characters (name and api_detail_url) for the Mario franchise.
    """
    print("Step 1: Searching for the 'Mario' franchise...")
    franchises_url = f"{BASE_URL}/franchises/"
    params = {'filter': 'name:Mario'}
    
    data = fetch_api_data(franchises_url, params)
    if not data or 'results' not in data or not data['results']:
        print("Could not find the 'Mario' franchise or API error.")
        return []

    mario_franchise_info = None
    for franchise in data['results']:
        if franchise.get('name', '').lower() == 'mario':
            mario_franchise_info = franchise
            break
    
    if not mario_franchise_info or 'api_detail_url' not in mario_franchise_info:
        print("Specific 'Mario' franchise not found or it lacks an API detail URL.")
        return []

    print(f"Found franchise: {mario_franchise_info['name']} (ID: {mario_franchise_info.get('id', 'N/A')})")
    print(f"Step 2: Fetching detailed franchise data to get character list from {mario_franchise_info['api_detail_url']}...")
    time.sleep(API_CALL_DELAY)
    
    franchise_details_data = fetch_api_data(mario_franchise_info['api_detail_url'])
    if not franchise_details_data or 'results' not in franchise_details_data:
        print("Could not fetch detailed data for the Mario franchise.")
        return []
        
    characters = franchise_details_data['results'].get('characters', [])
    if not characters:
        print("No characters listed under the Mario franchise details.")
    else:
        print(f"Found {len(characters)} characters associated with the Mario franchise.")
    return characters

### Get Character Relationships

In [5]:
def get_character_relationships(character_stub):
    """
    Fetches and returns relationships (friends, enemies) for a single character.
    `character_stub` is a dict with at least 'name' and 'api_detail_url'.
    Returns a list of tuples: (character_name, related_character_name, relationship_type).
    """
    char_name = character_stub.get('name')
    char_api_url = character_stub.get('api_detail_url')

    if not char_api_url:
        return []

    time.sleep(API_CALL_DELAY)

    # When calling fetch_api_data here, no specific 'field_list' is passed in params.
    # The API response structure for the full character detail might use 'friends' and 'enemies'.
    char_details_data = fetch_api_data(char_api_url) # Errors here are handled by fetch_api_data's prints
    

    if not char_details_data or 'results' not in char_details_data:
        return []

    char_info = char_details_data['results']
    relationships = []

    # Use 'friends' and 'enemies'
    for friend in char_info.get('friends', []):
        friend_name = friend.get('name')
        if friend_name:
            relationships.append((char_name, friend_name, "Friend"))
    
    for enemy in char_info.get('enemies', []):
        enemy_name = enemy.get('name')
        if enemy_name:
            relationships.append((char_name, enemy_name, "Enemy"))
            
    return relationships

### Fetch Data and Write to CSV

It processes 195 character per batch, thus modify the following from batch to batch

In [11]:
# --- Batch Configuration ---
# In this case, we have about 200 characters, so we can use 2 batches in total.
# CHANGE THIS NUMBER FOR EACH RUN (e.g., 1, 2, ...)
BATCH_NUMBER = 2 # 1 or 2 
MAX_CHARACTERS_PER_BATCH = 195

In [12]:
if API_KEY == "YOUR_API_KEY" or not API_KEY: # Check if API_KEY is placeholder or empty
    print("ERROR: Please set your actual API_KEY in the configuration cell and re-run it.")
else:
    mario_character_stubs_full = get_mario_franchise_characters()
    
    if not mario_character_stubs_full:
        print("No characters found for the Mario franchise. Exiting.")
    else:
        
        start_index = (BATCH_NUMBER - 1) * MAX_CHARACTERS_PER_BATCH
        end_index = start_index + MAX_CHARACTERS_PER_BATCH

        total_characters = len(mario_character_stubs_full)
        print(f"Total characters available: {total_characters}")

        if start_index >= total_characters:
            print(f"Start index ({start_index}) is beyond the total number of characters. No characters to process in this run.")
            mario_character_stubs_to_process = []
        else:
            mario_character_stubs_to_process = mario_character_stubs_full[start_index:end_index]
            print(f"Processing Batch #{BATCH_NUMBER}: Characters from index {start_index} to {min(end_index, total_characters)-1} (up to {len(mario_character_stubs_to_process)} characters).")

Step 1: Searching for the 'Mario' franchise...
Found franchise: Mario (ID: 1)
Step 2: Fetching detailed franchise data to get character list from https://www.giantbomb.com/api/franchise/3025-1/...
Found 209 characters associated with the Mario franchise.
Total characters available: 209
Processing Batch #2: Characters from index 195 to 208 (up to 14 characters).


In [None]:
if not mario_character_stubs_to_process:
    print("No characters selected for processing in this batch. Exiting.")
else:
    # Ensure the output directory from CSV_FILENAME (defined in the config cell) exists.
    output_dir = os.path.dirname(CSV_FILENAME)
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)

    print(f"\nStep 3: Fetching relationships for {len(mario_character_stubs_to_process)} characters...")
    
    all_relationships_data = []
    processed_character_count = 0
    
    with tqdm(total=len(mario_character_stubs_to_process), desc="Processing Characters") as pbar:
        for char_stub in mario_character_stubs_to_process:
            char_name_display = char_stub.get('name', 'Unknown')
            pbar.set_description(f"Processing: {char_name_display[:20]}") 
            
            # char_relationships_full is a list of tuples: (char_name, related_char_name, relationship_type)
            char_relationships_full = get_character_relationships(char_stub) 
            
            if char_relationships_full:
                all_relationships_data.extend(char_relationships_full)
            
            pbar.set_postfix_str(f"Found: {len(char_relationships_full)} rels")
            pbar.update(1)
            processed_character_count +=1

    print(f"\nFinished processing batch of {processed_character_count} characters.")

    if all_relationships_data:
        print(f"Step 4: Writing/Appending {len(all_relationships_data)} total relationships to '{CSV_FILENAME}'...")
        try:
            # Create a pandas DataFrame from the collected data
            df = pd.DataFrame(all_relationships_data, columns=['Character', 'RelatedCharacter', 'RelationshipType'])
            
            # Check if the file already exists to determine if we need to write the header
            file_exists = os.path.exists(CSV_FILENAME)
            
            # Save the DataFrame to a CSV file.
            # mode='a' appends to the file.
            # header=not file_exists writes the header only if the file is new.
            df.to_csv(CSV_FILENAME, mode='a', index=False, header=not file_exists, encoding='utf-8')
            
            if not file_exists:
                print(f"Successfully created and saved relationships to '{CSV_FILENAME}'.")
            else:
                print(f"Successfully appended new relationships to '{CSV_FILENAME}'.")

        except Exception as e:
            print(f"Error writing aggregated CSV file: {e}")
    else:
        print("No new relationships found in this batch to save.")


Step 3: Fetching relationships for 14 characters...


Processing: Whomp King: 100%|██████████| 14/14 [00:24<00:00,  1.76s/it, Found: 0 rels]        


Finished processing batch of 14 characters.
Step 4: Writing/Appending 115 total relationships to 'general_character_relationships.csv'...
Successfully appended new relationships to 'general_character_relationships.csv'.



