In [1]:
api_key = ""

In [3]:
import requests
import json
import time

# Define the API endpoint and your API key
base_url = "https://api.eia.gov/v2/electricity/rto/fuel-type-data/data/"

# Pagination settings
page_size = 5000
offset = 0

# This list will store all of the retrieved data
all_data = []

while True:
    # Construct the query parameters for the URL, including the API key
    params = {
        "api_key": api_key,  # Include the API key here
        "frequency": "hourly",
        "data[0]": "value",
        "start": "2024-01-01T00",
        "end": "2024-12-31T00",
        "sort[0][column]": "period",
        "sort[0][direction]": "desc",
        "offset": offset,
        "length": page_size
    }

    # Construct the header parameters (as a JSON string for X-Params)
    header_params = {
        "frequency": "hourly",
        "data": ["value"],
        "facets": {},
        "start": "2024-01-01T00",
        "end": "2024-12-31T00",
        "sort": [{"column": "period", "direction": "desc"}],
        "offset": offset,
        "length": page_size
    }

    headers = {
        "X-Params": json.dumps(header_params)
    }

    print(f"Requesting rows with offset {offset}...")

    # Make the GET request
    response = requests.get(base_url, params=params, headers=headers)

    # Check for a successful response
    if response.status_code != 200:
        print(f"Error: Received status code {response.status_code}")
        print("Response:", response.text)
        break

    # Parse the JSON response
    json_response = response.json()
   #Convert JSON responses returned by API into dictionaries
    # Assuming the API returns the data rows under the "data" key.
    page_data = json_response["response"].get("data", [])

    # If no data is returned, we've reached the end
    if not page_data:
        print("No more data returned; exiting pagination loop.")
        break

    # Add the page of data to our main list
    all_data.extend(page_data)
    print(f"Retrieved {len(page_data)} rows (total so far: {len(all_data)})")

    # If fewer rows than the page size were returned, this was the last page.
    if len(page_data) < page_size:
        print("Last page reached.")
        break

    # Increase the offset by the page size for the next iteration
    offset += page_size

    # Optional: pause between requests to avoid overwhelming the API
    time.sleep(3)

print("Finished retrieving data.")
print("Total rows fetched:", len(all_data))

Requesting rows with offset 0...
Retrieved 5000 rows (total so far: 5000)
Requesting rows with offset 5000...
Retrieved 5000 rows (total so far: 10000)
Requesting rows with offset 10000...
Retrieved 5000 rows (total so far: 15000)
Requesting rows with offset 15000...
Retrieved 5000 rows (total so far: 20000)
Requesting rows with offset 20000...
Retrieved 5000 rows (total so far: 25000)
Requesting rows with offset 25000...
Retrieved 5000 rows (total so far: 30000)
Requesting rows with offset 30000...
Retrieved 5000 rows (total so far: 35000)
Requesting rows with offset 35000...
Retrieved 5000 rows (total so far: 40000)
Requesting rows with offset 40000...
Retrieved 5000 rows (total so far: 45000)
Requesting rows with offset 45000...
Retrieved 5000 rows (total so far: 50000)
Requesting rows with offset 50000...
Retrieved 5000 rows (total so far: 55000)
Requesting rows with offset 55000...
Retrieved 5000 rows (total so far: 60000)
Requesting rows with offset 60000...
Retrieved 5000 rows (

In [5]:
import pandas as pd

pd.DataFrame(all_data).to_parquet("data/eia_hourly_generation_2025.parquet")