In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time

# === API SETTINGS ===
BASE_URL = "https://api.elhub.no/energy-data/v0/price-areas"
DATASET = "PRODUCTION_PER_GROUP_MBA_HOUR"

#== FUNCTION TO FORMAT DATES ===
def format_date(dt_obj):
    """Formats datetime with timezone offset for Elhub (%2B02:00).""" 
    return dt_obj.strftime("%Y-%m-%dT%H:%M:%S%%2B02:00") # formatted for URL encoding. The time is always in +02:00 for MBA data. Which is summertime all year round.

all_records = []

# === FETCH EACH MONTH OF 2021 ===
for month in range(1, 13):
    start = datetime(2021, month, 1)
    next_month = (start + timedelta(days=32)).replace(day=1)
    end = next_month - timedelta(seconds=1)

    start_str = format_date(start)
    end_str = format_date(end)

    url = f"{BASE_URL}?dataset={DATASET}&startDate={start_str}&endDate={end_str}"
    print(f"=== Fetching {start.date()} → {end.date()} ===")

    response = requests.get(url)
    if response.status_code != 200:
        print(f"❌ Error {response.status_code}")
        continue

    data = response.json()
    month_records = []

    for entry in data.get("data", []):
        attrs = entry.get("attributes", {})
        recs = attrs.get("productionPerGroupMbaHour", [])
        # Filter out placeholders. Filtering them out ensures your DataFrame contains only meaningful production data (real energy values per group and price area).
        recs = [r for r in recs if r.get("productionGroup") != "*"]
        month_records.extend(recs)

    all_records.extend(month_records)
    print(f"✅ {len(month_records)} records added")

    # Be nice to the API
    time.sleep(1)

print(f"\nTotal records collected: {len(all_records)}")


=== Fetching 2021-01-01 → 2021-01-31 ===
✅ 17856 records added
=== Fetching 2021-02-01 → 2021-02-28 ===
✅ 16128 records added
=== Fetching 2021-03-01 → 2021-03-31 ===
✅ 17832 records added
=== Fetching 2021-04-01 → 2021-04-30 ===
✅ 17280 records added
=== Fetching 2021-05-01 → 2021-05-31 ===
✅ 17856 records added
=== Fetching 2021-06-01 → 2021-06-30 ===
✅ 17976 records added
=== Fetching 2021-07-01 → 2021-07-31 ===
✅ 18600 records added
=== Fetching 2021-08-01 → 2021-08-31 ===
✅ 18600 records added
=== Fetching 2021-09-01 → 2021-09-30 ===
✅ 18000 records added
=== Fetching 2021-10-01 → 2021-10-31 ===
✅ 18625 records added
=== Fetching 2021-11-01 → 2021-11-30 ===
✅ 18000 records added
=== Fetching 2021-12-01 → 2021-12-31 ===
✅ 18600 records added

Total records collected: 215353


In [10]:
# === CONVERT TO DATAFRAME ===
df = pd.DataFrame(all_records)

# Convert data types
df['startTime'] = pd.to_datetime(df['startTime'], utc=True) # Ensure UTC timezone
df['endTime'] = pd.to_datetime(df['endTime'], utc=True)
df['quantityKwh'] = pd.to_numeric(df['quantityKwh'], errors='coerce')

# Keep only relevant columns
df = df[['priceArea', 'productionGroup', 'startTime', 'quantityKwh']]

# Sort by time for readability
df.sort_values('startTime', inplace=True)

# Optional: set startTime as index
df.set_index('startTime', inplace=True)

print(df.info())
# Print first 50 rows
print(df.head(50))


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 215353 entries, 2020-12-31 23:00:00+00:00 to 2021-12-31 22:00:00+00:00
Data columns (total 3 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   priceArea        215353 non-null  object 
 1   productionGroup  215353 non-null  object 
 2   quantityKwh      215353 non-null  float64
dtypes: float64(1), object(2)
memory usage: 6.6+ MB
None
                          priceArea productionGroup  quantityKwh
startTime                                                       
2020-12-31 23:00:00+00:00       NO1           hydro  2507716.800
2020-12-31 23:00:00+00:00       NO2           other        4.346
2020-12-31 23:00:00+00:00       NO5           solar        3.720
2020-12-31 23:00:00+00:00       NO2            wind      706.206
2020-12-31 23:00:00+00:00       NO3           hydro  2836774.000
2020-12-31 23:00:00+00:00       NO4            wind   381065.000
2020-12-31 23:00:00+00:00       