In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta

In [3]:
API_KEY = "c2254d0e191c82c9395811c56b69ad5b9c1104a8"

In [5]:
systems = [
    {"name": "siris-D-500kW", "system_id": "90854", "location": "Greece"},
    {"name": "Schipper Power - Three", "system_id": "67510", "location": "Netherlands"},
    {"name": "Pentaari", "system_id": "74198", "location": "Greece"},
    {"name": "MAWC CocaCola", "system_id": "43531", "location": "Maldives"},
    {"name": "Wannon Water Wbool", "system_id": "63726", "location": "Australia"},
    {"name": "DeCrescent 178kW PV", "system_id": "29714", "location": "US"},
    {"name": "FRECON-HeatLava", "system_id": "47085", "location": "Thailand"},
    {"name": "Nugali", "system_id": "71881", "location": "Brazil"},
    {"name": "HTL1 Klagenfurt", "system_id": "53361", "location": "Austria"},
    {"name": "Wray Farms", "system_id": "11542", "location": "UK"},
    {"name": "Feuerwehr", "system_id": "34873", "location": "Switzerland"}
]

In [7]:
end_date = datetime.today()
start_date = end_date - timedelta(days=30)

In [9]:
def fetch_daily_data(system, start, end):
    headers = {
        "X-Pvoutput-Apikey": API_KEY,
        "X-Pvoutput-SystemId": system["system_id"]
    }

    current = start
    records = []

    while current <= end:
        date_str = current.strftime('%Y%m%d')
        url = f"https://pvoutput.org/service/r2/getoutput.jsp?date={date_str}"

        response = requests.get(url, headers=headers)
        if response.status_code == 200 and response.text.strip():
            data = response.text.strip().split(',')
            try:
                record = {
                    "system_name": system["name"],
                    "system_id": system["system_id"],
                    "location": system["location"],
                    "date": data[0],
                    "energy_wh": int(data[1]),
                    "system_size_w": int(data[3]),
                    "temperature_c": float(data[5]) if data[5] else None,
                    "voltage_v": float(data[6]) if data[6] else None
                }
                # Normalize
                if record["system_size_w"] > 0:
                    record["energy_kwh_per_kw"] = (record["energy_wh"] / 1000) / (record["system_size_w"] / 1000)
                else:
                    record["energy_kwh_per_kw"] = None

                records.append(record)
            except (IndexError, ValueError) as e:
                print(f"Error parsing data for {system['name']} on {date_str}: {e}")
        else:
            print(f"No data for {system['name']} on {date_str}")

        current += timedelta(days=1)

    return records

In [11]:
all_records = []
for system in systems:
    print(f"Fetching data for {system['name']}...")
    system_data = fetch_daily_data(system, start_date, end_date)
    all_records.extend(system_data)

Fetching data for siris-D-500kW...
No data for siris-D-500kW on 20250225
No data for siris-D-500kW on 20250226
No data for siris-D-500kW on 20250227
No data for siris-D-500kW on 20250228
No data for siris-D-500kW on 20250301
No data for siris-D-500kW on 20250302
No data for siris-D-500kW on 20250303
No data for siris-D-500kW on 20250304
No data for siris-D-500kW on 20250305
No data for siris-D-500kW on 20250306
No data for siris-D-500kW on 20250307
No data for siris-D-500kW on 20250308
No data for siris-D-500kW on 20250309
No data for siris-D-500kW on 20250310
No data for siris-D-500kW on 20250311
No data for siris-D-500kW on 20250312
No data for siris-D-500kW on 20250313
No data for siris-D-500kW on 20250314
No data for siris-D-500kW on 20250315
No data for siris-D-500kW on 20250316
No data for siris-D-500kW on 20250317
No data for siris-D-500kW on 20250318
No data for siris-D-500kW on 20250319
No data for siris-D-500kW on 20250320
No data for siris-D-500kW on 20250321
No data for sir

KeyboardInterrupt: 

In [None]:
df_all = pd.DataFrame(all_records)
df_all.to_csv("pvoutput_combined_normalized.csv", index=False)
print("✅ Done! Preview:")
print(df_all.head())