In [1]:
from entsoe import EntsoePandasClient
from entsoe.exceptions import NoMatchingDataError
from tqdm.notebook import tqdm
import pandas as pd
import os
from dotenv import load_dotenv
from retrying import retry

In [2]:
load_dotenv()  # load env file
entsoe_key = os.getenv("ENTSOE_API_KEY")  # Get API keys from .env file

# Instantiate the ENTSOE Client
client = EntsoePandasClient(api_key=entsoe_key)

In [3]:
# Define the time range
start = pd.Timestamp("20240101", tz="UTC")
end = pd.Timestamp("20240103", tz="UTC")

# Define the countries AT BE HR CZ FR DE HU LU NL PL RO SK SI   
# HR causing problems in 2019
# SK problems in July of 2024, installed gen cap also early 2024
countries = ["AT", "BE", "HR", "CZ", "FR", "DE_LU", "HU","NL", "PL", "RO", "SK", "SI", "PL", "CH"]

In [4]:
@retry(stop_max_attempt_number=5, wait_fixed=5000)
def query_and_save(query_func, filename_template, countries=countries, start=start, end=end, **kwargs):
    """
    Query data for multiple countries and save to CSV files.

    Args:
        query_func (callable): The function to use for querying data.
        countries (list): List of country codes to query data for.
        start (datetime): The start date/time for the query.
        end (datetime): The end date/time for the query.
        filename_template (str): Template for the filename to save the CSV data.
        **kwargs: Additional keyword arguments to pass to the query function.
    """
    no_data_countries = []
    error_countries = []
    nan_summary = {}
    successful_queries = 0

    dir_name = filename_template.split('_{}')[0]
    subdir = os.path.join("raw_data", dir_name)
    os.makedirs(subdir, exist_ok=True)

    for country in tqdm(countries, desc=f"Processing {query_func.__name__}"):
        try:
            data = query_func(country_code=country, start=start, end=end, **kwargs)
            
            df_out = pd.DataFrame(data)
            
            nan_count = df_out.isna().sum().sum()
            total_count = df_out.size
            nan_summary[country] = (nan_count, total_count)
            
            filename = filename_template.format(country)
            filepath = os.path.join(subdir, filename)
            df_out.to_csv(filepath, index_label='date')

            successful_queries += 1

        except NoMatchingDataError:
            no_data_countries.append(country)
        except Exception as e:
            tqdm.write(f"Error querying data for {country}: {e}")
            error_countries.append(country)

    print("\n Summary:")
    for country, (nan_count, total_count) in nan_summary.items():
        if nan_count > 0:
            print(f"{country}: {nan_count} NaNs out of {total_count} datapoints")

    print(f"\nSuccessful queries: {successful_queries} / {len(countries)} countries")

    if no_data_countries:
        print(f"\nNo matching data for: {', '.join(no_data_countries)}")
    
    if error_countries:
        print(f"\nErrors occurred for: {', '.join(error_countries)}")

## Data availability test

### Day-ahead prices

In [5]:
query_and_save(
    query_func=client.query_day_ahead_prices,
    filename_template="day_ahead_prices_{}.csv",
)

Processing query_day_ahead_prices:   0%|          | 0/14 [00:00<?, ?it/s]


 Summary:

Successful queries: 14 out of 14 countries


### Load forecast

In [6]:
query_and_save(
    query_func=client.query_load_forecast,
    filename_template="load_forecast_{}.csv",
)

Processing query_load_forecast:   0%|          | 0/14 [00:00<?, ?it/s]

Connection Error, retrying in 0 seconds


Error querying data for PL: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

 Summary:

Successful queries: 13 out of 14 countries

Errors occurred for: PL


### Generation forecast

In [7]:
query_and_save(
    query_func=client.query_generation_forecast,
    filename_template="generation_forecast_{}.csv",
)

Processing query_generation_forecast:   0%|          | 0/14 [00:00<?, ?it/s]


 Summary:

Successful queries: 14 out of 14 countries


### Wind and solar forecast

In [8]:
query_and_save(
    query_func=client.query_wind_and_solar_forecast,
    filename_template="wind_and_solar_forecast_{}.csv",
)

Processing query_wind_and_solar_forecast:   0%|          | 0/14 [00:00<?, ?it/s]


 Summary:

Successful queries: 14 out of 14 countries


### Installed Generation Capacity

In [9]:
query_and_save(
    query_func=client.query_installed_generation_capacity,
    filename_template="installed_generation_capacity_{}.csv",
)

Processing query_installed_generation_capacity:   0%|          | 0/14 [00:00<?, ?it/s]


 Summary:

Successful queries: 13 out of 14 countries

No matching data for: SK
