# Imports

In [1]:
import os

import numpy as np
import pandas as pd
import xarray as xr

# Load data

In [2]:
fn = "../resources/test-sector-tyndp/profile_hydro_tyndp.nc"

In [3]:
ds = xr.open_dataset(fn)

In [4]:
pemmdb_dir = (
    "../../../data/2024/20250313_ENTSO-E_ENTSOG_TYNDP_2024_Scenarios_Inputs/PEMMDB2/"
)

In [5]:
ds.hydro_tech.values

array(['PS Closed', 'PS Open', 'Pondage', 'Reservoir', 'Run of River'],
      dtype='<U12')

In [6]:
def load_capacities(year, ds):
    capacities = {}
    missing = []
    for bus in np.char.replace(ds.bus.values, "GB", "UK"):
        path = pemmdb_dir + f"{year}/PEMMDB_{bus}_NationalTrends_{year}.xlsx"
        if os.path.exists(path):
            capacities[bus] = pd.read_excel(
                path,
                sheet_name="Hydro",
                skiprows=1,
                index_col=0,
            ).T.set_index("Market Node:")
        else:
            missing.append(bus)
    print(f"No PEMMDB data for nodes {missing} in {year}.")

    capacities = pd.concat(capacities)[
        [
            "Run of River - Total turbining capacity (MW)",
            "Pondage - Total turbining capacity (MW)",
            "Reservoir - Total turbining capacity (MW)",
            "Pump Storage (open loop, with natural inflows) - Total turbining capacity (MW)",
            "Pure Pump Storage (closed loop, no natural inflows) - Total turbining capacity (MW)",
        ]
    ].droplevel(1, "index")

    return capacities, missing

In [7]:
def check_consistency(year, tech, cap, missing):
    print(f"**{tech}** \n ============")

    tech_ = tech
    if tech == "PS Closed":
        tech_ = "closed loop"
    elif tech == "PS Open":
        tech_ = "open loop"

    profile = ds["profile"].sel(year=year, hydro_tech=tech).to_pandas().T
    total_nodes = profile.columns
    missing_profiles = profile.loc[:, (profile == 0).all(axis=0)].columns.str.replace(
        "GB", "UK", regex=True
    )
    print(
        f"Zero-profiles for {len(missing_profiles)} out of {len(total_nodes)} nodes: \n {missing_profiles.values}. \n"
    )

    tech_cap = cap.filter(like=tech_, axis="columns")
    no_cap = tech_cap.loc[tech_cap.values == 0].index
    no_cap = no_cap.to_list() + missing
    print(
        f"No capacities for {len(no_cap)} out of {len(total_nodes)} nodes: \n {no_cap}. \n"
    )

    difference = set(missing_profiles).difference(no_cap).difference(missing)
    print(
        f"{len(difference)} out of {len(total_nodes)} nodes have zero-profiles while having specified turbine capacities: \n {difference} \n"
    )

Hydro inflows are only needed for Run of River, Pondage and Reservoir plants.

In [8]:
techs = [
    "Run of River",
    "Pondage",
    "Reservoir",
]

There are by definition no hydro inflows for closed-loop pumped hydro storage plants. 

However, there are can be inflows for open loop pumped hydro which is provided within PEMMDB, thus both will be checked for completeness.

In [9]:
phs = [
    "PS Closed",
    "PS Open",
]

# Check data

During pre-processing missing data (empty fields) are filled with zero values. Thus we will filter for zero-profiles to identify missing hydro inflow profiles. Afterwards we will manually check whether the raw input files reveal in fact missing data or 0.0 values.

## 2030

In [10]:
year = 2030

In [11]:
cap_2030, missing = load_capacities(year, ds)

No PEMMDB data for nodes ['ITCO', 'ITVI'] in 2030.


In [12]:
for tech in techs:
    check_consistency(year, tech, cap_2030, missing)

**Run of River** 
Zero-profiles for 19 out of 54 nodes: 
 ['CY00' 'DKE1' 'DKW1' 'EE00' 'FI00' 'UKNI' 'GR03' 'ITCO' 'ITS1' 'ITVI'
 'LUB1' 'LUF1' 'LUV1' 'LV00' 'MT00' 'SE01' 'SE02' 'SE03' 'SE04']. 

No capacities for 18 out of 54 nodes: 
 ['CY00', 'DKE1', 'DKW1', 'EE00', 'FI00', 'UKNI', 'GR03', 'LUB1', 'LUF1', 'LUV1', 'LV00', 'MT00', 'SE01', 'SE02', 'SE03', 'SE04', 'ITCO', 'ITVI']. 

1 out of 54 nodes have zero-profiles while having specified turbine capacities: 
 {'ITS1'} 

**Pondage** 
Zero-profiles for 46 out of 54 nodes: 
 ['AL00' 'BE00' 'BG00' 'CH00' 'CY00' 'CZ00' 'DE00' 'DKE1' 'DKW1' 'EE00'
 'ES00' 'FI00' 'FR00' 'UK00' 'UKNI' 'GR03' 'HU00' 'IE00' 'ITCA' 'ITCN'
 'ITCO' 'ITCS' 'ITN1' 'ITS1' 'ITSA' 'ITSI' 'ITVI' 'LT00' 'LUB1' 'LUF1'
 'LUG1' 'LUV1' 'ME00' 'MK00' 'MT00' 'NL00' 'NOM1' 'NON1' 'NOS0' 'PL00'
 'RO00' 'RS00' 'SE01' 'SE02' 'SE03' 'SE04']. 

No capacities for 46 out of 54 nodes: 
 ['AL00', 'BE00', 'BG00', 'CH00', 'CY00', 'CZ00', 'DE00', 'DKE1', 'DKW1', 'EE00', 'ES00', 'FI00', '

Checking PHS for completeness:

In [13]:
for tech in phs:
    check_consistency(year, tech, cap_2030, missing)

**PS Closed** 
Zero-profiles for 54 out of 54 nodes: 
 ['AL00' 'AT00' 'BA00' 'BE00' 'BG00' 'CH00' 'CY00' 'CZ00' 'DE00' 'DKE1'
 'DKW1' 'EE00' 'ES00' 'FI00' 'FR00' 'UK00' 'UKNI' 'GR00' 'GR03' 'HR00'
 'HU00' 'IE00' 'ITCA' 'ITCN' 'ITCO' 'ITCS' 'ITN1' 'ITS1' 'ITSA' 'ITSI'
 'ITVI' 'LT00' 'LUB1' 'LUF1' 'LUG1' 'LUV1' 'LV00' 'ME00' 'MK00' 'MT00'
 'NL00' 'NOM1' 'NON1' 'NOS0' 'PL00' 'PT00' 'RO00' 'RS00' 'SE01' 'SE02'
 'SE03' 'SE04' 'SI00' 'SK00']. 

No capacities for 33 out of 54 nodes: 
 ['AL00', 'BA00', 'CY00', 'DKE1', 'DKW1', 'EE00', 'FI00', 'UKNI', 'GR03', 'HR00', 'HU00', 'ITCA', 'ITCN', 'ITS1', 'LUB1', 'LUF1', 'LUG1', 'LV00', 'ME00', 'MK00', 'MT00', 'NL00', 'NOM1', 'NON1', 'NOS0', 'PT00', 'RO00', 'SE01', 'SE02', 'SE03', 'SE04', 'ITCO', 'ITVI']. 

21 out of 54 nodes have zero-profiles while having specified turbine capacities: 
 {'LT00', 'SI00', 'BE00', 'SK00', 'ES00', 'IE00', 'FR00', 'AT00', 'CH00', 'LUV1', 'ITN1', 'ITSA', 'PL00', 'BG00', 'ITSI', 'CZ00', 'UK00', 'GR00', 'ITCS', 'DE00', 'RS00

As expected for `closed loop PHS`, no inflows are available.

Manually checking the raw data for the missing inflow data for `open loop PHS`, `Run of River` and `Reservoir` yields that the data is in fact not actually missing (empty fields) but filled with 0.0 values.

✅ Thus, no missing data is found for 2030.

## 2040

In [14]:
year = 2040

In [15]:
cap_2040, missing = load_capacities(year, ds)

No PEMMDB data for nodes ['ITCO', 'ITVI'] in 2040.


In [16]:
for tech in techs:
    check_consistency(year, tech, cap_2040, missing)

**Run of River** 
Zero-profiles for 19 out of 54 nodes: 
 ['CY00' 'DKE1' 'DKW1' 'EE00' 'FI00' 'UKNI' 'GR03' 'ITCO' 'ITS1' 'ITVI'
 'LUB1' 'LUF1' 'LUV1' 'LV00' 'MT00' 'SE01' 'SE02' 'SE03' 'SE04']. 

No capacities for 18 out of 54 nodes: 
 ['CY00', 'DKE1', 'DKW1', 'EE00', 'FI00', 'UKNI', 'GR03', 'LUB1', 'LUF1', 'LUV1', 'LV00', 'MT00', 'SE01', 'SE02', 'SE03', 'SE04', 'ITCO', 'ITVI']. 

1 out of 54 nodes have zero-profiles while having specified turbine capacities: 
 {'ITS1'} 

**Pondage** 
Zero-profiles for 46 out of 54 nodes: 
 ['AL00' 'BE00' 'BG00' 'CH00' 'CY00' 'CZ00' 'DE00' 'DKE1' 'DKW1' 'EE00'
 'ES00' 'FI00' 'FR00' 'UK00' 'UKNI' 'GR03' 'HU00' 'IE00' 'ITCA' 'ITCN'
 'ITCO' 'ITCS' 'ITN1' 'ITS1' 'ITSA' 'ITSI' 'ITVI' 'LT00' 'LUB1' 'LUF1'
 'LUG1' 'LUV1' 'ME00' 'MK00' 'MT00' 'NL00' 'NOM1' 'NON1' 'NOS0' 'PL00'
 'RO00' 'RS00' 'SE01' 'SE02' 'SE03' 'SE04']. 

No capacities for 46 out of 54 nodes: 
 ['AL00', 'BE00', 'BG00', 'CH00', 'CY00', 'CZ00', 'DE00', 'DKE1', 'DKW1', 'EE00', 'ES00', 'FI00', '

Checking PHS for completeness:

In [17]:
for tech in phs:
    check_consistency(year, tech, cap_2040, missing)

**PS Closed** 
Zero-profiles for 54 out of 54 nodes: 
 ['AL00' 'AT00' 'BA00' 'BE00' 'BG00' 'CH00' 'CY00' 'CZ00' 'DE00' 'DKE1'
 'DKW1' 'EE00' 'ES00' 'FI00' 'FR00' 'UK00' 'UKNI' 'GR00' 'GR03' 'HR00'
 'HU00' 'IE00' 'ITCA' 'ITCN' 'ITCO' 'ITCS' 'ITN1' 'ITS1' 'ITSA' 'ITSI'
 'ITVI' 'LT00' 'LUB1' 'LUF1' 'LUG1' 'LUV1' 'LV00' 'ME00' 'MK00' 'MT00'
 'NL00' 'NOM1' 'NON1' 'NOS0' 'PL00' 'PT00' 'RO00' 'RS00' 'SE01' 'SE02'
 'SE03' 'SE04' 'SI00' 'SK00']. 

No capacities for 33 out of 54 nodes: 
 ['AL00', 'BA00', 'CY00', 'DKE1', 'DKW1', 'EE00', 'FI00', 'UKNI', 'GR03', 'HR00', 'HU00', 'ITCA', 'ITCN', 'ITS1', 'LUB1', 'LUF1', 'LUG1', 'LV00', 'ME00', 'MK00', 'MT00', 'NL00', 'NOM1', 'NON1', 'NOS0', 'PT00', 'RO00', 'SE01', 'SE02', 'SE03', 'SE04', 'ITCO', 'ITVI']. 

21 out of 54 nodes have zero-profiles while having specified turbine capacities: 
 {'LT00', 'SI00', 'BE00', 'SK00', 'ES00', 'IE00', 'FR00', 'AT00', 'CH00', 'LUV1', 'ITN1', 'ITSA', 'PL00', 'BG00', 'ITSI', 'CZ00', 'UK00', 'GR00', 'ITCS', 'DE00', 'RS00

As expected for `closed loop PHS`, no inflows are available.

Manually checking the raw data for the missing inflow data for `open loop PHS`, `Run of River` and `Reservoir` yields that the data is in fact not actually missing (empty fields) but filled with 0.0 values.

✅ Thus, no missing data is found for 2040.

## 2050

In [18]:
year = 2050

In [19]:
cap_2050, missing = load_capacities(year, ds)

No PEMMDB data for nodes ['ITCO', 'ITVI'] in 2050.


In [20]:
for tech in techs:
    check_consistency(year, tech, cap_2050, missing)

**Run of River** 
Zero-profiles for 19 out of 54 nodes: 
 ['CY00' 'DKE1' 'DKW1' 'EE00' 'FI00' 'UKNI' 'GR03' 'ITCO' 'ITS1' 'ITVI'
 'LUB1' 'LUF1' 'LUV1' 'LV00' 'MT00' 'SE01' 'SE02' 'SE03' 'SE04']. 

No capacities for 18 out of 54 nodes: 
 ['CY00', 'DKE1', 'DKW1', 'EE00', 'FI00', 'UKNI', 'GR03', 'LUB1', 'LUF1', 'LUV1', 'LV00', 'MT00', 'SE01', 'SE02', 'SE03', 'SE04', 'ITCO', 'ITVI']. 

1 out of 54 nodes have zero-profiles while having specified turbine capacities: 
 {'ITS1'} 

**Pondage** 
Zero-profiles for 46 out of 54 nodes: 
 ['AL00' 'BE00' 'BG00' 'CH00' 'CY00' 'CZ00' 'DE00' 'DKE1' 'DKW1' 'EE00'
 'ES00' 'FI00' 'FR00' 'UK00' 'UKNI' 'GR03' 'HU00' 'IE00' 'ITCA' 'ITCN'
 'ITCO' 'ITCS' 'ITN1' 'ITS1' 'ITSA' 'ITSI' 'ITVI' 'LT00' 'LUB1' 'LUF1'
 'LUG1' 'LUV1' 'ME00' 'MK00' 'MT00' 'NL00' 'NOM1' 'NON1' 'NOS0' 'PL00'
 'RO00' 'RS00' 'SE01' 'SE02' 'SE03' 'SE04']. 

No capacities for 46 out of 54 nodes: 
 ['AL00', 'BE00', 'BG00', 'CH00', 'CY00', 'CZ00', 'DE00', 'DKE1', 'DKW1', 'EE00', 'ES00', 'FI00', '

Checking PHS for completeness:

In [21]:
for tech in phs:
    check_consistency(year, tech, cap_2050, missing)

**PS Closed** 
Zero-profiles for 54 out of 54 nodes: 
 ['AL00' 'AT00' 'BA00' 'BE00' 'BG00' 'CH00' 'CY00' 'CZ00' 'DE00' 'DKE1'
 'DKW1' 'EE00' 'ES00' 'FI00' 'FR00' 'UK00' 'UKNI' 'GR00' 'GR03' 'HR00'
 'HU00' 'IE00' 'ITCA' 'ITCN' 'ITCO' 'ITCS' 'ITN1' 'ITS1' 'ITSA' 'ITSI'
 'ITVI' 'LT00' 'LUB1' 'LUF1' 'LUG1' 'LUV1' 'LV00' 'ME00' 'MK00' 'MT00'
 'NL00' 'NOM1' 'NON1' 'NOS0' 'PL00' 'PT00' 'RO00' 'RS00' 'SE01' 'SE02'
 'SE03' 'SE04' 'SI00' 'SK00']. 

No capacities for 33 out of 54 nodes: 
 ['AL00', 'BA00', 'CY00', 'DKE1', 'DKW1', 'EE00', 'FI00', 'UKNI', 'GR03', 'HR00', 'HU00', 'ITCA', 'ITCN', 'ITS1', 'LUB1', 'LUF1', 'LUG1', 'LV00', 'ME00', 'MK00', 'MT00', 'NL00', 'NOM1', 'NON1', 'NOS0', 'PT00', 'RO00', 'SE01', 'SE02', 'SE03', 'SE04', 'ITCO', 'ITVI']. 

21 out of 54 nodes have zero-profiles while having specified turbine capacities: 
 {'LT00', 'SI00', 'BE00', 'SK00', 'ES00', 'IE00', 'FR00', 'AT00', 'CH00', 'LUV1', 'ITN1', 'ITSA', 'PL00', 'BG00', 'ITSI', 'CZ00', 'UK00', 'GR00', 'ITCS', 'DE00', 'RS00

As expected for `closed loop PHS`, no inflows are available.

Manually checking the raw data for the missing inflow data for `open loop PHS`, `Run of River` and `Reservoir` yields that the data is in fact not actually missing (empty fields) but filled with 0.0 values.

✅ Thus, no missing data is found for 2050.