# Validation of PyPSA-Eur model inputs focusing on installed capacities and demand

## Analysis of PyPSA-Eur Model

This notebook investigates the most important input data in the **PyPSA-Eur** model. 
In particular, publicly available information about the European power system from **Ember** is compared to the data used and the results produced by the PyPSA-Eur model.

The following quantities are reviewed:

### Inputs Used by the PyPSA-Eur Model
- **Installed Generation Capacity** by Technology Type and Country
- **Electricity Demand**

### Reproducing the Results

To properly reproduce the findings presented in this notebook, please run the full **Snakemake** workflow for **PyPSA-Eur** using the config file configs/validation_config_2023.yaml 

You can find detailed installation and execution instructions in the official documentation:  
🔗 [PyPSA-Eur Installation Guide](https://pypsa-eur.readthedocs.io/en/stable/installation.html)

Alternatively, if you have access to a pre-built network, you may use that directly.

This validation step is essential for:
- Ensuring the PyPSA-Eur model's  data reflects reality.
- Highlighting discrepancies, if any, between model data and real-world statistics.
- Supporting transparency and reliability in energy system modeling.


### Imports and Configuration

In [None]:
import pandas as pd
import pypsa
import pycountry
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import numpy as np
import os
from pathlib import Path
import cartopy.crs as ccrs
from pypsa.plot import add_legend_circles, add_legend_lines, add_legend_patches

# Configuration
year = 2023
# Load Pypsa Network
network_path = "../../resources/validation_2023/networks/base_s_128___2025.nc"
ember_capacity_data_path = "../ember_data/yearly_full_release_long_format.csv"
ember_demand_data_path ="../ember_data/europe_monthly_full_release_long_format.csv"
regions_onshore_path = "../../resources/validation_2023/country_shapes.geojson"

# Plotting styles
plt.style.use("bmh")
sns.set_style("darkgrid")

# Countries
countries = ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES',
             'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV',
             'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK']

### Load Data (Common for Capacity and Demand)

In [None]:
# Load data
try:
    n = pypsa.Network(network_path)
    print(f"Loaded PyPSA network: {network_path}")
except Exception as e:
    print(f"Error loading PyPSA network: {e}")
    raise

try:
    ember_capacity = pd.read_csv(ember_capacity_data_path)
    print(f"Loaded Ember capacity data: {ember_capacity_data_path}")
except Exception as e:
    print(f"Error loading Ember capacity data: {e}")
    raise

try:
    ember_demand = pd.read_csv(ember_demand_data_path)
    print(f"Loaded Ember demand data: {ember_demand_data_path}")
except Exception as e:
    print(f"Error loading Ember demand data: {e}")
    raise


    raise

### Process Ember Capacity Data

In [None]:
# Process Ember capacity data
def process_ember_capacity():
    print("Processing Ember capacity data")
    exclude_areas = ["Belarus", "Gibraltar", "Iceland", "Kosovo", "Moldova", "Russian Federation (the)", "Malta", "Cyprus"]
    ember_capacity_filtered = ember_capacity.query(
        f"Year == {year} and Continent == 'Europe' and Category == 'Capacity' and Subcategory == 'Fuel'"
    ).copy()
    ember_capacity_filtered = ember_capacity_filtered[~ember_capacity_filtered['Area'].isin(exclude_areas)]
    
    def iso3_to_iso2(iso3):
        try:
            return pycountry.countries.get(alpha_3=iso3).alpha_2
        except:
            return None
    
    ember_capacity_filtered["ISO"] = ember_capacity_filtered["ISO 3 code"].apply(iso3_to_iso2)
    ember_capacity_filtered = ember_capacity_filtered[["ISO", "Variable", "Value", "Unit"]]
    ember_capacity_pivot = ember_capacity_filtered.pivot_table(index="ISO", columns="Variable", values="Value").fillna(0)
    print(f"Processed Ember capacity data sample:\n{ember_capacity_pivot.head().to_string()}")
    return ember_capacity_pivot

ember_capacity_processed = process_ember_capacity()

### Process PyPSA Capacity Data

In [None]:

def process_pypsa_capacity():
    print("Processing PyPSA capacity data")
    def merge_and_replace(df, new_col, cols_to_merge, drop_original=True):
        available_cols = [col for col in cols_to_merge if col in df.columns]
        if not available_cols:
            print(f"Skipping merge for {new_col}: No columns {cols_to_merge} found.")
            return df
        df[new_col] = df[available_cols].sum(axis=1)
        if drop_original:
            df = df.drop(columns=available_cols, errors="ignore")
        return df
    
    conv_techs = ['OCGT', 'CCGT', 'coal', 'lignite', 'nuclear', 'oil', 'urban central gas CHP', 'urban central gas CHP CC']
    vres_tech = ['solar-hsat', 'solar', 'onwind', 'offwind-float', 'offwind-dc', 'offwind-ac', 'ror']

    pypsa_country_tech = (
        n.generators.query("carrier in @vres_tech")
        .groupby(['bus', 'carrier'])['p_nom' if 'p_nom' in n.generators.columns else 'p_nom']
        .sum().unstack(fill_value=0).reset_index()
    )
    
    sto_country_tech = n.storage_units.groupby(['bus', 'carrier'])['p_nom' if 'p_nom' in n.storage_units.columns else 'p_nom'].sum().unstack(fill_value=0).reset_index()

    # Conventional links (non-bio)
    conv_links = n.links.query("carrier in @conv_techs")
    chp_links = conv_links[conv_links.carrier.str.contains('CHP', na=False)]
    non_chp_links = conv_links[~conv_links.carrier.str.contains('CHP', na=False)]
    
    if not non_chp_links.empty:
        is_electric_non = n.buses.loc[non_chp_links.bus1, 'carrier'] == 'AC'
        p_nom = non_chp_links.p_nom if 'p_nom' in non_chp_links.columns else non_chp_links.p_nom
        non_chp_country_tech = (
            non_chp_links[is_electric_non.values]
            .assign(electrical_nom=lambda df: p_nom * df.efficiency)
            .groupby(['bus1', 'carrier'])['electrical_nom']
            .sum().unstack(fill_value=0).reset_index()
            .rename(columns={'bus1': 'bus'})
        )
    else:
        non_chp_country_tech = pd.DataFrame()
    
    if not chp_links.empty:
        is_electric_chp = n.buses.loc[chp_links.bus2, 'carrier'] == 'AC'
        p_nom = chp_links.p_nom if 'p_nom' in chp_links.columns else chp_links.p_nom
        chp_country_tech = (
            chp_links[is_electric_chp.values]
            .assign(electrical_nom=lambda df: p_nom * df.efficiency2)
            .groupby(['bus2', 'carrier'])['electrical_nom']
            .sum().unstack(fill_value=0).reset_index()
            .rename(columns={'bus2': 'bus'})
        )
    else:
        chp_country_tech = pd.DataFrame()
    
    conv_country_tech = pd.concat([non_chp_country_tech, chp_country_tech], ignore_index=True)

    # Bio from links (CHP electrical) using the provided methodology
    bio_carriers = ['urban central solid biomass CHP', 'urban central solid biomass CHP CC']
    bio_links = n.links[n.links['carrier'].isin(bio_carriers)].copy()
    if not bio_links.empty:
        # Use p_nom_opt if available, else p_nom
        bio_links['p_nom_used'] = bio_links.get('p_nom', bio_links['p_nom'])
        
        # Calculate electrical capacity = p_nom_used * efficiency2
        bio_links['electrical_nom'] = bio_links['p_nom_used'] * bio_links['efficiency2']
        
        # Extract bus for grouping
        bio_links['bus'] = bio_links['bus2']
        
        # Group by bus and carrier
        bio_link_country_tech = (
            bio_links.groupby(['bus', 'carrier'])['electrical_nom']
            .sum().unstack(fill_value=0).reset_index()
        )
    else:
        bio_link_country_tech = pd.DataFrame()

    # Concat all
    pypsa_country_tech = pd.concat([pypsa_country_tech, sto_country_tech, conv_country_tech, bio_link_country_tech], ignore_index=True)
    pypsa_country_tech['country'] = pypsa_country_tech['bus'].str[:2]
    
    pypsa_country_tech_merged = pypsa_country_tech.groupby('country').sum(numeric_only=True).reset_index()
    
    wind_cols = [col for col in pypsa_country_tech_merged.columns if col.startswith("onwind") or col.startswith("offwind")]
    solar_cols = [col for col in pypsa_country_tech_merged.columns if col.startswith("solar")]
    
    pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Wind", wind_cols)
    pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Solar", solar_cols)
    pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Gas", ["CCGT", "OCGT", "urban central gas CHP", "urban central gas CHP CC"])
    pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Coal", ["coal", "lignite"])
    pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Hydro", ["hydro", "ror", "PHS"])
    pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Other Fossil", ["oil"])
    pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Nuclear", ["nuclear"])
    # pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Other Renewables", ["geothermal"]) # no geothermal raises error
    pypsa_country_tech_merged = merge_and_replace(pypsa_country_tech_merged, "Bioenergy", bio_carriers)  # All bio carriers
    pypsa_country_tech_merged = pypsa_country_tech_merged.set_index("country").div(1000).round(2)
    
    # Print total installed capacity per technology
    total_capacity = pypsa_country_tech_merged.sum()
    print("Total installed capacity per technology:")
    print(total_capacity.to_string())
    
    print(f"Processed PyPSA capacity data sample:\n{pypsa_country_tech_merged.head().to_string()}")
    return pypsa_country_tech_merged

pypsa_capacity_processed = process_pypsa_capacity()

###  Plot Total Capacity Comparison

In [None]:
# Plot total capacity comparison
def plot_total_capacity_comparison(ember_capacity, pypsa_capacity):
    print("Generating total capacity comparison plot")
    ember_totals = ember_capacity.sum(axis=0)
    pypsa_totals = pypsa_capacity.sum(axis=0)
    
    all_techs = sorted(set(ember_totals.index) | set(pypsa_totals.index))
    ember_vals = [ember_totals.get(tech, 0) for tech in all_techs]
    pypsa_vals = [pypsa_totals.get(tech, 0) for tech in all_techs]
    
    fig, ax = plt.subplots(figsize=(12, 6))
    width = 0.35
    x = np.arange(len(all_techs))
    
    ax.bar(x - width/2, ember_vals, width, label='Ember', color='#13ce74')
    ax.bar(x + width/2, pypsa_vals, width, label='PyPSA-Eur', color='#192238')
    
    ax.set_xticks(x)
    ax.set_xticklabels(all_techs, rotation=45, ha='right')
    ax.set_ylabel('Installed Capacity (GW)')
    ax.set_title('Comparison of Installed Capacity by Technology (Summed Across Europe)')
    ax.legend()
    plt.tight_layout()
    
    output_path = f"results/validation_{year}/plots/total_capacity_plot.png"
    print(f"Saving total capacity comparison plot: {output_path}")
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    plt.savefig(output_path, bbox_inches='tight', dpi=300)
    plt.show() 

plot_total_capacity_comparison(ember_capacity_processed, pypsa_capacity_processed)

### Plot Country-Specific Capacity Comparison

In [None]:
# Plot country-specific capacity comparison
def plot_capacity_comparison_horizontal(countries, ember_capacity, pypsa_capacity):
    print("Generating country-specific capacity comparison plot")
    n = min(len(countries), 6)
    fig, axes = plt.subplots(3, 2, figsize=(12, 12))
    axes = axes.flatten()
    
    for idx, country in enumerate(countries[:6]):
        ax = axes[idx]
        ember_row = ember_capacity.loc[country] if country in ember_capacity.index else None
        pypsa_row = pypsa_capacity.loc[country] if country in pypsa_capacity.index else None
        
        if ember_row is not None and pypsa_row is not None:
            techs = sorted(set(ember_row.index) | set(pypsa_row.index))
            techs = [t if t != "Other Renewables" else "Other RES" for t in techs]
            
            ember_vals = [ember_row.get(tech, 0) for tech in techs]
            pypsa_vals = [pypsa_row.get(tech, 0) for tech in techs]
            
            y = np.arange(len(techs))
            height = 0.35
            
            ax.barh(y - height/2, ember_vals, height, label='Ember', color='#13ce74')
            ax.barh(y + height/2, pypsa_vals, height, label='PyPSA-Eur', color='#192238')
            ax.set_yticks(y)
            ax.set_yticklabels(techs)
            ax.set_title(f"{country} Capacity Comparison")
            
            if idx in [4, 5]:
                ax.set_xlabel("Capacity (GW)")
            
            if idx == 0:
                ax.legend(loc="upper right")
    
    for j in range(len(countries[:6]), len(axes)):
        axes[j].axis('off')
    
    fig.suptitle(f"Installed Capacity Comparison {year}", weight="bold")
    plt.tight_layout()
    
    output_path = f"results/validation_{year}/plots/country_capacity_plot.png"
    print(f"Saving country-specific capacity comparison plot: {output_path}")
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    plt.savefig(output_path, bbox_inches='tight', dpi=300)
    plt.show() 

plot_capacity_comparison_horizontal(
    countries=["DE", "NL", "IT", "PL", "CZ", "GR"],
    ember_capacity=ember_capacity_processed,
    pypsa_capacity=pypsa_capacity_processed
)

In [None]:
import pandas as pd

ember_capacity_data_path = "C:\\Users\\user\\Desktop\\Ember-Flexibility-Study\\validation\\ember_data\\yearly_full_release_long_format.csv"
ember_demand_data_path = "C:\\Users\\user\\Desktop\\Ember-Flexibility-Study\\validation\\ember_data\\europe_monthly_full_release_long_format.csv"
ember_demand = pd.read_csv(ember_demand_data_path)

# Note: regions_onshore_path seems to have a placeholder; if needed, use f-string
regions_onshore_path = f"C:\\Users\\user\\Desktop\\Ember-Flexibility-Study\\validation_{year}\\country_shapes.geojson"

# Plotting styles
plt.style.use("bmh")
sns.set_style("darkgrid")

# Countries
countries = ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES',
             'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV',
             'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK']

# Process Ember demand data
def process_ember_demand():
    print("Processing Ember demand data")
    ember_demand_europe = ember_demand[ember_demand["Continent"] == "Europe"].copy()
    
    def iso3_to_iso2(iso3):
        try:
            return pycountry.countries.get(alpha_3=iso3).alpha_2
        except:
            return None
    
    ember_demand_europe["ISO"] = ember_demand_europe["ISO 3 code"].apply(iso3_to_iso2)
    ember_demand_europe = ember_demand_europe[ember_demand_europe["Date"].str.startswith(str(year))]
    ember_demand_europe = ember_demand_europe[ember_demand_europe["Unit"] == "TWh"]
    ember_demand_europe = ember_demand_europe[ember_demand_europe["Subcategory"] == "Demand"]
    ember_demand_europe = ember_demand_europe[["ISO", "Date", "Variable", "Value", "Unit"]]
    
    ember_demand_yearly = ember_demand_europe.groupby("ISO", as_index=False)["Value"].sum()
    ember_demand_yearly["Unit"] = "TWh"
    ember_demand_yearly = ember_demand_yearly.set_index("ISO").drop(columns=["Unit"])
    print(f"Processed Ember demand data sample:\n{ember_demand_yearly.head().to_string()}")
    return ember_demand_yearly

ember_demand_processed = process_ember_demand()

# Process PyPSA demand data (electricity only)
def process_pypsa_demand():
    print("Processing PyPSA demand data (electricity only)")
    # Identify electricity buses (AC for high-voltage, low voltage for distribution, DC if present)
    elec_buses = n.buses.query("carrier.isin(['AC', 'DC', 'low voltage'])").index
    # Filter loads attached to electricity buses
    elec_loads = n.loads[n.loads.bus.isin(elec_buses)]
    # Get the input demand time series (p_set for unsolved/input networks; switch to n.loads_t.p if using a solved network's dispatched values)
    pypsa_loads = n.loads_t.p_set[elec_loads.index].groupby(elec_loads.bus.str[:2], axis=1).sum()
    weighted_loads = pypsa_loads.multiply(n.snapshot_weightings.generators, axis=0)
    pypsa_loads_yearly = weighted_loads.sum(axis=0).to_frame(name="Value")
    pypsa_loads_yearly = (pypsa_loads_yearly / 1e6).round()  # Convert to TWh
    print(f"Processed PyPSA electricity demand data sample:\n{pypsa_loads_yearly.head().to_string()}")
    return pypsa_loads_yearly

processed_pypsa_demand = process_pypsa_demand()

# Plot demand comparison
def plot_demand_comparison(ember_demand, pypsa_demand):
    print("Generating demand comparison plot")
    pypsa_loads_yearly_flat = pypsa_demand["Value"]
    ember_demand_yearly_flat = ember_demand["Value"]
    
    common_countries = pypsa_loads_yearly_flat.index.intersection(ember_demand_yearly_flat.index)
    common_countries = common_countries.intersection(countries)  # Filter to defined countries
    pypsa_vals = pypsa_loads_yearly_flat.loc[common_countries]
    ember_vals = ember_demand_yearly_flat.loc[common_countries]
    
    df_compare = pd.DataFrame({
        "PyPSA-Eur": pypsa_vals,
        "Ember": ember_vals
    }, index=common_countries)
    
    n_countries = len(df_compare)
    mid = n_countries // 2
    countries1 = df_compare.index[:mid]
    countries2 = df_compare.index[mid:]
    
    fig, axes = plt.subplots(1, 2, figsize=(9, 6.5), sharey=False)
    
    width = 0.35
    for ax, countries_group, title in zip(axes, [countries1, countries2], ["", ""]):
        y = np.arange(len(countries_group))
        ax.barh(y - width/2, df_compare.loc[countries_group, "PyPSA-Eur"], height=width, label='PyPSA-Eur', color='#192238')
        ax.barh(y + width/2, df_compare.loc[countries_group, "Ember"], height=width, label='Ember', color='#13ce74')
        ax.set_yticks(y)
        ax.set_yticklabels(countries_group)
        ax.set_xlabel('Yearly Electricity Demand (TWh)')
        ax.legend()
    
    fig.suptitle(f"Electricity Demand Comparison {year}", weight="bold")
    plt.tight_layout()
    
    output_path = f"results/validation_{year}/plots/demand_plot.png"
    print(f"Saving demand comparison plot: {output_path}")
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    plt.savefig(output_path, bbox_inches='tight', dpi=300)
    plt.show()

plot_demand_comparison(ember_demand_processed, processed_pypsa_demand)