# Example of Use

In [115]:
import re                                 # Für Textbereinigung (Kraftwerksnamen)
import pandas as pd
import geopandas as gpd                   # Für räumliche Operationen (z. B. Nearest-Join: kombiniert Daten aus zwei räumlichen Datensätzen auf der Grundlage ihrer Nähe zueinander)

import powerplantmatching as pm
from powerplantmatching.cleaning import set_column_name, get_config, aggregate_units  # Hilfsfunktionen des Tools


In [116]:
heatcap_path = r"C:\Users\delic\Desktop\DataUnits_CHP.xlsx"           # Excel-Datei mit thermischer Leistung (CHP_MaxHeat)
ppmlist_path  = r"C:\Users\delic\powerplantmatching\powerplants.csv"  # bestehende Kraftwerksliste aus PPM

#PPM: id	  Name	Fueltype Technology	Set	          Country	  Capacity	Efficiency DateIn	           DateOut	   lat	    lon
#EWL: unit_it unit	fuel	 deftech	operationmode countrycode max_power	AvgEff	   effective_startdate valid_until latitude	longitude CHP_MaxHeat

heatcap = pd.read_excel(heatcap_path)     # eigene Daten einlesen
ppm_list = pd.read_csv(ppmlist_path, sep=",")  # PPM-Kraftwerksdaten einlesen

In [117]:
#heatcap.columns = heatcap.columns.str.strip()
#ppm_list.columns  = ppm_list.columns.str.strip()

# Koordinaten in numerisches Format umwandeln und auf 2 Nachkommastellen runden (≈ 300 m Toleranz)
for df in (heatcap, ppm_list):
    df["lat"] = pd.to_numeric(df["lat"], errors="coerce").round(2)
    df["lon"] = pd.to_numeric(df["lon"], errors="coerce").round(2)


In [118]:
heatcap["CHP_MaxHeat"] = pd.to_numeric(heatcap.get("CHP_MaxHeat"), errors="coerce") # Thermische Leistung numerisch umwandeln

In [119]:
# Relevante Spalten definieren, fehlende mit pd.NA auffüllen
match_cols = [
"Name","Fueltype","Technology","Set","DateIn","DateOut","Country","City","Capacity","Efficiency","lat","lon","CHP_MaxHeat"
]
#"DateOut", (fragliche Datenqualität, in der DataUnits immer 1970)

heatcap_match = heatcap.reindex(columns=match_cols, fill_value=pd.NA).copy()
ppm_list_match = ppm_list.reindex(columns=match_cols, fill_value=pd.NA).copy()

# 'Set'-Spalte (z. B. „PP“ oder „CHP“) ergänzen, falls leer
for df in (heatcap_match, ppm_list_match):
    df["Set"] = df["Set"].fillna("PP")

In [120]:
# Funktion zur einheitlichen Bereinigung von Strings
def _clean_strings(df, cols):
    skip = {"lat", "lon", "Capacity", "Efficiency", "CHP_MaxHeat"}
    for c in cols:
        if c in skip:
            continue
        df[c] = (
            df[c].astype(str)
                 .str.strip()
                 .replace({"": pd.NA, "nan": pd.NA, "NaN": pd.NA, "None": pd.NA})
        )

def clean_name(s):
    if pd.isna(s):
        return s
    s = (s.replace("ÃŸ", "ß").replace("Gross", "Groß"))
    s = re.sub(r"\b(Block|Unit|Bk|Bl|B)\s*[0-9IVX]+\b", "", s, flags=re.I)
    s = re.sub(r"\b(Kraftwerk|Power\s*Plant|HKW|KW)\b", "", s, flags=re.I)
    return re.sub(r"\s+", " ", s).strip().title()

_clean_strings(heatcap_match, match_cols)
_clean_strings(ppm_list_match,  match_cols)
#Bereinigte Namen auf beide Tabellen anwenden
heatcap_match["Name"] = heatcap_match["Name"].map(clean_name)
ppm_list_match["Name"]  = ppm_list_match["Name"].map(clean_name)

num_cols = ["Capacity", "Efficiency", "CHP_MaxHeat"]
for df in (heatcap_match, ppm_list_match):
    df[num_cols] = df[num_cols].apply(pd.to_numeric, errors="coerce")

In [121]:
# Datenquellen mit Namen versehen (für konkretes Matching)
# Hier lag vorher eine der Haupftfehlerquellen: PPM erwartet pro Spalte genau eine Überschrift
heatcap_match = set_column_name(heatcap_match, name="EWL-DataUnits")
ppm_list_match = set_column_name(ppm_list_match, name="PPMLIST")

In [122]:
# Matching-Konfiguration festlegen
config = get_config()
config["target_columns"] = match_cols
config["EWL-DataUnits"] = {"reliability_score": 7}
config["PPMLIST"]       = {"reliability_score": 6}
config["fully_included_sources"] = ["EWL-DataUnits"]

In [123]:
date_cols = ["DateIn","DateOut"]
for df in (heatcap_match, ppm_list_match):
    df[date_cols] = df[date_cols].apply(pd.to_numeric, errors="coerce")

heatcap_match = aggregate_units(heatcap_match, dataset_name="EWL-DataUnits",
                                config=config)
ppm_list_match = aggregate_units(ppm_list_match, dataset_name="PPMLIST",
                                 config=config)

heatcap_match = set_column_name(heatcap_match, name="EWL-DataUnits")
ppm_list_match = set_column_name(ppm_list_match, name="PPMLIST")

In [126]:
# Alle EWL-DataUnits-Anlagen, die noch keinen Partner in PPMLIST haben
dfs, labels = [heatcap_match, ppm_list_match], ["EWL-DataUnits", "PPMLIST"]
intersection = pm.matching.combine_multiple_datasets(
    dfs, labels=labels, config=config
)

missing = intersection[("Name", "PPMLIST")].isna()
leftover = intersection.loc[missing].copy()

if len(leftover):
    # a) linke Geometrien = EWL-DataUnits-Koordinaten
    g_left = gpd.GeoDataFrame(
        leftover,
        geometry=gpd.points_from_xy(
            leftover[("lon", "EWL-DataUnits")],
            leftover[("lat", "EWL-DataUnits")]
        ),
        crs="EPSG:4326"
    ).to_crs(3857)

    # b) rechte Geometrien = alle PPMLIST-Koordinaten
    base = ppm_list_match.copy()
    g_right = gpd.GeoDataFrame(
        base,
        geometry=gpd.points_from_xy(base.lon, base.lat),
        crs="EPSG:4326"
    ).to_crs(3857)[["Name", "lat", "lon", "geometry"]]

    # c) Führe einen Nearest-Join durch (max. 1 km Entfernung)
    hits = gpd.sjoin_nearest(
        g_left, g_right,
        how="left",
        max_distance=1000,          # 1 km
        distance_col="d_m"
    )
    hits = hits[hits["Name"].notna()].copy()  # Nur Treffer übernehmen

    # d) Ergänze in intersection die PPMLIST-Werte aus dem spatial match
    for idx, row in hits.iterrows():
        intersection.loc[idx, ("Name", "PPMLIST")] = row["Name"]
        intersection.loc[idx, ("lat",  "PPMLIST")] = row["lat"]
        intersection.loc[idx, ("lon",  "PPMLIST")] = row["lon"]

# Reduktion nach spatial Join erneut durchführen: Kombiniert Attribute aus zwei räumlichen Datensätzen (Feature-Klassen oder Layer) auf Grundlage ihrer räumlichen Beziehungen.
combined = intersection.powerplant.reduce_matched_dataframe(config=config)
print(f"number of final matched plans: {len(combined)}")  # Ausgabe der Zahl final gematchter Kraftwerke

number of final matched plans: 189


In [127]:
display(intersection)  # zeigt das vollständige Ergebnis mit beiden Quellen nebeneinander


EWL-DataUnits,Name,Name,Fueltype,Fueltype,Technology,Technology,Set,Set,DateIn,DateIn,...,Capacity,Capacity,Efficiency,Efficiency,lat,lat,lon,lon,CHP_MaxHeat,CHP_MaxHeat
Unnamed: 0_level_1,EWL-DataUnits,PPMLIST,EWL-DataUnits,PPMLIST,EWL-DataUnits,PPMLIST,EWL-DataUnits,PPMLIST,EWL-DataUnits,PPMLIST,...,EWL-DataUnits,PPMLIST,EWL-DataUnits,PPMLIST,EWL-DataUnits,PPMLIST,EWL-DataUnits,PPMLIST,EWL-DataUnits,PPMLIST
0,Abfallentsorgungszentrum Asdonkshofmva Kamp Lintf,Dampfturbine,MUNI_WASTE,Waste,WAST_ST_BKP_b1999,Steam Turbine,CHP,CHP,1997,1997.0,...,16.000,16.0,0.210000,0.341931,51.52,51.52,6.52,6.58,,
1,Ads Anlage Industriepark Hochst,Iph Ads Anlage,NAT_GAS,Natural Gas,GAS_CC_EXC_f2000,Steam Turbine,CHP,CHP,2003,2011.0,...,182.500,99.6,0.484603,0.398200,50.09,50.09,8.53,8.53,,
2,Ahkw Neunkirchenlinie,Ahkw Neunkirchen,MUNI_WASTE,Waste,WAST_ST_BKP_b1999,Steam Turbine,CHP,CHP,1999,1977.0,...,11.600,23.2,0.210000,0.330000,49.34,49.34,7.19,7.17,,
3,Altbach Deizisaualt Dt Solob,Altbach Deizisau,COAL,Hard Coal,COAL_ST_EXC_b1999,Steam Turbine,CHP,CHP,1997,1985.0,...,336.000,756.0,0.383000,,48.72,48.72,9.37,9.37,,
4,Amk Iserlohn,Mhkw Iserlohn,MUNI_WASTE,Waste,WAST_ST_BKP_b1989,Steam Turbine,CHP,CHP,1981,1981.0,...,12.610,10.7,0.200000,0.330000,51.39,51.40,7.72,7.70,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184,Wi Biebrichblock Wiesbaden,Wi Biebrich,NAT_GAS,Natural Gas,GAS_ST_EXC_f2000,CCGT,CHP,CHP,2006,2006.0,...,25.015,78.0,0.400000,0.391600,50.04,50.04,8.24,8.25,,
185,Wiesengrund Eisenach,Wiesengrund,NAT_GAS,Natural Gas,GAS_ST_BKP_b1999,Steam Turbine,CHP,CHP,1993,1993.0,...,22.100,9.5,0.270000,0.377300,50.99,50.98,10.30,10.28,,
186,Zielitz,Zielitz,NAT_GAS,Natural Gas,GAS_IC_CH1_b1999,Steam Turbine,CHP,PP,1996,1996.0,...,27.000,27.0,0.350000,0.380600,52.28,52.30,11.62,11.68,,
187,Zolling,Zolling,COAL,Hard Coal,COAL_ST_EXC_b1989,Steam Turbine,CHP,CHP,1986,1985.0,...,472.000,472.0,0.363000,,48.45,48.46,11.80,11.80,,
