## Redispatch data of wind power plants in Langenhorn (Schleswig-Holstein)

In [5]:
# packages
import os
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

In [2]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**API connection**

Es gibt eine (halb-öffentliche) API. Die entsprechend URLs haben dieses Format:
redispatch-run.azurewebsites.net/api/operations/get?networkoperator=shn&type=finished&orderDirection=desc&orderBy=start&chunkNr=1

Als Parameter lassen sich der "networkoperator" zu shn, ava, bag, edi (für SH Netz, Avacon, Bayernwerk und Edis) ändern. Als orderBy lassen sich "start" und "operationId" auswählen. Im Ergebnis werden immer ca. 500 Einträge zurückgegeben. Über Variation des Parameter "chunkNr" kann man weitere Ergebnisse abfragen. Die API ist zwar ohne Schutz verfügbar, allerdings wird diese nur für die Veröffentlichungswebsites genutzt. Daher könnten jederzeit Parameter-Änderungen oder URL-Änderungen vorkommen, ohne dass diese angekündigt werden.

In [9]:
import requests

# base URL of API
base_url = "https://redispatch-run.azurewebsites.net/api/operations/get"

# parameters
params = {
    "networkoperator": "shn",          # change as needed (ava (Avacon), bag (Bayernwerk), edi (Edis))
    "type": "finished",
    "orderDirection": "desc",
    "orderBy": "start",
    "chunkNr": 1
}

# GET request to the API
response = requests.get(base_url, params=params)

if response.status_code == 200:
    data = response.json()
    df = pd.DataFrame(data)
    df_operations = pd.DataFrame(df['operations'].tolist())
else:
    print("Error:", response.status_code)


df_operations.head()

Unnamed: 0,id,operationId,srId,start,end,duration,location,locationBottleneck,controlStage,reason,Reason,assetKey,requestor,liabilityOfCompensation,DSOKey,inducingNO,KalKID
0,34667062,SHN202410001749,,2024-02-27 08:19:22,2024-02-27 11:53:16,214,UW Wöhrden,STUW46680-T123,60,Netzengpass,Netzengpass,E2079301S120B00000000039719200003,Schleswig-Holstein Netz,EEG,27461,Schleswig-Holstein Netz,SHN202410001749_K00017
1,34667061,SHN202410001749,,2024-02-27 08:19:22,2024-02-27 11:53:16,214,UW Wöhrden,STUW46680-T123,60,Netzengpass,Netzengpass,E2079301S120A00000000039719200002,Schleswig-Holstein Netz,EEG,27461,Schleswig-Holstein Netz,SHN202410001749_K00017
2,34667060,SHN202410001749,,2024-02-27 08:19:22,2024-02-27 11:53:16,214,UW Wöhrden,STUW46680-T123,60,Netzengpass,Netzengpass,E2079301S120F00000000039719200005,Schleswig-Holstein Netz,EEG,27461,Schleswig-Holstein Netz,SHN202410001749_K00017
3,34667059,SHN202410001749,,2024-02-27 08:19:22,2024-02-27 11:53:16,214,UW Wöhrden,STUW46680-T123,60,Netzengpass,Netzengpass,E2079301S120E00000000039719200006,Schleswig-Holstein Netz,EEG,27461,Schleswig-Holstein Netz,SHN202410001749_K00017
4,34667058,SHN202410001749,,2024-02-27 08:19:22,2024-02-27 11:53:16,214,UW Wöhrden,STUW46680-T123,60,Netzengpass,Netzengpass,E2079301S120000000000039719200001,Schleswig-Holstein Netz,EEG,27461,Schleswig-Holstein Netz,SHN202410001749_K00017


**Downloaded redipatch data of the location Langenhorn**

Website Redispatch data SH-Netz (DSO): https://www.sh-netz.com/de/energie-einspeisen/redispatch-2-0/einspeisemanagement/veroeffentlichungen/abgeschlossene-massnahmen.html

In [None]:
df_redispatch = pd.read_csv("/content/drive/My Drive/ms_wind_curtailment_prediction/Redispatch_Langenhorn.csv", sep = ';', parse_dates=['Start', 'Ende'])

columns_to_keep = ['Start', 'Ende', 'Dauer (Min)',
                   'Stufe (%)', 'Ursache', 'Anlagenschlüssel']
                   #'Ort Engpass', 'Einsatz-ID', , 'Gebiet', 'Anforderer', 'Netzbetreiber',
                   #'Anlagen-ID', 'Entschädigungspflicht']

column_name_mapping = {
#    'Einsatz-ID': 'mission_ID',
    'Start': 'start_redispatch',
    'Ende': 'end_redispatch',
    'Dauer (Min)': 'duration (min)',
#    'Gebiet': 'location',
#    'Ort Engpass': 'congestion_location',
    'Stufe (%)': 'level',
    'Ursache': 'cause',
    'Anlagenschlüssel': 'plant_key_eeg',
#    'Anforderer': 'requester',
#    'Netzbetreiber': 'grid_operator',
#    'Anlagen-ID': 'plant_ID',
#    'Entschädigungspflicht': 'compensation_obligation'
}
df_redispatch = df_redispatch[columns_to_keep].rename(columns=column_name_mapping)

translation_dict = {
    'Netzengpass': 'grid congestion',
    'Funktionsnachweis': 'functional proof',
    'Kundenfunktionstest': 'customer function test',
    'Test': 'test',
    'Direktvermarkter': 'direct marketer',
    'Sonstige': 'other',
    'Vorgelagerter Netzbetreiber': 'upstream grid operator'
    }

df_redispatch['cause'] = df_redispatch['cause'].map(translation_dict).fillna(df_redispatch['cause'])

# change data types of columns
df_redispatch['start_redispatch'] = pd.to_datetime(df_redispatch['start_redispatch'], errors='coerce', format='mixed', dayfirst=True)
df_redispatch['end_redispatch'] = pd.to_datetime(df_redispatch['end_redispatch'], errors='coerce', format='mixed', dayfirst=True)

# only select grid congestion cause
df_redispatch = df_redispatch[df_redispatch['cause'] == 'grid congestion']

**EEG-keys and plant type data of Langenhorn (not only wind)**

Website:
http://www.energymap.info/energieregionen/DE/105/119/477/19932.html

In [None]:
df_eeg = pd.read_csv("/content/drive/My Drive/ms_wind_curtailment_prediction/EEG_Langenhorn.csv", sep=';', dayfirst=True)
df_eeg['Inbetriebnahme'] = pd.to_datetime(df_eeg['Inbetriebnahme'], format='%d.%m.%Y', errors='coerce')

columns_to_keep = ['Anlagenschluessel', 'Anlagentyp', 'Inbetriebnahme', 'DSO', 'TSO',
                   #'Nennleistung(kWp_el)', 'kWh(2013)', 'kWh(average)', 'kWh/kW',
                   'GPS-Lat', 'GPS-Lon']

column_name_mapping = {
    'Inbetriebnahme': 'commissioning_date',
    'Anlagenschluessel': 'plant_key_eeg',
    'Anlagentyp': 'plant_type',
#    'Nennleistung(kWp_el)': 'rated_capacity_kWp', # bruttoleistung
    'DSO': 'dso',
    'TSO': 'tso',
#    'kWh(2013)': 'kWh_2013',
#    'kWh(average)': 'avg_kWh',
#    'kWh/kW': 'kWh/kW',
    'GPS-Lat': 'lat',
    'GPS-Lon': 'long'
}

df_eeg = df_eeg[columns_to_keep].rename(columns=column_name_mapping)

# change data types of columns
#df_eeg['rated_capacity_kWp'] = df_eeg['rated_capacity_kWp'].str.replace(',', '.')
#df_eeg['kWh_2013'] = df_eeg['kWh_2013'].str.replace('.', '')
#df_eeg['avg_kWh'] = df_eeg['avg_kWh'].str.replace('.', '')
#df_eeg['avg_kWh'] = df_eeg['avg_kWh'].str.replace(',', '.')
df_eeg['lat'] = df_eeg['lat'].str.replace(',', '.')
df_eeg['long'] = df_eeg['long'].str.replace(',', '.')
#df_eeg['kWh_2013'] = df_eeg['kWh_2013'].astype(float)
#df_eeg['rated_capacity_kWp'] = df_eeg['rated_capacity_kWp'].astype(float)
#df_eeg['avg_kWh'] = df_eeg['avg_kWh'].astype(float)
df_eeg['lat'] = df_eeg['lat'].astype(float)
df_eeg['long'] = df_eeg['long'].astype(float)

translation_dict = {
    'Solarstrom': 'solar',
    'Biomasse': 'biomass',
    'Windkraft': 'wind',
    }

df_eeg['plant_type'] = df_eeg['plant_type'].map(translation_dict).fillna(df_eeg['plant_type'])

df_eeg.dropna(inplace = True)

**Filter wind and solar power plants in df_redispatch by mapping EEG-keys from df_eeg**

In [None]:
df_redispatch_wind = pd.merge(df_redispatch, df_eeg[df_eeg['plant_type'] == 'wind'], on='plant_key_eeg', how='inner')
df_redispatch_wind['plant_key_eeg'].unique() # 12 unique wind plants

array(['E20793012S12X00000000002414080001',
       'E20793012S12Z00000000002414080001',
       'E20793012S12000000000002414080001',
       'E20793012S12Y00000000002414080001',
       'E20793012S12V00000000002414080001',
       'E20793012S12W00000000002414080001',
       'E2079301EA01000000000070577400001',
       'E2079301EA01000000000070577400002',
       'E2079301EA01000000000070577400003',
       'E2079301EA01000000000070577400004',
       'E2079301EA01000000000070577400005',
       'E2079301EA01000000000070577400006'], dtype=object)

In [None]:
df_redispatch_solar = pd.merge(df_redispatch, df_eeg[df_eeg['plant_type'] == 'solar'], on='plant_key_eeg', how='inner')
df_redispatch_solar['plant_key_eeg'].unique() # 22 unique solar plants

array(['E20793012000000000000021348820001',
       'E20793014000000000000021354070001',
       'E20793012000000000000021309760001',
       'E20793012000000000000021246340001',
       'E20793012000000000000021466730001',
       'E20793012000000000000021402130001',
       'E20793012000000000000021470410001',
       'E20793012Z00000000000021470410002',
       'E20793012000000000000021345390002',
       'E2079301SZP0000000000040435100001',
       'E2079301S120000000000034798600001',
       'E2079301IZP0000000000039181100001',
       'E2079301IZP3000000000040435100001',
       'E20793012000000000000021046260001',
       'E20793012000000000000017784460001',
       'E20793012000000000000017784460002',
       'E20793012000000000000021272340001',
       'E2079302S120000000000027677700001',
       'E20793012000000000000021258820001',
       'E2079301S120000000000032103900002',
       'E20793012000000000000021345390001',
       'E20793012000000000000001345150001'], dtype=object)

**Look up if the are registered in the Marktstammdatenregister**

Website:
https://www.marktstammdatenregister.de/MaStR/Einheit/Einheiten/OeffentlicheEinheitenuebersicht

In [None]:
# Marktstammdatenregister Windkraft Langenhorn
df_register = pd.read_csv("/content/drive/My Drive/ms_wind_curtailment_prediction/Marktstammdatenregister_Langenhorn.csv", sep = ';', parse_dates=['Inbetriebnahmedatum der Einheit'], dayfirst=True)

# select relevant features
columns_to_keep = ['Anzeige-Name der Einheit', 'MaStR-Nr. der Einheit', 'Inbetriebnahmedatum der Einheit',
                   'Energieträger', 'Bruttoleistung der Einheit',
                   #'Nettonennleistung der Einheit', '\tMaStR-Nr. des Anlagenbetreibers',
                   'Name des Anlagenbetreibers (nur Org.)']

# translate column names
column_name_mapping = {
    'Anzeige-Name der Einheit': 'unit_name',
    'MaStR-Nr. der Einheit': 'plant_key_mastr',
    'Inbetriebnahmedatum der Einheit': 'commissioning_date',
    'Energieträger': 'plant_type',
    'Bruttoleistung der Einheit': 'gross_capacity',
#    'Nettonennleistung der Einheit': 'net_capacity',
#    '\tMaStR-Nr. des Anlagenbetreibers': 'operator_key',
    'Name des Anlagenbetreibers (nur Org.)': 'operator_name'
}

df_register = df_register[columns_to_keep].rename(columns=column_name_mapping)
df_register = df_register[df_register['operator_name'] != '(natürliche Person)']


**Wind**

In [None]:
# Merge the DataFrames on 'commissioning_date'
df_redispatch_wind_registered = pd.merge(df_redispatch_wind, df_register[df_register['plant_type'] == 'Wind'],
                                         on='commissioning_date', how='inner').drop('plant_type_y', axis = 1)

# manually looked up latitude and longitude values for each plant_key_mastr
coordinates = {
    'SEE987798529121': {'lat': '54.6727', 'long': '8.87871'},
    'SEE929976813660': {'lat': '54.67238', 'long': '8.86128'},
    'SEE955793159025': {'lat': '54.674777', 'long': '8.885012'},
    'SEE975321353732': {'lat': '54.67212', 'long': '8.86985'},
    'SEE964462351561': {'lat': '54.6594', 'long': '8.85654'},
    'SEE989327012883': {'lat': '54.667118', 'long': '8.862491'},
    'SEE944503500066': {'lat': '54.664171', 'long': '8.86336'},
    'SEE967713858420': {'lat': '54.666857', 'long': '8.869743'},
    'SEE978968638712': {'lat': '54.667329', 'long': '8.87676'},
    'SEE968633677399': {'lat': '54.664481', 'long': '8.877575'},
    'SEE923447900071': {'lat': '54.667751', 'long': '8.883703'}
}

for index, row in df_redispatch_wind_registered.iterrows():
    plant_key = row['plant_key_mastr']
    lat = coordinates[plant_key]['lat']
    long = coordinates[plant_key]['long']
    df_redispatch_wind_registered.at[index, 'lat'] = lat
    df_redispatch_wind_registered.at[index, 'long'] = long

# gross capacity in kW
unique_wind_plants = df_redispatch_wind_registered[['plant_key_eeg', 'plant_key_mastr', 'lat', 'long', 'operator_name', 'gross_capacity']].drop_duplicates().reset_index(drop = True)

In [None]:
# store csv
# unique_wind_plants.to_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/wind_plants_Langenhorn.csv',sep = ';', index=False)

**Solar**

In [None]:
# Merge the DataFrames on 'commissioning_date'
df_redispatch_solar_registered = pd.merge(df_redispatch_solar, df_register[df_register['plant_type'] == 'Solare Strahlungsenergie'],
                                         on='commissioning_date', how='inner').drop('plant_type_y', axis = 1)

# gross capacity in kWp
unique_solar_plants = df_redispatch_solar_registered[['plant_key_eeg', 'plant_key_mastr', 'lat', 'long', 'operator_name', 'gross_capacity']].drop_duplicates().reset_index(drop = True)

In [None]:
# store csv
# unique_solar_plants.to_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/solar_plants_Langenhorn.csv',sep = ';', index=False)

**Create a df minute-by-minute redispatch df of the registered wind and solar plants**

In [None]:
def minbymindf(start, end, redispatch_df, freq='15T'):
  # get slected subset of redispatch_df
  time_index = pd.date_range(start=start, end=end, freq=freq)
  subset_df = redispatch_df[(redispatch_df['start_redispatch'] >= start) & (redispatch_df['end_redispatch'] <= end)]
  # initiate df
  min_redispatch = pd.DataFrame(index=time_index)
  min_redispatch['redispatch'] = 0
  columns_to_copy = ['level', 'plant_key_eeg', 'plant_key_mastr',
                     'lat', 'long', 'operator_name', 'dso', 'tso']
  for column in columns_to_copy:
    min_redispatch[column] = ''
  # iterate over the subset_df
  for _, row in subset_df.iterrows():
      start = row['start_redispatch']
      end = row['end_redispatch']
      mask = (min_redispatch.index >= start) & (min_redispatch.index <= end)
      min_redispatch.loc[mask, 'redispatch'] = 1
      for column in columns_to_copy:
          min_redispatch.loc[mask, column] = row[column]
  # replace empty entries when redispatch is 0 with 0 instead of ""
  min_redispatch.loc[min_redispatch['redispatch'] == 0, :] = 0

  return min_redispatch

In [None]:
# years 2020 - 2024
start = datetime(2020, 1, 1, 0, 0, 0)
end = datetime(2024, 1, 1, 0, 0, 0)

**Wind**

In [None]:
# remove duplicates and unnecessary columns
df_redispatch_wind_registered.drop_duplicates(inplace = True)
df_redispatch_wind_registered.reset_index(inplace=True, drop=True)

columns_to_remove = ['duration (min)', 'commissioning_date']
df_redispatch_wind_registered.drop(columns_to_remove, axis = 1, inplace = True)

wind_redispatch = minbymindf(start, end, df_redispatch_wind_registered)

In [None]:
wind_redispatch.head()

Unnamed: 0,redispatch,level,plant_key_eeg,plant_key_mastr,lat,long,operator_name,dso,tso
2020-01-01 00:00:00,0,0,0,0,0,0,0,0,0
2020-01-01 00:15:00,0,0,0,0,0,0,0,0,0
2020-01-01 00:30:00,0,0,0,0,0,0,0,0,0
2020-01-01 00:45:00,0,0,0,0,0,0,0,0,0
2020-01-01 01:00:00,0,0,0,0,0,0,0,0,0


In [None]:
# store csv
wind_redispatch.to_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/wind_redispatch_2020_24.csv',sep = ';', index_label='timestamp')

**Solar**

In [None]:
# remove duplicates and unnecessary columns
df_redispatch_solar_registered.drop_duplicates(inplace = True)
df_redispatch_solar_registered.reset_index(inplace=True, drop=True)

columns_to_remove = ['duration (min)', 'commissioning_date']
df_redispatch_solar_registered.drop(columns_to_remove, axis = 1, inplace = True)

solar_redispatch = minbymindf(start, end, df_redispatch_solar_registered)

In [None]:
# solar_redispatch[solar_redispatch['redispatch'] == 1]

# store csv
solar_redispatch.to_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/solar_redispatch_2020_24.csv',sep = ';', index_label='timestamp')