In [1]:
import pandas as pd
import os
import datetime as dt

# Marktstammdatenregisters

### Read data and combined

In [2]:
def read_and_combine_files(start, end, folder_path):
    # Liste zum Speichern der eingelesenen DataFrames
    dfs = []

    # Durchlaufe den Bereich der Nummern
    for i in range(start, end + 1, 5000):
        # Erstelle den Dateinamen basierend auf dem Nummernbereich
        file_name = f"Stromerzeuger_{i}_bis_{i + 4999}.csv" 

        # Erstelle den vollständigen Pfad zur CSV-Datei
        file_path = os.path.join(folder_path, file_name)

        # Lese das CSV in einen DataFrame ein und füge ihn zur Liste hinzu
        df = pd.read_csv(file_path, delimiter=';')
        dfs.append(df)

    # Lese die letzte Datei ein
    last_file_path = os.path.join(folder_path, 'Stromerzeuger_465001_bis_465169.csv')
    last_df = pd.read_csv(last_file_path, delimiter=';')

    # Füge die letzte Datei zum DataFrame hinzu
    dfs.append(last_df)

    # Kombiniere alle DataFrames nach dem Index
    combined_df = pd.concat(dfs, axis=0, ignore_index=True)
    return combined_df

folder_path = r'C:\Users\mohdr\OneDrive\Desktop\BI specialist\Power BI\Portfolio Porject\Energyspeisung'

# Beispielaufruf der Funktion für den Bereich von 1 bis 189999
combined_df = read_and_combine_files(1, 464999, folder_path)

combined_df

Unnamed: 0,MaStR-Nr. der Einheit,Anzeige-Name der Einheit,Betriebs-Status,Inbetriebnahmedatum der Einheit,Registrierungsdatum der Einheit,Energieträger,Bruttoleistung der Einheit,Nettonennleistung der Einheit,Postleitzahl,Ort,Name des Anlagenbetreibers (nur Org.),\tMaStR-Nr. des Anlagenbetreibers,Letzte Aktualisierung
0,SEE945673330026,Haus,In Betrieb,2.2.2023,7.3.2024,Solare Strahlungsenergie,984,984,92358,Seubersdorf,(natürliche Person),ABR972499841405,7.3.2024
1,SEE954126237928,Amsel_Power_2023,In Betrieb,19.4.2023,7.3.2024,Solare Strahlungsenergie,78,6,85764,Oberschleißheim,(natürliche Person),ABR940716803345,7.3.2024
2,SEE989164765980,Garage,In Betrieb,18.12.2023,7.3.2024,Solare Strahlungsenergie,34,34,85659,Forstern,(natürliche Person),ABR947782345816,7.3.2024
3,SEE920454310947,Axöd,In Betrieb,31.10.2023,7.3.2024,Solare Strahlungsenergie,2772,27,84307,Eggenfelden,(natürliche Person),ABR942303839591,7.3.2024
4,SEE908981253579,Solaranlage,In Betrieb,30.6.2023,7.3.2024,Solare Strahlungsenergie,567,567,86167,Augsburg,(natürliche Person),ABR952078597961,7.3.2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...
465164,SEE948233553866,PV-Anlage,In Betrieb,30.9.2015,31.1.2019,Solare Strahlungsenergie,984,984,89353,Glött,(natürliche Person),ABR962314589896,9.1.2021
465165,SEE970488717847,Beim_Sepp_dahoam,In Betrieb,12.5.2017,31.1.2019,Solare Strahlungsenergie,99,92,82402,Seeshaupt,(natürliche Person),ABR954565102993,3.7.2019
465166,SEE962747377053,Hausdach,In Betrieb,20.3.2017,31.1.2019,Solare Strahlungsenergie,91,82,94327,Bogen,(natürliche Person),ABR950980245154,28.5.2019
465167,SEE935732931448,Moorbad,In Betrieb,13.12.2018,31.1.2019,Solare Strahlungsenergie,912,82,83093,Bad Endorf,(natürliche Person),ABR933410995416,7.6.2019



# Applied mask, grouped by Inbetriebnahmedatum and sum of values

In [3]:
mask_combined_df = (combined_df['Betriebs-Status'] == 'In Betrieb')  # Vergleichsoperator verwendet
df_combined_filtered = combined_df.loc[mask_combined_df, :]

# Display the filtered combined DataFrame

df_combined = pd.DataFrame({'date': pd.date_range(start='1/1/2015', end='12/31/2023', freq='1d')})


#df_combined = pd.DataFrame({'date': pd.date_range(start='1/1/2015', freq='1d', periods=365)})

df = pd.DataFrame(df_combined_filtered)  #um nur den gefilterten DataFrame zu verwenden

df_combined['Bruttoleistung'] = df['Bruttoleistung der Einheit'].str.replace(',', '.').astype(float)
df_combined['Nettonennleistung'] = df['Nettonennleistung der Einheit'].str.replace(',', '.').astype(float)

# Gruppiere nach Inbetriebnahmedatum und summiere die Werte
df_combined = df_combined.groupby('date').agg({'Bruttoleistung': 'sum','Nettonennleistung': 'sum'}).reset_index()

# Ergebnisse anzeigen
df_combined


Unnamed: 0,date,Bruttoleistung,Nettonennleistung
0,2015-01-01,9.84,9.84
1,2015-01-02,7.80,6.00
2,2015-01-03,3.40,3.40
3,2015-01-04,27.72,27.00
4,2015-01-05,5.67,5.67
...,...,...,...
3282,2023-12-27,8.20,8.20
3283,2023-12-28,8.74,8.74
3284,2023-12-29,3.00,3.00
3285,2023-12-30,11.50,10.00


# Bayern
### Tatsächliche und prognostizierte Solarenergieeinspeisung in der Regelzone von TenneT Deutschland - Bayern

In [4]:
# data source: https://netztransparenz.tennet.eu/de/strommarkt/transparenz/transparenz-deutschland/netzkennzahlen/tatsaechliche-und-prognostizierte-solarenergieeinspeisung/bayern/
# filtered for 2023-01-01 to 2023-12-31

df_en_raw = pd.read_table('solarEnergyFeedIn_BY_2015-01-01_2023-12-31.csv',sep=';',parse_dates=['Datum'],decimal=',')

df_en = pd.DataFrame({'date': pd.date_range(start='1/1/2015', end='12/31/2023', freq='1d')})

df_en.loc[:,'Prog_in_MW'] = pd.DataFrame(df_en_raw.groupby(['Datum'], as_index=False)['Prognostiziert in MW'].sum()).iloc[:,1]
df_en.loc[:,'Act_in_MW'] = pd.DataFrame(df_en_raw.groupby(['Datum'], as_index=False)['Tatsaechlich in MW'].sum()).iloc[:,1]

#df = df.merge(df_en,on='date')
df_en

Unnamed: 0,date,Prog_in_MW,Act_in_MW
0,2015-01-01,0.00,2807.7
1,2015-01-02,0.00,2813.0
2,2015-01-03,0.00,6713.3
3,2015-01-04,13422.30,4321.5
4,2015-01-05,18801.30,20532.6
...,...,...,...
3282,2023-12-27,49308.82,57761.5
3283,2023-12-28,36315.35,31177.6
3284,2023-12-29,36246.40,32275.7
3285,2023-12-30,47332.25,49406.9


In [5]:
df_en_com = pd.merge(df_combined,df_en, on = 'date')
df_en_com = pd.DataFrame(df_en_com)
df_en_com

Unnamed: 0,date,Bruttoleistung,Nettonennleistung,Prog_in_MW,Act_in_MW
0,2015-01-01,9.84,9.84,0.00,2807.7
1,2015-01-02,7.80,6.00,0.00,2813.0
2,2015-01-03,3.40,3.40,0.00,6713.3
3,2015-01-04,27.72,27.00,13422.30,4321.5
4,2015-01-05,5.67,5.67,18801.30,20532.6
...,...,...,...,...,...
3282,2023-12-27,8.20,8.20,49308.82,57761.5
3283,2023-12-28,8.74,8.74,36315.35,31177.6
3284,2023-12-29,3.00,3.00,36246.40,32275.7
3285,2023-12-30,11.50,10.00,47332.25,49406.9


# Stationsmessungen der Solarstrahlung

tageswerte_ST_03668 und tageswerte_ST_05792

In [9]:
df = pd.DataFrame({'date': pd.date_range(start='1/1/2015', end= '31/12/2023', freq='1d')})

#'07370' Started 2021, '05404' Ended 2014 and '00867 Started 2021', therefore these stations are excluded

stationsid = ['03668','05792', '05856', '05705', '02290']
for id in stationsid:
    stationsname = 'stationid_'+id+'.txt'
    df_stat = pd.read_csv(stationsname,sep=';',parse_dates=['MESS_DATUM'],usecols=['MESS_DATUM','FD_STRAHL','FG_STRAHL','SD_STRAHL'])
    mask = (df_stat.loc[:,'MESS_DATUM'] >= '2015-01-01') & (df_stat.loc[:,'MESS_DATUM'] <= '2023-12-31')
    df_stat.columns =['date',id+'_FD_STRAHL',id+'_FG_STRAHL',id+'_SD_STRAHL']
    df = df.merge(df_stat.loc[mask,:],on='date')

df

Unnamed: 0,date,03668_FD_STRAHL,03668_FG_STRAHL,03668_SD_STRAHL,05792_FD_STRAHL,05792_FG_STRAHL,05792_SD_STRAHL,05856_FD_STRAHL,05856_FG_STRAHL,05856_SD_STRAHL,05705_FD_STRAHL,05705_FG_STRAHL,05705_SD_STRAHL,02290_FD_STRAHL,02290_FG_STRAHL,02290_SD_STRAHL
0,2015-01-01,188.0,188.0,0.0,0.0,694.0,8.4,214.0,214.0,0.0,129.0,129.0,0.0,184.0,611.0,7.7
1,2015-01-02,76.0,76.0,0.0,0.0,383.0,2.8,251.0,259.0,0.1,62.0,62.0,0.0,320.0,414.0,2.6
2,2015-01-03,108.0,108.0,0.0,0.0,343.0,1.2,297.0,316.0,0.3,92.0,92.0,0.0,253.0,265.0,0.8
3,2015-01-04,232.0,233.0,0.0,0.0,252.0,0.0,206.0,207.0,0.0,195.0,196.0,0.0,252.0,274.0,1.0
4,2015-01-05,157.0,485.0,6.2,0.0,591.0,2.9,223.0,223.0,0.0,129.0,502.0,7.5,279.0,457.0,3.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3282,2023-12-27,265.0,349.0,2.2,164.0,637.0,7.7,138.0,534.0,6.7,246.0,281.0,0.8,188.0,606.0,6.9
3283,2023-12-28,115.0,115.0,0.0,335.0,449.0,4.0,278.0,465.0,5.0,122.0,124.0,0.1,283.0,350.0,2.9
3284,2023-12-29,183.0,190.0,0.3,407.0,565.0,5.9,227.0,261.0,1.3,190.0,205.0,0.5,306.0,335.0,2.0
3285,2023-12-30,270.0,311.0,1.9,135.0,682.0,8.0,247.0,261.0,0.8,231.0,248.0,1.1,148.0,597.0,7.6


# Final Combination DataFrame from 3 Tables

In [10]:
df_final = df.merge(df_en_com, on = 'date')
df_final

Unnamed: 0,date,03668_FD_STRAHL,03668_FG_STRAHL,03668_SD_STRAHL,05792_FD_STRAHL,05792_FG_STRAHL,05792_SD_STRAHL,05856_FD_STRAHL,05856_FG_STRAHL,05856_SD_STRAHL,05705_FD_STRAHL,05705_FG_STRAHL,05705_SD_STRAHL,02290_FD_STRAHL,02290_FG_STRAHL,02290_SD_STRAHL,Bruttoleistung,Nettonennleistung,Prog_in_MW,Act_in_MW
0,2015-01-01,188.0,188.0,0.0,0.0,694.0,8.4,214.0,214.0,0.0,129.0,129.0,0.0,184.0,611.0,7.7,9.84,9.84,0.00,2807.7
1,2015-01-02,76.0,76.0,0.0,0.0,383.0,2.8,251.0,259.0,0.1,62.0,62.0,0.0,320.0,414.0,2.6,7.80,6.00,0.00,2813.0
2,2015-01-03,108.0,108.0,0.0,0.0,343.0,1.2,297.0,316.0,0.3,92.0,92.0,0.0,253.0,265.0,0.8,3.40,3.40,0.00,6713.3
3,2015-01-04,232.0,233.0,0.0,0.0,252.0,0.0,206.0,207.0,0.0,195.0,196.0,0.0,252.0,274.0,1.0,27.72,27.00,13422.30,4321.5
4,2015-01-05,157.0,485.0,6.2,0.0,591.0,2.9,223.0,223.0,0.0,129.0,502.0,7.5,279.0,457.0,3.5,5.67,5.67,18801.30,20532.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3282,2023-12-27,265.0,349.0,2.2,164.0,637.0,7.7,138.0,534.0,6.7,246.0,281.0,0.8,188.0,606.0,6.9,8.20,8.20,49308.82,57761.5
3283,2023-12-28,115.0,115.0,0.0,335.0,449.0,4.0,278.0,465.0,5.0,122.0,124.0,0.1,283.0,350.0,2.9,8.74,8.74,36315.35,31177.6
3284,2023-12-29,183.0,190.0,0.3,407.0,565.0,5.9,227.0,261.0,1.3,190.0,205.0,0.5,306.0,335.0,2.0,3.00,3.00,36246.40,32275.7
3285,2023-12-30,270.0,311.0,1.9,135.0,682.0,8.0,247.0,261.0,0.8,231.0,248.0,1.1,148.0,597.0,7.6,11.50,10.00,47332.25,49406.9


In [11]:
df_final.to_csv('df_final_2015_2023.csv')