### Create Power Generation Dataframe

In [12]:
import pandas as pd


new_df = pd.read_csv(
    "new_data/raw_country/AGGREGATED_GENERATION_PER_TYPE_GENERATION_202412312300-202512312300.csv")
print(len(new_df))
new_df.head()

735840


  new_df = pd.read_csv(


Unnamed: 0,MTU (CET/CEST),Area,Production Type,Generation (MW)
0,01/01/2025 00:00:00 - 01/01/2025 00:15:00,Germany (DE),Biomass,3950.69
1,01/01/2025 00:15:00 - 01/01/2025 00:30:00,Germany (DE),Biomass,3822.69
2,01/01/2025 00:30:00 - 01/01/2025 00:45:00,Germany (DE),Biomass,3763.84
3,01/01/2025 00:45:00 - 01/01/2025 01:00:00,Germany (DE),Biomass,3736.3
4,01/01/2025 01:00:00 - 01/01/2025 01:15:00,Germany (DE),Biomass,3799.98


In [14]:
import pandas as pd

df = new_df

# clean time column
# the raw column: "01.01.2025 00:00 - 01.01.2025 00:15"
# we extract just the start time like this: "01.01.2025 00:00"
start_str = (
    df["MTU (CET/CEST)"]
    .str.split(" - ").str[0]
    .str.replace(r"\s*\(.*\)", "", regex=True)  # removes "(CET)" or "(CEST)"
    .str.strip()
)

df["Time"] = pd.to_datetime(
    start_str,
    format="%d/%m/%Y %H:%M:%S",
    errors="raise"
)

df = df.set_index("Time").sort_index()

# clean values
# ENTSO-E sometimes uses 'n/e' or '-' for zero/missing.
# We force the 'Generation (MW)' column to be numbers. Errors become 0.
df['Generation (MW)'] = pd.to_numeric(df['Generation (MW)'], errors='coerce').fillna(0)

# pivot the transformation
# This commands moves 'Production Type' entries into Column Headers
print("Pivoting data to give each type its own column...")
df_wide = df.pivot_table(
    index='Time',
    columns='Production Type',
    values='Generation (MW)',
    aggfunc='sum'  # Sum ensures if duplicates exist, they combine nicely
)

# 5. RESAMPLE TO HOURLY (Optional but Recommended)
# ---------------------------------------------------------
# Your raw data is 15-minute intervals.
# Aggregating to 1-hour makes the file 4x smaller and easier to chart.
df_hourly = df_wide.resample('60min').mean()

# 6. SAVE
# v---------------------------------------------------------
output_file = 'germany_2025_generation.csv'
df_hourly.to_csv(output_file)

print("-" * 40)
print(f"SUCCESS: Data saved to '{output_file}'")
print("-" * 40)
print(f"Rows:    {len(df_hourly)}")
print(f"Columns: {len(df_hourly.columns)}")
print("\nNew Columns Created:")
for col in df_hourly.columns:
    print(f" - {col}")

Pivoting data to give each type its own column...
----------------------------------------
SUCCESS: Data saved to 'germany_2025_generation.csv'
----------------------------------------
Rows:    8760
Columns: 21

New Columns Created:
 - Biomass
 - Energy storage
 - Fossil Brown coal/Lignite
 - Fossil Coal-derived gas
 - Fossil Gas
 - Fossil Hard coal
 - Fossil Oil
 - Fossil Oil shale
 - Fossil Peat
 - Geothermal
 - Hydro Pumped Storage
 - Hydro Run-of-river and pondage
 - Hydro Water Reservoir
 - Marine
 - Nuclear
 - Other
 - Other renewable
 - Solar
 - Waste
 - Wind Offshore
 - Wind Onshore


In [16]:
# merge all 3 years
df_2023 = pd.read_csv("germany_2023_generation.csv", index_col="Time", parse_dates=True)
df_2024 = pd.read_csv("germany_2024_generation.csv", index_col="Time", parse_dates=True)
df_2025 = pd.read_csv("germany_2025_generation.csv", index_col="Time", parse_dates=True)

combined_df = pd.concat([df_2023, df_2024, df_2025])
combined_df = combined_df.sort_index()

# exclude duplicate time indices if any
print(len(combined_df))
combined_df = combined_df[~combined_df.index.duplicated(keep='first')]
print(len(combined_df))
combined_df.to_csv("germany_2325_generation.csv")

26304
26304


In [17]:
combined_df.columns

Index(['Biomass', 'Energy storage', 'Fossil Brown coal/Lignite',
       'Fossil Coal-derived gas', 'Fossil Gas', 'Fossil Hard coal',
       'Fossil Oil', 'Fossil Oil shale', 'Fossil Peat', 'Geothermal',
       'Hydro Pumped Storage', 'Hydro Run-of-river and pondage',
       'Hydro Water Reservoir', 'Marine', 'Nuclear', 'Other',
       'Other renewable', 'Solar', 'Waste', 'Wind Offshore', 'Wind Onshore'],
      dtype='str')

In [18]:
combined_df

Unnamed: 0_level_0,Biomass,Energy storage,Fossil Brown coal/Lignite,Fossil Coal-derived gas,Fossil Gas,Fossil Hard coal,Fossil Oil,Fossil Oil shale,Fossil Peat,Geothermal,...,Hydro Run-of-river and pondage,Hydro Water Reservoir,Marine,Nuclear,Other,Other renewable,Solar,Waste,Wind Offshore,Wind Onshore
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-01 00:00:00,4014.0975,0.0,3859.6000,651.3750,1593.8225,2067.6225,306.4125,0.0,0.0,25.2750,...,1203.3775,72.0100,0.0,2459.1700,187.3025,91.3300,1.7925,735.2525,3059.0925,28947.1500
2023-01-01 01:00:00,3993.2700,0.0,3866.3650,629.2750,1436.9025,2051.8300,305.9050,0.0,0.0,25.2750,...,1200.2100,25.9800,0.0,2458.6025,187.2700,92.6150,1.6500,725.1000,3586.2600,29587.5575
2023-01-01 02:00:00,3967.2750,0.0,3860.1350,570.9500,1435.1400,2034.2625,305.7125,0.0,0.0,24.8975,...,1198.9250,23.3025,0.0,2459.6450,187.2525,92.4675,1.7975,718.6700,3842.2825,29514.8475
2023-01-01 03:00:00,3973.1550,0.0,3864.6100,579.3750,1432.6100,2037.0600,306.0000,0.0,0.0,24.7450,...,1196.7950,26.4825,0.0,2460.4750,187.2025,91.7625,1.7550,718.8425,3463.0525,27493.4675
2023-01-01 04:00:00,3996.4200,0.0,3840.8300,604.6000,1430.8500,2039.9775,306.0000,0.0,0.0,24.5950,...,1192.2750,51.4700,0.0,2460.8025,187.2775,91.9700,2.1275,721.3250,3462.1925,26938.7425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-12-31 19:00:00,4425.6400,0.0,7273.5125,583.5075,8317.2875,3840.8975,397.7275,0.0,0.0,21.7900,...,831.8225,82.3125,0.0,0.0000,226.0675,72.5000,0.0000,721.9950,5418.4900,26899.9525
2025-12-31 20:00:00,4339.9600,0.0,5903.7725,582.7450,8388.6850,4075.6650,397.6950,0.0,0.0,21.7900,...,820.9600,20.6100,0.0,0.0000,224.2550,72.5000,0.0000,726.1125,5015.0525,27956.6075
2025-12-31 21:00:00,4294.9375,0.0,5095.6025,599.3575,8312.4625,4392.1225,397.6975,0.0,0.0,21.7900,...,815.7100,68.3450,0.0,0.0000,224.1875,72.5000,0.0000,706.4775,4390.0575,29863.3450
2025-12-31 22:00:00,4235.0750,0.0,4983.3250,584.6700,8391.1250,4908.1325,397.7625,0.0,0.0,21.7900,...,810.8150,75.7425,0.0,0.0000,224.3150,72.5000,0.0000,708.2850,3363.9350,29854.2150


### Add Carbon Intensity Column

In [1]:
import pandas as pd


ci_df = pd.read_csv("new_data/germany_2023_generation.csv")

ci_df.head()

Unnamed: 0,Time,Biomass,Energy storage,Fossil Brown coal/Lignite,Fossil Coal-derived gas,Fossil Gas,Fossil Hard coal,Fossil Oil,Fossil Oil shale,Fossil Peat,...,Hydro Run-of-river and pondage,Hydro Water Reservoir,Marine,Nuclear,Other,Other renewable,Solar,Waste,Wind Offshore,Wind Onshore
0,2023-01-01 00:00:00,4023.925,0.0,3866.365,629.275,1436.9625,2051.83,305.905,0.0,0.0,...,1212.2125,35.0025,0.0,2458.6025,187.27,92.615,1.65,737.775,3586.26,29630.37
1,2023-01-01 01:00:00,3997.03,0.0,3860.135,570.95,1435.2975,2034.2625,305.7125,0.0,0.0,...,1210.9325,32.33,0.0,2459.645,187.2525,92.4675,1.7975,731.27,3842.2825,29560.1275
2,2023-01-01 02:00:00,4003.315,0.0,3864.61,579.375,1433.1875,2037.06,306.0,0.0,0.0,...,1208.765,35.525,0.0,2460.475,187.2025,91.7625,1.755,731.605,3463.0525,27538.555
3,2023-01-01 03:00:00,4026.8325,0.0,3840.83,604.6,1431.335,2039.9775,306.0,0.0,0.0,...,1204.1925,60.505,0.0,2460.8025,187.2775,91.97,2.1275,734.0125,3462.1925,26981.12
4,2023-01-01 04:00:00,4048.2225,0.0,3804.275,606.25,1426.5325,2036.6875,306.0,0.0,0.0,...,1203.83,44.1575,0.0,2465.0175,187.165,91.49,2.3275,723.5375,3339.85,27402.4025


In [2]:
ci_df

Unnamed: 0,Time,Biomass,Energy storage,Fossil Brown coal/Lignite,Fossil Coal-derived gas,Fossil Gas,Fossil Hard coal,Fossil Oil,Fossil Oil shale,Fossil Peat,...,Hydro Run-of-river and pondage,Hydro Water Reservoir,Marine,Nuclear,Other,Other renewable,Solar,Waste,Wind Offshore,Wind Onshore
0,2023-01-01 00:00:00,4023.9250,0.0,3866.3650,629.275,1436.9625,2051.8300,305.9050,0.0,0.0,...,1212.2125,35.0025,0.0,2458.6025,187.2700,92.6150,1.6500,737.7750,3586.2600,29630.3700
1,2023-01-01 01:00:00,3997.0300,0.0,3860.1350,570.950,1435.2975,2034.2625,305.7125,0.0,0.0,...,1210.9325,32.3300,0.0,2459.6450,187.2525,92.4675,1.7975,731.2700,3842.2825,29560.1275
2,2023-01-01 02:00:00,4003.3150,0.0,3864.6100,579.375,1433.1875,2037.0600,306.0000,0.0,0.0,...,1208.7650,35.5250,0.0,2460.4750,187.2025,91.7625,1.7550,731.6050,3463.0525,27538.5550
3,2023-01-01 03:00:00,4026.8325,0.0,3840.8300,604.600,1431.3350,2039.9775,306.0000,0.0,0.0,...,1204.1925,60.5050,0.0,2460.8025,187.2775,91.9700,2.1275,734.0125,3462.1925,26981.1200
4,2023-01-01 04:00:00,4048.2225,0.0,3804.2750,606.250,1426.5325,2036.6875,306.0000,0.0,0.0,...,1203.8300,44.1575,0.0,2465.0175,187.1650,91.4900,2.3275,723.5375,3339.8500,27402.4025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2023-12-31 19:00:00,4429.5000,0.0,3368.6475,453.900,2856.3550,1887.3625,393.7800,0.0,0.0,...,1545.4575,154.5375,0.0,0.0000,205.8975,109.5450,2.9850,779.3850,6305.2675,30244.4975
8756,2023-12-31 20:00:00,4328.1750,0.0,3373.9400,454.275,2808.6700,1935.8050,393.3475,0.0,0.0,...,1549.4150,272.2900,0.0,0.0000,205.9525,109.7125,2.9775,775.8625,6361.0025,29856.3600
8757,2023-12-31 21:00:00,4253.3950,0.0,3375.1150,422.075,2765.4400,1902.6275,391.8650,0.0,0.0,...,1546.0475,172.7700,0.0,0.0000,206.1925,109.6875,2.9500,775.4075,6285.7300,29924.8600
8758,2023-12-31 22:00:00,4182.2800,0.0,3380.7325,446.525,2821.9825,1813.3550,393.2475,0.0,0.0,...,1547.8150,122.0350,0.0,0.0000,206.0300,109.7875,2.9225,791.9975,5963.4175,30586.2775
