### Create Power Generation Dataframe

In [50]:
import pandas as pd

name = "den_2025"
path = "full_datasets/energy_generation/den"
new_df = pd.read_csv(
    f"{path}/{name}.csv")
print(len(new_df))
print(new_df.head())

df = new_df

# clean time column
# the raw column: "01.01.2025 00:00 - 01.01.2025 00:15"
# we extract just the start time like this: "01.01.2025 00:00"
start_str = (
    df["MTU (CET/CEST)"]
    .str.split(" - ").str[0]
    .str.replace(r"\s*\(.*\)", "", regex=True)  # removes "(CET)" or "(CEST)"
    .str.strip()
)

df["Time"] = pd.to_datetime(
    start_str,
    format="%d/%m/%Y %H:%M:%S",
    errors="raise"
)

df = df.set_index("Time").sort_index()

# clean values
# ENTSO-E sometimes uses 'n/e' or '-' for zero/missing.
# We force the 'Generation (MW)' column to be numbers. Errors become 0.
df['Generation (MW)'] = pd.to_numeric(df['Generation (MW)'], errors='coerce').fillna(0)

# pivot the transformation
# This commands moves 'Production Type' entries into Column Headers
print("Pivoting data to give each type its own column...")
df_wide = df.pivot_table(
    index='Time',
    columns='Production Type',
    values='Generation (MW)',
    aggfunc='sum'  # Sum ensures if duplicates exist, they combine nicely
)

# 5. RESAMPLE TO HOURLY (Optional but Recommended)
# ---------------------------------------------------------
# Your raw data is 15-minute intervals.
# Aggregating to 1-hour makes the file 4x smaller and easier to chart.
df_hourly = df_wide.resample('60min').mean()

# 6. SAVE
# v---------------------------------------------------------
output_file = f'{path}/{name}_generation.csv'
df_hourly.to_csv(output_file)

print("-" * 40)
print(f"SUCCESS: Data saved to '{output_file}'")
print("-" * 40)
print(f"Rows:    {len(df_hourly)}")
print(f"Columns: {len(df_hourly.columns)}")
print("\nNew Columns Created:")
for col in df_hourly.columns:
    print(f" - {col}")

183960
                              MTU (CET/CEST)          Area Production Type  \
0  01/01/2025 00:00:00 - 01/01/2025 01:00:00  Denmark (DK)         Biomass   
1  01/01/2025 01:00:00 - 01/01/2025 02:00:00  Denmark (DK)         Biomass   
2  01/01/2025 02:00:00 - 01/01/2025 03:00:00  Denmark (DK)         Biomass   
3  01/01/2025 03:00:00 - 01/01/2025 04:00:00  Denmark (DK)         Biomass   
4  01/01/2025 04:00:00 - 01/01/2025 05:00:00  Denmark (DK)         Biomass   

  Generation (MW)  
0          144.72  
1          150.87  
2          164.49  
3          157.05  
4          163.10  
Pivoting data to give each type its own column...
----------------------------------------
SUCCESS: Data saved to 'full_datasets/energy_generation/den/den_2025_generation.csv'
----------------------------------------
Rows:    8760
Columns: 21

New Columns Created:
 - Biomass
 - Energy storage
 - Fossil Brown coal/Lignite
 - Fossil Coal-derived gas
 - Fossil Gas
 - Fossil Hard coal
 - Fossil Oil
 - Fos

### Merge multipls energy generation csv

In [51]:

df_list = [pd.read_csv(f"{path}/den_202{i+2}_generation.csv", index_col="Time", parse_dates=True) for i in range(4)]

combined_df = pd.concat(df_list)
combined_df = combined_df.sort_index()

# exclude duplicate time indices if any
print(len(combined_df))
combined_df = combined_df[~combined_df.index.duplicated(keep='first')]
print(len(combined_df))
combined_df.to_csv(f"{path}/den_2225_generation.csv")

35064
35064


In [32]:
combined_df.columns

Index(['Biomass', 'Energy storage', 'Fossil Brown coal/Lignite',
       'Fossil Coal-derived gas', 'Fossil Gas', 'Fossil Hard coal',
       'Fossil Oil', 'Fossil Oil shale', 'Fossil Peat', 'Geothermal',
       'Hydro Pumped Storage', 'Hydro Run-of-river and pondage',
       'Hydro Water Reservoir', 'Marine', 'Nuclear', 'Other',
       'Other renewable', 'Solar', 'Waste', 'Wind Offshore', 'Wind Onshore'],
      dtype='str')

In [55]:
df_list = [pd.read_csv(f"full_datasets/energy_imports_exports/den/den_202{i+2}_ci.csv", index_col="datetime", parse_dates=True) for i in range(4)]

combined_df = pd.concat(df_list)
combined_df = combined_df.sort_index()

# exclude duplicate time indices if any
print(len(combined_df))
combined_df = combined_df[~combined_df.index.duplicated(keep='first')]
print(len(combined_df))
combined_df.to_csv(f"full_datasets/energy_imports_exports/den/den_2125_carbon.csv")

35064
35064


### Merge energy generation data with import/export data

In [58]:
A = pd.read_csv("full_datasets/den_2225_generation_carbon_wTarget.csv")
B = pd.read_csv("full_datasets/weather_21_25/weather_denmark.csv")

A["Time"] = pd.to_datetime(A["Time"])
B["time"] = pd.to_datetime(B["time"])

out = A.merge(
    B,
    how="left",
    left_on="Time",
    right_on="time"
)

# drop the duplicate join key from B (optional)
out = out.drop(columns=["time"])

# fill missing values coming from B with zeros
b_cols = B.columns.difference(["time"])
out[b_cols] = out[b_cols].fillna(0)
out.to_csv("full_datasets/den_2225_generation_carbon_wTarget_wWeather.csv")

In [42]:
list(out.columns)

['Time',
 'Unnamed: 0',
 'Biomass',
 'Energy storage',
 'Fossil Brown coal/Lignite',
 'Fossil Coal-derived gas',
 'Fossil Gas',
 'Fossil Hard coal',
 'Fossil Oil',
 'Fossil Oil shale',
 'Fossil Peat',
 'Geothermal',
 'Hydro Pumped Storage',
 'Hydro Run-of-river and pondage',
 'Hydro Water Reservoir',
 'Marine',
 'Nuclear',
 'Other',
 'Other renewable',
 'Solar',
 'Waste',
 'Wind Offshore',
 'Wind Onshore',
 'total_energy_import',
 'total_carbon_import',
 'total_energy_export',
 'total_carbon_export',
 'total_power_mw',
 'emissions_weighted',
 'carbon_intensity',
 'wx_mean__temperature_2m (°C)',
 'wx_mean__precipitation (mm)',
 'wx_mean__cloud_cover (%)',
 'wx_mean__wind_speed_100m (km/h)',
 'wx_mean__wind_direction_100m (°)',
 'wx_mean__soil_moisture_0_to_7cm (m³/m³)',
 'wx_mean__soil_temperature_0_to_7cm (°C)',
 'wx_mean__relative_humidity_2m (%)',
 'wx_mean__shortwave_radiation (W/m²)',
 'wx_mean__shortwave_radiation_instant (W/m²)',
 'wx_std__temperature_2m (°C)',
 'wx_std__precipit

### Add Carbon Intensity Column

In [3]:
import pandas as pd


ci_df = pd.read_csv("new_data/germany_2325_ci.csv")

ci_df.head()

Unnamed: 0,Time,Biomass,Energy storage,Fossil Brown coal/Lignite,Fossil Coal-derived gas,Fossil Gas,Fossil Hard coal,Fossil Oil,Fossil Oil shale,Fossil Peat,...,Nuclear,Other,Other renewable,Solar,Waste,Wind Offshore,Wind Onshore,total_power_mw,emissions_weighted,carbon_intensity
0,2023-01-01 00:00:00,4014.0975,0.0,3859.6,651.375,1593.8225,2067.6225,306.4125,0.0,0.0,...,2459.17,187.3025,91.33,1.7925,735.2525,3059.0925,28947.15,49274.6825,8804439.0,178.68078
1,2023-01-01 01:00:00,3993.27,0.0,3866.365,629.275,1436.9025,2051.83,305.905,0.0,0.0,...,2458.6025,187.27,92.615,1.65,725.1,3586.26,29587.5575,50174.0675,8704868.0,173.493363
2,2023-01-01 02:00:00,3967.275,0.0,3860.135,570.95,1435.14,2034.2625,305.7125,0.0,0.0,...,2459.645,187.2525,92.4675,1.7975,718.67,3842.2825,29514.8475,50237.5625,8618429.0,171.553495
3,2023-01-01 03:00:00,3973.155,0.0,3864.61,579.375,1432.61,2037.06,306.0,0.0,0.0,...,2460.475,187.2025,91.7625,1.755,718.8425,3463.0525,27493.4675,47857.39,8604584.0,179.79634
4,2023-01-01 04:00:00,3996.42,0.0,3840.83,604.6,1430.85,2039.9775,306.0,0.0,0.0,...,2460.8025,187.2775,91.97,2.1275,721.325,3462.1925,26938.7425,47351.455,8603948.0,181.703985


In [4]:
ci_df.columns

Index(['Time', 'Biomass', 'Energy storage', 'Fossil Brown coal/Lignite',
       'Fossil Coal-derived gas', 'Fossil Gas', 'Fossil Hard coal',
       'Fossil Oil', 'Fossil Oil shale', 'Fossil Peat', 'Geothermal',
       'Hydro Pumped Storage', 'Hydro Run-of-river and pondage',
       'Hydro Water Reservoir', 'Marine', 'Nuclear', 'Other',
       'Other renewable', 'Solar', 'Waste', 'Wind Offshore', 'Wind Onshore',
       'total_power_mw', 'emissions_weighted', 'carbon_intensity'],
      dtype='object')