In [4]:
import pandas as pd

# Folder with your Excel files
data_hourly = "/Users/kevineriksson/PycharmProjects/Simulation/data/PV_WIND_PROD_PL.xlsx"
data_yearly = "/Users/kevineriksson/PycharmProjects/Simulation/data/YIELD_PL.xlsx"

"""
Loads and normalizes hourly wind and solar production profiles for each year using annual production data.

Args:
    data_hourly: Path to the Excel file containing hourly raw production data.
    data_yearly: Path to the Excel file containing annual production per MW of installed capacity.

Returns:
    A DataFrame indexed by datetime with columns: 'Year', 'wind_profile', 'solar_profile'.
"""
df_hourly = pd.read_excel(data_hourly).ffill()
df_yearly = pd.read_excel(data_yearly).ffill()

df_hourly['Hour'] = pd.to_datetime(df_hourly['Hour'])
df_hourly['Year'] = df_hourly['Hour'].dt.year

df_hourly['wind_profile'] = 0.0
df_hourly['solar_profile'] = 0.0

for year in df_yearly['Year'].unique():
    mask = df_hourly['Year'] == year

    wind_hourly = df_hourly.loc[mask, 'Wind']
    solar_hourly = df_hourly.loc[mask, 'Solar']

    wind_annual = df_yearly.loc[df_yearly['Year'] == year, 'WIND - Annual prod MWh, MW'].values[0]
    solar_annual = df_yearly.loc[df_yearly['Year'] == year, 'SOLAR - Annual prod MWh, MW'].values[0]

    df_hourly.loc[mask, 'wind_profile'] = wind_hourly / (wind_hourly.sum() / wind_annual)
    df_hourly.loc[mask, 'solar_profile'] = solar_hourly / (solar_hourly.sum() / solar_annual)

df_hourly[['Hour', 'Year', 'wind_profile', 'solar_profile']].to_excel('/Users/kevineriksson/PycharmProjects/Simulation/data/profiles_PL.xlsx', index=False)