# Transform Time Resolution for Labels (HW Indices)

In [4]:
import pandas as pd
import numpy as np

In [32]:
hws_chile = pd.read_csv("../data/local_data/monthly/hws_chile_new_agg.csv")

hws_chile["time"] = pd.to_datetime(hws_chile["Unnamed: 0"],format='%Y-%m')
hws_chile.set_index('time', inplace=True)
hws_chile.drop(columns="Unnamed: 0", inplace=True)
hws_chile

Unnamed: 0_level_0,hwn,hwf,hwd,hwm,hwa,hwmeand,hwi,hwmaxi,hwmeani
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1971-01-01,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1971-02-01,0.2,0.7,4.0,2.155206,4.498967,3.500000,1.508644,8.398446,7.543220
1971-03-01,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1971-04-01,0.2,0.6,3.0,3.185188,5.672930,3.000000,1.911113,13.762538,9.555565
1971-05-01,0.1,0.3,3.0,5.556710,9.012540,3.000000,1.667013,16.670130,16.670130
...,...,...,...,...,...,...,...,...,...
2023-08-01,0.3,1.0,4.0,2.786501,7.586745,3.333333,2.786501,13.359577,9.288335
2023-09-01,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2023-10-01,0.1,0.3,3.0,0.627372,0.775687,3.000000,0.188212,1.882117,1.882117
2023-11-01,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [33]:
hws_chile_yearly = pd.read_csv("../data/local_data/yearly/hws_chile_new_agg.csv")

hws_chile_yearly["time"] = pd.to_datetime(hws_chile_yearly["Unnamed: 0"],format='%Y')
hws_chile_yearly.set_index('time', inplace=True)
hws_chile_yearly.drop(columns="Unnamed: 0", inplace=True)
hws_chile_yearly

Unnamed: 0_level_0,hwn,hwf,hwd,hwm,hwa,hwmeand,hwi,hwmaxi,hwmeani
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1971-01-01,1.1,3.9,6.0,2.36223,9.01254,3.545455,9.212697,16.67013,8.375179
1972-01-01,2.1,7.2,8.0,1.339138,9.911353,3.428571,9.641792,13.845965,4.591329
1973-01-01,1.2,4.0,5.0,2.26872,6.251137,3.333333,9.074878,13.701867,7.562399
1974-01-01,1.9,7.0,7.0,2.070951,5.461437,3.684211,14.496657,19.823121,7.62982
1975-01-01,0.7,2.2,4.0,1.464228,4.937715,3.142857,3.221302,7.012564,4.60186
1976-01-01,1.5,5.0,6.0,2.302896,6.388407,3.333333,11.51448,11.876649,7.67632
1977-01-01,4.7,17.2,6.0,2.430148,10.131121,3.659574,41.798545,31.746775,8.893307
1978-01-01,3.5,13.6,11.0,1.863866,11.014251,3.885714,25.348576,19.065452,7.24245
1979-01-01,3.5,12.6,6.0,1.693547,6.12981,3.6,21.338691,13.330293,6.096769
1980-01-01,3.7,12.8,6.0,1.608845,6.744341,3.459459,20.593221,13.143823,5.565736


In [34]:
def transform_time_label(df, new_time):
    columns = df.columns
    df = df.sort_index()
    if new_time == "year":
        df[new_time] = df.index.year
    elif new_time == "decade":
        df[new_time] = (df.index.year // 10) * 10
    else:
        raise ValueError(f"{new_time} not implemented")

    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    for drop_col in ["time", "Date"]:
        if drop_col in numeric_cols:
            numeric_cols.remove(drop_col)

    agg_dict = {}
    for col in numeric_cols:
        if col in ['hwn', 'hwf', 'hwi']:
            agg_dict[col] = 'sum'
        elif col in ['hwd', 'hwa', 'hwmaxi']:
            agg_dict[col] = 'max'
        # skip hwm, hwmd for custom handling

    # Aggregate yearly/decadal without special indices
    df_grouped = df.groupby(new_time)[numeric_cols].agg(agg_dict)

    # --- Custom aggregations ---

    # hwm: weighted mean by hwf
    def weighted_mean_hwm(group):
        if group["hwf"].sum() == 0:
            return 0.0
        return (group["hwm"] * group["hwf"]).sum() / group["hwf"].sum()

    # hwmeand: weighted mean by number of events (hwn)
    def weighted_mean_hwmeand(group):
        if group["hwn"].sum() == 0:
            return 0.0
        return (group["hwmeand"] * group["hwn"]).sum() / group["hwn"].sum()
    
    # hwmeani: weighted mean by number of events (hwn)
    def weighted_mean_hwmeani(group):
        if group["hwn"].sum() == 0:
            return 0.0
        return (group["hwmeani"] * group["hwn"]).sum() / group["hwn"].sum()

    # Compute for years
    hwm_group = df.groupby(new_time).apply(weighted_mean_hwm)
    hwmeand_group = df.groupby(new_time).apply(weighted_mean_hwmeand)
    hwmeani_group = df.groupby(new_time).apply(weighted_mean_hwmeani)

    df_grouped["hwm"] = hwm_group
    df_grouped["hwmeand"] = hwmeand_group
    df_grouped["hwmeani"] = hwmeani_group

    if new_time == "year":
        df_grouped["time"] = pd.to_datetime(df_grouped.index, format='%Y')
        df_grouped.set_index("time", inplace=True)

    return df_grouped[columns]

In [35]:
transformed_year = transform_time_label(hws_chile, "year")
transformed_decade = transform_time_label(hws_chile, "decade")

In [38]:
transformed_year - hws_chile_yearly

Unnamed: 0_level_0,hwn,hwf,hwd,hwm,hwa,hwmeand,hwi,hwmaxi,hwmeani
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1971-01-01,0.0,0.0,0.0,0.0,0.0,4.440892e-16,-1.776357e-15,0.0,0.0
1972-01-01,0.0,-8.881784e-16,0.0,0.0,0.0,4.440892e-16,0.0,0.0,-1.776357e-15
1973-01-01,0.0,0.0,0.0,-8.881784e-16,0.0,4.440892e-16,-3.552714e-15,0.0,-2.664535e-15
1974-01-01,2.220446e-16,0.0,0.0,8.881784e-16,0.0,4.440892e-16,0.0,0.0,0.0
1975-01-01,0.0,0.0,0.0,-6.661338e-16,0.0,4.440892e-16,-8.881784e-16,0.0,0.0
1976-01-01,0.0,0.0,0.0,-4.440892e-16,0.0,-4.440892e-16,1.776357e-15,0.0,-1.776357e-15
1977-01-01,0.0,0.0,0.0,0.0,0.0,1.332268e-15,0.0,0.0,-1.776357e-15
1978-01-01,0.0,0.0,0.0,2.220446e-16,0.0,0.0,3.552714e-15,0.0,0.0
1979-01-01,0.0,0.0,0.0,-2.220446e-16,0.0,8.881784e-16,0.0,0.0,0.0
1980-01-01,0.0,0.0,0.0,0.0,0.0,-8.881784e-16,0.0,0.0,0.0
