In [None]:
import pandas as pd
import numpy as np

## Uncensoring KPIs

In [None]:
def uncensoring_KPI(dataframe, method_name):
    """
    Calculate KPIs for demand forecasting evaluation
    
    Parameters:
    - dataframe: pandas DataFrame containing 'Verkauf' (predicted) and 'Verkauf_MBR' (ground truth) columns
    - method_name: string name of the method being evaluated
    - censorship_pct: censorship percentage for display
    - reduction_pct: reduction percentage for display
    - alpha: weight parameter for Weighted MAE (default=1)
            α = 0: standard MAE (no weighting)
            α = 1: linear weighting by true demand
            α > 1: over-proportional penalization of larger errors
            α < 1: emphasis on smaller demands
    """
    df = dataframe[dataframe['Zensiert']==1]
    censored_fraction = len(df)/len(dataframe)
    
    # Extract predicted and true values
    y_pred = df[method_name + "_Demand"].values  # ŷᵢ (estimated demand)
    y_true = df['Verkauf_MBR'].values  # yᵢ (true demand)
    
    n = len(y_pred)
    if n == 0: return
    
    # 1. Bias calculation
    bias = np.sum(y_pred - y_true) / n
    
    # 2. Accuracy (exact matches)
    exact_matches = np.sum(y_pred == y_true)
    accuracy = exact_matches / n
    
    # 3. Overestimation Rate
    overestimations = np.sum(y_pred > y_true)
    overestimation_rate = overestimations / n
    
    # 4. Underestimation Rate
    underestimations = np.sum(y_pred < y_true)
    underestimation_rate = underestimations / n
    
    # 5. Weighted MAE
    alphas = [0, 0.5, 1, 1.5]
    weighted_maes = {}
    for a in alphas:
        if a == 0:
            # Standard MAE (no weighting)
            weighted_maes[a] = np.mean(np.abs(y_pred - y_true))
        else:
            # Weighted MAE with α parameter
            weights = np.power(y_true, a)
            # Handle case where y_true might be 0
            weights = np.where(y_true == 0, 0, weights)
            weighted_maes[a] = np.sum(weights * np.abs(y_pred - y_true)) / np.sum(weights) if np.sum(weights) > 0 else 0

    # 6. Gini Coefficient
    abs_errors = np.abs(y_pred - y_true)
    mean_abs_error = np.mean(abs_errors)
    n = len(abs_errors)
    sorted_errors = np.sort(abs_errors)
    
    weighted_sum = 0.0
    total_sum = 0.0
    
    for i in range(n):
        weighted_sum += (i + 1) * sorted_errors[i]
        total_sum += sorted_errors[i]
    
    gini_coefficient = (2 * weighted_sum) / (n * total_sum) - (n + 1) / n
    
    # 7. Overstock, out of curiosity
    overstock = np.sum(np.maximum(0, y_pred - y_true))

    for i in range(n):
        weighted_sum += (i + 1) * sorted_errors[i]
        total_sum += sorted_errors[i]

    gini_coefficient = (2 * weighted_sum) / (n * total_sum) - (n + 1) / n

    # Determine bias direction
    bias_direction = "overestimation" if bias > 0 else "underestimation" if bias < 0 else "neutral"
    
    # Print results in the specified format
    #print(f"Method: {method_name}, on {path}")
    # print(f"- Bias: {bias:.3f} ({bias_direction})")
    # print(f"- Weighted MAE (α=0): {weighted_maes[0]:.3f}")
    # print(f"- Weighted MAE (α=0.5): {weighted_maes[0.5]:.3f}")
    # print(f"- Weighted MAE (α=1): {weighted_maes[1]:.3f}")
    # print(f"- Weighted MAE (α=1.5): {weighted_maes[1.5]:.3f}")
    # print(f"- Accuracy (exact matches): {accuracy:.3f}")
    # print(f"- Overestimation Rate: {overestimation_rate:.3f}")
    # print(f"- Underestimation Rate: {underestimation_rate:.3f}")
    # print(f"- Gini Coefficient: {gini_coefficient:.3f}")
    # print(f"Overstock: {int(overstock)}")
    
    return {
        method_name+'_bias': bias,
        method_name+'_weighted_mae_0': weighted_maes[0],
        method_name+'_weighted_mae_0.5': weighted_maes[0.5],
        method_name+'_weighted_mae_1': weighted_maes[1],
        method_name+'_weighted_mae_1.5': weighted_maes[1.5],
        method_name+'_censored_fraction': censored_fraction,
        method_name+'_accuracy': accuracy,
        method_name+'_overestimation_rate': overestimation_rate,
        method_name+'_underestimation_rate': underestimation_rate,
        method_name+'_gini_coefficient': gini_coefficient,
        method_name+'_overstock': overstock
        }

def uncensoring_KPI_POS(dataframe, methods, magazines):
    '''
    dataframe: df containing all uncensored results and original data columns
    Calculate uncensoring KPIs grouped by POS.
    Returns dataframe with KPI of each method per magazine and POS
    '''
    df = dataframe.copy()
    df=df.dropna(subset=['Verkauf', 'Bezug'])
    
    df_KPI = pd.DataFrame()
    for magazine in magazines:
        df_magazine = df[df['Magazine']==magazine]
        magazine_KPIs = []
        for method in methods:
            for pos, group in df.groupby('EHASTRA_EH_NUMMER'):
                #do not calculate KPI for POS with less than 3 uncensored points
                if len(group[group['Zensiert']==1])<3: continue
                pos_result = uncensoring_KPI(group, method)

                #check for None output when nothing is censored
                if pos_result is not None:
                    pos_result['EHASTRA_EH_NUMMER'] = pos
                    magazine_KPIs.append(pos_result)
            
        df_magazine_KPIs = pd.DataFrame(magazine_KPIs)
        df_magazine_KPIs['Magazine'] = magazine
        df_magazine_KPIs.set_index(['Magazine', 'EHASTRA_EH_NUMMER'], inplace=True).sort_index(inplace=True)
        print(f"Uncensoring KPIs by POS for magazine {magazine}:")
        for col in df_magazine_KPIs.columns:
            print(f'{col}: {df_magazine_KPIs[col].mean():.3f}')

        df_KPIs = pd.concat([df_KPIs, df_magazine_KPIs], ignore_index=True)

    return df_KPIs

In [None]:
df = pd.read_csv('Uncensoring_results.csv')
uncensoring_methods = ['N1', 'N2', 'N3', 'EM', 'PD', 'Nahmias', 'Conrad', 'Baseline', 'Bayesian', 'Agrawal']
magazine_letters = [letter for letter in 'ABCDEFGHI']
df_KPIs = uncensoring_KPI_POS(df, uncensoring_methods, magazine_letters)
df_KPIs.to_csv('Uncensoring_KPIs.csv', index = False) #adjust accordingly

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
