In [22]:
import numpy as np
from collections import Counter
import pandas as pd
import CRPS.CRPS as crps
from data_gathering import gather_data_actuals

In [23]:

bins = [
        0,
        0.5,
        2.5,
        5.5,
        10.5,
        25.5,
        50.5,
        100.5,
        250.5,
        500.5,
        1000.5,
    ]

In [24]:

def _ensemble_ignorance_score(predictions, n, observed):
        c = Counter(predictions)
        # n = c.total() : this works from python version 3.10, avoid this for a while.
        prob = c[observed] / n # if counter[observed] is 0, then this returns correctly
        return -np.log2(prob)

def ensemble_ignorance_score(observations, forecasts, bins, low_bin = 0, high_bin = 10000):
    """
    This implements the Ensemble (Ranked) interval Score from the easyVerification R-package in Python. Also inspired by properscoring.crps_ensemble(),
    and has interface that works with the xskillscore package.

    Parameters
    ----------
    observations : float or array_like
        Observations float or array. Missing values (NaN) are given scores of
        NaN.
    forecasts : float or array_like
        Array of forecasts ensemble members, of the same shape as observations
        except for the axis along which RIGN is calculated (which should be the
        axis corresponding to the ensemble). If forecasts has the same shape as
        observations, the forecasts are treated as deterministic. Missing
        values (NaN) are ignored.
    round_values: converts input data to integers by rounding.
    

    Returns
    -------
    out : np.ndarray
        RIGN for each ensemble forecast against the observations.
    """
    observations = np.asarray(observations)
    forecasts = np.asarray(forecasts)

    assert np.all((forecasts >= 0) | np.isnan(forecasts)), "Forecasts must be non-negative or NaN."
    assert np.all(observations >= 0), f"Observations must be non-negative."

    assert isinstance(bins, (int, list)), f"bins must be an integer or a list with floats"
    if isinstance(bins, int):
        assert bins > 0, f"bins must be an integer above 0 or a list with floats."

    def digitize_minus_one(x, bins, right=False):
        return np.digitize(x, bins, right) - 1

    """ edges = np.histogram_bin_edges(forecasts[..., :], bins = bins, range = (low_bin, high_bin))
    binned_forecasts =  np.apply_along_axis(digitize_minus_one, axis = 1, arr = forecasts, bins = edges)
    binned_observations = digitize_minus_one(observations, edges) """

    edges = np.histogram_bin_edges(forecasts, bins=bins, range=(low_bin, high_bin))
    binned_forecasts = digitize_minus_one(forecasts, edges)
    binned_observations = digitize_minus_one(observations, edges)


    # Append one observation in each bin-category to the forecasts to prevent 0 probability occuring.
    unique_categories = np.arange(0, len(bins))
    binned_forecasts = np.concatenate((binned_forecasts, np.tile(unique_categories, binned_forecasts.shape[:-1] + (1,))), axis = -1)
    
    n = binned_forecasts.shape[-1]

    #if observations.shape == forecasts.shape:
        # exact prediction yields 0 ign
    ign_score = np.empty_like(binned_observations, dtype = float)
    for index in np.ndindex(ign_score.shape):
        ign_score[index] = _ensemble_ignorance_score(binned_forecasts[index], n, binned_observations[index])
    
    
    return ign_score

In [25]:
def interval_score(observed: np.array, predictions: np.array, prediction_interval_level: float = 0.90) -> np.array:
    """
    Interval Score implemented based on the scaled Mean Interval Score in the R tsRNN package https://rdrr.io/github/thfuchs/tsRNN/src/R/metrics_dist.R

    The Interval Score is a probabilistic prediction evaluation metric that weights between the narrowness of the forecast range and the ability to correctly hit the observed value within that interval.
    
    :param observed: observed values
    :type observed: array_like
    :param predictions: probabilistic predictions with the latter axis (-1) being the forecasts for each observed value
    :type predictions: array_like
    :param prediction_interval_level: prediction interval between [0, 1]
    :type prediction_interval_level: float
    :returns array_like with the interval score for each observed value
    :rtype array_like

    observed = np.random.negative_binomial(5, 0.8, size = 600)
    forecasts = np.random.negative_binomial(5, 0.8, size = (600, 1000))

    score = interval_score(observed, forecasts)
    print(f'MIS: {score.mean()}')

    """

    assert 0 < prediction_interval_level < 1, f"'prediction_interval_level' must be a number between 0 and 1." 

    alpha = 1 - prediction_interval_level
    lower = np.quantile(predictions, q = alpha/2, axis = -1)
    upper = np.quantile(predictions, q = 1 - (alpha/2), axis = -1)

    interval_width = upper - lower
    lower_coverage = (2/alpha)*(lower-observed) * (observed<lower)
    upper_coverage = (2/alpha)*(observed-upper) * (observed>upper)

    return (interval_width + lower_coverage + upper_coverage)

In [26]:
def compute_metrics(pps: pd.DataFrame, export_path: str, model_identifier: str, export = True):
    """
    Compute CRPS, Interval Score and Ignorance Score for each country_id and month_id based on posterior predicitve samples
    Args:
        pps: 
        export_path: 

    Returns:

    """
    results_list = []
    # Extract column names that start with 'draw_' from pps
    predictions_column_names = [col for col in pps.columns if col.startswith('draw_')]
    for country_id in pps['country_id'].unique():
        for month_id in pps['month_id'].unique():
            # 1. Take value in 'ged_sb' as observed value, take values in 'draw_1', 'draw_2', ... as predictions for interval score and forecasts for ignorance score
            observed = pps.loc[(pps['country_id'] == country_id) & (pps['month_id'] == month_id), 'ged_sb'].values
            predictions = pps.loc[(pps['country_id'] == country_id) & (pps['month_id'] == month_id), predictions_column_names].values.flatten()
            predictions = predictions[~np.isnan(predictions)].reshape(1, -1)
            # 2. Calculate interval score and ignorance score for each country_id and month_id and save in dataframe
            interval_score_value = interval_score(observed, predictions)[0]
            ignorance_score_value = ensemble_ignorance_score(observed, predictions, bins = bins)[0]
            crps_score, _, _ = crps(predictions.flatten(), observed).compute()
            # 3. Save in dataframe
            # Append the result as a dictionary to the results list
            results_list.append({
                'country_id': country_id, 
                'month_id': month_id, 
                'crps': crps_score, 
                'interval_score': interval_score_value, 
                'ignorance_score': ignorance_score_value
            })
    
    # Convert the list of dictionaries to a dataframe
    results_df = pd.DataFrame(results_list)
    results_df['crps'] = results_df['crps'].astype('float64')
    
    if export:
        results_df.to_parquet(export_path + f'{model_identifier}_metrics_by_country_and_month.parquet')
    return results_df

In [27]:
def compute_averages(df, score_type):
    start_month_id = 457
    year_length = 12
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Year Average']
    years = [2018, 2019, 2020, 2021]

    results_df = pd.DataFrame(columns=['Year'] + months)
    results_df['Year'] = years

    for year in years:
        yearly_scores = []
        for month_index in range(1, year_length + 1):
            month_id = start_month_id + (year - years[0]) * year_length + (month_index - 1)
            mean_score = df.loc[df['month_id'] == month_id, score_type].mean()
            yearly_scores.append(mean_score)
        yearly_scores.append(pd.Series(yearly_scores).mean())
        results_df.loc[results_df['Year'] == year, 1:] = yearly_scores

    return results_df

def compute_averages_by_country(df, score_type):
    start_month_id = 457
    year_length = 12
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Year Average']
    years = [2018, 2019, 2020, 2021]
    country_ids = df['country_id'].unique()

    all_countries_df = pd.DataFrame()

    for country_id in country_ids:
        country_df = pd.DataFrame(columns=['Year', 'Country_ID'] + months)
        country_df['Year'] = years
        country_df['Country_ID'] = country_id

        for year in years:
            yearly_scores = []
            for month_index in range(1, year_length + 1):
                month_id = start_month_id + (year - years[0]) * year_length + (month_index - 1)
                mean_score = df[(df['month_id'] == month_id) & (df['country_id'] == country_id)][score_type].mean()
                yearly_scores.append(mean_score)
            yearly_scores.append(pd.Series(yearly_scores).mean())
            country_df.loc[country_df['Year'] == year, 2:] = yearly_scores

        all_countries_df = all_countries_df.append(country_df, ignore_index=True)

    return all_countries_df

In [28]:
def fill_pps_for_zero_obs_countries(df: pd.DataFrame, actual_countries: list, actuals_data: pd.DataFrame):
    # Unique country_ids in the original DataFrame
    unique_country_ids = df['country_id'].unique()
    # Determine missing country_ids
    missing_country_ids = [country_id for country_id in actual_countries if country_id not in unique_country_ids]
    # Unique month_ids in the original DataFrame
    unique_month_ids = df['month_id'].unique()
    # Create an empty list to store new rows
    new_rows = []
    
    # Generate new rows
    for month_id in unique_month_ids:
        for country_id in missing_country_ids:
            observation = actuals_data.loc[(actuals_data['country_id'] == country_id) & (actuals_data['month_id'] == month_id), 'ged_sb'].values
            new_row = {'month_id': month_id, 'country_id': country_id, 'ged_sb': observation[0]}
            for sample_col in [f'draw_{i}' for i in range(1, 100)]:
                new_row[sample_col] = 0.0
            new_rows.append(new_row)
    
    # Create a new DataFrame from the list of new rows
    new_df = pd.DataFrame(new_rows)
    
    # Append the new DataFrame to the original DataFrame
    df = pd.concat([df, new_df], ignore_index=True)
    
    # Optionally, you can sort the DataFrame
    df = df.sort_values(by=['country_id', 'month_id']).reset_index(drop=True)
    
    return df

    

In [29]:
# Load 'Actuals' data
_, _, _, _, data_cm_actual_allyears \
    = gather_data_actuals()
# Determine all countries
actual_countries = data_cm_actual_allyears['country_id'].unique()

In [30]:
# Load posterior predictive samples from Bayesian models
model_original_identifier = ['model13_nb_feature_set1', 'model3_zinb_feature_set1', 'model1_zinb_feature_set1', 'model4_zinb_feature_set1', 'model15_zinb_feature_set1']
data_path = 'C:/Users/Uwe Drauz/Documents/bachelor_thesis_local/personal_competition_data/Results/'
pps_baseline_f_m = pd.read_parquet(data_path + 'baseline_f_m_posterior_predictive_samples.parquet')
pps_baseline_f = pd.read_parquet(data_path + 'baseline_f_posterior_predictive_samples.parquet')
pps_model1 = pd.read_parquet(data_path + model_original_identifier[0]+ '_posterior_predicitve_samples.parquet')
pps_model2 = pd.read_parquet(data_path + model_original_identifier[1]+ '_posterior_predicitve_samples.parquet')
pps_model3 = pd.read_parquet(data_path + model_original_identifier[2]+ '_posterior_predicitve_samples.parquet')
pps_model4 = pd.read_parquet(data_path + model_original_identifier[3]+ '_posterior_predicitve_samples.parquet')
pps_model5 = pd.read_parquet(data_path + model_original_identifier[4]+ '_posterior_predicitve_samples.parquet')
pps_list = [pps_baseline_f_m, pps_baseline_f, pps_model1, pps_model2, pps_model3, pps_model4, pps_model5]

In [31]:
# Apply the function 'fill_pps_for_zero_obs_countries' to each DataFrame in the list
for i, pps in enumerate(pps_list):
    pps_list[i] = fill_pps_for_zero_obs_countries(pps, actual_countries, data_cm_actual_allyears)

In [32]:
export_path = r'C:\Users\Uwe Drauz\Documents\bachelor_thesis_local\personal_competition_data\Results\\'
model_identifiers = ['baseline_f_m', 'baseline_f', 'model1', 'model2', 'model3', 'model4', 'model5']
score_types = ['crps', 'interval_score', 'ignorance_score']
scores_dict = {}
for model_identifier, pps in zip(model_identifiers, pps_list):
    # Compute metrics for each model
    results_df = compute_metrics(pps, export_path, model_identifier, export = False)
    
    # Compute averages for each score type
    # Dictionaries to store the computed dataframes
    averages_overall_dict = {}
    averages_dict = {}
    averages_by_country_dict = {}
    
    for score_type in score_types:
        # Compute averages for each score type
        averages_dict[score_type] = compute_averages(results_df, score_type)
        
        # Compute averages by country for each score type
        averages_by_country_dict[score_type] = compute_averages_by_country(results_df, score_type)
        
        # Compute overall averages for each score type
        averages_overall_dict[score_type] = averages_dict[score_type]['Year Average'].mean()
        
        # Store average dictionaries in scores_dict as nested dictionary with model identifier as key
        scores_dict[model_identifier] = {
            'average_over_all_months': averages_overall_dict,
            'averages_per_month_and_year': averages_dict,
            'averages_per_montg_year_and_country': averages_by_country_dict
        }
        
    

  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.array(delta_fc)
  self.__delta_fc = np.ar

In [35]:
# Export the dataframes in the scores_dict to parquet files with suitable names
for model_identifier, model_dict in scores_dict.items():
    for score_type, score_dict in model_dict.items():
        for score_name, score_df in score_dict.items():
            # If score_df is numpy.float64, convert it to a dataframe with one row and one column
            if isinstance(score_df, np.float64):
                score_df = pd.DataFrame(score_df, index=[0], columns=[score_name])
            score_df.to_parquet(export_path + f'metrics/{model_identifier}_{score_type}_{score_name}.parquet')

In [40]:
# Read the parquet files back into dataframes, and store them in a dictionary
import_path = r'C:\Users\Uwe Drauz\Documents\bachelor_thesis_local\personal_competition_data\Results\\'
scores_dict_imp = {}
for model_identifier in model_identifiers:
    scores_dict_imp[model_identifier] = {}
    for score_type in score_types:
        scores_dict_imp[model_identifier][score_type] = {}
        for score_name in ['average_over_all_months', 'averages_per_month_and_year', 'averages_per_montg_year_and_country']:
            scores_dict_imp[model_identifier][score_type][score_name] = pd.read_parquet(import_path + f'metrics/{model_identifier}_{score_name}_{score_type}.parquet')