# Calculating medians, percentiles and interquartile ranges for ratios

This notebook loads `results/relative_gwp_contributions.csv` and calculates medians, percentiles and interquartile ranges for total ratio scores (CM HTO= 1). 

The resulting table is saved to `results/stats_totals_ratios.csv`.

In [4]:
import brightway2 as bw
import bw2calc as bc
import bw2data as bd
import numpy as np
import pandas as pd
import presamples as ps
import arviz as az

In [5]:
samples_combined = pd.read_csv("results/relative_gwp_contributions.csv")

## Total GWPs statistical analysis

In [6]:
total = samples_combined.groupby(['scenario', 'energy_scenario','iteration'])['score'].sum().reset_index()

In [7]:
grouped = total.groupby(['scenario', 'energy_scenario'])

def calculate_percentiles_and_iqr(group):
    mean = group['score'].mean()
    median = group['score'].median()
    minimum = group['score'].min()
    maximum = group['score'].max()
    q25 = group['score'].quantile(0.25)
    q75 = group['score'].quantile(0.75)
    iqr = q75 - q25
    hdi_90 = az.hdi(group['score'].to_numpy(), hdi_prob=.9)
    hdi_90_lower = hdi_90[0]
    hdi_90_upper = hdi_90[1]
    return pd.Series({'mean': mean, 'median': median, 'minimum': minimum, 'maximum': maximum, '25th Percentile': q25, '75th Percentile': q75, 'IQR': iqr, 'hdi_90_lower': hdi_90_lower,'hdi_90_upper': hdi_90_upper})

stats_totals = grouped.apply(calculate_percentiles_and_iqr).reset_index()
stats_totals

  stats_totals = grouped.apply(calculate_percentiles_and_iqr).reset_index()


Unnamed: 0,scenario,energy_scenario,mean,median,minimum,maximum,25th Percentile,75th Percentile,IQR,hdi_90_lower,hdi_90_upper
0,AM HTO,Current,1.229616,1.216593,0.94787,1.747044,1.139916,1.309603,0.169687,1.03095,1.416335
1,AM HTO,Greener,0.960112,0.953411,0.683905,1.583404,0.881093,1.030259,0.149166,0.78921,1.128263
2,AM HTO (steel jig),Current,1.094752,1.083453,0.845856,1.603272,1.011423,1.168723,0.1573,0.917425,1.274347
3,AM HTO (steel jig),Greener,0.759094,0.749171,0.538181,1.304382,0.693541,0.819717,0.126176,0.617,0.894136
4,CM HTO,Current,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0
5,UKR,Current,3.531368,3.502657,2.74474,4.606994,3.273459,3.769835,0.496376,3.042795,4.046183


In [10]:
stats_totals.to_csv("results/stats_totals_ratios.csv", index=False)

In [9]:
#def calculate_percentiles(series, mean_score, lower_percentile, upper_percentile):
#    adjusted_series = series - mean_score
#    lower_value = np.percentile(adjusted_series, lower_percentile)
#    upper_value = np.percentile(adjusted_series, upper_percentile)
#    return lower_value, upper_value

#percentiles = (
#    total
#    .groupby(['scenario', 'energy_scenario'])
#    .apply(lambda x: calculate_percentiles(x['score'], 
#                                           mean_scores.loc[(mean_scores['scenario'] == x.name[0]) & 
#                                                           (mean_scores['energy_scenario'] == x.name[1]), 'mean_score'].values[0],
#                                           2.5, 97.5))
#    .reset_index()
#)
#percentiles[['2.5th_percentile', '97.5th_percentile']] = pd.DataFrame(percentiles[0].tolist(), index=percentiles.index)
#percentiles.drop(columns=[0], inplace=True)
#percentiles