# Calculating medians, percentiles and interquartile ranges

This notebook loads `results/samples_comparative_gwp_contributions.csv` and calculates medians, percentiles and interquartile ranges for total scores and specific contributions. 

The resulting tables are saved to `results/stats_totals.csv`, `results/stats_totals.csv` and `results/stats_metals.csv`.


In [2]:
import brightway2 as bw
import bw2calc as bc
import bw2data as bd
import numpy as np
import pandas as pd
import presamples as ps
import arviz as az

In [3]:
samples_combined = pd.read_csv("results/samples_comparative_gwp_contributions.csv")

## Total GWPs statistical analysis

In [4]:
samples_combined_excl_total =  samples_combined[samples_combined['component'] != 'Total']
total = samples_combined_excl_total.groupby(['scenario', 'energy_scenario','iteration'])['score'].sum().reset_index()

In [5]:
grouped = total.groupby(['scenario', 'energy_scenario'])

def calculate_percentiles_and_iqr(group):
    mean = group['score'].mean()
    median = group['score'].median()
    minimum = group['score'].min()
    maximum = group['score'].max()
    q25 = group['score'].quantile(0.25)
    q75 = group['score'].quantile(0.75)
    iqr = q75 - q25
    #hdi_90 = az.hdi(group['score'].to_numpy(), hdi_prob=.9)
    #hdi_90_lower = hdi_90[0]
    #hdi_90_upper = hdi_90[1]
    return pd.Series({'mean': mean, 'median': median, 'minimum': minimum, 'maximum': maximum, '25th Percentile': q25, '75th Percentile': q75, 'IQR': iqr})

stats_totals = grouped.apply(calculate_percentiles_and_iqr).reset_index()
stats_totals

  stats_totals = grouped.apply(calculate_percentiles_and_iqr).reset_index()


Unnamed: 0,scenario,energy_scenario,mean,median,minimum,maximum,25th Percentile,75th Percentile,IQR
0,AM HTO,Current,13.174269,13.09296,11.427736,17.100529,12.695985,13.565573,0.869588
1,AM HTO,Greener,10.279125,10.173676,8.869765,13.271432,9.795183,10.628058,0.832874
2,AM HTO (steel jig),Current,11.726691,11.616579,10.077453,15.54236,11.244918,12.106179,0.861261
3,AM HTO (steel jig),Greener,8.126796,8.011777,6.798716,11.301494,7.703239,8.437332,0.734093
4,CM HTO,Current,10.800382,10.797094,8.07794,14.449564,10.045407,11.560788,1.515381
5,UKR,Current,37.850461,37.764603,33.781342,44.179102,36.786865,38.844513,2.057648


In [6]:
stats_totals.to_csv("results/stats_totals.csv", index=False)

## Material and manufacture GWP statistical analysis

Here only the impact of instruments (material and manufacture) and implant (material and manufacture) are considered.

In [7]:
components_to_exclude = ['Anesthesia', 'Packaging', 'Transport', 'Sterilisation', 'Argon']
total_contributions_filtered = samples_combined_excl_total[~samples_combined_excl_total['component'].isin(components_to_exclude)]
total_contributions_summed = total_contributions_filtered.groupby(['scenario', 'energy_scenario', 'iteration'])['score'].sum().reset_index()

In [8]:
grouped = total_contributions_summed.groupby(['scenario', 'energy_scenario'])
stats_material_manufacture = grouped.apply(calculate_percentiles_and_iqr).reset_index()
stats_material_manufacture['component'] = 'instruments and implant (mat and mnf)'
stats_material_manufacture 

  stats_material_manufacture = grouped.apply(calculate_percentiles_and_iqr).reset_index()


Unnamed: 0,scenario,energy_scenario,mean,median,minimum,maximum,25th Percentile,75th Percentile,IQR,component
0,AM HTO,Current,9.100207,9.066461,7.878075,11.461153,8.79808,9.369395,0.571315,instruments and implant (mat and mnf)
1,AM HTO,Greener,6.218086,6.191624,5.381732,8.541232,5.958115,6.433412,0.475296,instruments and implant (mat and mnf)
2,AM HTO (steel jig),Current,7.65825,7.639583,6.718192,8.928863,7.414577,7.896568,0.481991,instruments and implant (mat and mnf)
3,AM HTO (steel jig),Greener,4.071448,4.052522,3.490235,5.514103,3.910525,4.203042,0.292516,instruments and implant (mat and mnf)
4,CM HTO,Current,5.684553,5.649361,3.344534,9.269912,4.922462,6.442183,1.519721,instruments and implant (mat and mnf)
5,UKR,Current,29.258974,29.187098,25.615849,34.698177,28.196805,30.189274,1.992469,instruments and implant (mat and mnf)


In [11]:
stats_material_manufacture.to_csv("results/stats_material_manufacture.csv", index=False)

## Metal impacts (workpiece, powder and ingot) GWP statistical analysis

In [12]:
metal_impacts = pd.read_csv("results/metal_gwp.csv")

In [13]:
def calculate_percentiles_and_iqr(group):
    mean = group['score'].mean()
    median = group['score'].median()
    q25 = group['score'].quantile(0.25)
    q75 = group['score'].quantile(0.75)
    iqr = q75 - q25
    #hdi_90 = az.hdi(group['score'].to_numpy(), hdi_prob=.9)
    #hdi_90_lower = hdi_90[0]
    #hdi_90_upper = hdi_90[1]
    return pd.Series({'mean': mean, 'median': median, '25th Percentile': q25, '75th Percentile': q75, 'IQR': iqr})

stats_metals = metal_impacts.groupby('material').apply(calculate_percentiles_and_iqr).reset_index()

stats_metals

  stats_metals = metal_impacts.groupby('material').apply(calculate_percentiles_and_iqr).reset_index()


Unnamed: 0,material,mean,median,25th Percentile,75th Percentile,IQR
0,CoCr,32.630329,32.477687,31.416099,33.730888,2.314789
1,Ti6Al4V_powder,59.8408,59.39343,56.924139,62.003673,5.079533
2,Ti6Al4V_workpiece,52.023082,51.768341,49.798086,53.939948,4.141861
3,stainless_steel,6.707712,6.49947,5.924961,7.318991,1.39403


In [14]:
stats_metals.to_csv("results/stats_metals.csv", index=False)

In [15]:
#def calculate_percentiles(series, mean_score, lower_percentile, upper_percentile):
#    adjusted_series = series - mean_score
#    lower_value = np.percentile(adjusted_series, lower_percentile)
#    upper_value = np.percentile(adjusted_series, upper_percentile)
#    return lower_value, upper_value

#percentiles = (
#    total
#    .groupby(['scenario', 'energy_scenario'])
#    .apply(lambda x: calculate_percentiles(x['score'], 
#                                           mean_scores.loc[(mean_scores['scenario'] == x.name[0]) & 
#                                                           (mean_scores['energy_scenario'] == x.name[1]), 'mean_score'].values[0],
#                                           2.5, 97.5))
#    .reset_index()
#)
#percentiles[['2.5th_percentile', '97.5th_percentile']] = pd.DataFrame(percentiles[0].tolist(), index=percentiles.index)
#percentiles.drop(columns=[0], inplace=True)
#percentiles