# Calculate stastistics for an entire database and sectors within the database

## First Step : calculating indicators for each {activity|impact method}

In [5]:
import h5py
import numpy as np
from scipy import stats

#MC_results_dict={act:{ic_name:[MC_results]}} as the output of MC_multi_impact_entire_DB()

#Stored MC results in HDF5 are np array 1d which size=# iteration
#and stored like: Uncertainty LCI 1 LCIA 1/ActKey/impact method name



def calculating_endpoint_sum(hdf5_file_MC_LCA_results,hdf5_file_MC_statistics):
    
    
    for uncertainty_level in hdf5_file_MC_LCA_results.items():
        
        if 'lci_iteration_name_list' not in uncertainty_level[0]:

            for act in uncertainty_level[1].items():                    
                
                for impact_method in act[1].items():
                    
                    #If endpoint names are the second name in impact method tuples (...,...,...)
                    endpoint_name=impact_method[0].rsplit(',', 1)[0]+')'
                    
                    #If endpoint names are the first name in impact method tuples (...,...,...)
                    #endpoint_name=impact_method[0].rsplit(',', 2)[0]+')'
                    
                    endpoint_group_path='/'+uncertainty_level[0]+'/'+act[0]+'/'+endpoint_name
                    
                    contribution_to_add=impact_method[1][()]
                    
                    try:
                        endpoint_sum_dataset=hdf5_file_MC_statistics[endpoint_group_path+'/endpoint_sum']
                        endpoint_sum_dataset[...]=endpoint_sum_dataset[()]+contribution_to_add
                        
                    except:
                        hdf5_file_MC_statistics.create_dataset(endpoint_group_path+'/endpoint_sum',data=contribution_to_add)
                        
    return;




#MC_results_dict={act_key:{ic_name:[MC_results]}} as the output of MC_multi_impact_entire_DB()

def calculating_endpoint_stats_indicators(hdf5_file_MC_LCA_results,hdf5_file_MC_statistics):
  
    sum_spear_corr_endpoint={}
    
    for uncertainty_level in hdf5_file_MC_LCA_results.items():

        if 'lci_iteration_name_list' not in uncertainty_level[0]:

            for act in uncertainty_level[1].items():                    

                for impact_method in act[1].items():

                    #If endpoint names are the second name in impact method tuples (...,...,...)
                    endpoint_name=impact_method[0].rsplit(',', 1)[0]+')'
                    
                    #If endpoint names are the first name in impact method tuples (...,...,...)
                    #endpoint_name=impact_method[0].rsplit(',', 2)[0]+')'
                    
                    endpoint_group_path='/'+uncertainty_level[0]+'/'+act[0]+'/'+endpoint_name
                    
                    stats_dict={}
                    
                    #Regular stats
                    stats_dict['mean']=np.mean(impact_method[1])
                    stats_dict['variance']=np.var(impact_method[1])
                    stats_dict['std dev']=np.std(impact_method[1])
                    stats_dict['minimum']=min(impact_method[1])
                    stats_dict['maximum']=max(impact_method[1])
                    stats_dict['2.5th percentile']=np.percentile(impact_method[1],2.5)
                    stats_dict['25th percentile']=np.percentile(impact_method[1],25)
                    stats_dict['median']=np.percentile(impact_method[1],50)
                    stats_dict['75th percentile']=np.percentile(impact_method[1],75)
                    stats_dict['97.5th percentile']=np.percentile(impact_method[1],97.5)
                    stats_dict['number of iterations']=len(impact_method[1])

                    #Stats to measure the dispersion
                    stats_dict['MADM']=np.percentile(abs(impact_method[1]-stats_dict['median']),50)
                    stats_dict['IQR']=stats_dict['75th percentile']-stats_dict['25th percentile']
                    stats_dict['Spread']=stats_dict['maximum']-stats_dict['minimum']
                    stats_dict['CI95']=stats_dict['97.5th percentile']-stats_dict['2.5th percentile']
                    try:
                        stats_dict['Quartile coeff of dispersion']=stats_dict['IQR']/(stats_dict['75th percentile']+stats_dict['25th percentile'])
                    except:
                        stats_dict['Quartile coeff of dispersion']='NA'
                    try:
                        stats_dict['CV']=stats_dict['std dev']/stats_dict['mean']
                    except:
                        stats_dict['CV']='NA'
                    try:
                        stats_dict['CV modified']=stats_dict['std dev']/np.sqrt((stats_dict['maximum']-stats_dict['mean'])*(stats_dict['mean']-stats_dict['minimum']))
                    except:
                        stats_dict['CV modified']='NA'
                    try:
                        stats_dict['CV robust']=stats_dict['MADM']/stats_dict['median']
                    except:
                        stats_dict['CV robust']='NA'
                    try:
                        stats_dict['IQR\spread']=stats_dict['IQR']/(stats_dict['Spread'])
                    except:
                        stats_dict['IQR\spread']='NA'
                    try:
                        stats_dict['IQR\CI95']=stats_dict['IQR']/stats_dict['CI95']
                    except:
                        stats_dict['IQR\CI95']='NA'


                    stats_dict['Spearmann rank correlation - coefficient']=stats.spearmanr(impact_method[1],hdf5_file_MC_statistics[endpoint_group_path+'/endpoint_sum'])[0]
                    stats_dict['Spearmann rank correlation - pvalue']=stats.spearmanr(impact_method[1],hdf5_file_MC_statistics[endpoint_group_path+'/endpoint_sum'])[1]

                    try:
                        sum_spear_corr_endpoint[endpoint_name]=sum_spear_corr_endpoint[endpoint_name]+(stats_dict['Spearmann rank correlation - coefficient'])**2

                    except:
                        sum_spear_corr_endpoint[endpoint_name]=(stats_dict['Spearmann rank correlation - coefficient'])**2
                        
                        
                        
                    #print(str(stats_dict['Spearmann rank correlation - coefficient'])+' with sum '+str(sum_spear_corr_endpoint[endpoint_name]))
                    
                    
                    #Store values
                    impact_method_group_path='/'+uncertainty_level[0]+'/'+act[0]+'/'+impact_method[0]
                    
                    for indicator in stats_dict.keys():
                        try:
                            hdf5_file_MC_statistics.create_dataset(impact_method_group_path+'/'+indicator,data=stats_dict[indicator])
                        except:
                            hdf5_file_MC_statistics[impact_method_group_path+'/'+indicator][...]=stats_dict[indicator]
                 
                
                for impact_method in act[1].items():
                    
                    #If endpoint names are the second name in impact method tuples (...,...,...)
                    endpoint_name=impact_method[0].rsplit(',', 1)[0]+')'
                    
                    #If endpoint names are the first name in impact method tuples (...,...,...)
                    #endpoint_name=impact_method[0].rsplit(',', 2)[0]+')'
                    
                    impact_method_group_path='/'+uncertainty_level[0]+'/'+act[0]+'/'+impact_method[0]

                    #Calculating Contribution To Variance
                    stats_dict={}
                    stats_dict['Spearmann CTV midpoint to endpoint']=(hdf5_file_MC_statistics[impact_method_group_path+'/Spearmann rank correlation - coefficient'][()])**2/sum_spear_corr_endpoint[endpoint_name]
                    
                    #Store values
                    for indicator in stats_dict.keys():
                        try:
                            hdf5_file_MC_statistics.create_dataset(impact_method_group_path+'/'+indicator,data=stats_dict[indicator])
                        except:
                            hdf5_file_MC_statistics[impact_method_group_path+'/'+indicator][...]=stats_dict[indicator]
                    
                for endpoint_name in sum_spear_corr_endpoint.keys():
                    sum_spear_corr_endpoint[endpoint_name]=0
                
                

        
    return;


def calculating_endpoint_stats_entire_database_aggregated_MC_results(hdf5_file_MC_LCA_results_path, dir_path_for_saving):
    
    #Create and/or open the file for MC stats results
    hdf5_file_MC_statistics=h5py.File(dir_path_for_saving+'\\'+'MC_statistics_aggregated_results.hdf5','w-')
    
    #Open the MC LCA results file
    hdf5_file_MC_LCA_results=h5py.File(hdf5_file_MC_LCA_results_path,'r')
    
    #Calculate stats --> only make sense if impact categories in hdf5_file_MC_LCA_results are endpoint per midpoint categories
    calculating_endpoint_sum(hdf5_file_MC_LCA_results,hdf5_file_MC_statistics)
    calculating_endpoint_stats_indicators(hdf5_file_MC_LCA_results,hdf5_file_MC_statistics)
    
    #Close hdf5 files
    hdf5_file_MC_statistics.close()
    hdf5_file_MC_LCA_results.close()
    
    return;    
    


In [6]:
hdf5_file_MC_LCA_results_path="D:\\Dossiers professionnels\\Logiciels\\Brightway 2\\Test Dependant LCA Monte Carlo\\LCA_Dependant_Monte_Carlo_aggregated_results_ALL.hdf5"
dir_path_for_saving="D:\\Dossiers professionnels\\Logiciels\\Brightway 2\\Test Dependant LCA Monte Carlo"

calculating_endpoint_stats_entire_database_aggregated_MC_results(hdf5_file_MC_LCA_results_path, dir_path_for_saving)

  c /= stddev[:, None]
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


nan with sum nan
nan with sum nan
nan with sum nan
nan with sum nan
nan with sum nan
nan with sum nan
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
1.0 with sum 1.0
