# Calculate stastistics for an entire database and sectors within the database

## First Step : calculating indicators for each {activity|impact method}

In [1]:
import h5py
import numpy as np
from scipy import stats
import os

#MC_results_dict={act:{ic_name:[MC_results]}} as the output of MC_multi_impact_entire_DB()

#Stored MC results in HDF5 are np array 1d which size=# iteration
#and stored like: Uncertainty LCI 1 LCIA 1/ActKey/impact method name



def gini_coefficient(src):
    out = []
    for i in range(0,len(src)):
        for j in range(i+1,len(src)):
            out.append(abs(src[i]-src[j]))
    avdiff = np.mean(out)
    mn = np.mean(src)
    return avdiff / (2*mn);


def calculating_endpoint_sum(hdf5_file_MC_LCA_results,hdf5_file_MC_statistics):
    
    #Calculating sum
    for uncertainty_level in hdf5_file_MC_LCA_results.items():
        
        if 'lci_iteration_name_list' not in uncertainty_level[0]:

            for act in uncertainty_level[1].items():                    
                
                for impact_method in act[1].items():
                                        
                    #If endpoint names are the second name in impact method tuples (...,...,...)
                    endpoint_name='{},{})'.format(impact_method[0].split(',', 3)[0],impact_method[0].split(',', 3)[1])

                    #If endpoint names are the first name in impact method tuples (...,...,...)
                    #endpoint_name=impact_method[0].rsplit(',', 2)[0]+')'

                    endpoint_group_path='/{}/{}/{}'.format(uncertainty_level[0],act[0],endpoint_name)

                    contribution_to_add=impact_method[1][()]

                    try:
                        endpoint_sum_dataset=hdf5_file_MC_statistics['{}/endpoint_sum'.format(endpoint_group_path)]
                        endpoint_sum_dataset[...]=endpoint_sum_dataset[()]+contribution_to_add

                    except:
                        hdf5_file_MC_statistics.create_dataset('{}/endpoint_sum'.format(endpoint_group_path),data=contribution_to_add)

    #Calculating variance
    for uncertainty_level in hdf5_file_MC_statistics.items():

        for act in uncertainty_level[1].items():                    

            for impact_method in act[1].items():

                impact_method_group_path='/{}/{}/{}'.format(uncertainty_level[0],act[0],impact_method[0])
                
                endpoint_sum=hdf5_file_MC_statistics['{}/endpoint_sum'.format(impact_method_group_path)]
                data=np.var(endpoint_sum)
                hdf5_file_MC_statistics.create_dataset('{}/variance'.format(impact_method_group_path),data=data)
         
    return;



def sensivity_index_1st_estimate_smooth_curve(Y,X,bin_size=50):
    
    #Gathering Y and X
    pairs=np.column_stack((Y,X))
    
    
    #Sorting by X ascending
    pairs=pairs[pairs[:, 1].argsort()]
    
    
    #Number of bins
    if Y.size%bin_size != 0:
        print("bin_size should be adjusted to be a multiple of Y size")
        return;
    
    if bin_size%int(bin_size) != 0:
        print("bin_size should be an integer")
        return;
    
    bins=int(Y.size/bin_size)
    bin_size=int(bin_size)
    
    
    #Calculating mean for each bin
    data=pairs[:,0]
    data=np.reshape(data,(bins,bin_size))
    bin_means=np.mean(data, axis=1)
    
    
    #sensivity_index_1st
    si_1st=np.var(bin_means)/np.var(Y)
    
    return si_1st;


# for independant parameters model, to be used for sensitivity between mutated models and IC ranking
def sensivity_index_1st_variance_ratio(var_Y,var_X): 
    
    try:
        si_1st=var_X/var_Y
    except ZeroDivisionError:
        si_1st=0
        
    return si_1st;



#MC_results_dict={act_key:{ic_name:[MC_results]}} as the output of MC_multi_impact_entire_DB()

def calculating_endpoint_stats_indicators(hdf5_file_MC_LCA_results,hdf5_file_MC_statistics,bin_size,regular_stats=1, dispersion_stats=1, sensitivity_stats=1):
  
    sum_spear_corr_endpoint={}
    
    endpoint_name_list=[]
    
    for uncertainty_level in hdf5_file_MC_LCA_results.items():

        if 'lci_iteration_name_list' not in uncertainty_level[0]:

            for act in uncertainty_level[1].items():                    

                for impact_method in act[1].items():
                    
                    #If endpoint names are the second name in impact method tuples (...,...,...)
                    endpoint_name='{},{})'.format(impact_method[0].split(',', 3)[0],impact_method[0].split(',', 3)[1])


                    #If endpoint names are the first name in impact method tuples (...,...,...)
                    #endpoint_name=impact_method[0].rsplit(',', 2)[0]+')'

                    endpoint_group_path='/{}/{}/{}'.format(uncertainty_level[0],act[0],endpoint_name)

                    stats_dict={}

                    #Regular stats
                    if regular_stats==1:
                        stats_dict['mean']=np.mean(impact_method[1])
                        stats_dict['variance']=np.var(impact_method[1])
                        stats_dict['std dev']=np.std(impact_method[1])
                        stats_dict['minimum']=min(impact_method[1])
                        stats_dict['maximum']=max(impact_method[1])
                        stats_dict['2.5th percentile']=np.percentile(impact_method[1],2.5)
                        stats_dict['25th percentile']=np.percentile(impact_method[1],25)
                        stats_dict['median']=np.percentile(impact_method[1],50)
                        stats_dict['75th percentile']=np.percentile(impact_method[1],75)
                        stats_dict['97.5th percentile']=np.percentile(impact_method[1],97.5)
                        stats_dict['number of iterations']=len(impact_method[1])

                    #Stats to measure the dispersion

                    if dispersion_stats==1:
                        stats_dict['MADM']=np.percentile(abs(impact_method[1]-stats_dict['median']),50)
                        stats_dict['IQR']=stats_dict['75th percentile']-stats_dict['25th percentile']
                        stats_dict['Spread']=stats_dict['maximum']-stats_dict['minimum']
                        stats_dict['CI95']=stats_dict['97.5th percentile']-stats_dict['2.5th percentile']
                        try:
                            stats_dict['Quartile coeff of dispersion']=stats_dict['IQR']/(stats_dict['75th percentile']+stats_dict['25th percentile'])
                        except ZeroDivisionError:
                            stats_dict['Quartile coeff of dispersion']='NA'
                        try:
                            stats_dict['CV']=stats_dict['std dev']/stats_dict['mean']
                        except ZeroDivisionError:
                            stats_dict['CV']='NA'
                        try:
                            stats_dict['CV modified']=stats_dict['std dev']/np.sqrt((stats_dict['maximum']-stats_dict['mean'])*(stats_dict['mean']-stats_dict['minimum']))
                        except ZeroDivisionError:
                            stats_dict['CV modified']='NA'
                        try:
                            stats_dict['CV robust']=stats_dict['MADM']/stats_dict['median']
                        except ZeroDivisionError:
                            stats_dict['CV robust']='NA'
                        try:
                            stats_dict['IQR\spread']=stats_dict['IQR']/(stats_dict['Spread'])
                        except ZeroDivisionError:
                            stats_dict['IQR\spread']='NA'
                        try:
                            stats_dict['IQR\CI95']=stats_dict['IQR']/stats_dict['CI95']
                        except ZeroDivisionError:
                            stats_dict['IQR\CI95']='NA'


                    #Statistics the sensitivity based on endpoint_sum                     
                    if (sensitivity_stats==1 and regular_stats==1):
                        endpoint_sum=hdf5_file_MC_statistics['{}/endpoint_sum'.format(endpoint_group_path)]

                        stats_dict['Spearmann rank correlation - coefficient']=stats.spearmanr(impact_method[1],endpoint_sum)[0]
                        stats_dict['Spearmann rank correlation - pvalue']=stats.spearmanr(impact_method[1],endpoint_sum)[1]

                        if np.isnan(stats_dict['Spearmann rank correlation - coefficient']):
                            stats_dict['Spearmann rank correlation - coefficient']=0

                        try:
                            sum_spear_corr_endpoint[endpoint_name]=sum_spear_corr_endpoint[endpoint_name]+(stats_dict['Spearmann rank correlation - coefficient'])**2

                        except:
                            sum_spear_corr_endpoint[endpoint_name]=(stats_dict['Spearmann rank correlation - coefficient'])**2


                        if 'LCI 0 LCIA 1' in uncertainty_level[0]:
                            var_Y=hdf5_file_MC_statistics['{}/variance'.format(endpoint_group_path)][()]
                            var_X=stats_dict['variance']
                            stats_dict['Sensitivity index 1st order - midpoint to endpoint']=sensivity_index_1st_variance_ratio(var_Y,var_X)

                        else:
                            stats_dict['Sensitivity index 1st order - midpoint to endpoint']=sensivity_index_1st_estimate_smooth_curve(Y=endpoint_sum,X=impact_method[1],bin_size=bin_size)


                        #print(str(stats_dict['Spearmann rank correlation - coefficient'])+' with sum '+str(sum_spear_corr_endpoint[endpoint_name]))


                    #Store values
                    impact_method_group_path='/{}/{}/{}'.format(uncertainty_level[0],act[0],impact_method[0])

                    for indicator in stats_dict.keys():
                        try:
                            hdf5_file_MC_statistics.create_dataset('{}/{}'.format(impact_method_group_path,indicator),data=stats_dict[indicator])
                        except:
                            hdf5_file_MC_statistics['{}/{}'.format(impact_method_group_path,indicator)][...]=stats_dict[indicator]

                
                if (sensitivity_stats==1 and regular_stats==1):
                    for impact_method in act[1].items():
                        
                        #If endpoint names are the second name in impact method tuples (...,...,...)
                        #endpoint_name='{})'.format(impact_method[0].rsplit(',', 1)[0])
                        endpoint_name='{},{})'.format(impact_method[0].split(',', 3)[0],impact_method[0].split(',', 3)[1])

                        #If endpoint names are the first name in impact method tuples (...,...,...)
                        #endpoint_name=impact_method[0].rsplit(',', 2)[0]+')'

                        impact_method_group_path='/{}/{}/{}'.format(uncertainty_level[0],act[0],impact_method[0])

                        #Calculating Contribution To Variance
                        stats_dict={}
                        stats_dict['Spearmann CTV midpoint to endpoint']=(hdf5_file_MC_statistics['{}/Spearmann rank correlation - coefficient'.format(impact_method_group_path)][()])**2/sum_spear_corr_endpoint[endpoint_name]


                        #Store values
                        for indicator in stats_dict.keys():
                            try:
                                hdf5_file_MC_statistics.create_dataset('{}/{}'.format(impact_method_group_path,indicator),data=stats_dict[indicator])
                            except:
                                hdf5_file_MC_statistics['{}/{}'.format(impact_method_group_path,indicator)][...]=stats_dict[indicator]

                    
                    
    #Calculating Sensitivity index between uncertainty level for model_11 only 
    if (sensitivity_stats==1 and regular_stats==1):
        for uncertainty_level in hdf5_file_MC_statistics.items():

            if 'LCI 1 LCIA 1' in uncertainty_level[0]:

                for act in uncertainty_level[1].items():                    

                    for impact_method in act[1].items():

                        impact_method_group_path='/{}/{}/{}'.format(uncertainty_level[0],act[0],impact_method[0])
                        impact_method_group_path_LCI='/{}/{}/{}'.format('Uncertainty LCI 1 LCIA 0',act[0],impact_method[0])
                        impact_method_group_path_LCIA='/{}/{}/{}'.format('Uncertainty LCI 0 LCIA 1',act[0],impact_method[0])

                        var_Y=hdf5_file_MC_statistics['{}/variance'.format(impact_method_group_path)][()]
                        var_X_LCI=hdf5_file_MC_statistics['{}/variance'.format(impact_method_group_path_LCI)][()]
                        var_X_LCIA=hdf5_file_MC_statistics['{}/variance'.format(impact_method_group_path_LCIA)][()]

                        stats_dict={}
                        stats_dict['Sensitivity index 1st order - LCI parameters']=sensivity_index_1st_variance_ratio(var_Y,var_X_LCI)
                        stats_dict['Sensitivity index 1st order - LCIA parameters']=sensivity_index_1st_variance_ratio(var_Y,var_X_LCIA)

                        #Store values
                        for indicator in stats_dict.keys():
                            try:
                                hdf5_file_MC_statistics.create_dataset('{}/{}'.format(impact_method_group_path,indicator),data=stats_dict[indicator])
                            except:
                                hdf5_file_MC_statistics['{}/{}'.format(impact_method_group_path,indicator)][...]=stats_dict[indicator]

    return;


def calculating_endpoint_stats_entire_database_aggregated_MC_results(hdf5_file_MC_LCA_results_path, dir_path_for_saving,bin_size):
    
    #Create and/or open the file for MC stats results
    hdf5_file_MC_statistics=h5py.File(os.path.join(dir_path_for_saving,'MC_statistics_aggregated_results.hdf5'),'w-')
    
    #Open the MC LCA results file
    hdf5_file_MC_LCA_results=h5py.File(hdf5_file_MC_LCA_results_path,'r')
    
    #Calculate stats --> only make sense if impact categories in hdf5_file_MC_LCA_results are endpoint per midpoint categories
    calculating_endpoint_sum(hdf5_file_MC_LCA_results,hdf5_file_MC_statistics)
    calculating_endpoint_stats_indicators(hdf5_file_MC_LCA_results,hdf5_file_MC_statistics,bin_size)
    
    #Close hdf5 files
    hdf5_file_MC_statistics.close()
    hdf5_file_MC_LCA_results.close()
    
    return;    
    


In [2]:
hdf5_file_MC_LCA_results_path=r"D:\Dropbox (MAGI)\Dossiers professionnels\Logiciels\Brightway 2\Monte Carlo results_with correlation\Dependant LCA Monte Carlo - sector 19a\LCA_Dependant_Monte_Carlo_aggregated_results_ALL.hdf5"
dir_path_for_saving=r"D:\Dropbox (MAGI)\Dossiers professionnels\Logiciels\Brightway 2\Monte Carlo results_with correlation\Dependant LCA Monte Carlo - sector 19a"

bin_size=50
calculating_endpoint_stats_entire_database_aggregated_MC_results(hdf5_file_MC_LCA_results_path, dir_path_for_saving,bin_size)

  c /= stddev[:, None]
  c /= stddev[None, :]
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


## Second Step : comparing sensitivity indicators for each {sectors|impact method}

### Code for the Page trend test
Retrieved from https://github.com/jwcarr/PageTest

In [3]:
# PageTest v1.0.1
# http://jwcarr.github.io/PageTest/
#
# Copyright (c) 2013-2015 Jon W. Carr
# Licensed under the terms of the MIT License

from scipy import stats

#   Run Page's test and return l, m, n, p, where l = Page's L statistic,
#   m = number of replications, n = number of treatments, and p = p-value.

def page_test(matrix, ascending=False, use_critical_values=False):
  """
  Takes a matrix, with treatments along the columns and replications along the
  rows, and returns Page's (1963) L statistic, along with its p-value.

  Parameters
  ----------
  matrix : list
      Data matrix (formated as a list of lists) with treatments along the
      columns and replications along the rows.
  ascending : bool, optional
      Set to True if hypothesizing an ascending trend, False if hypothesizing
      a descending trend (default: False).
  use_critical_values : bool, optional
      Set to True to use the critical values from Page (1963) rather than
      compute an exact p-vaue (default: False).

  Returns
  -------
  L : float
      Page's L statistic
  m : int
      Number of replications
  n : int
      Number of treatments
  p : float or str
      P-value
  """
  validate_input(matrix, ascending, use_critical_values)
  if ascending == True:
    matrix = reverse_matrix(matrix)
  m = len(matrix)
  n = len(matrix[0])
  l = page_l(matrix, m, n)
  p = page_p(l, m, n, matrix, use_critical_values)
  return l, m, n, p

#   Calculate Page's L statistic.

def page_l(matrix, m, n):
  rank_matrix = []
  for i in range(0, m):
    rank = stats.rankdata(matrix[i])
    rank_list = []
    for j in range(0, n):
      rank_list.append(rank[n-j-1])
    rank_matrix.append(rank_list)
  ranks = []
  for i in range(0, n):
    total = sum([row[i] for row in rank_matrix])
    total *= i + 1
    ranks.append(total)
  return sum(ranks)

#   Calculate a p-value for L using the appropriate method.

def page_p(l, m, n, matrix, use_critical_values):
  if use_critical_values == True:
    try:
      return page_critical_p(l, m, n)
    except IndexError:
      print('Large data matrix, so calculating exact p-value instead')
  return page_exact_p(l, m, n, matrix)

#   For small m and n, the exact p-value won't always agree with the critical
#   values given in Page (1963, p. 220). If you prefer, you can use Page's critical
#   values instead. This function looks up the critical values for m and n, and
#   finds the significance level for L.

def page_critical_p(l, m, n):
  values = critical_values[n-3][m-2]
  significance_levels = ['< 0.001', '< 0.01', '< 0.05']
  for i in range(0, 3):
    if l >= values[i] and values[i] != None:
      return significance_levels[i]
  return 'n.s.'

#   Calculate the exact p-value using Eqation 4 in Page (1963)

def page_exact_p(l, m, n, matrix):
  # Calcualte L for the opposite trend
  alt_l = page_l(reverse_matrix(matrix), m, n)
  # If L for the opposite trend > L for the hypothesized trend, then the trend
  # can't be significant ...
  if alt_l > l:
    # ... so return 'n.s.', otherwise the exact p-value could be misleading
    # if the opposite trend happens to be significant
    return 'n.s.'
  chi_squared = ((12.0*l-3.0*m*n*(n+1.0)**2.0)**2.0)/(m*n**2.0*(n**2.0-1.0)*(n+1.0))
  p_two_tailed = 1 - stats.chi2.cdf(chi_squared, 1)
  # Return one-tailed p-value, since this is a one-tailed test
  return p_two_tailed / 2.0

#   Reverses the columns of a matrix

def reverse_matrix(matrix):
  return [[row[i] for i in reversed(range(len(matrix[0])))] for row in matrix]

#   Validates the input arguments to catch common problems

def validate_input(matrix, ascending, use_critical_values):
  if type(matrix) != list:
    raise TypeError('Matrix should be represented as Python lists')
  for row_type in [type(row) for row in matrix]:
    if row_type != list:
      raise TypeError('Rows of the matrix should be represented as Python lists')
  if len(set([len(row) for row in matrix])) != 1:
    raise ValueError('Rows in matrix should have same length')
  if len(matrix) < 2:
    raise ValueError('Page\'s test requires at least 2 replications')
  if len(matrix[0]) < 3:
    raise ValueError('Page\'s test requires at least 3 treatments')
  if type(ascending) != bool:
    raise TypeError('The ascending argument should be set to True or False')
  if type(use_critical_values) != bool:
    raise TypeError('The use_critical_values argument should be set to True or False')

critical_values = [[[None, None, 28], [None, 42, 41], [56, 55, 54], [70, 68, 66], [83, 81, 79], [96, 93, 91], [109, 106, 104], [121, 119, 116], [134, 131, 128], [147, 144, 141], [160, 156, 153], [172, 169, 165], [185, 181, 178], [197, 194, 190], [210, 206, 202], [223, 218, 215], [235, 231, 227], [248, 243, 239], [260, 256, 251]], [[None, 60, 58], [89, 87, 84], [117, 114, 111], [145, 141, 137], [172, 167, 163], [198, 193, 189], [225, 220, 214], [252, 246, 240], [278, 272, 266], [305, 298, 292], [331, 324, 317]], [[109, 106, 103], [160, 155, 150], [210, 204, 197], [259, 251, 244], [307, 299, 291], [355, 346, 338], [403, 393, 384], [451, 441, 431], [499, 487, 477], [546, 534, 523], [593, 581, 570]], [[178, 173, 166], [260, 252, 244], [341, 331, 321], [420, 409, 397], [499, 486, 474], [577, 563, 550], [655, 640, 625], [733, 717, 701], [811, 793, 777], [888, 869, 852], [965, 946, 928]], [[269, 261, 252], [394, 382, 370], [516, 501, 487], [637, 620, 603], [757, 737, 719], [876, 855, 835], [994, 972, 950], [1113, 1088, 1065], [1230, 1205, 1180], [1348, 1321, 1295], [1465, 1437, 1410]], [[388, 376, 362], [567, 549, 532], [743, 722, 701], [917, 893, 869], [1090, 1063, 1037], [1262, 1232, 1204], [1433, 1401, 1371], [1603, 1569, 1537], [1773, 1736, 1703], [1943, 1905, 1868], [2112, 2072, 2035]]]


### Code for Wilcoxon signed-rank test one tailed

Original code from scipy: https://github.com/scipy/scipy/blob/v0.19.1/scipy/stats/morestats.py#L2328-L2425

In [4]:
#from __future__ import division, print_function, absolute_import

import math
import warnings #
#from collections import namedtuple

import numpy as np #
from numpy import (isscalar, r_, log, around, unique, asarray,
                   zeros, arange, sort, amin, amax, any, atleast_1d,
                   sqrt, ceil, floor, array, poly1d, compress,
                   pi, exp, ravel, count_nonzero, sin, cos, arctan2, hypot) #
#from numpy.testing.decorators import setastest

#from scipy._lib.six import string_types
#from scipy import optimize
#from scipy import special
#from . import statlib
from scipy import stats #from . import stats #
from scipy.stats import find_repeats #from .stats import find_repeats, _contains_nan #
#from .contingency import chi2_contingency
from scipy.stats import distributions #from . import distributions #
#from ._distn_infrastructure import rv_generic


__all__ = ['mvsdist',
           'bayes_mvs', 'kstat', 'kstatvar', 'probplot', 'ppcc_max', 'ppcc_plot',
           'boxcox_llf', 'boxcox', 'boxcox_normmax', 'boxcox_normplot',
           'shapiro', 'anderson', 'ansari', 'bartlett', 'levene', 'binom_test',
           'fligner', 'mood', 'wilcoxon', 'median_test',
           'pdf_fromgamma', 'circmean', 'circvar', 'circstd', 'anderson_ksamp'
           ]


#Mean = namedtuple('Mean', ('statistic', 'minmax'))
#Variance = namedtuple('Variance', ('statistic', 'minmax'))
#Std_dev = namedtuple('Std_dev', ('statistic', 'minmax'))


def wilcoxon_one_tailed(x, y=None, zero_method="pratt", correction=False):
    """
    Calculate the Wilcoxon signed-rank test.
    The Wilcoxon signed-rank test tests the null hypothesis that two
    related paired samples come from the same distribution. In particular,
    it tests whether the distribution of the differences x - y is symmetric
    about zero. It is a non-parametric version of the paired T-test.
    Parameters
    ----------
    x : array_like
        The first set of measurements.
    y : array_like, optional
        The second set of measurements.  If `y` is not given, then the `x`
        array is considered to be the differences between the two sets of
        measurements.
    zero_method : string, {"pratt", "wilcox", "zsplit"}, optional
        "pratt":
            Pratt treatment: includes zero-differences in the ranking process
            (more conservative)
        "wilcox":
            Wilcox treatment: discards all zero-differences
        "zsplit":
            Zero rank split: just like Pratt, but spliting the zero rank
            between positive and negative ones
    correction : bool, optional
        If True, apply continuity correction by adjusting the Wilcoxon rank
        statistic by 0.5 towards the mean value when computing the
        z-statistic.  Default is False.
    Returns
    -------
    statistic : float
        The sum of the ranks of the differences above or below zero, whichever
        is smaller.
    pvalue : float
        The two-sided p-value for the test.
    Notes
    -----
    Because the normal approximation is used for the calculations, the
    samples used should be large.  A typical rule is to require that
    n > 20.
    References
    ----------
    .. [1] http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
    """

    if zero_method not in ["wilcox", "pratt", "zsplit"]:
        raise ValueError("Zero method should be either 'wilcox' "
                         "or 'pratt' or 'zsplit'")

    if y is None:
        d = asarray(x)
    else:
        x, y = map(asarray, (x, y))
        if len(x) != len(y):
            raise ValueError('Unequal N in wilcoxon.  Aborting.')
        d = x - y

    if zero_method == "wilcox":
        # Keep all non-zero differences
        d = compress(np.not_equal(d, 0), d, axis=-1)

    count = len(d)
    if count < 10:
        warnings.warn("Warning: sample size too small for normal approximation.")

    r = stats.rankdata(abs(d))
    r_plus = np.sum((d > 0) * r, axis=0)
    r_minus = np.sum((d < 0) * r, axis=0)

    if zero_method == "zsplit":
        r_zero = np.sum((d == 0) * r, axis=0)
        r_plus += r_zero / 2.
        r_minus += r_zero / 2.

    T = min(r_plus, r_minus)
    
    #to determine the direction of the test
    if r_plus>r_minus:
        test_sign='>0'
        
    if r_plus<r_minus:
        test_sign='<0'
    
    mn = count * (count + 1.) * 0.25
    se = count * (count + 1.) * (2. * count + 1.)

    if zero_method == "pratt":
        r = r[d != 0]

    replist, repnum = find_repeats(r)
    if repnum.size != 0:
        # Correction for repeated elements.
        se -= 0.5 * (repnum * (repnum * repnum - 1)).sum()

    se = sqrt(se / 24)
    correction = 0.5 * int(bool(correction)) * np.sign(T - mn)
    z = (T - mn - correction) / se
    prob = 2. * distributions.norm.sf(abs(z))
    prob=prob/2 #for a one tailed test

    return (T,test_sign, prob)

Testing the code below

In [6]:
X=np.random.normal(1,1.06,100)
Y=np.random.lognormal(2,5,100)

In [7]:
wilcoxon_one_tailed(X,Y)

(549.0, '<0', 5.4484088077383994e-12)

### Code for comparing sensivity index by sectors

In [5]:
import h5py
import numpy as np
from scipy import stats
import os
import collections
#import statistics


#for a specific impact category with stats indicator for all activities of a sector
def test_LCI_LCIA_ranking(stats_LCI,stats_LCIA):
    
    T,test_sign,prob=wilcoxon_one_tailed(stats_LCI,stats_LCIA)
    
    if prob<0.001:
        test_significance='<0.001'
        
    elif prob<0.01:
        test_significance='<0.01'
        
    elif prob<0.05:
        test_significance='<0.05'
        
    else:
        test_significance='Not significant'
        
    if test_sign=='<0':
        test_sign_trad='LCI < LCIA'
        
    elif test_sign=='>0':
        test_sign_trad='LCI > LCIA'
        
    return (test_sign_trad,test_significance);

#for a specific endpoint impact category with stats indicator for all activities of a sector
def test_IC_ranking(stats_ICs):
    
    #here we suppose that stats_IC are {impact_method:value of stats_IC for activities}
    stats_ICs_mean_dict={np.nanmean(stats_IC):key for key,stats_IC in stats_ICs.items()}
    
    #The order prediction is based on the mean value
    sorted_stats_ICs_mean_dict=collections.OrderedDict(sorted(stats_ICs_mean_dict.items(),reverse=True))
    
    #Gathering data in the good format
    data=[]
    descending_order=[]
    
    for order,key in sorted_stats_ICs_mean_dict.items():
        data.append(stats_ICs[key])
        descending_order.append(key)
        
    data=np.transpose(data).tolist()
    
    #Perform test and get the test significance
    l,m,n,prob=page_test(data)
    
    if prob=='n.s.':
        test_significance='Not significant'
        
    else:
        if prob<0.001:
            test_significance='<0.001'

        elif prob<0.01:
            test_significance='<0.01'

        elif prob<0.05:
            test_significance='<0.05'

        else:
            test_significance='Not significant'
        
    return (descending_order,test_significance);



def gathering_stats_from_sector_activities(activity_code_list,hdf5_file_MC_statistics,stats_name):
    
    for_LC_ranking={}
    for_IC_ranking={}
    
    stats_MP_to_EP_name='{}{}'.format(stats_name,' - midpoint to endpoint')
    stats_LCI_name='{}{}'.format(stats_name,' - LCI parameters')
    stats_LCIA_name='{}{}'.format(stats_name,' - LCIA parameters')
    
    for uncertainty_level in hdf5_file_MC_statistics.items():
        
        for_IC_ranking[uncertainty_level[0]]={}
                
        for act in activity_code_list:

            act_group=hdf5_file_MC_statistics['{}/{}'.format(uncertainty_level[0],act)]

            for impact_method in act_group.items():

                endpoint_name='{},{}'.format(impact_method[0].split(',', 3)[0],impact_method[0].split(',', 3)[1])
                impact_method_group_path='/{}/{}/{}'.format(uncertainty_level[0],act,impact_method[0])

                if impact_method[0]!=endpoint_name:
                    
                    try:
                        act_stats_endpoint=for_IC_ranking[uncertainty_level[0]][endpoint_name]
                    except KeyError:
                        for_IC_ranking[uncertainty_level[0]][endpoint_name]={}
                        act_stats_endpoint=for_IC_ranking[uncertainty_level[0]][endpoint_name]
                        
                    try:
                        act_stats=act_stats_endpoint[impact_method[0]]
                    except KeyError:
                        act_stats_endpoint[impact_method[0]]=[]
                        act_stats=act_stats_endpoint[impact_method[0]]
                    
                    act_stat=hdf5_file_MC_statistics['{}/{}'.format(impact_method_group_path,stats_MP_to_EP_name)][()]
                    act_stats.append(act_stat)
                    
                if 'LCI 1 LCIA 1' in uncertainty_level[0]:
                                       
                    try:
                        act_stats_LCI=for_LC_ranking[impact_method[0]]['LCI']
                        act_stats_LCIA=for_LC_ranking[impact_method[0]]['LCIA']
                    except KeyError:
                        for_LC_ranking[impact_method[0]]={}
                        for_LC_ranking[impact_method[0]]['LCI']=[]
                        for_LC_ranking[impact_method[0]]['LCIA']=[]
                        act_stats_LCI=for_LC_ranking[impact_method[0]]['LCI']
                        act_stats_LCIA=for_LC_ranking[impact_method[0]]['LCIA']
                    
                    act_stat_LCI=hdf5_file_MC_statistics['{}/{}'.format(impact_method_group_path,stats_LCI_name)][()]
                    act_stat_LCIA=hdf5_file_MC_statistics['{}/{}'.format(impact_method_group_path,stats_LCIA_name)][()]
                    act_stats_LCI.append(act_stat_LCI)
                    act_stats_LCIA.append(act_stat_LCIA)
                    
    return {'For LC ranking':for_LC_ranking,'For IC ranking':for_IC_ranking};



def test_results_from_sector_activities(activity_code_list,hdf5_file_MC_statistics,stats_name):
    
    data_dict=gathering_stats_from_sector_activities(activity_code_list,hdf5_file_MC_statistics,stats_name)
    
    data_LC_ranking=data_dict['For LC ranking']
    data_IC_ranking=data_dict['For IC ranking']
    
    results_LC_ranking={}
    results_IC_ranking={}
    
    for impact_method in data_LC_ranking.keys():
        
        stats_LCI=data_LC_ranking[impact_method]['LCI']
        stats_LCIA=data_LC_ranking[impact_method]['LCIA']
        
        results_LC_ranking[impact_method]=test_LCI_LCIA_ranking(stats_LCI,stats_LCIA)
        
    for uncertainty_level in data_IC_ranking.keys():
        
        results_IC_ranking[uncertainty_level]={}
        
        for endpoint_impact_method in data_IC_ranking[uncertainty_level].keys():
                        
            stats_ICs=data_IC_ranking[uncertainty_level][endpoint_impact_method]
            results_IC_ranking[uncertainty_level][endpoint_impact_method]=test_IC_ranking(stats_ICs)
            
    return [results_LC_ranking,results_IC_ranking];
    
    

In [6]:
import brightway2 as bw
bw.projects.set_current('iw_integration')
DB_eiv33=bw.Database('ecoinvent 3.3 cutoff')

In [7]:
act_19a=[act for act in DB_eiv33 if '19a' in str(act['classifications'])]
code_act_19a=[act['code'] for act in act_19a]
name_act_19a=[act['name'] for act in act_19a]
info_act_19a={act['code']:{'name':act['name'],'location':act['location']} for act in act_19a}

In [8]:
stats_name='Sensitivity index 1st order'
activity_code_list=code_act_19a

filepath=r"D:\Dropbox (MAGI)\Dossiers professionnels\Logiciels\Brightway 2\Monte Carlo results_with correlation\Dependant LCA Monte Carlo - sector 19a"
hdf5_file_MC_statistics=h5py.File(os.path.join(filepath,'MC_statistics_aggregated_results.hdf5'),'r')

[results_LC_ranking,results_IC_ranking]=test_results_from_sector_activities(activity_code_list,hdf5_file_MC_statistics,stats_name)

hdf5_file_MC_statistics.close()

In [9]:
results_LC_ranking

{"('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality')": ('LCI < LCIA',
  '<0.001'),
 "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Freshwater acidification')": ('LCI < LCIA',
  '<0.001'),
 "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Freshwater ecotoxicity, long-term')": ('LCI > LCIA',
  '<0.001'),
 "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Freshwater ecotoxicity, short-term')": ('LCI > LCIA',
  '<0.001'),
 "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Freshwater eutrophication')": ('LCI < LCIA',
  '<0.001'),
 "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Global warming, long-term, ecosystem')": ('LCI > LCIA',
  '<0.001'),
 "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Global warming, short-term, ecosystem')": ('LCI > LCIA',
  '<0.001'),
 "('IMPACTWorld+ (Default_Recommended_Endpoin

In [10]:
results_IC_ranking

{'Uncertainty LCI 0 LCIA 1': {"('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality'": (["('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Land transformation, biodiversity')",
    "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Land occupation, biodiversity')",
    "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Terrestrial acidification')",
    "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Freshwater eutrophication')",
    "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Freshwater acidification')",
    "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Marine eutrophication')",
    "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Global warming, long-term, ecosystem')",
    "('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Global warming, short-term, 

#### Format data for saving

In [11]:
hdf5_file_MC_statistics=h5py.File(os.path.join(filepath,'MC_statistics_aggregated_results.hdf5'),'r')
gather_dict=gathering_stats_from_sector_activities(activity_code_list,hdf5_file_MC_statistics,stats_name)
hdf5_file_MC_statistics.close()

In [12]:
gather_dict['For LC ranking']

{"('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality')": {'LCI': [4.2826724e-07,
   0.87492055,
   3.706136e-05,
   0.016674867,
   2.406985e-05,
   0.44414088,
   0.86697721,
   4.7187348e-07,
   0.16809262,
   0.28886998,
   0.1640183,
   2.406985e-05,
   0.017765772,
   0.016637143,
   0.6955533,
   0.15464118,
   0.22858998,
   3.9365315e-07,
   0.17072459,
   0.017575813,
   0.87791598,
   0.2222278,
   0.61188203,
   0.61622465,
   0.17072459,
   0.080970369,
   0.15988249,
   0.1539204,
   0.28349414,
   4.1820343e-07,
   0.1707246,
   0.16442896,
   0.45448932,
   0.37870327,
   0.30237675,
   0.14157881,
   0.37958217,
   0.15285057,
   0.68157446,
   0.37526584,
   1.3113154e-06,
   0.16451792,
   4.1863038e-07,
   0.40948954,
   0.45221329,
   0.38051271,
   0.06320744,
   0.14079347,
   0.28652796,
   0.41462249,
   0.19417973,
   0.28784823,
   0.07582847,
   0.33274031,
   0.75353205,
   0.70130336,
   4.2770822e-07,
   0.22873294,
   0.37555349,
   0

In [13]:
gather_dict['For IC ranking']

{'Uncertainty LCI 0 LCIA 1': {"('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality'": {"('IMPACTWorld+ (Default_Recommended_Endpoint 1_36)', 'Ecosystem Quality', 'Freshwater acidification')": [1.7133596e-09,
    0.0011987436,
    1.5116552e-07,
    1.6080491e-09,
    4.9041518e-08,
    0.0053937123,
    0.0011987437,
    1.6968861e-09,
    1.7047241e-06,
    1.8698714e-07,
    1.8466953e-06,
    4.9041518e-08,
    1.4629759e-09,
    1.608049e-09,
    0.021106539,
    1.6833203e-06,
    0.00018125815,
    1.7133598e-09,
    2.5275353e-06,
    1.4629759e-09,
    0.0012128528,
    0.0005021331,
    0.066180639,
    0.052383799,
    2.5275353e-06,
    0.00022852041,
    1.7047241e-06,
    2.5624731e-06,
    0.00020449764,
    1.6968864e-09,
    2.5314687e-06,
    1.8459119e-06,
    0.0053930078,
    1.2725089e-08,
    1.4669919e-07,
    0.0068246042,
    1.460355e-08,
    1.6833209e-06,
    0.02021567,
    1.2725088e-08,
    4.7337645e-09,
    1.8466951e-06,
    1.69688

In [14]:
LC_ranking_data_dict=gather_dict['For LC ranking']
IC_ranking_data_dict=gather_dict['For IC ranking']

In [15]:
def simple_dict_LC_ranking_data(data_dict,code_act):

    data_dict_simple={}

    for ic in data_dict.keys():
        for lc_level in data_dict[ic].keys():
            i=0
            for si in data_dict[ic][lc_level]:
                key=lc_level+' - '+code_act[i]+' - '+ic
                data_dict_simple[key]={'impact category':ic,
                                       'Activity':code_act[i],
                                               'LC level':lc_level,
                                               'Sensitivity index value':si}
                i=i+1

    return data_dict_simple;

def simple_dict_IC_ranking_data(data_dict,code_act):

    data_dict_simple={}

    for lc_level in data_dict.keys():
        for ic_endpoint in data_dict[lc_level].keys():
            for ic_midpoint in data_dict[lc_level][ic_endpoint].keys():
                i=0
                for si in data_dict[lc_level][ic_endpoint][ic_midpoint]:
                    key=lc_level+' - '+code_act[i]+' - '+ic_endpoint+' - '+ic_midpoint
                    data_dict_simple[key]={'LC level':lc_level,
                                                   'Activity':code_act[i],
                                                   'Endpoint IC':ic_endpoint,
                                                   'Midpoint IC':ic_midpoint,
                                                   'Sensitivity index value':si}
                    i=i+1

    return data_dict_simple;  

In [16]:
LC_ranking_data_dict_simple=simple_dict_LC_ranking_data(LC_ranking_data_dict,code_act_19a)
IC_ranking_data_dict_simple=simple_dict_IC_ranking_data(IC_ranking_data_dict,code_act_19a)

#### Format results for saving

In [17]:
def simple_dict_LC_ranking_results(results_LC_ranking):
    results_LC_ranking_dict_simple={}

    for ic in results_LC_ranking.keys():
        results_LC_ranking_dict_simple[ic]={'LC ranking':results_LC_ranking[ic][0],
                                          'Level of significance':results_LC_ranking[ic][1]}
        
    return results_LC_ranking_dict_simple;


def simple_dict_IC_ranking_results(results_IC_ranking):
    results_IC_ranking_dict_simple={}

    for lc_level in results_IC_ranking.keys():
        for ic_endpoint in results_IC_ranking[lc_level].keys():
            list_ic=results_IC_ranking[lc_level][ic_endpoint][0]
            test_significance=results_IC_ranking[lc_level][ic_endpoint][1]
            j=1

            for ic_midpoint in list_ic:
                key=lc_level+' - '+str(j)+' - '+ic_endpoint
                results_IC_ranking_dict_simple[key]={'LC level':lc_level,
                                                   'Endpoint IC':ic_endpoint,
                                                   'Midpoint IC':ic_midpoint,
                                                   'Level of significance':test_significance,
                                                     'order':j}

                j=j+1
                
    return results_IC_ranking_dict_simple;

In [18]:
results_IC_ranking_dict_simple=simple_dict_IC_ranking_results(results_IC_ranking)
results_LC_ranking_dict_simple=simple_dict_LC_ranking_results(results_LC_ranking)

Save data and results with Pickle

In [19]:
results_and_data_dict={'SI1_sector19a_data':gather_dict,'SI1_sector19a_test_results':{'results_LC_ranking':results_LC_ranking,'results_IC_ranking':results_IC_ranking}}

In [20]:
import pickle

In [21]:
def pickle_save(file_path_root,object_name_to_save,file_name):
    
    complete_file_path=file_path_root+'\\'+file_name+'.p'
    
    pickle.dump( object_name_to_save, open( complete_file_path, "wb" ) )

In [22]:
file_path_root=r'D:\Dropbox (MAGI)\Dossiers professionnels\Logiciels\Brightway 2\Monte Carlo results_with correlation\Dependant LCA Monte Carlo - sector 19a\SI1 test results'
object_name_to_save=results_and_data_dict
file_name='results_and_data_dict_sector19a_withoutLandTransfo'

pickle_save(file_path_root,object_name_to_save,file_name)

In [2]:
def pickle_load(file_path_root,file_name):
    
    complete_file_path=file_path_root+'\\'+file_name+'.p'
    
    return pickle.load( open( complete_file_path, "rb" ) );

In [3]:
file_path_root=r'D:\Dropbox (MAGI)\Dossiers professionnels\Logiciels\Brightway 2\Monte Carlo results\Dependant LCA Monte Carlo - sector 19a\SI1 test results\With Land transfo'
file_name='results_and_data_dict_sector19a_withLandTransfo'

results_and_data_dict=pickle_load(file_path_root,file_name)

In [8]:
LC_ranking_data_dict=results_and_data_dict['SI1_sector19a_data']['For LC ranking']
IC_ranking_data_dict=results_and_data_dict['SI1_sector19a_data']['For IC ranking']

results_LC_ranking=results_and_data_dict['SI1_sector19a_test_results']['results_LC_ranking']
results_IC_ranking=results_and_data_dict['SI1_sector19a_test_results']['results_IC_ranking']

LC_ranking_data_dict_simple=simple_dict_LC_ranking_data(LC_ranking_data_dict,code_act_19a)
IC_ranking_data_dict_simple=simple_dict_IC_ranking_data(IC_ranking_data_dict,code_act_19a)

results_IC_ranking_dict_simple=simple_dict_IC_ranking_results(results_IC_ranking)
results_LC_ranking_dict_simple=simple_dict_LC_ranking_results(results_LC_ranking)


Save data in csv with pandas

In [23]:
import pandas as pd

In [24]:
filepath_root='D:\Dropbox (MAGI)\Dossiers professionnels\Logiciels\Brightway 2\Monte Carlo results_with correlation\Dependant LCA Monte Carlo - sector 19a\SI1 test results'

In [25]:
df_name_sector=pd.DataFrame.from_dict(info_act_19a,orient='index')
df_name_sector.to_csv(sep=';',path_or_buf=filepath_root+'\Activites_sector_c19a.csv')

In [26]:
df_name_sector

Unnamed: 0,name,location
00aab9636c43ed974af887d893d6e73a,ethanol production from sugar beet molasses,RoW
05a8e3c172c7254844cf432cfb265c5a,"treatment of waste cooking oil, purified, este...",CA-QC
0d5f4c63bf6ef81a46717af6c9c421de,"ethyl tert-butyl ether production, from bioeth...",RER
0de776907f41a887a3c34174dda34c33,ethanol production from sweet sorghum,CN
1766c16028cefc7926e0707911ea57ba,ethanol production from wood,CH
191a865d11326a0396e6eee0d7c87934,biogas production from grass,RoW
1c83514926346f4856eaaf6fd70ebe9d,"market for potassium sulfate, as K2O",GLO
1cbe1e1ab2ba012b121bfb59089ab717,"market for ethanol, without water, in 99.7% so...",CH
1d02f531ecdc17b2c977d324ba9cfe5f,ethanol production from wood,CH
1db445dff2924c7c9e868ee89758def5,"dewatering of ethanol from biomass, from 95% t...",BR


In [27]:
df_data_LC_ranking=pd.DataFrame.from_dict(LC_ranking_data_dict_simple,orient='index')
df_data_LC_ranking.to_csv(sep=';',path_or_buf=filepath_root+'\Data_LC_ranking_sector_c19a.csv')

In [28]:
df_data_IC_ranking=pd.DataFrame.from_dict(IC_ranking_data_dict_simple,orient='index')
df_data_IC_ranking.to_csv(sep=';',path_or_buf=filepath_root+'\Data_IC_ranking_sector_c19a.csv')

In [29]:
df_results_IC_ranking=pd.DataFrame.from_dict(results_IC_ranking_dict_simple,orient='index')
df_results_IC_ranking.to_csv(sep=';',path_or_buf=filepath_root+'\Results_IC_ranking_sector_c19a.csv')

In [30]:
df_results_LC_ranking=pd.DataFrame.from_dict(results_LC_ranking_dict_simple,orient='index')
df_results_LC_ranking.to_csv(sep=';',path_or_buf=filepath_root+'\Results_LC_ranking_sector_c19a.csv')

## Exploring sectors in eiv33

In [65]:
import brightway2 as bw

In [66]:
bw.projects.set_current('iw_integration')

In [67]:
DB_eiv33=bw.Database('ecoinvent 3.3 cutoff')

In [77]:
all_act=[act for act in DB_eiv33]

In [82]:
classif=[act['classifications'] for act in all_act]

In [84]:
classif_19a=[classi for classi in classif if '19a' in str(classi)]

In [87]:
len(classif_19a)

95

In [88]:
act_19a=[act for act in DB_eiv33 if '19a' in str(act['classifications'])]

In [89]:
len(act_19a)

95

In [90]:
act_19a

['ethanol production from grass' (kilogram, RoW, None),
 'dewatering of ethanol from biomass, from 95% to 99.7% solution state' (kilogram, RoW, None),
 'ethanol production from grass' (kilogram, CH, None),
 'biogas production from grass' (kilogram, RoW, None),
 'ethanol production from maize' (kilogram, US, None),
 'market for potassium sulfate, as K2O' (kilogram, GLO, None),
 'treatment of waste cooking oil, purified, esterification' (kilogram, RoW, None),
 'ethanol production from wood' (kilogram, RoW, None),
 'esterification of rape oil' (kilogram, RoW, None),
 'market for used vegetable cooking oil, purified' (kilogram, GLO, None),
 'methanol production, from synthetic gas' (kilogram, RoW, None),
 'ethanol production from rye' (kilogram, RER, None),
 'market for methanol, from biomass' (kilogram, CH, None),
 'treatment of used vegetable cooking oil, purification' (kilogram, RoW, None),
 'ethanol production from grass' (kilogram, CH, None),
 'ethyl tert-butyl ether production, from 

In [93]:
code_act_19a=[act['code'] for act in act_19a]
code_act_19a

['2a5800eca28df7f0fa624cbc51efcd66',
 'c0da0952bdc5722394d3323f09ba2995',
 '42366c72a7b58506656fd59fce0ece87',
 'bcaab3242d83d63c67f4c621392230e0',
 '30b9fb38141e531bba7312a255fa5345',
 '1c83514926346f4856eaaf6fd70ebe9d',
 'ae1f9eeaf7d7b6500176acb396c03953',
 '7b1ea5090884dee2f258ac2478394a6b',
 'aafaf2e111de237a46eb2f5db01d2b0e',
 '3e6db3cc5f0eb9234740cde0ee96c41f',
 '6678bedecd094ef76eba1c1b15b3b8b2',
 'af5e8925a134b6d5dd81ab8adedc4106',
 'ae24315d079068756d47bf686652dfcd',
 '970e2e39add0d5cdcf4db9ddceef63b5',
 '1fef6876ffb8b901b45a6353a38725b4',
 '0d5f4c63bf6ef81a46717af6c9c421de',
 '2ec8d41be64a3ce32143d561627b5d19',
 '42847e8accbd417aec74ee4c6ec4c0b6',
 '74145525fc7e93ddc660248efe9d33d3',
 '1766c16028cefc7926e0707911ea57ba',
 'ebe20260e4862fc82fa4a5fe62d01bab',
 '191a865d11326a0396e6eee0d7c87934',
 'aaf7a39752b82ca4791d585ccc469eeb',
 'eac9b14f58c4748c3a6e698b99175d4d',
 'e7dd8db216c8ee64367553c5a74a2f6e',
 '1db445dff2924c7c9e868ee89758def5',
 'a37071f4e59f0f53efa46f2efe4db078',
 

In [19]:
act_test=act_19a[0]

In [20]:
act_test.as_dict()

{'activity': '105ba3ab-110c-442b-9e99-5f7fd20da03a',
 'activity type': 'market activity',
 'authors': {'data entry': {'email': 'arnaud.dauriat@eners.ch',
   'name': 'Arnaud Dauriat'},
  'data generator': {'email': 'edgard.gnansounou@epfl.ch',
   'name': 'Edgard Gnansounou'}},
 'classifications': [('ISIC rev.4 ecoinvent',
   '19a: Liquid and gaseous fuels from biomass'),
  ('EcoSpold01Categories', 'biomass/fuels')],
 'code': 'ae24315d079068756d47bf686652dfcd',
 'comment': "Bottom-Up estimation based on plant data. Life time is 80 years. Product storage volume of storage tanks is 10'000 m3. The average storage time is 2 months. Thus total through flow in the life time is 4.8 mio. m3. Specific transport estimation for CH. Source:  Final report ecoinvent data v2.0, volume 17.\n\n[This dataset was already contained in the ecoinvent database version 2. It was not individually updated during the transfer to ecoinvent version 3. Life Cycle Impact Assessment results may still have changed, as t

In [18]:
act_test=act_19a[0]

In [22]:
info_act_19a

{'00aab9636c43ed974af887d893d6e73a': {'location': 'RoW',
  'name': 'ethanol production from sugar beet molasses'},
 '05a8e3c172c7254844cf432cfb265c5a': {'location': 'CA-QC',
  'name': 'treatment of waste cooking oil, purified, esterification'},
 '0d5f4c63bf6ef81a46717af6c9c421de': {'location': 'RER',
  'name': 'ethyl tert-butyl ether production, from bioethanol'},
 '0de776907f41a887a3c34174dda34c33': {'location': 'CN',
  'name': 'ethanol production from sweet sorghum'},
 '1766c16028cefc7926e0707911ea57ba': {'location': 'CH',
  'name': 'ethanol production from wood'},
 '191a865d11326a0396e6eee0d7c87934': {'location': 'RoW',
  'name': 'biogas production from grass'},
 '1c83514926346f4856eaaf6fd70ebe9d': {'location': 'GLO',
  'name': 'market for potassium sulfate, as K2O'},
 '1cbe1e1ab2ba012b121bfb59089ab717': {'location': 'CH',
  'name': 'market for ethanol, without water, in 99.7% solution state, from fermentation, at service station'},
 '1d02f531ecdc17b2c977d324ba9cfe5f': {'location': 