In [4]:
import pandas as pd
import numpy as np

import uncertainties as uc
from uncertainties import ufloat
from uncertainties import unumpy

from scipy.stats import gmean

# Utility Functions

## Colors for cell types
We use the color index from the Summary file.
the following code loa the data from the excell and translate it to hex format to be used

In [10]:
colors_data = pd.read_excel('Summary.xlsx','Colors', index_col=0, usecols=range(4))

#pharsing the colors in RGB format from excel (used there in the form suitable for Voroni diagram)
colors_data['R'] = pd.to_numeric([(x.split(',')[0]).split('(')[1] for x in colors_data['colors']])
colors_data['G'] = pd.to_numeric([x.split(',')[1] for x in colors_data['colors']])
colors_data['B'] = pd.to_numeric([(x.split(',')[2]).split(')')[0] for x in colors_data['colors']])

#using a for loop to convert the RGB values to hex format
colors_data['hex']=0
for cell in colors_data.index:
    colors_data.loc[cell,'hex'] = '{:02x}{:02x}{:02x}'.format(colors_data.loc[cell,'R'],colors_data.loc[cell,'G'],colors_data.loc[cell,'B']) 

## Rounding the results
We use the next funcion to round the results to the significant digits.

In [36]:
'''
    This function help find how many digits needed to take in the round function
    giving number that there first digit is 1 an additional digit 

    inputs:
        values - np array or float
    output:
        np array with the number of digits to round for 
'''

def _sign_digits(values):
    if isinstance(values,float) or isinstance(values,int) :
        log_vals = np.log10(values)
        another_dig = 1*(log_vals %1 < 0.26)
        return int(1 + another_dig - np.ceil(log_vals))
        
    else:
        log_vals = np.log10(values.astype(float))    
        # finding values that are "1.x/1x/1xy.." such that we still want the second digit
        another_dig = 1*(log_vals %1 < 0.26)
        return (1 + another_dig - np.ceil(log_vals)).astype(int)


'''
    This function get either a ufloat or a float numbers or a np.array or an uncertainty uarray to its significant digits 
    (regarding the uncertainties in case of uarray)
    inputs:
        vec -  a uncertainty.uarray object containing numbers 
    output:
        rounded_vec - the values and uncertainies of the uarray are rounded to the significant digits
'''
def round_vec(vec):
    
    rounded_vec = vec
    
    if isinstance(vec,uc.UFloat):
        num_dig = _sign_digits(vec.std_dev)
        rounded_vec = ufloat(round(vec.nominal_value,num_dig),
                             round(vec.std_dev,num_dig))
    
    elif isinstance(vec,float) or isinstance(vec,int) :
        num_dig = _sign_digits(vec)+1  # +1 beacuse when looking at the values we want 2 significant digits
        rounded_vec = round(vec,num_dig)
                               
    elif isinstance(vec[0],uc.UFloat):
    #uarray we need to take in acount the significant digits of the uncertainty term         
        
        sig_vec = _sign_digits(unumpy.std_devs(vec)) 
        for indx, num_dig in enumerate(sig_vec):
            rounded_vec[indx] = ufloat(round(vec[indx].nominal_value,num_dig),
                                       round(vec[indx].std_dev,num_dig)) 
    elif isinstance(vec,np.ndarray):
    #array we need to take in acound the significant digits of the uncertainty term         
        sig_vec = _sign_digits(vec) + 1 #+1 beacuse when looking at the values we want 2 significant digits
        for indx, num_dig in enumerate(sig_vec):
            rounded_vec[indx] = round(vec[indx],num_dig)
            
    return rounded_vec


'''
    This function round the results to their significant digits
    
    inputs:
        res - a data frame containting the results for a certain cell type.
              each line is a parameter with column for values and uncertianty
        unc_type - either 'add' for addative (regular) or 'mul' for multiplication facor.
                   The default value is addative
        
    output:
        res - the values and uncertainies are rounded to the significant digits
'''
def round_results(res, unc_type = 'add'):
    
    if unc_type =='add':
    #for additive uncertainty we set the significant digits to be 1 for the uncertianty 
    # and we compute the same for the values
         

        sig_unc = _sign_digits(res['Uncertainty']) # np.ceil(np.log10(.astype(float)))
        for indx,num_dig in enumerate(sig_unc):
            res.loc[indx,'Value'] = round(res.loc[indx,'Value'],num_dig)
            res.loc[indx,'Uncertainty'] = round(res.loc[indx,'Uncertainty'],num_dig)
    
    elif unc_type =='mul':
    # for the multiplication factor we set round the muliplication factot to have one digit after the point ("1.5")
    # and the the value significance is 2 digits  
        sig_values = _sign_digits(res['Value'])
        for indx,num_dig in enumerate(sig_values):
            res.loc[indx,'Value'] = round(res.loc[indx,'Value'],num_dig+1)
            res.loc[indx,'Uncertainty'] = round(res.loc[indx,'Uncertainty'],1)

    return res



## Printing results 
We have seperate functions for printing results that are saved as DataFrames in which each row is a cell type,
and ech column is a parameter.
We also divide are functions to these that handle parameters with additive error and these that work for parameters with error given with a mutliplication factor (log normal variables)


In [None]:
'''
    Printing a certain paramter for given cell type, taken from a results DataFrame containing cell types as rows
    and parameters as columns.
    
    input:
        res - panda's DataFrame containing the results: row are cell types, columns are parameters, given as either
              ufloat for an additive error, or a tuple for a multiplication factor
        paramters  - list of parameters needed to be printed
        formats - an optional variable that may contain specific formats to use for the print
    output:
        the function doesn't return any output
'''

def print_res_celltypes_df(res, parameters, *formats):
    
    PARAMETERS = ['number','lifespan','cellular turnovr rate','cell mass',
                  'cellular mass turnovr rate','total cellular mass',
                  'lifespan in rodents','extrapolated cellular turnover rate',
                  'extrapolated cellular mass turnover rate']
    
    FORMATS =  dict(zip(lung_results.columns, ['{:0.1e}','{:0.0f}','{:0.1e}','{:0.0f}','{:0.1f}','{:0.0f}'])) 
    UNITS =dict(zip(lung_results.columns, ['cells','days','cells per day','pg','grams per day', 'grams']))

'''
A utility method for printing a parameter with a multiplication uncertainty
 '''
def PrintMulParmeter(cell_type,param):
    print(param.capitalize()+ ' of '+ cell_type+': '+ FORMATS.get(param).format(lung_results.loc[cell_type,param][0]),UNITS.get(param)+',',
          'SD range: ' + FORMATS.get(param).format(lung_results.loc[cell_type,param][0]/lung_results.loc[cell_type,param][1]),
          '- ' + FORMATS.get(param).format(lung_results.loc[cell_type,param][0]*lung_results.loc[cell_type,param][1]),
            ' '+UNITS.get(param))
    
    

In [201]:
class CellTypesResDF(object):
    #initiating using list of cell types
    def __init__(self, cell_types,un_type = 'additive'):
        self.params = ['number','lifespan','cellular turnover rate','cell mass',
                      'cellular mass turnover rate','total cellular mass',
                      'lifespan in rodents','extrapolated cellular turnover rate',
                      'extrapolated cellular mass turnover rate']
        
        #default formats for printing
        if un_type == 'additive': 
            self.formats =  dict(zip(self.params,['{:0.1eP}','{:0.0fP}','{:0.1eP}','{:0.0fP}','{:0.1fP}',
                                                  '{:0.0fP}','{:0.1fP}','{:0.1eP}','{:0.2fP}'])) 
        else:
            self.formats =  dict(zip(self.params,['{:0.1e}','{:0.0f}','{:0.1e}','{:0.0f}','{:0.1f}',
                                                  '{:0.0f}','{:0.1f}','{:0.1e}','{:0.2f}'])) 
        
        
        self.units = dict(zip(self.params, ['cells','days','cells per day','pg','grams per day',
                                               'grams','days','cells per day','grams per day']))

        
        self.res = pd.DataFrame(index=cell_types, columns=self.params)
        self.unc_type = un_type 
        

    '''
        Printing a certain paramter for given cell type, taken from a results object containing cell types as rows
        and parameters as columns.

        input:
            cell_types - cell type for which we want to printe the paramters results
            paramters  - list of parameters needed to be printed
            formats - an optional variable that may contain specific formats to use for the print
        output:
            the function doesn't return any output
    '''         
    def print_params(self, parameters=None, cell_types=None, forms=None):
        
        #in cases cell_types or parameters contain only one entries we convert them to a list
        if not cell_types:
            cell_types = self.res.index
        elif not isinstance(cell_types,list):
                cell_types = [cell_types]
        
        if not parameters:
            parameters = self.res.columns
        elif not isinstance(parameters,list):
            parameters = [parameters]
        
        #using none as a deafult value for forms to be replaced with the the existing formats attribute 
        if not forms:
            forms = self.formats
        else:
            if not isinstance(cell_types,dict):
                forms = dict(zip(parameters,forms))
 
        
        
        if self.unc_type == 'additive':
            self._add_print_params(cell_types, parameters,forms)
        else:
            self._mul_print_params(cell_types, parameters,forms)


            
    '''
        Printing a certain paramter for given cell type, given as an ufloat with an additive term of unncertainty

        input:
            paramters  - list of parameters needed to be printed
            formats - dictionary that contains specific formats to use for the print
        output:
            the function doesn't return any output
    '''
    def _add_print_params(self, cell_types, parameters,forms):
        #using a flag to check whether there enough printing that should be divided by cell types
        mult_flag = len(cell_types) > 1 and len(parameters) > 1  
        
        #going all over the cell_types and parameters and printing according to the formats.
        #using round_vec to round the values to significant digits
        for cell in cell_types:
            if mult_flag:
                if(cell != cell_types[0]):
                    print('_'*30)
                print('\x1b[1m'+ cell +':' +'\x1b[0m')  #using ANSI to print in bold     
            for param in parameters:
                value = round_vec(self.res.loc[cell,param])  
                print('{} of {} is:'.format(param.capitalize(),cell),
                        forms[param].format(value),self.units[param])
                    
    '''
        Printing a certain paramter for given cell type, given as a tuple: containing the value and the uncertainty as 
        a multiplication factor
        input:
            paramters  - list of parameters needed to be printed
            formats - an optional variable that may contain specific formats to use for the print
        output:
            the function doesn't return any output
    '''                      
    def _mul_print_params(self, cell_types, parameters,forms):
        #using a flag to check whether there enough printing that should be divided by cell types
        mult_flag = len(cell_types) > 1 and len(parameters) > 1 
        
        #going all over the cell_types and parameters and printing according to the formats.
        #using round_vec to round the values to significant digits 
#         print(cell_types,parameters)
        for cell in cell_types:
            if mult_flag:
                if(cell != cell_types[0]):
                    print('_'*30)
                print('\x1b[1m'+ cell +':' +'\x1b[0m')  #using ANSI to print in bold     
            for param in parameters:
#                 print(self.res.loc[cell,param])
                if isinstance(self.res.loc[cell,param],tuple):
#                   break
                    value = self.res.loc[cell,param][0]
                    unc_fac = self.res.loc[cell,param][1]
                    print('{} of {} is:'.format(param.capitalize(),cell),
                          forms[param].format(round_vec(value)),self.units[param],
                          'SD range:', forms[param].format(round_vec(value/unc_fac)),
                          '-', forms[param].format(round_vec(value*unc_fac)),self.units[param])

    '''
        Changin the formats of the uncertainties of the results from additive error to multiplication 
        factor. Thus, enabpling further error propagation 
    
        going over the parameters for which there are results and converting them using the utility function
    '''                      
    def change_unc_to_mul(self):
        if self.unc_type != 'mul':
        
            for param in self.params:
                if self.res[param].any():
                    self.res[param]= list(zip(unumpy.nominal_values(self.res[param]),
                                              uarr_to_mulfac(self.res[param]).round(2)))
            self.unc_type = 'mul'
            self.formats =  dict(zip(self.params,['{:0.1e}','{:0.0f}','{:0.1e}','{:0.0f}','{:0.1f}',
                                                      '{:0.0f}','{:0.1f}','{:0.1e}','{:0.2f}'])) 
        

In [199]:
# lungs = CellTypesResDF(['a','b'])
# lungs.formats
# lungs.res.loc['a','number'] = ufloat(10**10,10**9)
# lungs.res.loc['b','number'] = ufloat(10**10,2.2*10**9)
# lungs.res.loc['a','lifespan'] = ufloat(303,13.33)
# lungs.res.loc['b','lifespan'] = ufloat(43,53.33)



# lungs.print_params('number')

# lungs.change_unc_to_mul()

# lungs.print_params('number')

# # # lungs.print_params(['a','b'],['number','lifespan'],0)

liver = CellTypesResDF(['c','d'], un_type='mul')

liver.res.loc['c','number'] =(2.23*10**10,1.7)
liver.res.loc['d','number'] =(7.23*10**10,1.2)
liver.res.loc['c','lifespan'] = 300
liver.res.loc['d','lifespan'] =(100,2.2)

# liver.print_params()
# not not lungs.res['number'].any()
# round_vec(231)

In [88]:
isinstance('asd',list)

False

## Uncertianty convertor - from additive to multiplicative
Some time  in order to propagate errors we need to estimate an additive error by a muliplication factor

In [3]:
def AddErr2MulF(value,err=0):
    if(isinstance(value,uc.UFloat)):
        unc_mul_fac = gmean([1+value.std_dev/value.nominal_value,1/(1-value.std_dev/value.nominal_value)])
    else:
        unc_mul_fac = gmean([1+err/value,1/(1-err/value)]) 
    return unc_mul_fac

In [178]:
def ufloat_to_mulfac(value,err=0):
    if(isinstance(value,uc.UFloat)):
        unc_mul_fac = gmean([1+value.std_dev/value.nominal_value,1/(1-value.std_dev/value.nominal_value)])
    else:
        unc_mul_fac = gmean([1+err/value,1/(1-err/value)]) 
    return unc_mul_fac

Convert an uarray (with uncertainty) to an array of uncertainty multiplication factors 

In [173]:
def Uarr2MulF(uarr):
    means = unumpy.nominal_values(uarr)
    low_er = 1+unumpy.std_devs(uarr)/means
    upp_er = 1/(1-unumpy.std_devs(uarr)/means)
    return gmean([low_er,upp_er])

In [174]:
def uarr_to_mulfac(uarr):
    means = unumpy.nominal_values(uarr)
    low_er = 1+unumpy.std_devs(uarr)/means
    upp_er = 1/(1-unumpy.std_devs(uarr)/means)
    return gmean([low_er,upp_er])

## Multiplication uncertainty propagation
When we have two uncertainty given as a multiplication factor we can propagate the error of their multiplication by using the properties of the lognormal distribution:
$$ Z_{1} = e^{X} $$ 
$$ Z_{2} = e^{Y} $$ 
where $X,Y$ are normal variables such that $X = N(0,\sigma_{1}^{2}),Y= N(0,\sigma_{2}^{2})$  ($ Z_{1}, Z_{2}$ are lognormal variables)

Then: $Z_{1} Z_{2}$ is also lognormal variable, and if $ Z_{1}, Z_{2}$ are independant:
$$Z_{1}Z_{2} = e^{X+Y}$$ 
Such that $X+Y$ has variance of $\sigma_{1}^{2}+\sigma_{2}^{2}$

So, to propogate the error in a multplication of two uncertianty factor, we can move to the logspace and propogate the error easily there  


In [2]:
'''
factors is either:
1. a numpy array containing all the factors of n lognormal variables that are being multiplied
2. a regular list (or nested like [[1,1],[2,2]]) containing all the factors of n lognormal variables that are being multiplied
3. a list containing a vector (row) of factors + a scalar 
    special case is for two arrays of the same size, in this case we do propagation of each pair
'''
def FacMulProp(factors):
    if(isinstance(factors,np.ndarray)):
         FF=factors
    elif(isinstance(factors[-1],list) or isinstance(factors[-1],pd.Series)):
        FF=factors
    elif( isinstance(factors[-1],np.ndarray) and isinstance(factors[0],np.ndarray)):
        FF=factors
    elif(isinstance(factors[-1],float)): 
        if(isinstance(factors[0],float)):
            FF=factors
        else:
            FF = np.array([factors[0],factors[-1]*np.ones(len(factors[0]))])     
    logF = np.log10(FF) 
    ssq = np.sum(logF*logF,axis=0)
    return 10**(np.sqrt(ssq))


In [None]:
'''
factors is either:
1. a numpy array containing all the factors of n lognormal variables that are being multiplied
2. a regular list (or nested like [[1,1],[2,2]]) containing all the factors of n lognormal variables that are being multiplied
3. a list containing a vector (row) of factors + a scalar 
    special case is for two arrays of the same size, in this case we do propagation of each pair
'''
def propgate_mul_facs(factors):
    if(isinstance(factors,np.ndarray)):
         FF=factors
    elif(isinstance(factors[-1],list) or isinstance(factors[-1],pd.Series)):
        FF=factors
    elif( isinstance(factors[-1],np.ndarray) and isinstance(factors[0],np.ndarray)):
        FF=factors
    elif(isinstance(factors[-1],float)): 
        if(isinstance(factors[0],float)):
            FF=factors
        else:
            FF = np.array([factors[0],factors[-1]*np.ones(len(factors[0]))])     
    logF = np.log10(FF) 
    ssq = np.sum(logF*logF,axis=0)
    return 10**(np.sqrt(ssq))


## Extracting the geometric mean and the errors
The next function take a vector, possibly with additave errors, and compute the geometric mean and errors (as the multiplication factor). 
Using converstion to log space, the function acount for both the inter-errors, which is derived from the variation between the vector values, and the intra-error, which is given as the average of the individual errors

In [77]:
'''
vec is either a vector of number or a uarray containing additive errors.
The function is checking of what type is vec, and in case there it contain no error terms, it give an intra error of zero.
The fucntion convert the values to logspace, there it compute the intra error from the average of individual error given in vec, 
and the inter error, derived from variation of the values themselves.

The method return a tuple with the gmean and the uncertainty multiplication factor
'''
def GMeanWithErros(vec):
    if(isinstance(vec[0],np.int32) or isinstance(vec[0],np.float)):  # in case vec is array of normal numbers
        values = vec
        intra_error = 0  #no intra error, because no individual errors were provided
    elif(isinstance(vec[0],uc.UFloat)):   # in case vec is uarray of ufloats, containing additive errors
        values = unumpy.nominal_values(vec)
        errors = Uarr2MulF(vec) #converting the errors to multpilication factors, to enable tranformation to logspace
        intra_error = np.log10(errors).mean()   #the intra error is computed in logspace, from the expected variation
    mean = gmean(values)
    log_inter_error = np.log10(values).std()/ np.sqrt(values.size) #the inter error is computed as the standard error of the nominal values of vec
    ssq = log_inter_error**2 +intra_error**2 #error propogation in the logspace
    err_fac = 10**(np.sqrt(ssq)) 
    return round(mean,2),round(err_fac,2) #return a tuple with the gmean and the uncertainty multiplication factor


In [96]:
'''
this method get a list of tuples that contains values and uncertainty factor and retrun the geometric mean of the values and the overall uncertainty 
multiplication factor derived from the inter and intra uncertainties in logspace
'''
def GMeanWithErrorsFromMul(tups):
    values = np.array(list(zip(*tups))[0])
    errors = np.array(list(zip(*tups))[1])
    intra_error = np.log10(errors).mean()   #the intra error is computed in logspace, from the expected variation
    mean = gmean(values)
    log_inter_error = np.log10(values).std()/ np.sqrt(values.size) #the inter error is computed as the standard error of the nominal values of vec
    ssq = log_inter_error**2 +intra_error**2 #error propogation in the logspace
    err_fac = 10**(np.sqrt(ssq)) 
    return round(mean,2),round(err_fac,2) #return a tuple with the gmean and the uncertainty multiplication factor


In [79]:
ua = unumpy.uarray([2,2,3],[0.5,1,0.2])

type(ua)
type(ua[1])
# Uarr2MulF(ua)
isinstance(ua,np.ndarray)
ub=np.array([1.2,2,4])
isinstance(ub,np.ndarray)
isinstance(ua[0],uc.UFloat)
# type(ua[0])

AddErr2MulF(gmean(unumpy.nominal_values(ua)),unumpy.std_devs(ua).sum())
errors = Uarr2MulF(ua)
intra_error1 =10**(np.log10(errors).mean())
intra_error1 

GMeanWithErros(ua)
GMeanWithErros(ub)

(2.13, 1.33)

In [185]:
# FacMulProp([[1.1,1.1,1.2],[1,3,2]])

In [187]:
X =[[1,2,1.2],1.5]
X[1]
# # ff = [[1.1,1.1],[1,1]]
# # lo = np.log10(ff)
# # 10**np.sqrt(np.sum(lo*lo,axis =0))
# FacMulProp([[1.1,1.1],[1,1]])
# # FacMulProp([1.5,1.1])

# Y=[[1,2],[3,2]]
# type(Y)
# type(Y[-1])

# # np.log10(Y)


# # FacMulProp(X)
# # FacMulProp(Y)
# # FacMulProp(np.array(Y))
# Z = [np.array([1,2,1.2]),1.4]
# FacMulProp(Z)

# # Z[0]

# # FF = np.array([Z[0],Z[-1]*np.ones(len(Z[0]))])
# # FF
# # lf = np.log10(FF)

# # 10**np.sum(lf*lf,axis=0)

1.5

In [87]:
AddErr2MulF(ufloat(3,1.2))
AddErr2MulF(ufloat(300,100))

1.414213562373095

In [97]:
x = 2,1.2
y=3,1.4
z = 2.5, 1.1

tups=[x,y,z]
list(zip(*tups))[0]
GMeanWithErrorsFromMul(tups)

(2.47, 1.25)

In [135]:
isinstance(X,list)

True