In [1]:
import pandas as pd
import numpy as np

import uncertainties as uc
from uncertainties import ufloat
from uncertainties import unumpy

from scipy.stats import gmean
from openpyxl import load_workbook

# Utility Functions

## Colors for cell types
We use the color index from the Summary file.
the following code loa the data from the excell and translate it to hex format to be used

In [2]:
colors_data = pd.read_excel('Summary.xlsx','Colors', index_col=0, usecols=range(4))

#pharsing the colors in RGB format from excel (used there in the form suitable for Voroni diagram)
colors_data['R'] = pd.to_numeric([(x.split(',')[0]).split('(')[1] for x in colors_data['colors']])
colors_data['G'] = pd.to_numeric([x.split(',')[1] for x in colors_data['colors']])
colors_data['B'] = pd.to_numeric([(x.split(',')[2]).split(')')[0] for x in colors_data['colors']])

#using a for loop to convert the RGB values to hex format
colors_data['hex']=0
for cell in colors_data.index:
    colors_data.loc[cell,'hex'] = '{:02x}{:02x}{:02x}'.format(colors_data.loc[cell,'R'],colors_data.loc[cell,'G'],colors_data.loc[cell,'B']) 

## Rounding the results
We use the next funcion to round the results to the significant digits.

In [153]:
'''
    This function help find how many digits needed to take in the round function
    giving number that there first digit is 1 an additional digit 

    inputs:
        values - np array or float
    output:
        np array with the number of digits to round for 
'''

def _sign_digits(values):
#     np.seterr(divide='ignore', invalid='ignore') # we want to be able to ignore nan in the vector
    if isinstance(values,float) or isinstance(values,int) :
        log_vals = np.log10(values)
        another_dig = 1*(log_vals %1 < 0.23)
        return int(1 + another_dig - np.ceil(log_vals))
        
    else:
        log_vals = np.log10(values.astype(float))    
        # finding values that are "1.x/1x/1xy.." such that we still want the second digit
        another_dig = 1*(log_vals %1 < 0.23)
#         return (1 + another_dig - np.ceil(log_vals)).astype(int)
        return 1 + another_dig - np.ceil(log_vals)


'''
    This function get either a ufloat or a float numbers or a np.array or an uncertainty uarray to its significant digits 
    (regarding the uncertainties in case of uarray)
    inputs:
        vec -  a uncertainty.uarray object containing numbers 
    output:
        rounded_vec - the values and uncertainies of the uarray are rounded to the significant digits
'''
def round_vec(vec):
    
    rounded_vec = vec
    
    if isinstance(vec,uc.UFloat):
        num_dig = int(_sign_digits(vec.std_dev))
        rounded_vec = ufloat(round(vec.nominal_value,num_dig),
                             round(vec.std_dev,num_dig))
    
    elif isinstance(vec,float) or isinstance(vec,int) :
        num_dig = _sign_digits(vec)+1  # +1 beacuse when looking at the values we want 2 significant digits
        rounded_vec = round(vec,int(num_dig))
                               
    elif isinstance(vec[0],uc.UFloat):
    #uarray we need to take in acount the significant digits of the uncertainty term         
        sig_vec = np.full(vec.size,np.nan)
        sig_vec[~unumpy.isnan(vec)] = _sign_digits(unumpy.std_devs(vec[~unumpy.isnan(vec)])) 
        for indx, num_dig in enumerate(sig_vec):
            if not np.isnan(num_dig):
                rounded_vec[indx] = ufloat(round(vec[indx].nominal_value,int(num_dig)),
                                               round(vec[indx].std_dev,int(num_dig))) 
      
                
    elif isinstance(vec,np.ndarray):
    #array we need to take in acound the significant digits of the uncertainty term         
        sig_vec = _sign_digits(vec) + 1 #+1 beacuse when looking at the values we want 2 significant digits
        for indx, num_dig in enumerate(sig_vec):
            if not np.isnan(num_dig):
                rounded_vec[indx] = round(vec[indx],int(num_dig))
            
    elif isinstance(vec,tuple) and (isinstance(vec[0], float) or isinstance(vec[0], int)) :
    #array we need to take in acound the significant digits of the uncertainty term         
        rounded_vec = np.array(vec)
        sig_vec = np.full(rounded_vec.size,np.nan)
        sig_vec[~np.isnan(rounded_vec)] = _sign_digits(rounded_vec[~np.isnan(rounded_vec)])+ 1 #+1 beacuse when looking at the values we want 2 significant digits   
#         sig_vec = _sign_digits(rounded_vec) 
        for indx, num_dig in enumerate(sig_vec):
            if not np.isnan(num_dig):
                rounded_vec[indx] = round(vec[indx],int(num_dig))
    elif isinstance(vec,pd.Series) :
        round_vec(vec.values)
    else:
        print('none')
        print(type(vec))
    return rounded_vec

In [7]:
'''
    Our version of capitialize - keeps the existing big letters in the other words 
'''
def _cap_name(st):
    words =  st.split()
    st = ' '.join([words[0].capitalize()]+words[1:])
    return st

## Results for Cell type Object
We create a class to contain the results for cell types.

Its main properties are a dataframe containing the values of different parameters and their uncertainties (always given as 1 standard error)
Other properties include the formats and units for each parameter (given as a dictionary) and the type of uncertainty (additive or multiplcative).

Uncertainty type is constant for all the parameters for a given cell type. Values for cell types with uncertainties given as multiplication factor are stored as tuples with the value at the first place and the multpilication factor as the second entry.

The methods are mainly for printing in our wanted formats extracting wanted parameters and exporting to excel.



In [192]:
class CellTypesResDF(object):
    #initiating using list of cell types
    def __init__(self, cell_types,un_type = 'additive'):
        self.params = ['number','lifespan','cellular turnover rate','cell mass',
                      'cellular mass turnover rate','total cellular mass',
                      'lifespan in rodents','extrapolated cellular turnover rate',
                      'extrapolated cellular mass turnover rate']
        
        #default formats for printing
        if un_type == 'additive': 
            self.formats =  dict(zip(self.params,['{:0.1eP}','{:0.0fP}','{:0.1eP}','{:0.0fP}','{:0.1fP}',
                                                  '{:0.0fP}','{:0.1fP}','{:0.1eP}','{:0.2fP}'])) 
            self.error_types = dict(zip(cell_types,len(cell_types)*['additive']))
        elif un_type == 'mul':
            self.formats =  dict(zip(self.params,['{:0.1e}','{:0.0f}','{:0.1e}','{:0.0f}','{:0.1f}',
                                                  '{:0.0f}','{:0.1f}','{:0.1e}','{:0.2f}'])) 
            self.error_types = dict(zip(cell_types,len(cell_types)*['mul']))
        elif un_type == 'mixed':
            self.formats_add = dict(zip(self.params,['{:0.1eP}','{:0.0fP}','{:0.1eP}','{:0.0fP}','{:0.1fP}',
                                                  '{:0.0fP}','{:0.1fP}','{:0.1eP}','{:0.2fP}'])) 
            self.formats_mul = dict(zip(self.params,['{:0.1e}','{:0.0f}','{:0.1e}','{:0.0f}','{:0.1f}',
                                                  '{:0.0f}','{:0.1f}','{:0.1e}','{:0.2f}'])) 
            self.formats = {}
            self.error_types = {}
            
        self.units = dict(zip(self.params, ['cells','days','cells per day','pg','grams per day',
                                               'grams','days','cells per day','grams per day']))
                
        self.res = pd.DataFrame(index=cell_types, columns=self.params)
        self.unc_type = un_type 
        
        
        
        #saving the error type of each cell type, for cases of mixed uncertainties
        
        if un_type=='mul':
        # we want everything to be in tuple including the nans so we could iterate using zip
            self.res = self.res.applymap(lambda y: (np.nan,np.nan) if y is np.nan else y)
        

    '''
        Printing a certain paramter for given cell type, taken from a results object containing cell types as rows
        and parameters as columns.

        input:
            cell_types - cell type for which we want to printe the paramters results
            paramters  - list of parameters needed to be printed
            formats - an optional variable that may contain specific formats to use for the print
        output:
            the function doesn't return any output
    '''         
    def print_params(self, parameters=None, cell_types=None, forms=None):
        
        #in cases cell_types or parameters contain only one entries we convert them to a list
        if not cell_types:
            cell_types = self.res.index.tolist()
        elif not isinstance(cell_types,list):
                cell_types = [cell_types]
        
        if not parameters:
            parameters = self.res.columns.tolist()
        elif not isinstance(parameters,list):
            parameters = [parameters]
        
        #using none as a deafult value for forms to be replaced with the the existing formats attribute 
        if not forms:
            forms = self.formats
        else:
            if not isinstance(cell_types,dict):
                forms = dict(zip(parameters,forms))
 
        if self.unc_type == 'additive':
            self._add_print_params(cell_types, parameters,forms)
        elif self.unc_type == 'mul':
            self._mul_print_params(cell_types, parameters,forms)
        else:
            self._mix_print_params(cell_types, parameters)


            
    '''
        Printing certain paramters for given cell type, given as an ufloat with an additive term of unncertainty

        input:
            paramters  - list of parameters needed to be printed
            formats - dictionary that contains specific formats to use for the print
        output:
            the function doesn't return any output
    '''
    def _add_print_params(self, cell_types, parameters,forms):
        #using a flag to check whether there enough printing that should be divided by cell types
        mult_flag = len(cell_types) > 1 and len(parameters) > 1  
        
        #going all over the cell_types and parameters and printing according to the formats.
        #using round_vec to round the values to significant digits
        for cell in cell_types:
            self._update_formats(parameters,cell)
            if mult_flag:
                if(cell != cell_types[0]):
                    print('_'*30)
                print('\x1b[1m'+ cell +':' +'\x1b[0m')  #using ANSI to print in bold     
            for param in parameters:
                if not unumpy.isnan(self.res.loc[cell,param]):
                    value = round_vec(self.res.loc[cell,param]) 
                    print('{} of {} is:'.format(param.capitalize(),cell),
                           forms[param].format(value),self.units[param])
                    
    '''
        Printing certain paramters for given cell type, given as a tuple: containing the value and the uncertainty as 
        a multiplication factor
        input:
            paramters  - list of parameters needed to be printed
            formats - an optional variable that may contain specific formats to use for the print
        output:
            the function doesn't return any output
    '''                      
    def _mul_print_params(self, cell_types, parameters,forms):
        #using a flag to check whether there enough printing that should be divided by cell types
        mult_flag = len(cell_types) > 1 and len(parameters) > 1 
        
        #going all over the cell_types and parameters and printing according to the formats.
        #using round_vec to round the values to significant digits 
#         print(cell_types,parameters)
        for cell in cell_types:
            self._update_formats(parameters,cell)
            if mult_flag:
                if(cell != cell_types[0]):
                    print('_'*30)
                print('\x1b[1m'+ cell +':' +'\x1b[0m')  #using ANSI to print in bold     
            for param in parameters:
                if not np.isnan(self.res.loc[cell,param][0]):
                    value = self.res.loc[cell,param][0]
                    unc_fac = self.res.loc[cell,param][1]
                    print('{} of {} is:'.format(param.capitalize(),cell),
                          forms[param].format(round_vec(value)),self.units[param],
                          '(SD range:', forms[param].format(round_vec(value/unc_fac)),
                          '-', forms[param].format(round_vec(value*unc_fac)),self.units[param]+')')

                    
    '''
        Printing certain paramters for given cell types, given as a mixture of ufloats (for additive uncertainty) and tuples
        (containing the value and the uncertainty as a multiplication factor)
        input:
            cell_types - list of parameters
            paramters  - list of parameters needed to be printed
        output:
            the function doesn't return any output
    '''                      
    def _mix_print_params(self, cell_types, parameters):
        #using a flag to check whether there enough printing that should be divided by cell types
        mult_flag = len(cell_types) > 1 and len(parameters) > 1 
        
        #going all over the cell_types and parameters and printing according to the formats.
        #using round_vec to round the values to significant digits 
#         print(cell_types,parameters)
        for cell in cell_types:
            if mult_flag:
                if(cell != cell_types[0]):
                    print('_'*30)
                print('\x1b[1m'+ cell +':' +'\x1b[0m')  #using ANSI to print in bold     
            if self.error_types[cell] == 'additive':
                 self._add_print_params([cell], parameters,forms = self.formats_add)
            elif self.error_types[cell] == 'mul':
                self._mul_print_params([cell], parameters,forms = self.formats_mul)   
                    
    '''
        Changing the formats of the uncertainties of the results from additive error to multiplication 
        factor. Thus, enabpling further error propagation 
    
        going over the parameters for which there are results and converting them using the utility function
    '''                      
    def change_unc_to_mul(self):
        if self.unc_type != 'mul':
        
            for param in self.params:
                if self.res[param].any():
                    self.res[param]= list(zip(unumpy.nominal_values(self.res[param]),
                                              uarr_to_mulfac(self.res[param]).round(2)))
            self.unc_type = 'mul'
            self.formats =  dict(zip(self.params,['{:0.1e}','{:0.0f}','{:0.1e}','{:0.0f}','{:0.1f}',
                                                      '{:0.0f}','{:0.1f}','{:0.1e}','{:0.2f}'])) 
            
            # we want everything to be in tuple including the nans so we could iterate using zip
            self.res = self.res.applymap(lambda y: (np.nan,np.nan) if y is np.nan else y)

    '''
         Exporting the values to excel file in a sheet for each cell type
         input: file_name = name of the excel file in which need to save the results
    '''      
    def export_to_excel(self,file_name='Summary.xlsx'):
        to_excel =pd.DataFrame(index = range(len(self.params)), columns=['Parameter','Value','Units','Uncertainty','Comments'])
        to_excel['Parameter'] = pd.Series(self.params)
        to_excel['Units'] = pd.Series(list(self.units.values()))
#          file_name = 'Summary.xlsx'
        book = load_workbook(file_name)
        writer = pd.ExcelWriter(file_name, engine = 'openpyxl')
        writer.book = book
        writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
        sheet=writer.book.active      

        
        #we go through the types of cells and for each we arrange the results in a dataframe for extracting to excel.
        for cell_type in self.res.index:  
            if self.unc_type != 'mul': 
                rounded_values = round_vec(self.res.loc[cell_type])
                replace_nans =  lambda y: ufloat(np.nan,np.nan) if y is np.nan else y
                rounded_values = np.array([replace_nans(v) for v in rounded_values])
                to_excel['Value'] = unumpy.nominal_values(rounded_values)
                to_excel['Uncertainty'] = unumpy.std_devs(rounded_values)
                to_excel['Comments'] = 'additive uncertainty'  
            else:
                to_excel['Value'] = round_vec(list(zip(*self.res.loc[cell_type]))[0])
                to_excel['Uncertainty'] = round_vec(list(zip(*self.res.loc[cell_type]))[1])
                to_excel['Comments'] = 'uncertainty by multiplication factor'  
            to_excel.to_excel(writer, sheet_name =_cap_name(cell_type) ,index=False)
        # using the _cap_name - our method that capitalize the first word and keep the other as they are
            worksheet = writer.sheets[_cap_name(cell_type)]   
        #     using the colors defined in the color sheet to color the tabs. Using the extraction of the data in the Utility_Functions.ipynb notebook
            try:
                worksheet.sheet_properties.tabColor= colors_data.loc[_cap_name(cell_type),'hex']
            except:
                  print('no color as {} is not in the cell types list'.format(cell_type))
            writer.save()
    
    '''
         Getting the values of a certain parameter for each of the cell types, as a vector.
         Usually used in case of uncertainties as a multiplication factors. The function use zip* to extracht from the tuples.
    ''' 
    def get_param_vals(self,param):
        if self.unc_type == 'additive':
            return unumpy.nominal_values(self.res[param])
        elif self.unc_type == 'mul':
            extracted = list(zip(*self.res[param]))
            return np.array(extracted[0]) 
        elif self.unc_type == 'mixed':
           #going all obver indices with 'additive' error
            vals = pd.Series(index = self.res.index)
            add_inx = [k for k,v in self.error_types.items() if v == 'additive']
            mul_inx = [k for k,v in self.error_types.items() if v == 'mul'] 
            vals[mul_inx] = list(zip(*self.res.loc[mul_inx,params]))[0]
            vals[add_inx] =  unumpy.nominal_values(self.res.loc[add_inx,params])
            return vals.values  
    '''
         Getting the uncertainties of a certain parameter for each of the cell types, as a vector.
         Usually used in case of uncertainties as a multiplication factors. The function use zip* to extracht from the tuples.
    ''' 
    def get_param_unc(self,param):
        if self.unc_type == 'additive':
            return unumpy.std_devs(self.res[param])
        elif self.unc_type == 'mul':
            extracted = list(zip(*self.res[param]))
            return np.array(extracted[1])
        elif self.unc_type == 'mixed':
           #going all obver indices with 'additive' error
            vals = pd.Series(index = self.res.index)
            add_inx = [k for k,v in self.error_types.items() if v == 'additive']
            mul_inx = [k for k,v in self.error_types.items() if v == 'mul'] 
            vals[mul_inx] = list(zip(*self.res.loc[mul_inx,params]))[1]
            vals[add_inx] =  unumpy.std_devs(self.res.loc[add_inx,params])
            return vals.values      
    
    '''
         Getting the upper and lower bound of the values for a parameter
         Given as 1 SD
    ''' 
    def get_param_vals_and_bounds(self,param):
        vals = pd.DataFrame(index = self.res.index, columns = [param,'std','mul fac',param + ' lower bound',param +' upper bound'])
        if self.unc_type == 'additive':
            vals[param] =  unumpy.nominal_values(self.res[param])
            vals['std'] =  unumpy.std_devs(self.res[param])
            vals[param + ' lower bound'] = round_vec(vals[param]-vals['std'])
            vals[param + ' upper bound'] = round_vec(vals[param]+vals['std'])

        elif self.unc_type == 'mul':
            extracted = list(zip(*self.res[param]))
            vals[param] = np.array(extracted[0])
            vals['mul fac'] = np.array(extracted[1])
            vals[param + ' lower bound'] = round_vec(vals[param]/vals['mul fac'])
            vals[param + ' upper bound'] = round_vec(vals[param]*vals['mul fac'])
            
        elif self.unc_type == 'mixed':
            add_inx = [k for k,v in self.error_types.items() if v == 'additive']
            mul_inx = [k for k,v in self.error_types.items() if v == 'mul'] 

            vals.loc[mul_inx,param] = list(zip(*self.res.loc[mul_inx,param]))[0]
            vals.loc[add_inx,param] =  unumpy.nominal_values(self.res.loc[add_inx,param])

            vals.loc[mul_inx,'mul fac'] = list(zip(*self.res.loc[mul_inx,param]))[1]
            vals.loc[mul_inx,param + ' lower bound'] = round_vec(vals.loc[mul_inx,param]/vals.loc[mul_inx,'mul fac'])
            vals.loc[mul_inx,param + ' upper bound'] = round_vec(vals.loc[mul_inx,param]*vals.loc[mul_inx,'mul fac'])

            vals.loc[add_inx,'std'] =  unumpy.std_devs(self.res.loc[add_inx,param])
            vals.loc[add_inx,param + ' lower bound'] = round_vec(vals.loc[add_inx,param]-vals.loc[add_inx,'std'])
            vals.loc[add_inx,param + ' upper bound'] = round_vec(vals.loc[add_inx,param]+vals.loc[add_inx,'std'])
        vals = vals.drop(['std','mul fac'],axis=1)
        return vals     
       
    
    '''
         Biulding an extended database for the required parameters and cell types, consisting of values, lower and upper bounds
         (given as 1 SD)
    ''' 
    def get_extended_dataframe(self,parameters=None, cell_types=None):
        if not cell_types:
            cell_types = self.res.index.tolist()
        elif not isinstance(cell_types,list):
                cell_types = [cell_types]
      
        if not parameters:
            parameters = self.res.columns.tolist()
        elif not isinstance(parameters,list):
            parameters = [parameters]
        
        results = pd.DataFrame(index = self.res.index)
        for param in parameters:
            results = results.join(self.get_param_vals_and_bounds(param))
       
        return results.loc[cell_types]
        
    '''
        Automatic updating the formats for best printing
        Getting list of parameters to update
        using _sign_digits function to find what is the appropriate format 
    ''' 
    def _update_formats(self, parameters=None, cell_types = None): 
        if not cell_types:
            cell_types = self.res.index.tolist()
        elif not isinstance(cell_types,list):
                cell_types = [cell_types]
        if not parameters:
            parameters = self.res.columns.tolist()
        elif not isinstance(parameters,list):
            parameters = [parameters]
        for p in parameters:
            if self.unc_type == 'additive':
                if not unumpy.isnan(self.res.loc[cell_types,p]):
                    val_for_sign = unumpy.std_devs(self.res.loc[cell_types,p])
                    num_dig = _sign_digits(val_for_sign).astype('int')
                    if min(num_dig)<-3:
                        self.formats[p] = '{:0.1eP}'
                    elif max(num_dig)>=1:
                        self.formats[p] = '{:0.'+str(max(num_dig))+'fP}'
                    else:
                        self.formats[p] = '{:0.0fP}'
            elif self.unc_type == 'mul':
                extracted = list(zip(*self.res.loc[cell_types,p]))
                if not np.isnan(extracted[0]):
                    val_for_sign = np.array(extracted[0])/np.array(extracted[1])
                    num_dig = _sign_digits(val_for_sign).astype('int')
                    if min(num_dig)<-3:
                        self.formats[p] = '{:0.0e}'
                    elif max(num_dig)>=1:
                        self.formats[p] = '{:0.'+str(max(num_dig))+'f}'
                    else:
                        self.formats[p] = '{:0.0f}'
            elif self.unc_type == 'mixed':
                if self.error_types[cell_types[0]] == 'additive':
                    if not unumpy.isnan(self.res.loc[cell_types,p]):
                        val_for_sign = unumpy.std_devs(self.res.loc[cell_types,p])
                        num_dig = _sign_digits(val_for_sign).astype('int')
                        if min(num_dig)<-3:
                            self.formats_add[p] = '{:0.1eP}'
                        elif max(num_dig)>=1:
                            self.formats_add[p] = '{:0.'+str(max(num_dig))+'fP}'
                        else:
                            self.formats_add[p]= '{:0.0fP}'
                elif self.error_types[cell_types[0]] == 'mul':
                    extracted = list(zip(*self.res.loc[cell_types,p]))
                    if not np.isnan(extracted[0]):
                        val_for_sign = np.array(extracted[0])/np.array(extracted[1])
                        num_dig = _sign_digits(val_for_sign).astype('int')
                        if min(num_dig)<-3:
                            self.formats_mul[p] = '{:0.0e}'
                        elif max(num_dig)>=1:
                            self.formats_mul[p] = '{:0.'+str(max(num_dig))+'f}'
                        else:
                            self.formats_mul[p] = '{:0.0f}'
                            

    '''
        Print the parameters in years instead of days
        Get list of parameters and cell types to print in years instead of days 
    '''    
    def print_in_years(self, parameters=None, cell_types=None, forms=None):
        
        #in cases cell_types or parameters contain only one entries we convert them to a list
        if not cell_types:
            cell_types = self.res.index.tolist()
        elif not isinstance(cell_types,list):
                cell_types = [cell_types]
        
        if not parameters:
            parameters = self.res.columns.tolist()
        elif not isinstance(parameters,list):
            parameters = [parameters]
        
        #using a flag to check whether there enough printing that should be divided by cell types
        mult_flag = len(cell_types) > 1 and len(parameters) > 1 
        
        #dict containing the multiplication factor from days to years for each paramater
        D_IN_Y = 365
        days2years_fac = dict(zip(self.params,[1,1/D_IN_Y ,D_IN_Y ,1,D_IN_Y ,1,1/D_IN_Y,D_IN_Y,D_IN_Y])) 
        units_in_years = dict(zip(self.params, ['cells','years','cells per year','pg','grams per year',
                                               'grams','years','cells per year','grams per year'])) 
    
        #going all over the cell_types and parameters and printing according to the formats.
        #using round_vec to round the values to significant digits 
        for cell in cell_types:
            if mult_flag:
                if(cell != cell_types[0]):
                    print('_'*30)
                print('\x1b[1m'+ cell +':' +'\x1b[0m')  #using ANSI to print in bold     
            for param in parameters:
                if self.unc_type == 'mul':
                    if not np.isnan(self.res.loc[cell,param][0]):
                        value = self.res.loc[cell,param][0]*days2years_fac[param]
                        unc_fac = self.res.loc[cell,param][1]
                        
                        if days2years_fac[param]==1:
                            self._update_formats(param,cell)
                            form = self.formats[param]
                        else:
                            forms_years = {'lifespan': '{:0.0f}','cellular turnover rate': '{:0.0e}','cellular mass turnover rate': '{:0.1f}',
                                          'lifespan in rodents': '{:0.0f}','extrapolated cellular turnover rate': '{:0.0e}',
                                           'extrapolated cellular mass turnover rate': '{:0.1f}'}
                            form = forms_years[param]
                        print('{} of {} is:'.format(param.capitalize(),cell),
                              form.format(round_vec(value)), units_in_years[param],
                              '(SD range:', form.format(round_vec(value/unc_fac)),
                              '-', form.format(round_vec(value*unc_fac)), units_in_years[param]+')')
                else:
                    if not unumpy.isnan(self.res.loc[cell,param]):
                        value = round_vec(self.res.loc[cell,param]*days2years_fac[param]) 
                        
                        if days2years_fac[param]==1:
                            self._update_formats(param,cell)
                            form = self.formats[param]
                        else:
                            forms_years = {'lifespan': '{:0.0fP}','cellular turnover rate': '{:0.0eP}','cellular mass turnover rate': '{:0.1fP}',
                                          'lifespan in rodents': '{:0.0fP}','extrapolated cellular turnover rate': '{:0.0eP}',
                                           'extrapolated cellular mass turnover rate': '{:0.1fP}'}
                            form = forms_years[param]
                        print('{} of {} is:'.format(param.capitalize(),cell),form.format(value), units_in_years[param])
                    
        
    '''
    This method takes an excel filename that consist of sheets with cell type data in the specific format designed
    and import them to the object. It check what is the uncertainty type of each of the cells and save it to the 
    error_type dictionary of the object
    '''
    def import_celltypes_from_xlsx(self,  filename, cell_types=None):
        #in cases cell_types contain only one entries we convert them to a list
        if not cell_types:
            cell_types = self.res.index.tolist()
        elif not isinstance(cell_types,list):
                cell_types = [cell_types]
        
       
        for ctype in cell_types:
            ctype_data = pd.read_excel(filename,ctype, index_col=0, usecols=range(5))
            if ctype_data.loc['number','Comments'] == 'additive uncertainty':
                self.error_types[ctype] = 'additive'
                self.res.loc[ctype] = unumpy.uarray(ctype_data['Value'],ctype_data['Uncertainty'])
            else:
                self.error_types[ctype] = 'mul'
                self.res.loc[ctype] = list(zip(ctype_data['Value'],ctype_data['Uncertainty']))
        #checking whether there are more than one types of uncertainties, indcluding existing cell types
        if len(set(self.error_types))==1:
            self.unc_type = self.error_types[ctype]
        else:
            self.unc_type = 'mixed'  
            self.formats_add = dict(zip(self.params,['{:0.1eP}','{:0.0fP}','{:0.1eP}','{:0.0fP}','{:0.1fP}',
                                                  '{:0.0fP}','{:0.1fP}','{:0.1eP}','{:0.2fP}'])) 
            self.formats_mul = dict(zip(self.params,['{:0.1e}','{:0.0f}','{:0.1e}','{:0.0f}','{:0.1f}',
                                                  '{:0.0f}','{:0.1f}','{:0.1e}','{:0.2f}'])) 
        
        

## Uncertianty convertor - from additive to multiplicative
Some time  in order to propagate errors we need to estimate an additive error by a muliplication factor

In [18]:
def ufloat_to_mulfac(value,err=0):
    if(isinstance(value,uc.UFloat)):
        unc_mul_fac = gmean([1+value.std_dev/value.nominal_value,1/(1-value.std_dev/value.nominal_value)])
    else:
        unc_mul_fac = gmean([1+err/value,1/(1-err/value)]) 
    return unc_mul_fac

Convert an uarray (with uncertainty) to an array of uncertainty multiplication factors 

In [20]:
def uarr_to_mulfac(uarr):
    means = unumpy.nominal_values(uarr)
    low_er = 1+unumpy.std_devs(uarr)/means
    upp_er = 1/(1-unumpy.std_devs(uarr)/means)
    return gmean([low_er,upp_er])

## Multiplication uncertainty propagation
When we have two uncertainty given as a multiplication factor we can propagate the error of their multiplication by using the properties of the lognormal distribution:
$$ Z_{1} = e^{X} $$ 
$$ Z_{2} = e^{Y} $$ 
where $X,Y$ are normal variables such that $X = N(0,\sigma_{1}^{2}),Y= N(0,\sigma_{2}^{2})$  ($ Z_{1}, Z_{2}$ are lognormal variables)

Then: $Z_{1} Z_{2}$ is also lognormal variable, and if $ Z_{1}, Z_{2}$ are independant:
$$Z_{1}Z_{2} = e^{X+Y}$$ 
Such that $X+Y$ has variance of $\sigma_{1}^{2}+\sigma_{2}^{2}$

So, to propogate the error in a multplication of two uncertianty factor, we can move to the logspace and propogate the error easily there  


In [22]:
'''
factors is either:
1. a numpy array containing all the factors of n lognormal variables that are being multiplied
2. a regular list (or nested like [[1,1],[2,2]]) containing all the factors of n lognormal variables that are being multiplied
3. a list containing a vector (row) of factors + a scalar 
    special case is for two arrays of the same size, in this case we do propagation of each pair
'''
def propagate_mul_facs(factors):
    if(isinstance(factors,np.ndarray)):
         FF=factors
    elif(isinstance(factors[-1],list) or isinstance(factors[-1],pd.Series)):
        FF=factors
    elif( isinstance(factors[-1],np.ndarray) and isinstance(factors[0],np.ndarray)):
        FF=factors
    elif(isinstance(factors[-1],float)): 
        if(isinstance(factors[0],float)):
            FF=factors
        else:
            FF = np.array([factors[0],factors[-1]*np.ones(len(factors[0]))])     
    logF = np.log10(FF) 
    ssq = np.sum(logF*logF,axis=0)
    return 10**(np.sqrt(ssq))


## Extracting the geometric mean and the errors
The next function take a vector, possibly with additave errors, and compute the geometric mean and errors (as the multiplication factor). 
Using converstion to log space, the function acount for both the inter-errors, which is derived from the variation between the vector values, and the intra-error, which is given as the average of the individual errors

In [23]:
'''
vec is either a vector of number or a uarray containing additive errors.
The function is checking of what type is vec, and in case there it contain no error terms, it give an intra error of zero.
The fucntion convert the values to logspace, there it compute the intra error from the average of individual error given in vec, 
and the inter error, derived from variation of the values themselves.

The method return a tuple with the gmean and the uncertainty multiplication factor
'''
def gmean_with_errors(vec):
    if(isinstance(vec[0],np.int32) or isinstance(vec[0],np.float)):  # in case vec is array of normal numbers
        values = vec
        intra_error = 0  #no intra error, because no individual errors were provided
    elif(isinstance(vec[0],uc.UFloat)):   # in case vec is uarray of ufloats, containing additive errors
        values = unumpy.nominal_values(vec)
        errors = uarr_to_mulfac(vec) #converting the errors to multpilication factors, to enable tranformation to logspace
        intra_error = np.log10(errors).mean()   #the intra error is computed in logspace, from the expected variation
    mean = gmean(values)
    log_inter_error = np.log10(values).std()/ np.sqrt(values.size) #the inter error is computed as the standard error of the nominal values of vec
    ssq = log_inter_error**2 +intra_error**2 #error propogation in the logspace
    err_fac = 10**(np.sqrt(ssq)) 
    return round(mean,2),round(err_fac,2) #return a tuple with the gmean and the uncertainty multiplication factor


In [24]:
'''
this method get a list of tuples that contains values and uncertainty factor and retrun the geometric mean of the values and the overall uncertainty 
multiplication factor derived from the inter and intra uncertainties in logspace
'''
def gmean_with_errors_from_mul(tups):
    values = np.array(list(zip(*tups))[0])
    errors = np.array(list(zip(*tups))[1])
    intra_error = np.log10(errors).mean()   #the intra error is computed in logspace, from the expected variation
    mean = gmean(values)
    log_inter_error = np.log10(values).std()/ np.sqrt(values.size) #the inter error is computed as the standard error of the nominal values of vec
    ssq = log_inter_error**2 +intra_error**2 #error propogation in the logspace
    err_fac = 10**(np.sqrt(ssq)) 
    return round(mean,2),round(err_fac,2) #return a tuple with the gmean and the uncertainty multiplication factor
