In [301]:
# loading all the dependencies
import pandas as pd
import numpy as np
pd.options.display.float_format ='{:,.1f}'.format
import matplotlib.pyplot as plt

%matplotlib inline  
from scipy.stats import gmean
from scipy import integrate
from openpyxl import load_workbook

import uncertainties as uc
from uncertainties import ufloat
from uncertainties import unumpy
%run Utility_Functions.ipynb

# Monocytes
The estimation of monocytes numebr and turnover overlap, as one of the main sources that connect the different compartments where monocyte reside used a model that integrate both the fraction of subpopulation and their turnover time/.

## Number of monocytes
We have measurements for two seperated compartments of monocytes:
1. Reference ranges for circulating monocytes in the blood multiplied by the blood volume. 
2. Bone marrow cellularity and its fraction of monocytes taken from [Harrison, 1962](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC480393/)

### Blood monocyte
We use blood volume estimate from: [Snyder,1975](http://www.icrp.org/publication.asp?id=ICRP%20Publication%2023), [Boer, 1984](https://www.ncbi.nlm.nih.gov/pubmed/6496691) and [Nadler et al., 1962](https://www.ncbi.nlm.nih.gov/pubmed/21936146) 

We use reference interval for monocyte concentration from:
[Wakeman et al., 2007](https://www.ncbi.nlm.nih.gov/pubmed/17617078), 
[Pekelharing et al., 2010](https://www.sysmex.dk/fileadmin/media/f100/Diagnostic_Perspectives/Pekelharing_DiagPersp_Vol1_1-11.pdf), 
[Dosoo et al., 2012](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0036308), 
[Ambayya et al., 2014](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0091968)

Some of the sources give only 95% confidence reference interval. We use the middle of this range as a reference. 
We then compute the uncertainty using errorpropogation of the inter studies and intra studies SEM.


In [115]:
#uploading the blood volume data
blood_volume= pd.read_excel('Monocytes_data.xlsx','Blood_Volume',usecols = range(6))
#using the uncertainty package to propogate the SEM of the values from the sourcs (intra/inter)
ref_BV = ufloat(blood_volume['mean [L]'].mean(),blood_volume['SEM [L]'].mean()) + ufloat(0,blood_volume['mean [L]'].sem())

#uploading the monocyte concentration data
mono_conc= pd.read_excel('Monocytes_data.xlsx','Blood_concentration',index_col=0,usecols = range(9))
#assuming the median equal to the mean where there is no specific data:
mono_conc.loc[:,'mean [10^9 cells/liter]'][np.isnan(mono_conc['mean [10^9 cells/liter]'])*(~np.isnan(mono_conc['median [10^9 cells/liter]']))]= mono_conc.loc[:,'median [10^9 cells/liter]'][np.isnan(mono_conc['mean [10^9 cells/liter]'])*(~np.isnan(mono_conc['median [10^9 cells/liter]']))] 
#using the middle of the reference range as the mean
mono_conc.loc[:,'mean [10^9 cells/liter]'][np.isnan(mono_conc['mean [10^9 cells/liter]'])] = mono_conc[['min reg range','max reg range']][np.isnan(mono_conc['mean [10^9 cells/liter]'])].mean(axis=1)
#using the uncertainty package to propogate the SEM of the values from the sourcs (intra/inter)
ref_mono_conc = ufloat(mono_conc['mean [10^9 cells/liter]'].mean(),mono_conc['SEM'].mean()) + ufloat(0,mono_conc['mean [10^9 cells/liter]'].sem())

#the reference estimate for bloof monocyte is given from the multiplication of the two: 
blood_mono = ref_mono_conc*ref_BV*10**9

print('Number of monocytes circulating in the blood: {:0.1eP}'.format(blood_mono))

Number of monocytes circulating in the blood: (2.7±0.4)×10⁹


#### Blood monocytes subpopulations
We use data from [Wong et a;. 2011](https://www.ncbi.nlm.nih.gov/pubmed/21653326) to divide the blood monocytes population into three distinctive subpopulation, which play different parts in terms of kinetics: classical monocyte, intermmediate monocytes and non-classical monocytes.
The basic model is that some of the classical monocytes differentiate into intermmediate monocytes that in turn differentiate one more time to nonc-lassical monocytes.

In [132]:
blood_subp= pd.read_excel('Monocytes_data.xlsx','Subpopulations',usecols = range(5),index_col=0)

blood_subp['total'] = unumpy.uarray(blood_subp['mean %'],blood_subp['SEM'])*blood_mono
blood_subp.head()

Unnamed: 0_level_0,mean %,SD,N,SEM,total
pool,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
classical monocytes,0.8,0.1,16,0.0,(2.30+/-0.31)e+09
intermediate monocytes,0.1,0.0,16,0.0,(1.46+/-0.23)e+08
non-classical monocytes,0.1,0.0,16,0.0,(2.5+/-0.4)e+08


### Bone marrow monocytes
We use bone marrow cellularity data from [Harrison, 1962](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC480393/): the number of nucleated cells per kg of person's weight and the percentage of monocytes out of the cells. 

In [122]:
BM_data= pd.read_excel('Monocytes_data.xlsx','BM_population',usecols = range(8),index_col=0)

#saving the 'monocytes nuceleated cells' parameter seperated and dropping it from the data frame to ease the integration of the 
#cellularity data
perc_mono = BM_data.loc['monocytes nuceleated cells','value']
BM_data = BM_data.drop('monocytes nuceleated cells')

#trimming the index. Using the first word instead of the entire text
BM_data.index = BM_data.index.str.split(' ',1).str.get(0)

#using the uncertainty package to propogate the SEM of the values from the sourcs (intra/inter)
BM_nuc_cells = ufloat(BM_data['value'].mean(),BM_data['SEM'].mean()) + ufloat(0,BM_data['value'].sem())

ref_man_mass= 70 #kg

#combining the cellularity with the mass and percentage of monocyte to get their overall numbers in the marrow.
BM_mono = perc_mono*BM_nuc_cells*ref_man_mass

print('Number of BM monocytes according to Harrison, 1962: {:0.1eP}'.format(BM_mono))

Number of BM monocytes according to Harrison, 1962: (2.3±0.3)×10⁹


## Monocytes kinetics

We base our analysis on the kinetic model developed by [Patel et al., 2017](http://jem.rupress.org/content/214/7/1913). 
The model divide the population of monocyte into 4 compartments.
A. Bone marrow:
    1. proliferating pool - producing the influx of new monocytes,
       post proliferating (classical monocytes) pool - waiting to be transfered to the blood 
       the model fit a parameter for the delay time in the post-proliferative pool, but as we are interested in the steady state, the only 
       effect is has is on the shape of the distribution of monocytes ages when the transfer to the blood, it doesn't affect the mean resident 
       time in the bone marraw pool which follows from the poliferation rate (as proliferation rate equal removal rate in steady state)    
B. Blood: 
    2. classical monocytes - circulating in the blood. Only little fraction of them differentitate to the intermmediate,
        the main fraction either die or transfer outside from the blood and into the tissues (creating the monocte-deriven macrophages in 
        tissues as the dermis and intestine) . 
    3. intermmediate
    4. non-classical monocytes

We know that monocytes are able to differentiate into tissue resident macrophages and dendritic cells. They migrate to outside of the blood in response to injury. In some tissues the macropage resident population is mostly made out of monocytes dereived macrophages, such as the intestinal and dermal macrophages populations, but it is not known what fraction of the monocytes leave the blood for these tissues ([Italiani and Boraschi, 2014](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4201108/pdf/fimmu-05-00514.pdf)).

As [Patel et al., 2017](http://jem.rupress.org/content/214/7/1913) model doesn't seperate between monocytes that die or leave the blood, we assume that the most of the monocyte die and treat their estimated lifespan as representing the time until death. In this manner, our estimates may serve as an upper bound for total turnover of monocytes (as some of them may differentiate rather than die)   

Because the estimates for the number of monocytes in the blood and bone marrow not necessarily agree, considering the kinetic data, we derive two distinct estimates and average them in the end:
1. From the estimate of blood monocytes number we derive the number of bone marrow monocytes, assuming that the flux of monocyte into the blood equal to the flux of proliferating monocytes in the bone marrow
2. From the bone marrow monocytes estimate we derive the number of blood monocytes, assuming that the rate of monocyte entering the blood equal to the production rate in the marrow.   


In [309]:
mono_comp = pd.read_excel('Monocytes_data.xlsx','Kinetics_model',usecols = range(9),index_col=0)

#combining the values and the uncertainties into uarray: 
mono_comp['lifespan'] = unumpy.uarray(mono_comp['mean lifespan [d]'],mono_comp['SEM lifespan [d]']) 
mono_comp['proliferation'] = unumpy.uarray(mono_comp['proliferation rate [1/d]'],mono_comp['proliferation SEM [1/d]'])

'''
integrating the numbers using the two seperate estimates - using reference values for blood and bone marrow
keeping them as seperate columns.
Using the the relation that the rate of output from the bone marrow need to equal the input rate of the blood  
'''

# combining the estimates based on blood
mono_comp['number - blood est'] = blood_subp['total']

# combining the estimates based on blood marrow measurements
mono_comp['number - BM est'] = 0
mono_comp.loc['bone marrow','number - BM est'] = BM_mono

'''
Defining new data frame - its index is the two basic estimates (blood or BM) and it columns are relevant paramaters
the basic parameter we use to derive the two estimate is the rate in which classical monocytes enter the blood
we use this rate to derive the estimate for number of bone marrow monocyte from the blood estimate prespective and to derive the 
classical monocyte from the bone marrow perspective
'''
mono_est = pd.DataFrame(index = ['blood est', 'BM est'],columns=['blood enter','number','cellular turnover rate','lifespan']) 

#from blood estimate prespective the rate in which monocyte reach the blood equal to the number of classical monocyte divided by their lifespan
mono_est.loc['blood est','blood enter'] = mono_comp.loc['classical monocytes','number - blood est']/mono_comp.loc['classical monocytes','lifespan']
#from BM estimate prespective the rate in which monocyte reach the blood equal to the number of BM monocytemultiplied  by their proliferation
mono_est.loc['BM est','blood enter'] =  mono_comp.loc['bone marrow','number - BM est']*mono_comp.loc['bone marrow','proliferation']

#By following the computation backawrd we can derive the missing data for each estimate:
mono_comp.loc['bone marrow','number - blood est'] = mono_est.loc['blood est','blood enter']/mono_comp.loc['bone marrow','proliferation']

mono_comp.loc['classical monocytes','number - BM est'] = mono_est.loc['BM est','blood enter']*mono_comp.loc['classical monocytes','lifespan']

# from the ratio of classical monocyte to the other group we can estimate their number in the BM estimate:
mono_comp.loc['intermediate monocytes','number - BM est'] = mono_comp.loc['classical monocytes','number - BM est']*mono_comp.loc['intermediate monocytes',
                                                            'number - blood est']/mono_comp.loc['classical monocytes','number - blood est']
mono_comp.loc['non-classical monocytes','number - BM est'] = mono_comp.loc['classical monocytes','number - BM est']*mono_comp.loc['non-classical monocytes',
                                                            'number - blood est']/mono_comp.loc['classical monocytes','number - blood est']

#we set the total number of monocyte by each estimate by summing on all groups 
mono_est['number']=mono_comp[['number - blood est','number - BM est']].sum(axis =0).tolist()

#using for loop we can also derive from the sum of the number divide by lifespan of each of the blood monocytes groups the cellular turnover for each estiamte 
for est in mono_est.index:
    mono_est.loc[est,'cellular turnover rate']=np.nansum((mono_comp['number - ' + est]/mono_comp['lifespan']))
    
    
#we can estimate the mean lifespaan from the total number and turnover rate
mono_est['lifespan'] =  mono_est['number']/ mono_est['cellular turnover rate']

#we don't need the parameter of blood enter rate anymore. For convinience we remove it
mono_est = mono_est.drop('blood enter',axis=1)

#we define new dataframe to save the results. two columns one for the mean value and one for the uncertainty as a multiplication factor 
monocyte_results = pd.DataFrame(index=['number','lifespan','cellular turnover rate','cell mass','cellular mass turnovr rate',
                                       'total cellular mass'], columns = ['Value','Units','Uncertainty','Formats'])

monocyte_results['Units'] = ['cells','days','cells per day','pg','grams per day', 'grams']
monocyte_results['Formats'] = ['{:0.1e}','{:0.1f}','{:0.1e}','{:0.0f}','{:0.1f}','{:0.1f}']

'''
To calculate the mean and its uncertainty we use a for loop and for each paramter we:
1. take the geometric mean of the two method as the ref value
2. We compute the intra method uncertainty using a utility function we define to estimate a linear uncertainty (of an uarray) by a multiplication factor
3. We use a log10 to translate the error into trems of linear error in logspace (as logX is a normal variable)
4. We use the SEM of the inter-estimates by computing the STD of the log values and divid by sqrt(2)
5. We combine the two types of error: intra/inter-estimate in the logspace using euclidian error propogation 
6. We take the 10th power of the error in the logspace as the multiplication uncertainty error for the reference values
'''
for param in mono_est.columns:
    monocyte_results.loc[param,'Value'] = gmean(unumpy.nominal_values(mono_est[param]))
    intra_unc = Uarr2MulF(mono_est[param])
    log_intra_unc = np.log10(intra_unc)
    log_inter_unc = np.log10(unumpy.nominal_values(mono_est[param])).std()/np.sqrt(2)
    log_unc = np.sqrt(log_intra_unc.mean()**2+log_inter_unc**2)
    monocyte_results.loc[param,'Uncertainty'] =10**log_unc

#printing the results for the number of monocytes, their lifespan and cellular turnover rate, including the uncrtainty range.    
for param in monocyte_results.index:
    if ~np.isnan(monocyte_results.loc[param,'Value']):
        print(param.capitalize() + ' of monocytes: '+ monocyte_results.loc[param,'Formats'].format(monocyte_results.loc[param,'Value'])+ ' '+monocyte_results.loc[param,'Units'],
            'SD range: ' + monocyte_results.loc[param,'Formats'].format(monocyte_results.loc[param,'Value']/monocyte_results.loc[param,'Uncertainty']),              
            '- ' + monocyte_results.loc[param,'Formats'].format(monocyte_results.loc[param,'Value']*monocyte_results.loc[param,'Uncertainty']),
            ' '+monocyte_results.loc[param,'Units'])

    




Number of monocytes: 5.2e+09 cells SD range: 3.6e+09 - 7.7e+09  cells
Lifespan of monocytes: 3.5 days SD range: 2.9 - 4.1  days
Cellular turnover rate of monocytes: 1.5e+09 cells per day SD range: 1.0e+09 - 2.3e+09  cells per day


## Monocytes mass and cellular mass turnover
We use data regarding the volume and density of monocytes to conclude their mass.
sources: [Zipursky et al., 1976](https://www.ncbi.nlm.nih.gov/pubmed/1066173),
[Chapam et al., 1981](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC494369/pdf/jclinpath00491-0013.pdf),
[Ince et al., 1987](https://www.sciencedirect.com/science/article/pii/000527368790023X?via%3Dihub), 
[Nibbering et al., 1990](https://www.ncbi.nlm.nih.gov/pubmed/2110946) 

Integrating the estimate for a single cell mass with the estiamte for total number of monocytes and their lifespan, we can estiamte the total cellular mass and the cellular mass turnover

In [311]:
mass_data= pd.read_excel('Monocytes_data.xlsx','Volume_and_mass',usecols = range(8))

#storing the density in a designated variable 
density = ufloat(mass_data['value'][mass_data['parameter']=='monocyte mass density'],
                 mass_data['SEM'][mass_data['parameter']=='monocyte mass density'][0])

#removing the parameters that are not 'monocyte volume'
mass_data = mass_data[mass_data['parameter']=='monocyte volume']

'''
taking the average of the values as the reference value
The uncertainty is given by the euclidean sum of the intra variabiallty (mean of the SEM of the studies) and the inter error
standard error of the mean of the various studies
'''
mono_volume = ufloat(mass_data['value'].mean(),mass_data['SEM'].mean()) +  ufloat(0,mass_data['value'].std()/np.sqrt(mass_data['value'].size))

#the mass is given by the multiplication of the volume and density
mono_mass = mono_volume*density 

#and because all the number and turnover uncertainties are in terms of multiplication factors we use the utility function to convert the terms of error:
monocyte_results.loc['cell mass','Value'] = mono_mass.nominal_value
monocyte_results.loc['cell mass','Uncertainty'] = AddErr2MulF(mono_mass)

#using the estimate for the cell mass we can estimate the total cell mass of monocytes (translating it from pg to g)
monocyte_results.loc['total cellular mass','Value'] = monocyte_results.loc['cell mass','Value']* monocyte_results.loc['number','Value']/10**12
monocyte_results.loc['total cellular mass','Uncertainty'] =FacMulProp([monocyte_results.loc['cell mass','Uncertainty'],monocyte_results.loc['number','Uncertainty']])

#using the estimate for the cell mass we can estimate the total cellular mass turnover of monocytes (translating it from pg/d to g/d)
monocyte_results.loc['cellular mass turnovr rate','Value'] = monocyte_results.loc['cell mass','Value']* monocyte_results.loc['cellular turnover rate','Value']/10**12
monocyte_results.loc['cellular mass turnovr rate','Uncertainty'] = FacMulProp([monocyte_results.loc['cell mass','Uncertainty'],monocyte_results.loc['cellular turnover rate','Uncertainty']])


#printing the results for the mass/total mass/cellular mass turnover rate of monocytes
for param in monocyte_results.index[3:6]:
    print(param.capitalize() + ' of monocytes: '+ monocyte_results.loc[param,'Formats'].format(monocyte_results.loc[param,'Value'])+ ' '+monocyte_results.loc[param,'Units'],
            'SD range: ' + monocyte_results.loc[param,'Formats'].format(monocyte_results.loc[param,'Value']/monocyte_results.loc[param,'Uncertainty']),              
            '- ' + monocyte_results.loc[param,'Formats'].format(monocyte_results.loc[param,'Value']*monocyte_results.loc[param,'Uncertainty']),
            ' '+monocyte_results.loc[param,'Units'])

    

Cell mass of monocytes: 459 pg SD range: 418 - 503  pg
Cellular mass turnovr rate of monocytes: 0.7 grams per day SD range: 0.5 - 1.1  grams per day
Total cellular mass of monocytes: 2.4 grams SD range: 1.6 - 3.6  grams


## Saving the results to excel


In [307]:
#We use the dataframe monocyte_results to save the results. 
#some change are needed for it to fit our format

monocyte_results.reset_index(level=0, inplace=True) #We need the paramaters as a column, not an index
monocyte_results = monocyte_results.rename(columns = {'index':'Parameter'}) #changing its name to parameters
monocyte_results = monocyte_results.drop(columns=['Formats']) #lossing the Format columns usefull only for printings in python 
monocyte_results['Comments'] = 'uncertainty by multiplication factor' #adding a comment about the nature of the uncertainty factor

In [308]:
file = 'Summary.xlsx'
book = load_workbook(file)
writer = pd.ExcelWriter(file, engine = 'openpyxl')
writer.book = book

writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
sheet=writer.book.active

monocyte_results.to_excel(writer, sheet_name = 'Monocytes',index=False)
worksheet = writer.sheets['Monocytes']   
worksheet.sheet_properties.tabColor= colors_data.loc['Monocytes','hex'] 
writer.save()