diff --git a/.ipynb_checkpoints/global_biomass-checkpoint.ipynb b/.ipynb_checkpoints/global_biomass-checkpoint.ipynb deleted file mode 100644 index ee4756f..0000000 --- a/.ipynb_checkpoints/global_biomass-checkpoint.ipynb +++ /dev/null @@ -1,539 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Load dependencies\n", - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, 'statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass on Earth\n", - "To estimate the total biomass on Earth, we sum all of the contributions from each of the taxa. Here are our estimate of the total biomass of each taxon:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass [Gt C]Rounded biomass [Gt C]UncertaintyTotal biomass [Gt C]Rounded total biomass [Gt C]Total uncertainty
BacteriaTerrestrial deep subsurface58.06929460.00020.55382673.39674270.09.652678
Marine1.3269341.4001.805524NaNNaNNaN
Soil7.3522987.0006.399644NaNNaNNaN
Marine deep subsurface6.6482177.0007.643075NaNNaNNaN
ArchaeaTerrestrial deep subsurface3.7065514.00063.7532737.4032258.013.379259
Marine0.3317340.3002.682953NaNNaNNaN
Soil0.5157050.5003.645371NaNNaNNaN
Marine deep subsurface2.8492363.0007.931545NaNNaNNaN
FungiTerrestrial11.802005NaN3.459751NaN13.03.328541
Marine0.324820NaN10.000000NaNNaNNaN
AnimalsAnnelids0.1985060.200NaN2.4672243.04.833176
Terrestrial arthropods0.2115680.20014.881347NaNNaNNaN
Marine arthropods0.9223401.00010.000000NaNNaNNaN
Cnidarians0.0896170.100NaNNaNNaNNaN
Molluscs0.1819850.200NaNNaNNaNNaN
Nematodes0.0240750.020NaNNaNNaNNaN
Fish0.6676110.7008.261037NaNNaNNaN
Livestock0.1071470.100NaNNaNNaNNaN
Humans0.0553730.050NaNNaNNaNNaN
Wild birds0.0016580.002NaNNaNNaNNaN
Wild mammals0.0073440.0071.810268NaNNaNNaN
ProtistsMarine2.059715NaN10.000000NaN4.05.694338
Terrestrial1.597868NaN8.018254NaNNaNNaN
VirusesViruses0.2202160.20015.201769NaNNaN15.201769
PlantsPlants450.000000450.0001.193955NaNNaN1.193955
Total biomassTotal biomassNaNNaNNaNNaN550.01.664686
\n", - "
" - ], - "text/plain": [ - " Biomass [Gt C] \\\n", - "Bacteria Terrestrial deep subsurface 58.069294 \n", - " Marine 1.326934 \n", - " Soil 7.352298 \n", - " Marine deep subsurface 6.648217 \n", - "Archaea Terrestrial deep subsurface 3.706551 \n", - " Marine 0.331734 \n", - " Soil 0.515705 \n", - " Marine deep subsurface 2.849236 \n", - "Fungi Terrestrial 11.802005 \n", - " Marine 0.324820 \n", - "Animals Annelids 0.198506 \n", - " Terrestrial arthropods 0.211568 \n", - " Marine arthropods 0.922340 \n", - " Cnidarians 0.089617 \n", - " Molluscs 0.181985 \n", - " Nematodes 0.024075 \n", - " Fish 0.667611 \n", - " Livestock 0.107147 \n", - " Humans 0.055373 \n", - " Wild birds 0.001658 \n", - " Wild mammals 0.007344 \n", - "Protists Marine 2.059715 \n", - " Terrestrial 1.597868 \n", - "Viruses Viruses 0.220216 \n", - "Plants Plants 450.000000 \n", - "Total biomass Total biomass NaN \n", - "\n", - " Rounded biomass [Gt C] \\\n", - "Bacteria Terrestrial deep subsurface 60.000 \n", - " Marine 1.400 \n", - " Soil 7.000 \n", - " Marine deep subsurface 7.000 \n", - "Archaea Terrestrial deep subsurface 4.000 \n", - " Marine 0.300 \n", - " Soil 0.500 \n", - " Marine deep subsurface 3.000 \n", - "Fungi Terrestrial NaN \n", - " Marine NaN \n", - "Animals Annelids 0.200 \n", - " Terrestrial arthropods 0.200 \n", - " Marine arthropods 1.000 \n", - " Cnidarians 0.100 \n", - " Molluscs 0.200 \n", - " Nematodes 0.020 \n", - " Fish 0.700 \n", - " Livestock 0.100 \n", - " Humans 0.050 \n", - " Wild birds 0.002 \n", - " Wild mammals 0.007 \n", - "Protists Marine NaN \n", - " Terrestrial NaN \n", - "Viruses Viruses 0.200 \n", - "Plants Plants 450.000 \n", - "Total biomass Total biomass NaN \n", - "\n", - " Uncertainty Total biomass [Gt C] \\\n", - "Bacteria Terrestrial deep subsurface 20.553826 73.396742 \n", - " Marine 1.805524 NaN \n", - " Soil 6.399644 NaN \n", - " Marine deep subsurface 7.643075 NaN \n", - "Archaea Terrestrial deep subsurface 63.753273 7.403225 \n", - " Marine 2.682953 NaN \n", - " Soil 3.645371 NaN \n", - " Marine deep subsurface 7.931545 NaN \n", - "Fungi Terrestrial 3.459751 NaN \n", - " Marine 10.000000 NaN \n", - "Animals Annelids NaN 2.467224 \n", - " Terrestrial arthropods 14.881347 NaN \n", - " Marine arthropods 10.000000 NaN \n", - " Cnidarians NaN NaN \n", - " Molluscs NaN NaN \n", - " Nematodes NaN NaN \n", - " Fish 8.261037 NaN \n", - " Livestock NaN NaN \n", - " Humans NaN NaN \n", - " Wild birds NaN NaN \n", - " Wild mammals 1.810268 NaN \n", - "Protists Marine 10.000000 NaN \n", - " Terrestrial 8.018254 NaN \n", - "Viruses Viruses 15.201769 NaN \n", - "Plants Plants 1.193955 NaN \n", - "Total biomass Total biomass NaN NaN \n", - "\n", - " Rounded total biomass [Gt C] \\\n", - "Bacteria Terrestrial deep subsurface 70.0 \n", - " Marine NaN \n", - " Soil NaN \n", - " Marine deep subsurface NaN \n", - "Archaea Terrestrial deep subsurface 8.0 \n", - " Marine NaN \n", - " Soil NaN \n", - " Marine deep subsurface NaN \n", - "Fungi Terrestrial 13.0 \n", - " Marine NaN \n", - "Animals Annelids 3.0 \n", - " Terrestrial arthropods NaN \n", - " Marine arthropods NaN \n", - " Cnidarians NaN \n", - " Molluscs NaN \n", - " Nematodes NaN \n", - " Fish NaN \n", - " Livestock NaN \n", - " Humans NaN \n", - " Wild birds NaN \n", - " Wild mammals NaN \n", - "Protists Marine 4.0 \n", - " Terrestrial NaN \n", - "Viruses Viruses NaN \n", - "Plants Plants NaN \n", - "Total biomass Total biomass 550.0 \n", - "\n", - " Total uncertainty \n", - "Bacteria Terrestrial deep subsurface 9.652678 \n", - " Marine NaN \n", - " Soil NaN \n", - " Marine deep subsurface NaN \n", - "Archaea Terrestrial deep subsurface 13.379259 \n", - " Marine NaN \n", - " Soil NaN \n", - " Marine deep subsurface NaN \n", - "Fungi Terrestrial 3.328541 \n", - " Marine NaN \n", - "Animals Annelids 4.833176 \n", - " Terrestrial arthropods NaN \n", - " Marine arthropods NaN \n", - " Cnidarians NaN \n", - " Molluscs NaN \n", - " Nematodes NaN \n", - " Fish NaN \n", - " Livestock NaN \n", - " Humans NaN \n", - " Wild birds NaN \n", - " Wild mammals NaN \n", - "Protists Marine 5.694338 \n", - " Terrestrial NaN \n", - "Viruses Viruses 15.201769 \n", - "Plants Plants 1.193955 \n", - "Total biomass Total biomass 1.664686 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_excel('results.xlsx','Table1 & Fig1', index_col=[0,1])\n", - "results" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the global biomass on Earth is ≈550 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = results['Biomass [Gt C]'].sum()\n", - "print('Our best estimate for the global biomass on Earth is ≈%.d Gt C' %round(best_estimate,-1))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To project the uncertainty associated with our estimate of the total biomass on Earth, we sum the biomass of the different kingdoms of life and take into account the uncertainty in our estimates of their biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the total biomass on Earth is ≈1.7-fold\n" - ] - } - ], - "source": [ - "kingdoms = results.groupby(level=0).apply(sum).drop('Total biomass')\n", - "\n", - "mul_CI = CI_sum_prop(estimates=kingdoms['Biomass [Gt C]'], mul_CIs=kingdoms['Total uncertainty'])\n", - "print('Our best projection for the uncertainty associated with our estimate of the total biomass on Earth is ≈%.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "update_results(path='results.xlsx', sheet='Table1 & Fig1',row = ('Total biomass','Total biomass'), col='Total uncertainty', values=mul_CI)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 615aafb..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "python.pythonPath": "/usr/bin/python3" -} \ No newline at end of file diff --git a/.~lock.results.xlsx# b/.~lock.results.xlsx# deleted file mode 100644 index 889c657..0000000 --- a/.~lock.results.xlsx# +++ /dev/null @@ -1 +0,0 @@ -,yinonbaron,yinonPC,10.04.2018 18:22,file:///home/yinonbaron/.config/libreoffice/4; \ No newline at end of file diff --git a/.~lock.results20180325.xlsx# b/.~lock.results20180325.xlsx# deleted file mode 100644 index 8083ec6..0000000 --- a/.~lock.results20180325.xlsx# +++ /dev/null @@ -1 +0,0 @@ -,yinonbaron,yinonPC,25.03.2018 16:53,file:///home/yinonbaron/.config/libreoffice/4; \ No newline at end of file diff --git a/MAREDAT_consistency_check/.ipynb_checkpoints/consistency_with_tara_oceans-checkpoint.ipynb b/MAREDAT_consistency_check/.ipynb_checkpoints/consistency_with_tara_oceans-checkpoint.ipynb deleted file mode 100644 index c99b472..0000000 --- a/MAREDAT_consistency_check/.ipynb_checkpoints/consistency_with_tara_oceans-checkpoint.ipynb +++ /dev/null @@ -1,453 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Consistency check between the MAREDAT data and *Tara* Oceans data\n", - "Our estimates of the global biomass of several marine taxa are based on data from the MAREDAT database. As stated in the specific sections relying on data from the database, there are many sources of uncertainty associated with the estimates stemming from the MAREDAT data. Many of those sources of uncertainty are hard to quantify, and no uncertainty estimate is provided in the literature on estimates based on the MAREDAT database.\n", - "\n", - "Thus, we perform consistency checks for the MAREDAT data against independent sources of data, to increases our confidence in our estimates and to provide a measure of the uncertainty associated with our estimates.\n", - "\n", - "In this document we conduct a comparison between the estimates of biomass based on the MAREDAT database and data from [de Vargas et al.](http://dx.doi.org/10.1126/science.1261605). the data in de Vargas et al. is based on 18S rDNA sequencing of different populations of plankton collected by the *Tara* Oceans campaign. \n", - "\n", - "The dataset in de Vargas et al. divides the plankton community based on size ranges (pico-nano-, nano-, micro- and meso-plankton). de Vargas et al. provides only number of reads for each taxon. The fraction of reads that a taxon has out of the total number of reads can be used as a proxy for the biomass fraction of the taxon, but not as a proxy of its absolute biomass. Relying on 18S rDNA sequence abundance as a proxy for biomass is not a well established practice, and has its own biases, but we chose to use it for the sake of comparing it to independent approaches such as the MAREDAT database. Each plankton size fraction sampled in the study was sequenced to approximately the same sequencing depth (≈120 million reads). This means that the 18S read data can provide a possible proxy for the biomass fraction of a certain taxon within a size fraction, but not across size fractions.\n", - "\n", - "We focus on comparing the MAREDAT and de Vargas et al. data in two case studies: the biomass of diatoms and the total biomass of nanoplakton and microplankton.\n", - "\n", - "## Diatoms\n", - "We begin by describing how to compare the biomass estimates of diatoms based on de Vargas et al. and the MAREDAT dataset. Our aim is to calculate the relative fraction of diatoms out of the total biomass of organisms in the same size range as diatoms. As the data in those two datasets is structured differently, we first need to make corrections to the data so a valid comparison will be available.\n", - "\n", - "### MAREDAT\n", - "In de Vargas et al., diatoms appear mainly in the nanoplankton (5-20 µm in diameter) and microplankton (20-180 µm) size fractions. In order to make a comparison to the MAREDAT database we need to find the corresponding groups in the MAREDAT database. The corresponding groups in the MAREDAT database are the microzooplankton and the diatom groups (zooplankton between 5 and 200 µm in diameter). As calculated in the marine protists sections, our estimates for the respective biomass of microzooplankton and diatoms are ≈0.6 Gt C and ≈0.3 Gt C. Thus, according to the MAREDAT data diatoms account for about 30% of the total biomass of plankton in the 5-200 µm size fraction.\n", - "\n", - "### de Vargas et al.\n", - "We use data on the total number of reads of different taxa in each size fraction. The data originates from de Vargas et al. from Database W6 in the companion website, as well as from Figure 3 in the main text. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GroupTotal # of readsPiconano fractionNano fractionMicro fractionMeso fractionRhizaria
0Metazoa2469626460.0502020.2421050.3708500.336032False
1Bacillariophyta145862500.1236220.5409450.2929130.039370False
2Collodaria970850640.0520610.2386120.1605210.548807True
3Nassellaria & Eucyrtidium6569610.4295010.1193060.2342730.216920True
4Spumellaria51993770.7917570.0542300.0715840.080260True
\n", - "
" - ], - "text/plain": [ - " Group Total # of reads Piconano fraction \\\n", - "0 Metazoa 246962646 0.050202 \n", - "1 Bacillariophyta 14586250 0.123622 \n", - "2 Collodaria 97085064 0.052061 \n", - "3 Nassellaria & Eucyrtidium 656961 0.429501 \n", - "4 Spumellaria 5199377 0.791757 \n", - "\n", - " Nano fraction Micro fraction Meso fraction Rhizaria \n", - "0 0.242105 0.370850 0.336032 False \n", - "1 0.540945 0.292913 0.039370 False \n", - "2 0.238612 0.160521 0.548807 True \n", - "3 0.119306 0.234273 0.216920 True \n", - "4 0.054230 0.071584 0.080260 True " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data on the total number of reads of each taxon from de Vargas et al.\n", - "data = pd.read_excel('tara_oceans_data.xlsx','de Vargas W6',skiprows=1)\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We also use data on the total number of reads from each size fraction from Figure 2 in de Vargas et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Piconano readsNano readsMicro readsMeso reads
0114000000135000000121000000135000000
\n", - "
" - ], - "text/plain": [ - " Piconano reads Nano reads Micro reads Meso reads\n", - "0 114000000 135000000 121000000 135000000" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Load data on the total number of reads in each size fraction\n", - "tot_reads = pd.read_excel('tara_oceans_data.xlsx','Total number of reads', skiprows=1)\n", - "tot_reads" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In de Vargas et al., diatoms appear mainly in the nanoplankton (5-20 µm in diameter) and microplankton (20-180 µm) size fractions. However, the microzooplankton biomass estimates in the MAREDAT database do not include copepods, which were moved to the mesozooplankton group. Fragile protists such as Rhizaria, are probably also undersampled in the MAREDAT database. Therefore, to correct for these effects such that we could compare the MAREDAT and Tara Oceans datasets, we remove metazoa (dominated by arthropods) and Rhizaria reads from the relevant size fractions in the Tara Oceans dataset (nano and microplakton):\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of diatoms out of the total number of reads in nanoplankton and microplankton\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Nano readsMicro reads
00.1563850.337967
\n", - "
" - ], - "text/plain": [ - " Nano reads Micro reads\n", - "0 0.156385 0.337967" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate the total number of reads for the Nano and Micro fractions\n", - "read_data = pd.DataFrame()\n", - "read_data['Nano reads'] = data['Total # of reads']*data['Nano fraction']\n", - "read_data['Micro reads'] = data['Total # of reads']*data['Micro fraction']\n", - "\n", - "# Subtract the total sum of rhizaria and metazoa from the total number of reads\n", - "corrected_total_reads = tot_reads[['Nano reads','Micro reads']] - read_data[data['Group'] == 'Metazoa'] - read_data[data['Rhizaria'] == True].sum()\n", - "\n", - "print('The fraction of diatoms out of the total number of reads in nanoplankton and microplankton')\n", - "read_data.loc[1]/corrected_total_reads" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After correcting for those biases, the biomass fraction of diatoms in microplankton in the Tara Oceans dataset is between 16%-33%, which fits well with the estimate from the MAREDAT database of ≈30%.\n", - "\n", - "## Nanoplankton and Microplankton biomass\n", - "In this section we generate an independent estimate of the total biomass of nanoplankton and microplankton, based on several data sources. We begin with the independently measured biomass of Rhizaria. The independent measurement using microscopy by [Biard et al.](http://dx.doi.org/10.1038/nature17652) has estimated ≈0.2 Gt C of Rhizaria above 600 µm in diameter.\n", - "\n", - "We assume that this biomass represents the biomass of Rhizaria in mesozooplankton. As we calculated in the marine arthropod section, Rhizaria represent ≈40% of the total mesoplankton biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average fraction of Rhizaria in 18S rDNA sequencing data in surface waters is 40%\n", - "The average fraction of Rhizaria in 18S rDNA sequencing data in the deep chlorophyll maximum is 35%\n" - ] - } - ], - "source": [ - "# Load 18S sequecing data of mesozooplankton\n", - "seq_data = pd.read_excel('../animals/arthropods/marine_arthropods/marine_arthropods_data.xlsx',sheet_name='de Vargas',skiprows=1)\n", - "\n", - "print('The average fraction of Rhizaria in 18S rDNA sequencing data in surface waters is ' + '{:,.0f}%'.format(seq_data['Rhizaria surface'].mean()*100))\n", - "print('The average fraction of Rhizaria in 18S rDNA sequencing data in the deep chlorophyll maximum is ' + '{:,.0f}%'.format(seq_data['Rhizaria DCM'].mean()*100))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The remaining 60% are made up mainly of arthropods. This would put the total mesozooplankton arthropods biomass at ≈0.3 Gt C. Our estimate for the total biomass of arthropods in the nano, micro and mesozooplankton size fraction is ≈0.56 Gt C (see the marine arthropod section for details). Subtracting the fraction of As which leaves ≈0.2 Gt C of nano and microzooplankton arthropod biomass." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# The estimate of the biomass of rhizaria based on Biard et al.\n", - "rhizaria_biomass = 0.2e15\n", - "\n", - "# Calculate the average fraction of rhizaria out of the biomass\n", - "# of mesozooplankton\n", - "rhizaria_fraction = np.mean([seq_data['Rhizaria surface'].mean(),seq_data['Rhizaria DCM'].mean()])\n", - "\n", - "# Calculate the biomass of mesozooplankton arthropods\n", - "meso_arth = rhizaria_biomass/(1-rhizaria_fraction)\n", - "\n", - "# Our estimate for the total biomass of arthropods in nano\n", - "# micro and mezozooplankton size fractions\n", - "nano_micro_mezo_arthropod = 0.56e15\n", - "\n", - "# Subtract the mesozooplankton arthropod biomass to estimate\n", - "# The nanozooplankton and microzooplankton arthropod biomass\n", - "nano_micro_arth = nano_micro_mezo_arthropod - meso_arth" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Based on the Tara Oceans data, the nano and microzooplankton arthropod biomass accounts for ≈40-75% of the total nano and microplankton biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of arthropods out of the total number of reads in nanoplankton and microplankton\n", - "The mean fraction of arthropods out of the total number of reads in nanoplankton and microplankton is ≈60%\n" - ] - } - ], - "source": [ - "print('The fraction of arthropods out of the total number of reads in nanoplankton and microplankton')\n", - "metazoa_frac = read_data[data['Group'] == 'Metazoa']/tot_reads[['Nano reads','Micro reads']]\n", - "\n", - "print('The mean fraction of arthropods out of the total number of reads in nanoplankton and microplankton is ≈' + '{:,.0f}%'.format(float(metazoa_frac.mean(axis=1)*100)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the estimate we just calculated of ≈0.2 Gt C of arthropods in the nano and microplankton size fractions, and combine it with the estimate of the biomass fraction of arthropods in the nano and microplankton size fractions from the Tara Oceans dataset. This yields an estimate for the total nano and microplankton biomass which is about ≈0.4 Gt C:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The toal biomass of nano and microplankton we estimate is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "tot_nano_micro_biomass = nano_micro_arth/metazoa_frac.mean(axis=1)\n", - "\n", - "print('The toal biomass of nano and microplankton we estimate is ≈%.1f Gt C' %(tot_nano_micro_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As we stated in the section regarding the biomass of diatoms, the biomass of nano and microplankton is estimated at ≈1 Gt C based on the MAREDAT database, which is about 2-fold larger than the estimate we got based on combination of information from Biard et al. the Tara Oceans dataset and the MAREDAT database." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/MAREDAT_consistency_check/.ipynb_checkpoints/cyanobacteria_consistency_check-checkpoint.ipynb b/MAREDAT_consistency_check/.ipynb_checkpoints/cyanobacteria_consistency_check-checkpoint.ipynb deleted file mode 100644 index 3057930..0000000 --- a/MAREDAT_consistency_check/.ipynb_checkpoints/cyanobacteria_consistency_check-checkpoint.ipynb +++ /dev/null @@ -1,139 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Consistency check between the MAREDAT data and cyanobacteria abundance data\n", - "We use a recent study by [Flombaum et al.](http://dx.doi.org/10.1073/pnas.1307701110) which estimated the total number of cyanobacteria worldwide. Flombaum et al. estimate ≈$3×10^{27}$ Prochlorococcus cells and ≈$7×10^{26}$ Synechococcus cells.\n", - "\n", - "In order to estimate the total biomass of cyanobacteria, we use data from [Buitenhuis et al](https://ueaeprints.uea.ac.uk/40778/), to estimate the carbon content of Prochlorococcus and Synechococcus. Buitenhuis et al. reports values from the literature on the carbon content of Prochlorococcus and Synechococcus. We use the geometric mean of the estimates from different studies as our best estimate of the carbon content of Prochlorococcus and Synechococcus:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load data from Buitenhuis et al.\n", - "carbon_content = pd.read_excel('cyanobacteria_data.xlsx',skiprows=1)\n", - "\n", - "# Calculate the geometric mean of the carbon content of Prochlorococcus and Synechococcus\n", - "pro_cc = gmean(carbon_content['Prochlorococcus [fg C cell^-1]'].dropna())*1e-15\n", - "syn_cc = gmean(carbon_content['Synechococcus [fg C cell^-1]'].dropna())*1e-15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We multiply the total number of cells of Prochlorococcus and Synechococcus by the carbon content of Prochlorococcus and Synechococcus to estimate their total biomass. The total biomass of cyanobacteria is the sum of the total biomass of Prochlorococcus and Synechococcus:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The total biomass of cyanobacteria is ≈0.3 Gt C\n" - ] - } - ], - "source": [ - "# The total number of Prochlorococcus and Synechococcus from Flombaum et al.\n", - "pro_cell_num = 3e27\n", - "syn_cell_num = 7e26\n", - "\n", - "# Calculate the total biomass of Prochlorococcus and Synechococcus\n", - "pro_tot_biomass = pro_cc*pro_cell_num\n", - "syn_tot_biomass = syn_cc*syn_cell_num\n", - "\n", - "# Calculate the total biomass of cyanobacteria\n", - "cyano_biomass = pro_tot_biomass + syn_tot_biomass\n", - "print('The total biomass of cyanobacteria is ≈%.1f Gt C' %(cyano_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We note in the section detailing our estimate of the total biomass of marine protists that the total biomass of picophytoplankton based on the MAREDAT database is ≈0.42 Gt C. Buithenhuis et al. estimate, based on data from the MAREDAT database, that cyanobacteria account for 31-51% out of the total biomass of picophytoplankton, which are equivalent to:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate of the biomass of cyanobacteria based on the MAREDAT database is 0.2 Gt C\n" - ] - } - ], - "source": [ - "# The estimate of the biomass of picophytoplankton based on MAREDAT data\n", - "picophyto_biomass = 0.42e15\n", - "\n", - "# The fraction of cyanobacteria out of the total biomass of picophytoplankton based\n", - "# on MAREDAT data\n", - "cyano_fraction = [0.31,0.51]\n", - "\n", - "# The estimate of the total biomass of cyanobacteria\n", - "cyano_maredat = picophyto_biomass*np.mean(cyano_fraction)\n", - "print('The estimate of the biomass of cyanobacteria based on the MAREDAT database is %.1f Gt C' %(cyano_maredat/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The estimate based on the data from Flumbaum et al. and the estimate based on the MAREDAT database are less than 2-fold apart." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/MAREDAT_consistency_check/.ipynb_checkpoints/remote_sensing_consistency_check-checkpoint.ipynb b/MAREDAT_consistency_check/.ipynb_checkpoints/remote_sensing_consistency_check-checkpoint.ipynb deleted file mode 100644 index b7b4e1e..0000000 --- a/MAREDAT_consistency_check/.ipynb_checkpoints/remote_sensing_consistency_check-checkpoint.ipynb +++ /dev/null @@ -1,67 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Comparing MAREDAT estimates to remote sensing measurements\n", - "As a consistency check for the estimates in the MAREDAT database, we calculate the total biomass of phytoplankton, and compare it to estimates of the total biomass of phytoplankton from remote sensing measurements.\n", - "\n", - "The groups of phytoplankton for which we estimate the total biomass are picophytoplankton, diatoms and *Phaeocystis*. Our best estimates for the total biomass of these groups based on data from the MAREDAT database are 0.42, 0.3 and 0.27 Gt C, respectively (for more details on our estimates, see the marine protist section in the Supplementary Information). To estimate the total biomass of phytoplankton, we sum up our estimates for the biomass of all the phytoplankton groups:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of phytoplankton based on the MAREDAT database is ≈1 Gt C\n" - ] - } - ], - "source": [ - "# The estimates of the total biomass of the different phyotplankton groups, based on the MAREDAT database\n", - "picophyoto_biomass = 0.42e15\n", - "diatom_biomass = 0.3e15\n", - "phaeocystis_biomass = 0.27e15\n", - "\n", - "# Calculate our best estimate of the total biomass of phytoplankton based on the MAREDAT database\n", - "phyoplankton_biomass = picophyoto_biomass + diatom_biomass + phaeocystis_biomass\n", - "\n", - "print('Our best estimate of the total biomass of phytoplankton based on the MAREDAT database is ≈%.0f Gt C' %(phyoplankton_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We compate this estimate with the estimates of the total biomass of phytoplankton made by [Antonine et al.](http://dx.doi.org/10.1029/95GB02832) and [Behrenfeld & Falkowski](http://dx.doi.org/10.4319/lo.1997.42.1.0001). Antonine et al. and Behrenfeld & Falkowski use remote sensing data to estimate ≈0.3-0.75 Gt C of phytoplankton. This means that our estimate based on the MAREDAT databased is 1.3-3-fold higher than the estimate based on remote sensing." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/__pycache__/run_pipline.cpython-35.pyc b/__pycache__/run_pipline.cpython-35.pyc deleted file mode 100644 index 8582375..0000000 Binary files a/__pycache__/run_pipline.cpython-35.pyc and /dev/null differ diff --git a/animals/.ipynb_checkpoints/animal_biomass-checkpoint.ipynb b/animals/.ipynb_checkpoints/animal_biomass-checkpoint.ipynb deleted file mode 100644 index 8b9ed27..0000000 --- a/animals/.ipynb_checkpoints/animal_biomass-checkpoint.ipynb +++ /dev/null @@ -1,240 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of animals\n", - "To estimate the total biomass of animals, we conbine our estimates for the biomass of each animal taxon, which we calculated in each subdirectory. Our estimates for the biomass of each animal taxon are:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TaxonBiomass [Gt C]Uncertainty
0Annelids0.198506NaN
1Cnidarians0.089617NaN
2Molluscs0.181985NaN
3Marine arthropods0.92234010.000000
4Terrestrial arthropods0.21156814.881347
5Fish0.6676118.197784
6Livestock0.107147NaN
7Wild mammals0.0073442.266285
8Nematodes0.024075NaN
9Humans0.050000NaN
\n", - "
" - ], - "text/plain": [ - " Taxon Biomass [Gt C] Uncertainty\n", - "0 Annelids 0.198506 NaN\n", - "1 Cnidarians 0.089617 NaN\n", - "2 Molluscs 0.181985 NaN\n", - "3 Marine arthropods 0.922340 10.000000\n", - "4 Terrestrial arthropods 0.211568 14.881347\n", - "5 Fish 0.667611 8.197784\n", - "6 Livestock 0.107147 NaN\n", - "7 Wild mammals 0.007344 2.266285\n", - "8 Nematodes 0.024075 NaN\n", - "9 Humans 0.050000 NaN" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_excel('animal_biomass_estimate.xlsx')\n", - "data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum all these different contributions to produce our best estimate for the biomass of animals:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of animals is ≈2.5 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = data['Biomass [Gt C]'].sum()\n", - "\n", - "print('Our best estimate for the biomass of animals is ≈%.1f Gt C' %best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To project the uncertainty associated with our estimate of the total biomass of animals, we combine the uncertainties of the estimates for which is have uncertainty projections, namely arthropods (marine and terrestrial), fish and wild mammals." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our projection for the uncertainty of our estimate of the total biomass of animals is ≈5-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_sum_prop(estimates=data.loc[~np.isnan(data['Uncertainty']),'Biomass [Gt C]'].values, mul_CIs = data.loc[~np.isnan(data['Uncertainty']),'Uncertainty'].values)\n", - "\n", - "print('Our projection for the uncertainty of our estimate of the total biomass of animals is ≈%.0f-fold' %mul_CI)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Annelids'), \n", - " col=['Total biomass [Gt C]', 'Total uncertainty'],\n", - " values=[best_estimate,mul_CI],\n", - " path='../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/animal_biomass_estimate_OLD.xlsx b/animals/animal_biomass_estimate_OLD.xlsx deleted file mode 100644 index ba3fa31..0000000 Binary files a/animals/animal_biomass_estimate_OLD.xlsx and /dev/null differ diff --git a/animals/annelids/.ipynb_checkpoints/annelids-checkpoint.ipynb b/animals/annelids/.ipynb_checkpoints/annelids-checkpoint.ipynb deleted file mode 100644 index 6cbb83e..0000000 --- a/animals/annelids/.ipynb_checkpoints/annelids-checkpoint.ipynb +++ /dev/null @@ -1,849 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of Annelids\n", - "To estimate the total biomass of annelids, we rely on data collected in a recent study by [Fierer et al.](http://dx.doi.org/10.1111/j.1461-0248.2009.01360.x). Fierer et al. collected data on the biomass density of two major groups on annelids (Enchytraeids & Earthworms) in different biomes. Here is a sample from the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
BiomeAverage biomass density [g C m^-2]Median biomass density [g C m^-2]Taxon
0Boreal forests0.320.28Enchytraeids
1Desert0.000.00Enchytraeids
2Temperate coniferous forest0.800.56Enchytraeids
3Temperate deciduous forest0.640.30Enchytraeids
4Temperate grassland0.310.26Enchytraeids
5Tropical forest0.000.00Enchytraeids
6Tundra0.990.83Enchytraeids
7Boreal forests0.280.10Earthworms
8Desert0.000.00Earthworms
9Temperate coniferous forest1.200.13Earthworms
10Temperate deciduous forest2.001.19Earthworms
11Temperate grassland3.800.79Earthworms
12Tropical forest4.900.48Earthworms
13Tundra1.400.09Earthworms
\n", - "
" - ], - "text/plain": [ - " Biome Average biomass density [g C m^-2] \\\n", - "0 Boreal forests 0.32 \n", - "1 Desert 0.00 \n", - "2 Temperate coniferous forest 0.80 \n", - "3 Temperate deciduous forest 0.64 \n", - "4 Temperate grassland 0.31 \n", - "5 Tropical forest 0.00 \n", - "6 Tundra 0.99 \n", - "7 Boreal forests 0.28 \n", - "8 Desert 0.00 \n", - "9 Temperate coniferous forest 1.20 \n", - "10 Temperate deciduous forest 2.00 \n", - "11 Temperate grassland 3.80 \n", - "12 Tropical forest 4.90 \n", - "13 Tundra 1.40 \n", - "\n", - " Median biomass density [g C m^-2] Taxon \n", - "0 0.28 Enchytraeids \n", - "1 0.00 Enchytraeids \n", - "2 0.56 Enchytraeids \n", - "3 0.30 Enchytraeids \n", - "4 0.26 Enchytraeids \n", - "5 0.00 Enchytraeids \n", - "6 0.83 Enchytraeids \n", - "7 0.10 Earthworms \n", - "8 0.00 Earthworms \n", - "9 0.13 Earthworms \n", - "10 1.19 Earthworms \n", - "11 0.79 Earthworms \n", - "12 0.48 Earthworms \n", - "13 0.09 Earthworms " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data taken from Fierer et al.\n", - "data = pd.read_excel('annelid_biomass_data.xlsx','Fierer',skiprows=1)\n", - "data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For each biome, Fierer et al. provides an estimate of the average biomass density and the median biomass density. We generate two estimates for the total biomass of annelids, one based on average biomass densities and one based on median biomass densities. The estimate based on the average biomass density is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are in non-natural conditions, or samples which have some technical biases associated with them) might shift the average biomass density significantly. On the other hand, the estimate based on median biomass densities might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies.\n", - "\n", - "For each biome, we multiply the sum of the biomass density of Enchytraeids and Earthworms by the total area of that biome taken from the book [Biogeochemistry: An analysis of Global Change](https://www.sciencedirect.com/science/book/9780123858740) by Schlesinger & Bernhardt.:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The total biomass of annelids based on Fierer et al. based on average biomass densities is 0.2 Gt C\n", - "The total biomass of annelids based on Fierer et al. based on median biomass densities is 0.05 Gt C\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Average biomass density [g C m^-2]Median biomass density [g C m^-2]
Biome
Boreal forests0.600.38
Desert0.000.00
Temperate coniferous forest2.000.69
Temperate deciduous forest2.641.49
Temperate grassland4.111.05
Tropical forest4.900.48
Tundra2.390.92
\n", - "
" - ], - "text/plain": [ - " Average biomass density [g C m^-2] \\\n", - "Biome \n", - "Boreal forests 0.60 \n", - "Desert 0.00 \n", - "Temperate coniferous forest 2.00 \n", - "Temperate deciduous forest 2.64 \n", - "Temperate grassland 4.11 \n", - "Tropical forest 4.90 \n", - "Tundra 2.39 \n", - "\n", - " Median biomass density [g C m^-2] \n", - "Biome \n", - "Boreal forests 0.38 \n", - "Desert 0.00 \n", - "Temperate coniferous forest 0.69 \n", - "Temperate deciduous forest 1.49 \n", - "Temperate grassland 1.05 \n", - "Tropical forest 0.48 \n", - "Tundra 0.92 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load biome area data\n", - "area = pd.read_excel('annelid_biomass_data.xlsx','Biome area', skiprows=1, index_col='Biome')\n", - "\n", - "# For each biome sum the total biomass density of annelids\n", - "total_biomass_density = data.groupby('Biome').sum()\n", - "\n", - "# Calculate the total biomass of annelids based on average or median biomass densities\n", - "total_biomass_mean = (total_biomass_density['Average biomass density [g C m^-2]']*area['Area [m^2]']).sum()\n", - "total_biomass_median = (total_biomass_density['Median biomass density [g C m^-2]']*area['Area [m^2]']).sum()\n", - "\n", - "print('The total biomass of annelids based on Fierer et al. based on average biomass densities is %.1f Gt C' %(total_biomass_mean/1e15))\n", - "print('The total biomass of annelids based on Fierer et al. based on median biomass densities is %.2f Gt C' %(total_biomass_median/1e15))\n", - "total_biomass_density" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The data in Fierer et al. does not account two biomes - croplands and tropical savannas. To estimate the biomass contribution of annelids from those biomes, we collected data from the literature on the biomass density of annelids (mostly earthworms) from these biomes. The data we collected is provided below:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Original valueOriginal unitsBiomass density [g C m^-2]SiteBiomeReferenceLinkRemarksUnnamed: 8
04516.846514g DW m-22.258423Ivory Coast, Lamto, “mean savanna”Native tropical savannaPetersen, H., & Luxton, M. (1982). A comparati...http://dx.doi.org/10.2307/3544689NaNNaN
12977.568617g DW m-21.488784Ivory Coast, Lamto, unburnt savanna/bare soilNative tropical savannaBlock, W. (1970). Micro-arthropods in some Uga...NaNNaNNaN
248.793261g DW m-20.024397Uganda, Kabanyolo, elephant grassNative tropical savannaBlock, W. (1970). Micro-arthropods in some Uga...NaNNaNNaN
358.792141g DW m-20.029396Uganda, Kabanyolo, natural bushNative tropical savannaBlock, W. (1970). Micro-arthropods in some Uga...NaNNaNNaN
479.062971g DW m-20.039531Uganda, Kabanyolo, pastureNative tropical savannaBlock, W. (1970). Micro-arthropods in some Uga...NaNNaNNaN
520.000000g FW m-23.0000001 Site in Mexico and 6 in Ivory CoastNative tropical savannaFragoso, C., Kanyonyo, J., Moreno, A., Senapat...http://horizon.documentation.ird.fr/exl-doc/pl...total 32.06 and earthworms are 60%NaN
667.000000g FW m-210.05000067 sites in america africa and asiaTropical pasturesFragoso, C., Kanyonyo, J., Moreno, A., Senapat...http://horizon.documentation.ird.fr/exl-doc/pl...73.2 g FW m-2 and ≈90% is earthwormsNaN
70.700000g FW m-20.10500040 sites in america africa and asiaCropsFragoso, C., Kanyonyo, J., Moreno, A., Senapat...http://horizon.documentation.ird.fr/exl-doc/pl...NaN5.12 g FW and 13% of the biomass is earthworms
84.774000g FW m-20.716100Carimagua, columbiaNative tropical savannaDecaëns, T., Jiménez, J. J., Barros, E., Chauv...http://dx.doi.org/10.1016/j.agee.2003.12.005NaN15.3 g m-2 and 31% earthworms
959.600000g FW m-28.940000Carimagua, columbiaTropical pasturesDecaëns, T., Jiménez, J. J., Barros, E., Chauv...http://dx.doi.org/10.1016/j.agee.2003.12.005NaNNaN
103.900000g FW m-20.585000Manaus, BrazilTropical pasturesDecaëns, T., Jiménez, J. J., Barros, E., Chauv...http://dx.doi.org/10.1016/j.agee.2003.12.005NaN5.6 g FW m-2 and 65% earthworms
1145.100000g FW m-26.765000Manaus, BrazilTropical pasturesDecaëns, T., Jiménez, J. J., Barros, E., Chauv...http://dx.doi.org/10.1016/j.agee.2003.12.005NaN60.8 g FW m-2 and 79% earthworms
1239.700000g FW m-25.955000Manaus, BrazilTropical pasturesDecaëns, T., Jiménez, J. J., Barros, E., Chauv...http://dx.doi.org/10.1016/j.agee.2003.12.005NaN57 g FW m-2 and 90% earthworms
135.000000g FW m-20.750000Carimagua, columbiaNative tropical savannaJiménez, J. J., Moreno, A. G., Decaëns, T., La...http://dx.doi.org/10.1007/s003740050469NaNNaN
1462.000000g FW m-29.300000Carimagua, columbiaTropical pasturesJiménez, J. J., Moreno, A. G., Decaëns, T., La...http://dx.doi.org/10.1007/s003740050469NaNNaN
\n", - "
" - ], - "text/plain": [ - " Original value Original units Biomass density [g C m^-2] \\\n", - "0 4516.846514 g DW m-2 2.258423 \n", - "1 2977.568617 g DW m-2 1.488784 \n", - "2 48.793261 g DW m-2 0.024397 \n", - "3 58.792141 g DW m-2 0.029396 \n", - "4 79.062971 g DW m-2 0.039531 \n", - "5 20.000000 g FW m-2 3.000000 \n", - "6 67.000000 g FW m-2 10.050000 \n", - "7 0.700000 g FW m-2 0.105000 \n", - "8 4.774000 g FW m-2 0.716100 \n", - "9 59.600000 g FW m-2 8.940000 \n", - "10 3.900000 g FW m-2 0.585000 \n", - "11 45.100000 g FW m-2 6.765000 \n", - "12 39.700000 g FW m-2 5.955000 \n", - "13 5.000000 g FW m-2 0.750000 \n", - "14 62.000000 g FW m-2 9.300000 \n", - "\n", - " Site Biome \\\n", - "0 Ivory Coast, Lamto, “mean savanna” Native tropical savanna \n", - "1 Ivory Coast, Lamto, unburnt savanna/bare soil Native tropical savanna \n", - "2 Uganda, Kabanyolo, elephant grass Native tropical savanna \n", - "3 Uganda, Kabanyolo, natural bush Native tropical savanna \n", - "4 Uganda, Kabanyolo, pasture Native tropical savanna \n", - "5 1 Site in Mexico and 6 in Ivory Coast Native tropical savanna \n", - "6 67 sites in america africa and asia Tropical pastures \n", - "7 40 sites in america africa and asia Crops \n", - "8 Carimagua, columbia Native tropical savanna \n", - "9 Carimagua, columbia Tropical pastures \n", - "10 Manaus, Brazil Tropical pastures \n", - "11 Manaus, Brazil Tropical pastures \n", - "12 Manaus, Brazil Tropical pastures \n", - "13 Carimagua, columbia Native tropical savanna \n", - "14 Carimagua, columbia Tropical pastures \n", - "\n", - " Reference \\\n", - "0 Petersen, H., & Luxton, M. (1982). A comparati... \n", - "1 Block, W. (1970). Micro-arthropods in some Uga... \n", - "2 Block, W. (1970). Micro-arthropods in some Uga... \n", - "3 Block, W. (1970). Micro-arthropods in some Uga... \n", - "4 Block, W. (1970). Micro-arthropods in some Uga... \n", - "5 Fragoso, C., Kanyonyo, J., Moreno, A., Senapat... \n", - "6 Fragoso, C., Kanyonyo, J., Moreno, A., Senapat... \n", - "7 Fragoso, C., Kanyonyo, J., Moreno, A., Senapat... \n", - "8 Decaëns, T., Jiménez, J. J., Barros, E., Chauv... \n", - "9 Decaëns, T., Jiménez, J. J., Barros, E., Chauv... \n", - "10 Decaëns, T., Jiménez, J. J., Barros, E., Chauv... \n", - "11 Decaëns, T., Jiménez, J. J., Barros, E., Chauv... \n", - "12 Decaëns, T., Jiménez, J. J., Barros, E., Chauv... \n", - "13 Jiménez, J. J., Moreno, A. G., Decaëns, T., La... \n", - "14 Jiménez, J. J., Moreno, A. G., Decaëns, T., La... \n", - "\n", - " Link \\\n", - "0 http://dx.doi.org/10.2307/3544689 \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "5 http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "6 http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "7 http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "8 http://dx.doi.org/10.1016/j.agee.2003.12.005 \n", - "9 http://dx.doi.org/10.1016/j.agee.2003.12.005 \n", - "10 http://dx.doi.org/10.1016/j.agee.2003.12.005 \n", - "11 http://dx.doi.org/10.1016/j.agee.2003.12.005 \n", - "12 http://dx.doi.org/10.1016/j.agee.2003.12.005 \n", - "13 http://dx.doi.org/10.1007/s003740050469 \n", - "14 http://dx.doi.org/10.1007/s003740050469 \n", - "\n", - " Remarks \\\n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "5 total 32.06 and earthworms are 60% \n", - "6 73.2 g FW m-2 and ≈90% is earthworms \n", - "7 NaN \n", - "8 NaN \n", - "9 NaN \n", - "10 NaN \n", - "11 NaN \n", - "12 NaN \n", - "13 NaN \n", - "14 NaN \n", - "\n", - " Unnamed: 8 \n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "5 NaN \n", - "6 NaN \n", - "7 5.12 g FW and 13% of the biomass is earthworms \n", - "8 15.3 g m-2 and 31% earthworms \n", - "9 NaN \n", - "10 5.6 g FW m-2 and 65% earthworms \n", - "11 60.8 g FW m-2 and 79% earthworms \n", - "12 57 g FW m-2 and 90% earthworms \n", - "13 NaN \n", - "14 NaN " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "supp_biome_data = pd.read_excel('annelid_biomass_data.xlsx','Supplementary biomes')\n", - "supp_biome_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For each biome, we calculate the average and median annelid biomass density, and multiply by the total area of the biome:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate average and median biomass densities for each additional biome\n", - "mean_supp_biome_biomass_density = supp_biome_data.groupby('Biome').mean()['Biomass density [g C m^-2]']\n", - "median_supp_biome_biomass_density = supp_biome_data.groupby('Biome').median()['Biomass density [g C m^-2]']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We do no know the specifc division in terms of area between pastures and savanna. We thus make two estimates - one assumes the entire area of tropical savannas is filled with savanna, and the second assumes the entire area is pastures. We generate four estimates - median and mean-based estimates with considering only savanna or pastures. As our best estimate for the total biomass of soil annelids, we use the geometric mean of those four estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of annelids is 0.2 Gt C\n" - ] - }, - { - "data": { - "text/plain": [ - "0.13401468278872883" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Consider only savanna\n", - "all_savanna_area = area.copy()\n", - "all_savanna_area.loc['Native tropical savanna', 'Area [m^2]'] *=2\n", - "all_savanna_area.loc['Tropical pastures', 'Area [m^2]'] =0\n", - "all_savanna_mean = total_biomass_mean + (mean_supp_biome_biomass_density*all_savanna_area['Area [m^2]']).sum()\n", - "all_savanna_median = total_biomass_median + (median_supp_biome_biomass_density*all_savanna_area['Area [m^2]']).sum()\n", - "\n", - "# Consider only pastures\n", - "all_pastures_area = area.copy()\n", - "all_pastures_area.loc['Native tropical savanna', 'Area [m^2]'] =0\n", - "all_pastures_area.loc['Tropical pastures', 'Area [m^2]'] *=2\n", - "all_pastures_mean = total_biomass_mean + (mean_supp_biome_biomass_density*all_pastures_area['Area [m^2]']).sum()\n", - "all_pastures_median = total_biomass_median + (median_supp_biome_biomass_density*all_pastures_area['Area [m^2]']).sum()\n", - "\n", - "# Calculate the geometric mean of the average-based and median-based estimates\n", - "best_estimate = gmean([all_pastures_median,all_pastures_mean,all_savanna_mean,all_savanna_median])\n", - "\n", - "\n", - "print('Our best estimate for the biomass of annelids is %.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of annelids\n", - "We consider only the Enchytraeids as they are ≈200-fold smaller than earthworms (Fierer et al.). We calculate the total biomass of Enchytraeids and divide it by the carbon content of Enchytraeids, which is ≈25 µg C (Fierer et al.):" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of Enchytraeids is ≈8e+17\n" - ] - } - ], - "source": [ - "num_data = data.set_index('Biome')\n", - "# Calculate the total biomasss of Enchytraeids based on mean and median biomass densities\n", - "mean_ench_biomass = (num_data[num_data['Taxon'] == 'Enchytraeids']['Average biomass density [g C m^-2]']*area['Area [m^2]']).sum()\n", - "median_ench_biomass = (num_data[num_data['Taxon'] == 'Enchytraeids']['Median biomass density [g C m^-2]']*area['Area [m^2]']).sum()\n", - "\n", - "# Calculate the geometric mean of both biomass estimates\n", - "ench_biomass = gmean([mean_ench_biomass, median_ench_biomass])\n", - "\n", - "# The carbon content of Enchytraeids from Fierer et al.\n", - "ench_carbon_content = 25e-6\n", - "\n", - "# Calculate the total number of Enchytraeids\n", - "tot_ench_num = ench_biomass/ench_carbon_content\n", - "\n", - "print('Our best estimate for the total number of Enchytraeids is ≈%.0e' % tot_ench_num)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to the animal biomass data\n", - "old_results = pd.read_excel('../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Annelids',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,np.nan)\n", - "result.to_excel('../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Annelids'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_estimate/1e15,None],\n", - " path='../../results.xlsx')\n", - "\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Annelids'), \n", - " col=['Number of individuals'],\n", - " values=tot_ench_num,\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/arthropods/.ipynb_checkpoints/arthropod_biomass-checkpoint.ipynb b/animals/arthropods/.ipynb_checkpoints/arthropod_biomass-checkpoint.ipynb deleted file mode 100644 index 0293c51..0000000 --- a/animals/arthropods/.ipynb_checkpoints/arthropod_biomass-checkpoint.ipynb +++ /dev/null @@ -1,187 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of arthropods\n", - "To estimate the total biomass of animals, we conbine our estimates for the biomass of marine and terrestrial arthropods. Our estimates for the biomass of terrestrial and marine arthropods are:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass [Gt C]Uncertainty
Taxon
Marine arthropods0.92234010.000000
Terrestrial arthropods0.21156814.881347
\n", - "
" - ], - "text/plain": [ - " Biomass [Gt C] Uncertainty\n", - "Taxon \n", - "Marine arthropods 0.922340 10.000000\n", - "Terrestrial arthropods 0.211568 14.881347" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_excel('../animal_biomass_estimate.xlsx',index_col=0)\n", - "arth_biomass = data.loc[['Marine arthropods','Terrestrial arthropods']]\n", - "arth_biomass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum all these different contributions to produce our best estimate for the biomass of animals:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of arthropods is ≈1 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = arth_biomass['Biomass [Gt C]'].sum()\n", - "\n", - "print('Our best estimate for the biomass of arthropods is ≈%.0f Gt C' %best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To project the uncertainty associated with our estimate of the total biomass of animals, we combine the uncertainties of the estimates for which is have uncertainty projections, namely arthropods (marine and terrestrial), fish and wild mammals." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our projection for the uncertainty of our estimate of the total biomass of animals is ≈8-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_sum_prop(estimates=arth_biomass['Biomass [Gt C]'].values, mul_CIs = arth_biomass['Uncertainty'].values)\n", - "\n", - "print('Our projection for the uncertainty of our estimate of the total biomass of animals is ≈%.0f-fold' %mul_CI)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to Fig. S2-S3\n", - "update_figs2s3(row='Arthropods', \n", - " col='Uncertainty',\n", - " values=mul_CI,\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/arthropods/.ipynb_checkpoints/arthropods-checkpoint.ipynb b/animals/arthropods/.ipynb_checkpoints/arthropods-checkpoint.ipynb deleted file mode 100644 index 2fd6442..0000000 --- a/animals/arthropods/.ipynb_checkpoints/arthropods-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/arthropods/.ipynb_checkpoints/marine_arthropods-checkpoint.ipynb b/animals/arthropods/.ipynb_checkpoints/marine_arthropods-checkpoint.ipynb deleted file mode 100644 index 66d22ef..0000000 --- a/animals/arthropods/.ipynb_checkpoints/marine_arthropods-checkpoint.ipynb +++ /dev/null @@ -1,282 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of marine arthropods\n", - "To estimate the biomass of marine arthropods, we rely on data from the marine ecosystem biomass data (MAREDAT) initiative. The MAREDAT database contains measurements of the biomass concentration for each plankton group. From this database [Buitenhuis et al.](https://doi.org/10.5194/essd-5-227-2013) generates estimates for the global biomass of each plankton group by using a characteristic biomass concentration for each depth (either a median or average of the values in the database) and applying it across the entire volume of ocean at that depth.\n", - "Buitenhuis et al. reports two estimates, one based on the arithmetic mean of concentrations at each depth, and one based on the median concentrations at each depth. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are in coastal waters, or samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies.\n", - "\n", - "The data in the MAREDAT database is divided into plankton size classes: microzooplankton (zooplankton between 5 and 200 µm in diameter), mesozooplankton (zooplankton between 200 and 2000 µm in diameter) and macrozooplankton (zooplankton between 2 and 10 mm). We are interested in the biomass of arthropods in each class.\n", - "\n", - "## Microzooplankton\n", - "Microzooplankton was defined in the MAREDAT databased as to exclude copepod biomass, and thus its contribution to the total biomass of marine arthropods is neglegible.\n", - "\n", - "## Mesozooplankton\n", - "Mesozooplankton might contain several different kinds of animal and protist taxa. We argure that the main contribution to the mesozooplankton category in MAREDAT database comes from arthropods (mainly copepods). To substantiate this claim, we rely on 18S rDNA sequencing data from the *Tara* Oceans campaign reported in [de Vargas et al.](http://dx.doi.org/10.1126/science.1261605). In figure W10A in the companion [website](http://taraoceans.sb-roscoff.fr/EukDiv/#figW10) to the paper the authors detail abundance of 18S rDNA reads for different animal and protist taxa:\n", - "\n", - "\n", - "### Can 18S rDNA data be used to estimate biomass contribution?\n", - "It is not obvious that data on the relative abundance of 18S sequences can be used in order to estimate the relative biomass contribution of different taxa. We provide two independent lines of evidence of the legitimacy of using the 18S data in de Vargas et al. in order to claim that arthropod dominate the biomass of mesozooplanktonin the MAREDAT data.\n", - "\n", - "The first line of evidence is in figure W3 in the companion [website](http://taraoceans.sb-roscoff.fr/EukDiv/#figureW3): \n", - "\n", - "This figure shows good correlations between the rDNA content in a cell and the cell size, as well as correlation between microscopy data and 18S rDNA sequencing data.\n", - "\n", - "The second line of evidence is a comparison of the relative fraction of Rhizaria in the 18S rDNA data with \n", - "data from in-situ imaging ([Biard et al.](http://dx.doi.org/10.1038/nature17652)). We estimate the average relative fraction of Rhizaria in it the 18S rDNA sequencing data:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average fraction of Rhizaria in 18S rDNA sequencing data in surface waters is 40%\n", - "The average fraction of Rhizaria in 18S rDNA sequencing data in the deep chlorophyll maximum is 35%\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from fraction_helper import *\n", - "\n", - "\n", - "# Load 18S sequecing data\n", - "seq_data = pd.read_excel('mesozooplankton_data.xlsx',sheet_name='de Vargas',skiprows=1)\n", - "\n", - "print('The average fraction of Rhizaria in 18S rDNA sequencing data in surface waters is ' + '{:,.0f}%'.format(seq_data['Rhizaria surface'].mean()*100))\n", - "print('The average fraction of Rhizaria in 18S rDNA sequencing data in the deep chlorophyll maximum is ' + '{:,.0f}%'.format(seq_data['Rhizaria DCM'].mean()*100))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "These fraction of 35-40% are very close to the average fraction reported by Biard et al. using imaging data of ≈33%.\n", - "\n", - "### The biomass contribution of arthropod to mesozooplankton data in MAREDAT\n", - "To calculate the contribution of arthropods to the biomass data in the MAREDAT database, we assume the representation of Rhizaria in the MADERAT data is limited, as Biard et al. indicated that they are usually undersampled because many of them are delicate and are severely damaged by plankton nets or fixatives used in surveys such as the ones used to build the MAREDAT. Therefore, we calculate the relative contribution of arthropods to the total population of mesozooplankton excluding Rhizaria:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average fraction of arthropods out of the total biomass of mesozooplankton in surface waters excluding Rhizaria is 79%\n", - "The average fraction of arthropods out of the total biomass of mesozooplankton in the deep chlorophyll maximum excluding Rhizaria is 79%\n" - ] - } - ], - "source": [ - "# Define the relative fraction of arthropods out of the total mesozooplankton excluding Rhizaria\n", - "arth_frac_surf = seq_data['Arthropod surface']/(1-seq_data['Rhizaria surface'])\n", - "arth_frac_dcm = seq_data['Arthropod DCM']/(1-seq_data['Rhizaria DCM'])\n", - "\n", - "# Calculate the mean fraction of arthropods in surface waters and the DCM\n", - "mean_arth_frac_surf = frac_mean(arth_frac_surf)\n", - "mean_arth_frac_dcm = frac_mean(arth_frac_dcm)\n", - "print('The average fraction of arthropods out of the total biomass of mesozooplankton in surface waters excluding Rhizaria is '+'{:,.0f}%'.format(mean_arth_frac_surf*100))\n", - "print('The average fraction of arthropods out of the total biomass of mesozooplankton in the deep chlorophyll maximum excluding Rhizaria is '+'{:,.0f}%'.format(mean_arth_frac_dcm*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Overall, we use ≈80% for the fraction of arthropods out of the total biomass of mesozooplankton in the MAREDAT database.\n", - "\n", - "To estimate the biomass of mesozooplankton arthropods, we rely on the global estimates made by [Buitenhuis et al.](http://search.proquest.com/openview/0e8e5672fa28111df473268e13f2f757/1?pq-origsite=gscholar&cbl=105729) based on the MAREDAT data. Buitenhuis et al. generated two estimates for the global biomass of mesozooplankton by using a characteristic biomass concentration for each depth (either a median or average of the values in the database) and applying it across the entire volume of ocean at that depth. This approach results in two types of estimates for the global biomass of mesozooplankton: a so called “minimum” estimate which uses the median concentration of biomass from the database, and a so called “maximum” estimate which uses the average biomass concentration. Because the distributions of values in the database are usually highly skewed by asymmetrically high values, the median and mean are loosely associated by the MAREDAT authors with a minimum and maximum estimate. The estimate based on the average value is more susceptible to biases in oversampling singular locations such as blooms of plankton species, or of coastal areas in which biomass concentrations are especially high, which might lead to an overestimate. On the other hand, the estimate based on the median biomass concentration might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. Therefore, our best estimate of the biomass of mesozooplakton is the geometric mean of the “minimum” and “maximum” estimates. Buitenhuis et al. reports a \"minimum\" estimate of 0.33 Gt C and a \"maximum\" estimate of 0.59 Gt C. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "buitenhuis_estimate = gmean([0.33e15,0.59e15])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We than use 80% of the geometric mean as an estimate for the biomass of mesozooplankton arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the mean fraction of arthropods between surface water and DCM\n", - "arth_frac = frac_mean(np.array([mean_arth_frac_dcm,mean_arth_frac_surf]))\n", - "\n", - "# Calculate the fraction of mesozooplankton biomass that is arthropod biomass\n", - "meso_arth_biomass = buitenhuis_estimate*arth_frac" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Most of the data in the MAREDAT databased was collected using 300 µm nets, and thus some of the lower size fraction of mesozooplankton was not collected. To correct for this fact, we use a relation between biomass estimated using 200 µm nets and 300 µm nets [O'brian 2005](https://www.st.nmfs.noaa.gov/copepod/2005/documents/fspo73_abbreviated.pdf). The relation is: $$ B_{300} = 0.619× B_{200}$$ Where $B_{300}$ is the biomass sampled with 300 µm nets and $B_{200}$ is the biomass sampled with 200 µm nets. We correct for this factor to get our best estimate for the biomass of mesozooplankton arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of mesozooplankton arthropods is ≈0.56 Gt C\n" - ] - } - ], - "source": [ - "# Correct for the use of 300 µm nets when sampling mesozooplankton biomass\n", - "meso_arth_biomass /= 0.619\n", - "\n", - "print('Our best estimate for the biomass of mesozooplankton arthropods is ≈%.2f Gt C' % (meso_arth_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Macrozooplankton\n", - "Some arthropods are also included in the macrozooplankton size category (zooplankton between 2 and 10 mm). Macrozooplankton contains organisms from many phyla such as arthropods, cnidarians, chordates, annelids, molluscs, ctenophores and representatives from Chaetognatha (a phylum of pelagic worms). To estimate the biomass of macrozooplankton arthropods, we first estimate the total biomass of macrozooplankton, and then estimate the fraction fo this total biomass that is contributed by arthropods.\n", - "\n", - "To estimate the total biomass of macrozooplankton, we rely on data from the MAREDAT database. We use the estimates of macrozooplankton biomass Buitenhuis et al. generated from the MAREDAT database. To generate these estimates, Buitenhuis et al. followed the same procedure as we detailed in the mesozooplankton section above. Buitenhuis et al. provides “minimum” and “maximum” estimates of the total biomass of macrozooplankton, which are 0.2 Gt C and 1.5 Gt C, respectively. We use the geometric mean of those estimates as our best estimate for the biomass of macrozooplankton:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of macrozooplankton is ≈0.5 Gt C\n" - ] - } - ], - "source": [ - "macro_biomass = gmean([0.2e15,1.5e15])\n", - "print('Our best estimate for the biomass of macrozooplankton is ≈%.1f Gt C' %(macro_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From this total biomass we subtract our estimates for the biomass of pteropods, which are in the same size range as macrozooplankton. For details on the estimate of the biomass of pteropods see the molluscs section in the Supplementary Information. We estimate the total biomass of pteropods at 0.15 Gt C. \n", - "\n", - "We also subtract from the total biomass of macrozooplankton the contribution by gelatinous zooplankton which also contains some species in the same size range as macrozooplankton. We estimate a global biomass of ≈0.04 Gt C (for details on the estimate of the biomass of gelatinous plankton see the cnidarians section in the Supplementary Information)." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "our best estimate for the total biomass of macrozooplankton arthropods is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the total biomass of macrozooplankton arthropods by\n", - "# subtacting the biomass of pteropods and gelatinous zooplankton\n", - "# from the total biomass of macrozooplankton\n", - "macro_arth_biomass = macro_biomass -0.15e15 -0.04e15\n", - "print('our best estimate for the total biomass of macrozooplankton arthropods is ≈%.1f Gt C' %(macro_arth_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum up the biomass of arthropods in the mesezooplankton and macrozooplankton size fractions as our best estimate for the biomass of marine arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of marine arthropods is 0.92 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = meso_arth_biomass+macro_arth_biomass\n", - "print('Our best estimate for the biomass of marine arthropods is %.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertanity analysis\n", - "We discuss the uncertainty of estimates based on the MAREDAT database in a dedicated section in the Supplementary Information. We crudly project an uncertainty of about an order of magnitude." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/arthropods/.ipynb_checkpoints/terrestrial arthropods-checkpoint.ipynb b/animals/arthropods/.ipynb_checkpoints/terrestrial arthropods-checkpoint.ipynb deleted file mode 100644 index cb1ba33..0000000 --- a/animals/arthropods/.ipynb_checkpoints/terrestrial arthropods-checkpoint.ipynb +++ /dev/null @@ -1,1604 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of terrestrial arthropods\n", - "To estimate the biomass of terrestrial arthropods, we rely on two parallel methods - a method based on average biomass densities of arthropods extrapolated to the global ice-free land surface, and a method based on estimates of the average carbon content of a characteristic arthropod and the total number of terrestrial arthropods.\n", - "\n", - "## Average biomass densities method\n", - "We collected values from the literature on the biomass densities of arthropods per unit area. We assume, based on [Stork et al.](http://dx.doi.org/10.1007/978-94-009-1685-2_1), most of the biomass is located in the soil, litter or in the canopy of trees. We thus estimate a mean biomass density of arhtropods in soil, litter and in canopies, sum those biomass densities and apply them across the entire ice-free land surface.\n", - "\n", - "### Litter arthropod biomass\n", - "We complied a list of values from several different habitats. Most of the measurements are from forests and savannas. For some of the older studies, we did not have access to the original data, but to a summary of the data made by two main studies: [Gist & Crossley](http://dx.doi.org/10.2307/2424109) and [Brockie & Moeed](http://dx.doi.org/10.1007/BF00377108). Here is a sample of the data from Gist & Grossley:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Taxonomic groupDensity of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]StudySiteHabitatLink
0Diplopoda14.06.13.2Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
1Orthoptera0.90.20.1Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
2Medium Araneida0.50.10.0Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
3Coleoptera1.11.00.3Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
4Cryptostigmata22,433.01.50.7Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
\n", - "
" - ], - "text/plain": [ - " Taxonomic group Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "0 Diplopoda 14.0 6.1 \n", - "1 Orthoptera 0.9 0.2 \n", - "2 Medium Araneida 0.5 0.1 \n", - "3 Coleoptera 1.1 1.0 \n", - "4 Cryptostigmata 22,433.0 1.5 \n", - "\n", - " Dry weight [g m^-2] Study Site Habitat \\\n", - "0 3.2 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "1 0.1 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "2 0.0 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "3 0.3 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "4 0.7 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "\n", - " Link \n", - "0 http://dx.doi.org/10.2307/2424109 \n", - "1 http://dx.doi.org/10.2307/2424109 \n", - "2 http://dx.doi.org/10.2307/2424109 \n", - "3 http://dx.doi.org/10.2307/2424109 \n", - "4 http://dx.doi.org/10.2307/2424109 " - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from CI_helper import *\n", - "pd.options.display.float_format = '{:,.1f}'.format\n", - "# Load global stocks data\n", - "gc_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Gist & Crossley',skiprows=1)\n", - "gc_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here is a sample from Brockie & Moeed:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Taxonomic groupWet weight [g m^-2]SiteHabitatStudyLink
0Collembola0.2Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
1Coleoptera1.0Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
2Acarina3.0Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
3Arachnida0.3Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
4Chilopoda0.3Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
\n", - "
" - ], - "text/plain": [ - " Taxonomic group Wet weight [g m^-2] Site \\\n", - "0 Collembola 0.2 Orongorongo Valley, New Zealand \n", - "1 Coleoptera 1.0 Orongorongo Valley, New Zealand \n", - "2 Acarina 3.0 Orongorongo Valley, New Zealand \n", - "3 Arachnida 0.3 Orongorongo Valley, New Zealand \n", - "4 Chilopoda 0.3 Orongorongo Valley, New Zealand \n", - "\n", - " Habitat Study \\\n", - "0 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "1 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "2 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "3 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "4 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "\n", - " Link \n", - "0 http://dx.doi.org/10.1007/BF00377108 \n", - "1 http://dx.doi.org/10.1007/BF00377108 \n", - "2 http://dx.doi.org/10.1007/BF00377108 \n", - "3 http://dx.doi.org/10.1007/BF00377108 \n", - "4 http://dx.doi.org/10.1007/BF00377108 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bm_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Brockie & Moeed',skiprows=1)\n", - "bm_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the sum of biomass of all the groups of arthropods in each study to provide an estimate for the total biomass density of arthropods in litter:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate from Brockie & Moeed:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Wet weight [g m^-2]
Study
Bornebusch (1930) 16.2
Bornebusch (1930) 21.9
Bornebusch (1930) 31.9
Brockie & Moeed (1986)14.5
Kitazawa (1967) 114.2
Kitazawa (1967) 27.1
\n", - "
" - ], - "text/plain": [ - " Wet weight [g m^-2]\n", - "Study \n", - "Bornebusch (1930) 1 6.2\n", - "Bornebusch (1930) 2 1.9\n", - "Bornebusch (1930) 3 1.9\n", - "Brockie & Moeed (1986) 14.5\n", - "Kitazawa (1967) 1 14.2\n", - "Kitazawa (1967) 2 7.1" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gc_study = gc_data.groupby('Study').sum()\n", - "bm_study = bm_data.groupby('Study').sum()\n", - "\n", - "print('The estimate from Brockie & Moeed:')\n", - "bm_study" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate from Gist & Crossley:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]
Study
Bornebusch (1930) 12,145.56.31.9
Bornebusch (1930) 2265.01.30.4
Bornebusch (1930) 34,547.52.50.7
Gist & Crossley (1975)30,580.512.04.6
Kitazawa (1967) 1870,214.014.54.4
Moulder et al. (1970)40,887.01.60.5
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2,145.5 6.3 \n", - "Bornebusch (1930) 2 265.0 1.3 \n", - "Bornebusch (1930) 3 4,547.5 2.5 \n", - "Gist & Crossley (1975) 30,580.5 12.0 \n", - "Kitazawa (1967) 1 870,214.0 14.5 \n", - "Moulder et al. (1970) 40,887.0 1.6 \n", - "\n", - " Dry weight [g m^-2] \n", - "Study \n", - "Bornebusch (1930) 1 1.9 \n", - "Bornebusch (1930) 2 0.4 \n", - "Bornebusch (1930) 3 0.7 \n", - "Gist & Crossley (1975) 4.6 \n", - "Kitazawa (1967) 1 4.4 \n", - "Moulder et al. (1970) 0.5 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print('The estimate from Gist & Crossley:')\n", - "gc_study" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In cases where data is coflicting between the two studies, we calculate the mean. We merge the data from the papers to generate a list of estimates on the total biomass density of arhtropods" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Dry weight [g m^-2]Wet weight [g m^-2]
Study
Bornebusch (1930) 12,145.51.96.2
Bornebusch (1930) 2265.00.41.6
Bornebusch (1930) 34,547.50.72.2
Brockie & Moeed (1986)nannan14.5
Gist & Crossley (1975)30,580.54.612.0
Kitazawa (1967) 1870,214.04.414.4
Kitazawa (1967) 2nannan7.1
Moulder et al. (1970)40,887.00.51.6
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Dry weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2,145.5 1.9 \n", - "Bornebusch (1930) 2 265.0 0.4 \n", - "Bornebusch (1930) 3 4,547.5 0.7 \n", - "Brockie & Moeed (1986) nan nan \n", - "Gist & Crossley (1975) 30,580.5 4.6 \n", - "Kitazawa (1967) 1 870,214.0 4.4 \n", - "Kitazawa (1967) 2 nan nan \n", - "Moulder et al. (1970) 40,887.0 0.5 \n", - "\n", - " Wet weight [g m^-2] \n", - "Study \n", - "Bornebusch (1930) 1 6.2 \n", - "Bornebusch (1930) 2 1.6 \n", - "Bornebusch (1930) 3 2.2 \n", - "Brockie & Moeed (1986) 14.5 \n", - "Gist & Crossley (1975) 12.0 \n", - "Kitazawa (1967) 1 14.4 \n", - "Kitazawa (1967) 2 7.1 \n", - "Moulder et al. (1970) 1.6 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concat the data from the two studies\n", - "conc = pd.concat([gc_study,bm_study])\n", - "conc_mean = conc.groupby(conc.index).mean()\n", - "conc_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate from the dry weight and wet weight estimates the biomass density in g C $m^{-2}$ by assuming 70% water content and 50% carbon in dry mass:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Study\n", - "Bornebusch (1930) 1 0.9\n", - "Bornebusch (1930) 2 0.2\n", - "Bornebusch (1930) 3 0.4\n", - "Brockie & Moeed (1986) 2.2\n", - "Gist & Crossley (1975) 2.3\n", - "Kitazawa (1967) 1 2.2\n", - "Kitazawa (1967) 2 1.1\n", - "Moulder et al. (1970) 0.2\n", - "Name: Biomass density [g C m^-2], dtype: float64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Fill places with no dry weight estimate with 30% of the wet weight estimate \n", - "conc_mean['Dry weight [g m^-2]'].fillna(conc_mean['Wet weight [g m^-2]']*0.3,inplace=True)\n", - "\n", - "# Calculate carbon biomass as 50% of dry weight\n", - "conc_mean['Biomass density [g C m^-2]'] = conc_mean['Dry weight [g m^-2]']/2\n", - "conc_mean['Biomass density [g C m^-2]']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates from the different studies as our best estimate of the biomass density of litter arthropods." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in litter is ≈1 g C m^-2\n" - ] - } - ], - "source": [ - "litter_biomass_density = gmean(conc_mean.iloc[0:5,3])\n", - "print('Our best estimate for the biomass density of arthropods in litter is ≈%.0f g C m^-2' %litter_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Soil arthropod biomass\n", - "As our source for estimating the biomass of soil arthropods, we use these data collected from the literature, which are detailed below:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass density [g C m^-2]SiteBiomeLinkCommentsRemarks
Reference
Fragoso et al. (1999)1.8Several locationsForesthttp://horizon.documentation.ird.fr/exl-doc/pl...SoilTaken from Figure 2.1 in Fragoso et al. - we c...
Fragoso et al. (1999)1.8Several locationsSavannahttp://horizon.documentation.ird.fr/exl-doc/pl...SoilTaken from Figure 2.1 in Fragoso et al. - we c...
Stork (1996)0.8SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1SoilFrom page 6 – 30 kg dry weight per ha. Assumin...
\n", - "
" - ], - "text/plain": [ - " Biomass density [g C m^-2] Site \\\n", - "Reference \n", - "Fragoso et al. (1999) 1.8 Several locations \n", - "Fragoso et al. (1999) 1.8 Several locations \n", - "Stork (1996) 0.8 Seram \n", - "\n", - " Biome \\\n", - "Reference \n", - "Fragoso et al. (1999) Forest \n", - "Fragoso et al. (1999) Savanna \n", - "Stork (1996) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Fragoso et al. (1999) http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "Fragoso et al. (1999) http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "Stork (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "\n", - " Comments \\\n", - "Reference \n", - "Fragoso et al. (1999) Soil \n", - "Fragoso et al. (1999) Soil \n", - "Stork (1996) Soil \n", - "\n", - " Remarks \n", - "Reference \n", - "Fragoso et al. (1999) Taken from Figure 2.1 in Fragoso et al. - we c... \n", - "Fragoso et al. (1999) Taken from Figure 2.1 in Fragoso et al. - we c... \n", - "Stork (1996) From page 6 – 30 kg dry weight per ha. Assumin... " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load additional data\n", - "soil_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Soil',index_col='Reference')\n", - "soil_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimate for the biomass density of arthropods in soils:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in soils is ≈1 g C m^-2\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates of the biomass density of soil arthropods\n", - "soil_biomass_density = gmean(soil_data['Biomass density [g C m^-2]'])\n", - "\n", - "print('Our best estimate for the biomass density of arthropods in soils is ≈%.0f g C m^-2' %soil_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we sum the biomass density of soil and litter arthropods, we arrive at an estimate of ≈2 g C m^-2, which is inline with the data from Kitazawa et al. of 1-2 g C m^-2." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Canopy arthropod biomass\n", - "Data on the biomass density of canopy arthropods is much less abundant. We extracted from the literature the following values:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass density [g C m^-2]SiteBiomeLinkCommentsRemarks
Reference
Ellwood & Foster (2004)0.4Borneolowland dipterocarp rainforesthttp://dx.doi.org/10.1038/nature02560CanopyEllwood & Foster report 3776 g ha^-1 dry bioma...
Dial et al. (2006)1.2BorneoLowland Tropical Rain Foresthttp://dx.doi.org/10.1111/j.1744-7429.2006.001...CanopyDial et al. Estimate 23.6 kg dry weight per ha.\\n
Stork, N. E. (1996)0.8SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1CanopyFrom page 6 – 30 kg dry weight per ha. Assumin...
\n", - "
" - ], - "text/plain": [ - " Biomass density [g C m^-2] Site \\\n", - "Reference \n", - "Ellwood & Foster (2004) 0.4 Borneo \n", - "Dial et al. (2006) 1.2 Borneo \n", - "Stork, N. E. (1996) 0.8 Seram \n", - "\n", - " Biome \\\n", - "Reference \n", - "Ellwood & Foster (2004) lowland dipterocarp rainforest \n", - "Dial et al. (2006) Lowland Tropical Rain Forest \n", - "Stork, N. E. (1996) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Ellwood & Foster (2004) http://dx.doi.org/10.1038/nature02560 \n", - "Dial et al. (2006) http://dx.doi.org/10.1111/j.1744-7429.2006.001... \n", - "Stork, N. E. (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "\n", - " Comments \\\n", - "Reference \n", - "Ellwood & Foster (2004) Canopy \n", - "Dial et al. (2006) Canopy \n", - "Stork, N. E. (1996) Canopy \n", - "\n", - " Remarks \n", - "Reference \n", - "Ellwood & Foster (2004) Ellwood & Foster report 3776 g ha^-1 dry bioma... \n", - "Dial et al. (2006) Dial et al. Estimate 23.6 kg dry weight per ha.\\n \n", - "Stork, N. E. (1996) From page 6 – 30 kg dry weight per ha. Assumin... " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data on the biomass density of canopy arthropods\n", - "canopy_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Canopy',index_col='Reference')\n", - "canopy_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates for the biomass density of arthropods in canopies:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in canopies is ≈0.7 g C m^-2\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates of biomass densitiy of canopy arthropods\n", - "canopy_biomass_density = gmean(canopy_data['Biomass density [g C m^-2]'])\n", - "print('Our best estimate for the biomass density of arthropods in canopies is ≈%.1f g C m^-2' %canopy_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our best estimate for the biomass of arthropods using estimates of biomass densities, we sum the estimates for the biomass density of arthropods in soils and in canopies, and apply this density over the entire ice-free land surface of $1.3×10^{14} \\: m^2$:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "# Sum the biomass densities of arthropods in soils and in canopies\n", - "total_denisty = litter_biomass_density+soil_biomass_density+canopy_biomass_density\n", - "\n", - "# Apply the average biomass density across the entire ice-free land surface\n", - "method1_estimate = total_denisty*1.3e14\n", - "\n", - "print('Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈%.1f Gt C' %(method1_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Average carbon content method\n", - "In this method, in order to estimate the total biomass of arthropods, we calculate the carbon content of a characteristic arthropod, and multiply this carbon content by an estimate for the total number of arthropods.\n", - "We rely both on data from Gist & Crossley which detail the total number of arthropods per unit area as well as the total biomass of arthropods per unit area for serveal studies. Form this data we can calculate the characteristic carbon content of a single arthropod assuming 50% carbon in dry mass:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]Carbon content [g C per individual]
Study
Bornebusch (1930) 12.1e+036.3e+001.9e+004.4e-04
Bornebusch (1930) 22.6e+021.3e+004.0e-017.6e-04
Bornebusch (1930) 34.5e+032.5e+007.4e-018.1e-05
Gist & Crossley (1975)3.1e+041.2e+014.6e+007.5e-05
Kitazawa (1967) 18.7e+051.5e+014.4e+002.5e-06
Moulder et al. (1970)4.1e+041.6e+004.7e-015.8e-06
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2.1e+03 6.3e+00 \n", - "Bornebusch (1930) 2 2.6e+02 1.3e+00 \n", - "Bornebusch (1930) 3 4.5e+03 2.5e+00 \n", - "Gist & Crossley (1975) 3.1e+04 1.2e+01 \n", - "Kitazawa (1967) 1 8.7e+05 1.5e+01 \n", - "Moulder et al. (1970) 4.1e+04 1.6e+00 \n", - "\n", - " Dry weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 1.9e+00 \n", - "Bornebusch (1930) 2 4.0e-01 \n", - "Bornebusch (1930) 3 7.4e-01 \n", - "Gist & Crossley (1975) 4.6e+00 \n", - "Kitazawa (1967) 1 4.4e+00 \n", - "Moulder et al. (1970) 4.7e-01 \n", - "\n", - " Carbon content [g C per individual] \n", - "Study \n", - "Bornebusch (1930) 1 4.4e-04 \n", - "Bornebusch (1930) 2 7.6e-04 \n", - "Bornebusch (1930) 3 8.1e-05 \n", - "Gist & Crossley (1975) 7.5e-05 \n", - "Kitazawa (1967) 1 2.5e-06 \n", - "Moulder et al. (1970) 5.8e-06 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.options.display.float_format = '{:,.1e}'.format\n", - "\n", - "# Calculate the carbon content of a single arthropod by dividing the dry weight by 2 (assuming 50% carbon in\n", - "# dry weight) and dividing the result by the total number of individuals\n", - "gc_study['Carbon content [g C per individual]'] = gc_study['Dry weight [g m^-2]']/2/gc_study['Density of individuals [N m^-2]']\n", - "\n", - "gc_study" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine the data from these studies with data from additional sources detailed below:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Carbon content [g C per individual]SiteHabitatLinkCommentsRemarks
Reference
Ellwood & Foster (2004)1.0e-03Borneolowland dipterocarp rainforesthttp://dx.doi.org/10.1038/nature02560CanopyEllwood & Foster report 88 g dry weight per fe...
Stork (1996)3.5e-04SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1SoilFrom page 6 - The results of this study indica...
Dial et al. (2006)5.0e-04BorneoLowland Tropical Rain Foresthttp://dx.doi.org/10.1111/j.1744-7429.2006.001...CanopyDial et al. Estimate 2.4*10^4 g dry weight in ...
\n", - "
" - ], - "text/plain": [ - " Carbon content [g C per individual] Site \\\n", - "Reference \n", - "Ellwood & Foster (2004) 1.0e-03 Borneo \n", - "Stork (1996) 3.5e-04 Seram \n", - "Dial et al. (2006) 5.0e-04 Borneo \n", - "\n", - " Habitat \\\n", - "Reference \n", - "Ellwood & Foster (2004) lowland dipterocarp rainforest \n", - "Stork (1996) Lowland Tropical Rain Forest \n", - "Dial et al. (2006) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Ellwood & Foster (2004) http://dx.doi.org/10.1038/nature02560 \n", - "Stork (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "Dial et al. (2006) http://dx.doi.org/10.1111/j.1744-7429.2006.001... \n", - "\n", - " Comments \\\n", - "Reference \n", - "Ellwood & Foster (2004) Canopy \n", - "Stork (1996) Soil \n", - "Dial et al. (2006) Canopy \n", - "\n", - " Remarks \n", - "Reference \n", - "Ellwood & Foster (2004) Ellwood & Foster report 88 g dry weight per fe... \n", - "Stork (1996) From page 6 - The results of this study indica... \n", - "Dial et al. (2006) Dial et al. Estimate 2.4*10^4 g dry weight in ... " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load additional data sources\n", - "other_carbon_content_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Carbon content',index_col='Reference')\n", - "\n", - "other_carbon_content_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates from the difference studies and use it as our best estimate for the carbon content of a characteristic arthropod:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of a characteristic arthropod is 1.2e-04 g C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates from the different studies on the average carbon content of a single arthropod.\n", - "average_carbon_content = gmean(pd.concat([other_carbon_content_data,gc_study])['Carbon content [g C per individual]'])\n", - "print('Our best estimate for the carbon content of a characteristic arthropod is %.1e g C' % average_carbon_content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of arthropods using the characteristic carbon content method, we multiply our best estiamte of the carbon content of a single arthropod by an estimate of the total number of arthropods made by [Williams](http://dx.doi.org/10.1086/282115). Williams estiamted a total of $~10^{18}$ individual insects in soils. We assume this estimate of the total number of insects is close to the total number of arthropods (noting that in this estimate Williams also included collembola which back in 1960 were considered insects, and are usually very numerous because of their small size). To estimate the total biomass of arthropods, we multiply the carbon content of a single arthropod by the the estimate for the total number of arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈0.1 Gt C\n" - ] - } - ], - "source": [ - "# Total number of insects estimated by Williams\n", - "tot_num_arthropods = 1e18\n", - "\n", - "# Calculate the total biomass of arthropods\n", - "method2_estimate = average_carbon_content*tot_num_arthropods\n", - "print('Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈%.1f Gt C' %(method2_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the biomass of arthropods is the geometric mean of the estimates from the two methods:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods is ≈0.2 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates using the two methods\n", - "best_estimate = gmean([method1_estimate,method2_estimate])\n", - "print('Our best estimate for the biomass of terrestrial arthropods is ≈%.1f Gt C' %(best_estimate/1e15)) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty associated with the estimate of the biomass of terrestrial arthropods, we compile a collection of the different sources of uncertainty, and combine them to project the total uncertainty. We survey the interstudy uncertainty for estimates within each method, the total uncertainty of each method and the uncertainty of the geometric mean of the values from the two methods.\n", - "\n", - "## Average biomass densities method\n", - "We calculate the 95% confidence interval for the geometric mean of the biomass densities reported for soil and canopy arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval for the average biomass density of soil arthropods is ≈2.0-fold\n", - "The 95 percent confidence interval for the average biomass density of soil arthropods is ≈1.8-fold\n", - "The 95 percent confidence interval for the average biomass density of canopy arthropods is ≈1.9-fold\n" - ] - } - ], - "source": [ - "litter_CI = geo_CI_calc(conc_mean['Biomass density [g C m^-2]'])\n", - "soil_CI = geo_CI_calc(soil_data['Biomass density [g C m^-2]'])\n", - "canopy_CI = geo_CI_calc(canopy_data['Biomass density [g C m^-2]'])\n", - "print('The 95 percent confidence interval for the average biomass density of soil arthropods is ≈%.1f-fold' %litter_CI)\n", - "print('The 95 percent confidence interval for the average biomass density of soil arthropods is ≈%.1f-fold' %soil_CI)\n", - "print('The 95 percent confidence interval for the average biomass density of canopy arthropods is ≈%.1f-fold' %canopy_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the uncertainty of the global biomass estimate using the average biomass density method, we propagate the uncertainties of the soil and canopy biomass density:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval biomass of arthropods using the biomass densities method is ≈1.5-fold\n" - ] - } - ], - "source": [ - "method1_CI = CI_sum_prop(estimates=np.array([litter_biomass_density,soil_biomass_density,canopy_biomass_density]),mul_CIs=np.array([litter_CI,soil_CI,canopy_CI]))\n", - "print('The 95 percent confidence interval biomass of arthropods using the biomass densities method is ≈%.1f-fold' %method1_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Average carbon content method\n", - "As a measure of the uncertainty of the estimate of the total biomass of arthropods using the average carbon content method, we calculate the 95% confidence interval of the geometric mean of the estimates from different studies of the carbon content of a single arthropod:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval of the carbon content of a single arthropod is ≈4.1-fold\n" - ] - } - ], - "source": [ - "carbon_content_CI = geo_CI_calc(pd.concat([other_carbon_content_data,gc_study])['Carbon content [g C per individual]'])\n", - "print('The 95 percent confidence interval of the carbon content of a single arthropod is ≈%.1f-fold' %carbon_content_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine this uncertainty of the average carbon content of a single arthropod with the uncertainty reported in Williams on the total number of insects of about one order of magnitude. This provides us with a measure of the uncertainty of the estimate of the biomass of arthropods using the average carbon content method." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval biomass of arthropods using the average carbon content method is ≈14.9-fold\n" - ] - } - ], - "source": [ - "# The uncertainty of the total number of insects from Williams\n", - "tot_num_arthropods_CI = 10\n", - "\n", - "# Combine the uncertainties of the average carbon content of a single arthropod and the uncertainty of \n", - "# the total number of arthropods\n", - "method2_CI = CI_prod_prop(np.array([carbon_content_CI,tot_num_arthropods_CI]))\n", - "print('The 95 percent confidence interval biomass of arthropods using the average carbon content method is ≈%.1f-fold' %method2_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty\n", - "We calculate the 95% conficence interval of the geometric mean of the estimates of the biomass of arthropods using the average biomass density or the average carbon content:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The inter-method uncertainty of the geometric mean of the estimates of the biomass of arthropods is ≈3.0\n" - ] - } - ], - "source": [ - "inter_CI = geo_CI_calc(np.array([method1_estimate,method2_estimate]))\n", - "print('The inter-method uncertainty of the geometric mean of the estimates of the biomass of arthropods is ≈%.1f' % inter_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods, we take the highest uncertainty among the collection of uncertainties we generate, which is the ≈15-fold uncertainty of the average carbon content method. " - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods is ≈14.9-fold\n" - ] - } - ], - "source": [ - "mul_CI = np.max([inter_CI,method1_CI,method2_CI])\n", - "print('Our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods is ≈%.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The biomass of termites\n", - "As we state in the Supplementary Information, there are some groups of terrestrial arthropods for which better estimates are available. An example is the biomass of termites. We use the data in [Sanderson](http://dx.doi.org/10.1029/96GB01893) to estimate the global biomass of termites:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate of the total biomass of termites based on Sanderson is ≈0.07 Gt C\n" - ] - } - ], - "source": [ - "# Load termite data\n", - "termite_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Sanderson', skiprows=1, index_col=0)\n", - "\n", - "# Multiply biomass density by biome area and sum over biomes\n", - "termite_biomass = (termite_data['Area [m^2]']* termite_data['Biomass density [g wet weight m^-2]']).sum()\n", - "\n", - "# Calculate carbon mass assuming carbon is 15% of wet weight\n", - "termite_biomass *= 0.15\n", - "\n", - "print('The estimate of the total biomass of termites based on Sanderson is ≈%.2f Gt C' %(termite_biomass/1e15))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/arthropods/marine_arthropods/.ipynb_checkpoints/marine_arthropods-checkpoint.ipynb b/animals/arthropods/marine_arthropods/.ipynb_checkpoints/marine_arthropods-checkpoint.ipynb deleted file mode 100644 index c0c7cf2..0000000 --- a/animals/arthropods/marine_arthropods/.ipynb_checkpoints/marine_arthropods-checkpoint.ipynb +++ /dev/null @@ -1,384 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from excel_utils import *\n", - "\n", - "# Define function that runs a jypyter notebook and saves the results to the same file\n", - "def run_nb(path):\n", - " import nbformat\n", - " from nbconvert.preprocessors import ExecutePreprocessor\n", - " import os\n", - " with open(path) as f:\n", - " nb = nbformat.read(f, as_version=4)\n", - " ep = ExecutePreprocessor(timeout=6000, kernel_name='python3')\n", - " ep.preprocess(nb, {'metadata': {'path': os.path.dirname(path)}})\n", - " with open(path, 'wt') as f:\n", - " nbformat.write(nb, f)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of marine arthropods\n", - "To estimate the biomass of marine arthropods, we rely on data from the marine ecosystem biomass data (MAREDAT) initiative. The MAREDAT database contains measurements of the biomass concentration for each plankton group. From this database [Buitenhuis et al.](https://doi.org/10.5194/essd-5-227-2013) generates estimates for the global biomass of each plankton group by using a characteristic biomass concentration for each depth (either a median or average of the values in the database) and applying it across the entire volume of ocean at that depth.\n", - "Buitenhuis et al. reports two estimates, one based on the arithmetic mean of concentrations at each depth, and one based on the median concentrations at each depth. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are in coastal waters, or samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies.\n", - "\n", - "The data in the MAREDAT database is divided into plankton size classes: microzooplankton (zooplankton between 5 and 200 µm in diameter), mesozooplankton (zooplankton between 200 and 2000 µm in diameter) and macrozooplankton (zooplankton between 2 and 10 mm). We are interested in the biomass of arthropods in each class.\n", - "\n", - "## Microzooplankton\n", - "Microzooplankton was defined in the MAREDAT databased as to exclude copepod biomass, and thus its contribution to the total biomass of marine arthropods is neglegible.\n", - "\n", - "## Mesozooplankton\n", - "Mesozooplankton might contain several different kinds of animal and protist taxa. We argure that the main contribution to the mesozooplankton category in MAREDAT database comes from arthropods (mainly copepods). To substantiate this claim, we rely on 18S rDNA sequencing data from the *Tara* Oceans campaign reported in [de Vargas et al.](http://dx.doi.org/10.1126/science.1261605). In figure W10A in the companion [website](http://taraoceans.sb-roscoff.fr/EukDiv/#figW10) to the paper the authors detail abundance of 18S rDNA reads for different animal and protist taxa:\n", - "\n", - "\n", - "### Can 18S rDNA data be used to estimate biomass contribution?\n", - "It is not obvious that data on the relative abundance of 18S sequences can be used in order to estimate the relative biomass contribution of different taxa. We provide two independent lines of evidence of the legitimacy of using the 18S data in de Vargas et al. in order to claim that arthropod dominate the biomass of mesozooplanktonin the MAREDAT data.\n", - "\n", - "The first line of evidence is in figure W3 in the companion [website](http://taraoceans.sb-roscoff.fr/EukDiv/#figureW3): \n", - "\n", - "This figure shows good correlations between the rDNA content in a cell and the cell size, as well as correlation between microscopy data and 18S rDNA sequencing data.\n", - "\n", - "The second line of evidence is a comparison of the relative fraction of Rhizaria in the 18S rDNA data with \n", - "data from in-situ imaging ([Biard et al.](http://dx.doi.org/10.1038/nature17652)). We estimate the average relative fraction of Rhizaria in it the 18S rDNA sequencing data:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average fraction of Rhizaria in 18S rDNA sequencing data in surface waters is 40%\n", - "The average fraction of Rhizaria in 18S rDNA sequencing data in the deep chlorophyll maximum is 35%\n" - ] - } - ], - "source": [ - "# Load 18S sequecing data\n", - "seq_data = pd.read_excel('marine_arthropods_data.xlsx',sheet_name='de Vargas',skiprows=1)\n", - "\n", - "print('The average fraction of Rhizaria in 18S rDNA sequencing data in surface waters is ' + '{:,.0f}%'.format(seq_data['Rhizaria surface'].mean()*100))\n", - "print('The average fraction of Rhizaria in 18S rDNA sequencing data in the deep chlorophyll maximum is ' + '{:,.0f}%'.format(seq_data['Rhizaria DCM'].mean()*100))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "These fraction of 35-40% are very close to the average fraction reported by Biard et al. using imaging data of ≈33%.\n", - "\n", - "### The biomass contribution of arthropod to mesozooplankton data in MAREDAT\n", - "To calculate the contribution of arthropods to the biomass data in the MAREDAT database, we assume the representation of Rhizaria in the MADERAT data is limited, as Biard et al. indicated that they are usually undersampled because many of them are delicate and are severely damaged by plankton nets or fixatives used in surveys such as the ones used to build the MAREDAT. Therefore, we calculate the relative contribution of arthropods to the total population of mesozooplankton excluding Rhizaria:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average fraction of arthropods out of the total biomass of mesozooplankton in surface waters excluding Rhizaria is 79%\n", - "The average fraction of arthropods out of the total biomass of mesozooplankton in the deep chlorophyll maximum excluding Rhizaria is 79%\n" - ] - } - ], - "source": [ - "# Define the relative fraction of arthropods out of the total mesozooplankton excluding Rhizaria\n", - "arth_frac_surf = seq_data['Arthropod surface']/(1-seq_data['Rhizaria surface'])\n", - "arth_frac_dcm = seq_data['Arthropod DCM']/(1-seq_data['Rhizaria DCM'])\n", - "\n", - "# Calculate the mean fraction of arthropods in surface waters and the DCM\n", - "mean_arth_frac_surf = frac_mean(arth_frac_surf)\n", - "mean_arth_frac_dcm = frac_mean(arth_frac_dcm)\n", - "print('The average fraction of arthropods out of the total biomass of mesozooplankton in surface waters excluding Rhizaria is '+'{:,.0f}%'.format(mean_arth_frac_surf*100))\n", - "print('The average fraction of arthropods out of the total biomass of mesozooplankton in the deep chlorophyll maximum excluding Rhizaria is '+'{:,.0f}%'.format(mean_arth_frac_dcm*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Overall, we use ≈80% for the fraction of arthropods out of the total biomass of mesozooplankton in the MAREDAT database.\n", - "\n", - "To estimate the biomass of mesozooplankton arthropods, we rely on the global estimates made by [Buitenhuis et al.](http://search.proquest.com/openview/0e8e5672fa28111df473268e13f2f757/1?pq-origsite=gscholar&cbl=105729) based on the MAREDAT data. Buitenhuis et al. generated two estimates for the global biomass of mesozooplankton by using a characteristic biomass concentration for each depth (either a median or average of the values in the database) and applying it across the entire volume of ocean at that depth. This approach results in two types of estimates for the global biomass of mesozooplankton: a so called “minimum” estimate which uses the median concentration of biomass from the database, and a so called “maximum” estimate which uses the average biomass concentration. Because the distributions of values in the database are usually highly skewed by asymmetrically high values, the median and mean are loosely associated by the MAREDAT authors with a minimum and maximum estimate. The estimate based on the average value is more susceptible to biases in oversampling singular locations such as blooms of plankton species, or of coastal areas in which biomass concentrations are especially high, which might lead to an overestimate. On the other hand, the estimate based on the median biomass concentration might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. Therefore, our best estimate of the biomass of mesozooplakton is the geometric mean of the “minimum” and “maximum” estimates. Buitenhuis et al. reports a \"minimum\" estimate of 0.33 Gt C and a \"maximum\" estimate of 0.59 Gt C. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "buitenhuis_estimate = gmean([0.33e15,0.59e15])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We than use 80% of the geometric mean as an estimate for the biomass of mesozooplankton arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the mean fraction of arthropods between surface water and DCM\n", - "arth_frac = frac_mean(np.array([mean_arth_frac_dcm,mean_arth_frac_surf]))\n", - "\n", - "# Calculate the fraction of mesozooplankton biomass that is arthropod biomass\n", - "meso_arth_biomass = buitenhuis_estimate*arth_frac" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Most of the data in the MAREDAT databased was collected using 300 µm nets, and thus some of the lower size fraction of mesozooplankton was not collected. To correct for this fact, we use a relation between biomass estimated using 200 µm nets and 300 µm nets [O'brian 2005](https://www.st.nmfs.noaa.gov/copepod/2005/documents/fspo73_abbreviated.pdf). The relation is: $$ B_{300} = 0.619× B_{200}$$ Where $B_{300}$ is the biomass sampled with 300 µm nets and $B_{200}$ is the biomass sampled with 200 µm nets. We correct for this factor to get our best estimate for the biomass of mesozooplankton arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of mesozooplankton arthropods is ≈0.56 Gt C\n" - ] - } - ], - "source": [ - "# Correct for the use of 300 µm nets when sampling mesozooplankton biomass\n", - "meso_arth_biomass /= 0.619\n", - "\n", - "print('Our best estimate for the biomass of mesozooplankton arthropods is ≈%.2f Gt C' % (meso_arth_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Macrozooplankton\n", - "Some arthropods are also included in the macrozooplankton size category (zooplankton between 2 and 10 mm). Macrozooplankton contains organisms from many phyla such as arthropods, cnidarians, chordates, annelids, molluscs, ctenophores and representatives from Chaetognatha (a phylum of pelagic worms). To estimate the biomass of macrozooplankton arthropods, we first estimate the total biomass of macrozooplankton, and then estimate the fraction fo this total biomass that is contributed by arthropods.\n", - "\n", - "To estimate the total biomass of macrozooplankton, we rely on data from the MAREDAT database. We use the estimates of macrozooplankton biomass Buitenhuis et al. generated from the MAREDAT database. To generate these estimates, Buitenhuis et al. followed the same procedure as we detailed in the mesozooplankton section above. Buitenhuis et al. provides “minimum” and “maximum” estimates of the total biomass of macrozooplankton, which are 0.2 Gt C and 1.5 Gt C, respectively. We use the geometric mean of those estimates as our best estimate for the biomass of macrozooplankton:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of macrozooplankton is ≈0.5 Gt C\n" - ] - } - ], - "source": [ - "macro_biomass = gmean([0.2e15,1.5e15])\n", - "print('Our best estimate for the biomass of macrozooplankton is ≈%.1f Gt C' %(macro_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From this total biomass we subtract our estimates for the biomass of pteropods, which are in the same size range as macrozooplankton. For details on the estimate of the biomass of pteropods see the molluscs section in the Supplementary Information. We estimate the total biomass of pteropods at 0.15 Gt C. \n", - "\n", - "We also subtract from the total biomass of macrozooplankton the contribution by gelatinous zooplankton which also contains some species in the same size range as macrozooplankton. We estimate a global biomass of ≈0.04 Gt C (for details on the estimate of the biomass of gelatinous plankton see the cnidarians section in the Supplementary Information)." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "our best estimate for the total biomass of macrozooplankton arthropods is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the total biomass of macrozooplankton arthropods by\n", - "# subtacting the biomass of pteropods and gelatinous zooplankton\n", - "# from the total biomass of macrozooplankton\n", - "\n", - "# Load biomass estimates for pteropods and gelatinous zooplankton\n", - "other_macrozooplankton = pd.read_excel('marine_arthropods_data.xlsx','Other macrozooplankton')\n", - "\n", - "# In other zooplankton biomass estimate is empty, run the scripts \n", - "if(other_macrozooplankton.shape[0]<2):\n", - " run_nb('../../cnidarians/cnidarians.ipynb')\n", - " run_nb('../../molluscs/molluscs.ipynb')\n", - " other_macrozooplankton = pd.read_excel('marine_arthropods_data.xlsx','Other macrozooplankton')\n", - "\n", - "macro_arth_biomass = macro_biomass - other_macrozooplankton['Value'].sum()\n", - "print('our best estimate for the total biomass of macrozooplankton arthropods is ≈%.1f Gt C' %(macro_arth_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum up the biomass of arthropods in the mesezooplankton and macrozooplankton size fractions as our best estimate for the biomass of marine arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of marine arthropods is 0.9 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = meso_arth_biomass+macro_arth_biomass\n", - "print('Our best estimate for the biomass of marine arthropods is %.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertanity analysis\n", - "We discuss the uncertainty of estimates based on the MAREDAT database in a dedicated section in the Supplementary Information. We crudly project an uncertainty of about an order of magnitude." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# We project an uncertainty of an order of magnitude (see MAREDAT consistency check section)\n", - "mul_CI = 10" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of marine arthropods\n", - "We consider only the mesozooplankton as they are the smallest group of marine arthropods (by the definitions of the MAREDAT database they also contain microzooplankton). To estimate the total number of marine arthropods, we divide our estimate for the total biomass of mesozooplankton by an estimate for the characteristic carbon content of an individual copepod, which dominate the mesozooplankton biomass. As the basis of our estimate for the charactristic carbon content of a single copepod, we rely on data from [Viñas et al.](http://dx.doi.org/10.1590/S1679-87592010000200008) and [Dai et al.](https://doi.org/10.1016/j.jmarsys.2015.11.004), which range from 0.15 µg C to 100 µg C per individual. We use the geometric mean of this range, which is ≈4 µg C per individual, as our best estimate of the carbon content of a single copepod." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of marine arthropods is ≈1e+20\n" - ] - } - ], - "source": [ - "# The carbon content of copepods\n", - "copepod_carbon_content = 4e-6\n", - "\n", - "# Calculate the total number of marine arthropods\n", - "marine_arth_num = meso_arth_biomass/copepod_carbon_content\n", - "\n", - "print('Our best estimate for the total number of marine arthropods is ≈%.0e' % marine_arth_num)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to the animal biomass data\n", - "old_results = pd.read_excel('../../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Marine arthropods',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,mul_CI)\n", - "\n", - "result.to_excel('../../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Marine arthropods'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_estimate/1e15,mul_CI],\n", - " path='../../../results.xlsx')\n", - "\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Marine arthropods'), \n", - " col=['Number of individuals'],\n", - " values=marine_arth_num,\n", - " path='../../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/arthropods/terrestrial_arthropods/.ipynb_checkpoints/terrestrial arthropods-checkpoint.ipynb b/animals/arthropods/terrestrial_arthropods/.ipynb_checkpoints/terrestrial arthropods-checkpoint.ipynb deleted file mode 100644 index ee59dc8..0000000 --- a/animals/arthropods/terrestrial_arthropods/.ipynb_checkpoints/terrestrial arthropods-checkpoint.ipynb +++ /dev/null @@ -1,1605 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of terrestrial arthropods\n", - "To estimate the biomass of terrestrial arthropods, we rely on two parallel methods - a method based on average biomass densities of arthropods extrapolated to the global ice-free land surface, and a method based on estimates of the average carbon content of a characteristic arthropod and the total number of terrestrial arthropods.\n", - "\n", - "## Average biomass densities method\n", - "We collected values from the literature on the biomass densities of arthropods per unit area. We assume, based on [Stork et al.](http://dx.doi.org/10.1007/978-94-009-1685-2_1), most of the biomass is located in the soil, litter or in the canopy of trees. We thus estimate a mean biomass density of arhtropods in soil, litter and in canopies, sum those biomass densities and apply them across the entire ice-free land surface.\n", - "\n", - "### Litter arthropod biomass\n", - "We complied a list of values from several different habitats. Most of the measurements are from forests and savannas. For some of the older studies, we did not have access to the original data, but to a summary of the data made by two main studies: [Gist & Crossley](http://dx.doi.org/10.2307/2424109) and [Brockie & Moeed](http://dx.doi.org/10.1007/BF00377108). Here is a sample of the data from Gist & Grossley:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Taxonomic groupDensity of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]StudySiteHabitatLink
0Diplopoda14.06.13.2Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
1Orthoptera0.90.20.1Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
2Medium Araneida0.50.10.0Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
3Coleoptera1.11.00.3Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
4Cryptostigmata22,433.01.50.7Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
\n", - "
" - ], - "text/plain": [ - " Taxonomic group Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "0 Diplopoda 14.0 6.1 \n", - "1 Orthoptera 0.9 0.2 \n", - "2 Medium Araneida 0.5 0.1 \n", - "3 Coleoptera 1.1 1.0 \n", - "4 Cryptostigmata 22,433.0 1.5 \n", - "\n", - " Dry weight [g m^-2] Study Site Habitat \\\n", - "0 3.2 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "1 0.1 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "2 0.0 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "3 0.3 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "4 0.7 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "\n", - " Link \n", - "0 http://dx.doi.org/10.2307/2424109 \n", - "1 http://dx.doi.org/10.2307/2424109 \n", - "2 http://dx.doi.org/10.2307/2424109 \n", - "3 http://dx.doi.org/10.2307/2424109 \n", - "4 http://dx.doi.org/10.2307/2424109 " - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper/')\n", - "from CI_helper import *\n", - "pd.options.display.float_format = '{:,.1f}'.format\n", - "# Load global stocks data\n", - "gc_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Gist & Crossley',skiprows=1)\n", - "gc_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here is a sample from Brockie & Moeed:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Taxonomic groupWet weight [g m^-2]SiteHabitatStudyLink
0Collembola0.2Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
1Coleoptera1.0Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
2Acarina3.0Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
3Arachnida0.3Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
4Chilopoda0.3Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
\n", - "
" - ], - "text/plain": [ - " Taxonomic group Wet weight [g m^-2] Site \\\n", - "0 Collembola 0.2 Orongorongo Valley, New Zealand \n", - "1 Coleoptera 1.0 Orongorongo Valley, New Zealand \n", - "2 Acarina 3.0 Orongorongo Valley, New Zealand \n", - "3 Arachnida 0.3 Orongorongo Valley, New Zealand \n", - "4 Chilopoda 0.3 Orongorongo Valley, New Zealand \n", - "\n", - " Habitat Study \\\n", - "0 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "1 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "2 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "3 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "4 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "\n", - " Link \n", - "0 http://dx.doi.org/10.1007/BF00377108 \n", - "1 http://dx.doi.org/10.1007/BF00377108 \n", - "2 http://dx.doi.org/10.1007/BF00377108 \n", - "3 http://dx.doi.org/10.1007/BF00377108 \n", - "4 http://dx.doi.org/10.1007/BF00377108 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bm_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Brockie & Moeed',skiprows=1)\n", - "bm_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the sum of biomass of all the groups of arthropods in each study to provide an estimate for the total biomass density of arthropods in litter:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate from Brockie & Moeed:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Wet weight [g m^-2]
Study
Bornebusch (1930) 16.2
Bornebusch (1930) 21.9
Bornebusch (1930) 31.9
Brockie & Moeed (1986)14.5
Kitazawa (1967) 114.2
Kitazawa (1967) 27.1
\n", - "
" - ], - "text/plain": [ - " Wet weight [g m^-2]\n", - "Study \n", - "Bornebusch (1930) 1 6.2\n", - "Bornebusch (1930) 2 1.9\n", - "Bornebusch (1930) 3 1.9\n", - "Brockie & Moeed (1986) 14.5\n", - "Kitazawa (1967) 1 14.2\n", - "Kitazawa (1967) 2 7.1" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gc_study = gc_data.groupby('Study').sum()\n", - "bm_study = bm_data.groupby('Study').sum()\n", - "\n", - "print('The estimate from Brockie & Moeed:')\n", - "bm_study" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate from Gist & Crossley:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]
Study
Bornebusch (1930) 12,145.56.31.9
Bornebusch (1930) 2265.01.30.4
Bornebusch (1930) 34,547.52.50.7
Gist & Crossley (1975)30,580.512.04.6
Kitazawa (1967) 1870,214.014.54.4
Moulder et al. (1970)40,887.01.60.5
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2,145.5 6.3 \n", - "Bornebusch (1930) 2 265.0 1.3 \n", - "Bornebusch (1930) 3 4,547.5 2.5 \n", - "Gist & Crossley (1975) 30,580.5 12.0 \n", - "Kitazawa (1967) 1 870,214.0 14.5 \n", - "Moulder et al. (1970) 40,887.0 1.6 \n", - "\n", - " Dry weight [g m^-2] \n", - "Study \n", - "Bornebusch (1930) 1 1.9 \n", - "Bornebusch (1930) 2 0.4 \n", - "Bornebusch (1930) 3 0.7 \n", - "Gist & Crossley (1975) 4.6 \n", - "Kitazawa (1967) 1 4.4 \n", - "Moulder et al. (1970) 0.5 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print('The estimate from Gist & Crossley:')\n", - "gc_study" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In cases where data is coflicting between the two studies, we calculate the mean. We merge the data from the papers to generate a list of estimates on the total biomass density of arhtropods" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Dry weight [g m^-2]Wet weight [g m^-2]
Study
Bornebusch (1930) 12,145.51.96.2
Bornebusch (1930) 2265.00.41.6
Bornebusch (1930) 34,547.50.72.2
Brockie & Moeed (1986)nannan14.5
Gist & Crossley (1975)30,580.54.612.0
Kitazawa (1967) 1870,214.04.414.4
Kitazawa (1967) 2nannan7.1
Moulder et al. (1970)40,887.00.51.6
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Dry weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2,145.5 1.9 \n", - "Bornebusch (1930) 2 265.0 0.4 \n", - "Bornebusch (1930) 3 4,547.5 0.7 \n", - "Brockie & Moeed (1986) nan nan \n", - "Gist & Crossley (1975) 30,580.5 4.6 \n", - "Kitazawa (1967) 1 870,214.0 4.4 \n", - "Kitazawa (1967) 2 nan nan \n", - "Moulder et al. (1970) 40,887.0 0.5 \n", - "\n", - " Wet weight [g m^-2] \n", - "Study \n", - "Bornebusch (1930) 1 6.2 \n", - "Bornebusch (1930) 2 1.6 \n", - "Bornebusch (1930) 3 2.2 \n", - "Brockie & Moeed (1986) 14.5 \n", - "Gist & Crossley (1975) 12.0 \n", - "Kitazawa (1967) 1 14.4 \n", - "Kitazawa (1967) 2 7.1 \n", - "Moulder et al. (1970) 1.6 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concat the data from the two studies\n", - "conc = pd.concat([gc_study,bm_study])\n", - "conc_mean = conc.groupby(conc.index).mean()\n", - "conc_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate from the dry weight and wet weight estimates the biomass density in g C $m^{-2}$ by assuming 70% water content and 50% carbon in dry mass:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Study\n", - "Bornebusch (1930) 1 0.9\n", - "Bornebusch (1930) 2 0.2\n", - "Bornebusch (1930) 3 0.4\n", - "Brockie & Moeed (1986) 2.2\n", - "Gist & Crossley (1975) 2.3\n", - "Kitazawa (1967) 1 2.2\n", - "Kitazawa (1967) 2 1.1\n", - "Moulder et al. (1970) 0.2\n", - "Name: Biomass density [g C m^-2], dtype: float64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Fill places with no dry weight estimate with 30% of the wet weight estimate \n", - "conc_mean['Dry weight [g m^-2]'].fillna(conc_mean['Wet weight [g m^-2]']*0.3,inplace=True)\n", - "\n", - "# Calculate carbon biomass as 50% of dry weight\n", - "conc_mean['Biomass density [g C m^-2]'] = conc_mean['Dry weight [g m^-2]']/2\n", - "conc_mean['Biomass density [g C m^-2]']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates from the different studies as our best estimate of the biomass density of litter arthropods." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in litter is ≈1 g C m^-2\n" - ] - } - ], - "source": [ - "litter_biomass_density = gmean(conc_mean.iloc[0:5,3])\n", - "print('Our best estimate for the biomass density of arthropods in litter is ≈%.0f g C m^-2' %litter_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Soil arthropod biomass\n", - "As our source for estimating the biomass of soil arthropods, we use these data collected from the literature, which are detailed below:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass density [g C m^-2]SiteBiomeLinkCommentsRemarks
Reference
Fragoso et al. (1999)1.8Several locationsForesthttp://horizon.documentation.ird.fr/exl-doc/pl...SoilTaken from Figure 2.1 in Fragoso et al. - we c...
Fragoso et al. (1999)1.8Several locationsSavannahttp://horizon.documentation.ird.fr/exl-doc/pl...SoilTaken from Figure 2.1 in Fragoso et al. - we c...
Stork (1996)0.8SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1SoilFrom page 6 – 30 kg dry weight per ha. Assumin...
\n", - "
" - ], - "text/plain": [ - " Biomass density [g C m^-2] Site \\\n", - "Reference \n", - "Fragoso et al. (1999) 1.8 Several locations \n", - "Fragoso et al. (1999) 1.8 Several locations \n", - "Stork (1996) 0.8 Seram \n", - "\n", - " Biome \\\n", - "Reference \n", - "Fragoso et al. (1999) Forest \n", - "Fragoso et al. (1999) Savanna \n", - "Stork (1996) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Fragoso et al. (1999) http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "Fragoso et al. (1999) http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "Stork (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "\n", - " Comments \\\n", - "Reference \n", - "Fragoso et al. (1999) Soil \n", - "Fragoso et al. (1999) Soil \n", - "Stork (1996) Soil \n", - "\n", - " Remarks \n", - "Reference \n", - "Fragoso et al. (1999) Taken from Figure 2.1 in Fragoso et al. - we c... \n", - "Fragoso et al. (1999) Taken from Figure 2.1 in Fragoso et al. - we c... \n", - "Stork (1996) From page 6 – 30 kg dry weight per ha. Assumin... " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load additional data\n", - "soil_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Soil',index_col='Reference')\n", - "soil_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimate for the biomass density of arthropods in soils:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in soils is ≈1 g C m^-2\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates of the biomass density of soil arthropods\n", - "soil_biomass_density = gmean(soil_data['Biomass density [g C m^-2]'])\n", - "\n", - "print('Our best estimate for the biomass density of arthropods in soils is ≈%.0f g C m^-2' %soil_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we sum the biomass density of soil and litter arthropods, we arrive at an estimate of ≈2 g C m^-2, which is inline with the data from Kitazawa et al. of 1-2 g C m^-2." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Canopy arthropod biomass\n", - "Data on the biomass density of canopy arthropods is much less abundant. We extracted from the literature the following values:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass density [g C m^-2]SiteBiomeLinkCommentsRemarks
Reference
Ellwood & Foster (2004)0.4Borneolowland dipterocarp rainforesthttp://dx.doi.org/10.1038/nature02560CanopyEllwood & Foster report 3776 g ha^-1 dry bioma...
Dial et al. (2006)1.2BorneoLowland Tropical Rain Foresthttp://dx.doi.org/10.1111/j.1744-7429.2006.001...CanopyDial et al. Estimate 23.6 kg dry weight per ha.\\n
Stork, N. E. (1996)0.8SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1CanopyFrom page 6 – 30 kg dry weight per ha. Assumin...
\n", - "
" - ], - "text/plain": [ - " Biomass density [g C m^-2] Site \\\n", - "Reference \n", - "Ellwood & Foster (2004) 0.4 Borneo \n", - "Dial et al. (2006) 1.2 Borneo \n", - "Stork, N. E. (1996) 0.8 Seram \n", - "\n", - " Biome \\\n", - "Reference \n", - "Ellwood & Foster (2004) lowland dipterocarp rainforest \n", - "Dial et al. (2006) Lowland Tropical Rain Forest \n", - "Stork, N. E. (1996) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Ellwood & Foster (2004) http://dx.doi.org/10.1038/nature02560 \n", - "Dial et al. (2006) http://dx.doi.org/10.1111/j.1744-7429.2006.001... \n", - "Stork, N. E. (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "\n", - " Comments \\\n", - "Reference \n", - "Ellwood & Foster (2004) Canopy \n", - "Dial et al. (2006) Canopy \n", - "Stork, N. E. (1996) Canopy \n", - "\n", - " Remarks \n", - "Reference \n", - "Ellwood & Foster (2004) Ellwood & Foster report 3776 g ha^-1 dry bioma... \n", - "Dial et al. (2006) Dial et al. Estimate 23.6 kg dry weight per ha.\\n \n", - "Stork, N. E. (1996) From page 6 – 30 kg dry weight per ha. Assumin... " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data on the biomass density of canopy arthropods\n", - "canopy_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Canopy',index_col='Reference')\n", - "canopy_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates for the biomass density of arthropods in canopies:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in canopies is ≈0.7 g C m^-2\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates of biomass densitiy of canopy arthropods\n", - "canopy_biomass_density = gmean(canopy_data['Biomass density [g C m^-2]'])\n", - "print('Our best estimate for the biomass density of arthropods in canopies is ≈%.1f g C m^-2' %canopy_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our best estimate for the biomass of arthropods using estimates of biomass densities, we sum the estimates for the biomass density of arthropods in soils and in canopies, and apply this density over the entire ice-free land surface of $1.3×10^{14} \\: m^2$:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "# Sum the biomass densities of arthropods in soils and in canopies\n", - "total_denisty = litter_biomass_density+soil_biomass_density+canopy_biomass_density\n", - "\n", - "# Apply the average biomass density across the entire ice-free land surface\n", - "method1_estimate = total_denisty*1.3e14\n", - "\n", - "print('Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈%.1f Gt C' %(method1_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Average carbon content method\n", - "In this method, in order to estimate the total biomass of arthropods, we calculate the carbon content of a characteristic arthropod, and multiply this carbon content by an estimate for the total number of arthropods.\n", - "We rely both on data from Gist & Crossley which detail the total number of arthropods per unit area as well as the total biomass of arthropods per unit area for serveal studies. Form this data we can calculate the characteristic carbon content of a single arthropod assuming 50% carbon in dry mass:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]Carbon content [g C per individual]
Study
Bornebusch (1930) 12.1e+036.3e+001.9e+004.4e-04
Bornebusch (1930) 22.6e+021.3e+004.0e-017.6e-04
Bornebusch (1930) 34.5e+032.5e+007.4e-018.1e-05
Gist & Crossley (1975)3.1e+041.2e+014.6e+007.5e-05
Kitazawa (1967) 18.7e+051.5e+014.4e+002.5e-06
Moulder et al. (1970)4.1e+041.6e+004.7e-015.8e-06
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2.1e+03 6.3e+00 \n", - "Bornebusch (1930) 2 2.6e+02 1.3e+00 \n", - "Bornebusch (1930) 3 4.5e+03 2.5e+00 \n", - "Gist & Crossley (1975) 3.1e+04 1.2e+01 \n", - "Kitazawa (1967) 1 8.7e+05 1.5e+01 \n", - "Moulder et al. (1970) 4.1e+04 1.6e+00 \n", - "\n", - " Dry weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 1.9e+00 \n", - "Bornebusch (1930) 2 4.0e-01 \n", - "Bornebusch (1930) 3 7.4e-01 \n", - "Gist & Crossley (1975) 4.6e+00 \n", - "Kitazawa (1967) 1 4.4e+00 \n", - "Moulder et al. (1970) 4.7e-01 \n", - "\n", - " Carbon content [g C per individual] \n", - "Study \n", - "Bornebusch (1930) 1 4.4e-04 \n", - "Bornebusch (1930) 2 7.6e-04 \n", - "Bornebusch (1930) 3 8.1e-05 \n", - "Gist & Crossley (1975) 7.5e-05 \n", - "Kitazawa (1967) 1 2.5e-06 \n", - "Moulder et al. (1970) 5.8e-06 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.options.display.float_format = '{:,.1e}'.format\n", - "\n", - "# Calculate the carbon content of a single arthropod by dividing the dry weight by 2 (assuming 50% carbon in\n", - "# dry weight) and dividing the result by the total number of individuals\n", - "gc_study['Carbon content [g C per individual]'] = gc_study['Dry weight [g m^-2]']/2/gc_study['Density of individuals [N m^-2]']\n", - "\n", - "gc_study" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine the data from these studies with data from additional sources detailed below:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Carbon content [g C per individual]SiteHabitatLinkCommentsRemarks
Reference
Ellwood & Foster (2004)1.0e-03Borneolowland dipterocarp rainforesthttp://dx.doi.org/10.1038/nature02560CanopyEllwood & Foster report 88 g dry weight per fe...
Stork (1996)3.5e-04SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1SoilFrom page 6 - The results of this study indica...
Dial et al. (2006)5.0e-04BorneoLowland Tropical Rain Foresthttp://dx.doi.org/10.1111/j.1744-7429.2006.001...CanopyDial et al. Estimate 2.4*10^4 g dry weight in ...
\n", - "
" - ], - "text/plain": [ - " Carbon content [g C per individual] Site \\\n", - "Reference \n", - "Ellwood & Foster (2004) 1.0e-03 Borneo \n", - "Stork (1996) 3.5e-04 Seram \n", - "Dial et al. (2006) 5.0e-04 Borneo \n", - "\n", - " Habitat \\\n", - "Reference \n", - "Ellwood & Foster (2004) lowland dipterocarp rainforest \n", - "Stork (1996) Lowland Tropical Rain Forest \n", - "Dial et al. (2006) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Ellwood & Foster (2004) http://dx.doi.org/10.1038/nature02560 \n", - "Stork (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "Dial et al. (2006) http://dx.doi.org/10.1111/j.1744-7429.2006.001... \n", - "\n", - " Comments \\\n", - "Reference \n", - "Ellwood & Foster (2004) Canopy \n", - "Stork (1996) Soil \n", - "Dial et al. (2006) Canopy \n", - "\n", - " Remarks \n", - "Reference \n", - "Ellwood & Foster (2004) Ellwood & Foster report 88 g dry weight per fe... \n", - "Stork (1996) From page 6 - The results of this study indica... \n", - "Dial et al. (2006) Dial et al. Estimate 2.4*10^4 g dry weight in ... " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load additional data sources\n", - "other_carbon_content_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Carbon content',index_col='Reference')\n", - "\n", - "other_carbon_content_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates from the difference studies and use it as our best estimate for the carbon content of a characteristic arthropod:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of a characteristic arthropod is 1.2e-04 g C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates from the different studies on the average carbon content of a single arthropod.\n", - "average_carbon_content = gmean(pd.concat([other_carbon_content_data,gc_study])['Carbon content [g C per individual]'])\n", - "print('Our best estimate for the carbon content of a characteristic arthropod is %.1e g C' % average_carbon_content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of arthropods using the characteristic carbon content method, we multiply our best estiamte of the carbon content of a single arthropod by an estimate of the total number of arthropods made by [Williams](http://dx.doi.org/10.1086/282115). Williams estiamted a total of $~10^{18}$ individual insects in soils. We assume this estimate of the total number of insects is close to the total number of arthropods (noting that in this estimate Williams also included collembola which back in 1960 were considered insects, and are usually very numerous because of their small size). To estimate the total biomass of arthropods, we multiply the carbon content of a single arthropod by the the estimate for the total number of arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈0.1 Gt C\n" - ] - } - ], - "source": [ - "# Total number of insects estimated by Williams\n", - "tot_num_arthropods = 1e18\n", - "\n", - "# Calculate the total biomass of arthropods\n", - "method2_estimate = average_carbon_content*tot_num_arthropods\n", - "print('Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈%.1f Gt C' %(method2_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the biomass of arthropods is the geometric mean of the estimates from the two methods:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods is ≈0.2 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates using the two methods\n", - "best_estimate = gmean([method1_estimate,method2_estimate])\n", - "print('Our best estimate for the biomass of terrestrial arthropods is ≈%.1f Gt C' %(best_estimate/1e15)) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty associated with the estimate of the biomass of terrestrial arthropods, we compile a collection of the different sources of uncertainty, and combine them to project the total uncertainty. We survey the interstudy uncertainty for estimates within each method, the total uncertainty of each method and the uncertainty of the geometric mean of the values from the two methods.\n", - "\n", - "## Average biomass densities method\n", - "We calculate the 95% confidence interval for the geometric mean of the biomass densities reported for soil and canopy arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval for the average biomass density of soil arthropods is ≈2.0-fold\n", - "The 95 percent confidence interval for the average biomass density of soil arthropods is ≈1.8-fold\n", - "The 95 percent confidence interval for the average biomass density of canopy arthropods is ≈1.9-fold\n" - ] - } - ], - "source": [ - "litter_CI = geo_CI_calc(conc_mean['Biomass density [g C m^-2]'])\n", - "soil_CI = geo_CI_calc(soil_data['Biomass density [g C m^-2]'])\n", - "canopy_CI = geo_CI_calc(canopy_data['Biomass density [g C m^-2]'])\n", - "print('The 95 percent confidence interval for the average biomass density of soil arthropods is ≈%.1f-fold' %litter_CI)\n", - "print('The 95 percent confidence interval for the average biomass density of soil arthropods is ≈%.1f-fold' %soil_CI)\n", - "print('The 95 percent confidence interval for the average biomass density of canopy arthropods is ≈%.1f-fold' %canopy_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the uncertainty of the global biomass estimate using the average biomass density method, we propagate the uncertainties of the soil and canopy biomass density:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "100000\n", - "The 95 percent confidence interval biomass of arthropods using the biomass densities method is ≈1.5-fold\n" - ] - } - ], - "source": [ - "method1_CI = CI_sum_prop(estimates=np.array([litter_biomass_density,soil_biomass_density,canopy_biomass_density]),mul_CIs=np.array([litter_CI,soil_CI,canopy_CI]))\n", - "print('The 95 percent confidence interval biomass of arthropods using the biomass densities method is ≈%.1f-fold' %method1_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Average carbon content method\n", - "As a measure of the uncertainty of the estimate of the total biomass of arthropods using the average carbon content method, we calculate the 95% confidence interval of the geometric mean of the estimates from different studies of the carbon content of a single arthropod:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval of the carbon content of a single arthropod is ≈4.1-fold\n" - ] - } - ], - "source": [ - "carbon_content_CI = geo_CI_calc(pd.concat([other_carbon_content_data,gc_study])['Carbon content [g C per individual]'])\n", - "print('The 95 percent confidence interval of the carbon content of a single arthropod is ≈%.1f-fold' %carbon_content_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine this uncertainty of the average carbon content of a single arthropod with the uncertainty reported in Williams on the total number of insects of about one order of magnitude. This provides us with a measure of the uncertainty of the estimate of the biomass of arthropods using the average carbon content method." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval biomass of arthropods using the average carbon content method is ≈14.9-fold\n" - ] - } - ], - "source": [ - "# The uncertainty of the total number of insects from Williams\n", - "tot_num_arthropods_CI = 10\n", - "\n", - "# Combine the uncertainties of the average carbon content of a single arthropod and the uncertainty of \n", - "# the total number of arthropods\n", - "method2_CI = CI_prod_prop(np.array([carbon_content_CI,tot_num_arthropods_CI]))\n", - "print('The 95 percent confidence interval biomass of arthropods using the average carbon content method is ≈%.1f-fold' %method2_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty\n", - "We calculate the 95% conficence interval of the geometric mean of the estimates of the biomass of arthropods using the average biomass density or the average carbon content:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The inter-method uncertainty of the geometric mean of the estimates of the biomass of arthropods is ≈3.0\n" - ] - } - ], - "source": [ - "inter_CI = geo_CI_calc(np.array([method1_estimate,method2_estimate]))\n", - "print('The inter-method uncertainty of the geometric mean of the estimates of the biomass of arthropods is ≈%.1f' % inter_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods, we take the highest uncertainty among the collection of uncertainties we generate, which is the ≈15-fold uncertainty of the average carbon content method. " - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods is ≈14.9-fold\n" - ] - } - ], - "source": [ - "mul_CI = np.max([inter_CI,method1_CI,method2_CI])\n", - "print('Our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods is ≈%.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The biomass of termites\n", - "As we state in the Supplementary Information, there are some groups of terrestrial arthropods for which better estimates are available. An example is the biomass of termites. We use the data in [Sanderson](http://dx.doi.org/10.1029/96GB01893) to estimate the global biomass of termites:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate of the total biomass of termites based on Sanderson is ≈0.07 Gt C\n" - ] - } - ], - "source": [ - "# Load termite data\n", - "termite_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Sanderson', skiprows=1, index_col=0)\n", - "\n", - "# Multiply biomass density by biome area and sum over biomes\n", - "termite_biomass = (termite_data['Area [m^2]']* termite_data['Biomass density [g wet weight m^-2]']).sum()\n", - "\n", - "# Calculate carbon mass assuming carbon is 15% of wet weight\n", - "termite_biomass *= 0.15\n", - "\n", - "print('The estimate of the total biomass of termites based on Sanderson is ≈%.2f Gt C' %(termite_biomass/1e15))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/arthropods/terrestrial_arthropods/.ipynb_checkpoints/terrestrial_arthropods-checkpoint.ipynb b/animals/arthropods/terrestrial_arthropods/.ipynb_checkpoints/terrestrial_arthropods-checkpoint.ipynb deleted file mode 100644 index 884afb2..0000000 --- a/animals/arthropods/terrestrial_arthropods/.ipynb_checkpoints/terrestrial_arthropods-checkpoint.ipynb +++ /dev/null @@ -1,1652 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *\n", - "pd.options.display.float_format = '{:,.1f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of terrestrial arthropods\n", - "To estimate the biomass of terrestrial arthropods, we rely on two parallel methods - a method based on average biomass densities of arthropods extrapolated to the global ice-free land surface, and a method based on estimates of the average carbon content of a characteristic arthropod and the total number of terrestrial arthropods.\n", - "\n", - "## Average biomass densities method\n", - "We collected values from the literature on the biomass densities of arthropods per unit area. We assume, based on [Stork et al.](http://dx.doi.org/10.1007/978-94-009-1685-2_1), most of the biomass is located in the soil, litter or in the canopy of trees. We thus estimate a mean biomass density of arhtropods in soil, litter and in canopies, sum those biomass densities and apply them across the entire ice-free land surface.\n", - "\n", - "### Litter arthropod biomass\n", - "We complied a list of values from several different habitats. Most of the measurements are from forests and savannas. For some of the older studies, we did not have access to the original data, but to a summary of the data made by two main studies: [Gist & Crossley](http://dx.doi.org/10.2307/2424109) and [Brockie & Moeed](http://dx.doi.org/10.1007/BF00377108). Here is a sample of the data from Gist & Grossley:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Taxonomic groupDensity of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]StudySiteHabitatLink
0Diplopoda14.06.13.2Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
1Orthoptera0.90.20.1Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
2Medium Araneida0.50.10.0Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
3Coleoptera1.11.00.3Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
4Cryptostigmata22,433.01.50.7Gist & Crossley (1975)USAMixed hardwood foresthttp://dx.doi.org/10.2307/2424109
\n", - "
" - ], - "text/plain": [ - " Taxonomic group Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "0 Diplopoda 14.0 6.1 \n", - "1 Orthoptera 0.9 0.2 \n", - "2 Medium Araneida 0.5 0.1 \n", - "3 Coleoptera 1.1 1.0 \n", - "4 Cryptostigmata 22,433.0 1.5 \n", - "\n", - " Dry weight [g m^-2] Study Site Habitat \\\n", - "0 3.2 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "1 0.1 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "2 0.0 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "3 0.3 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "4 0.7 Gist & Crossley (1975) USA Mixed hardwood forest \n", - "\n", - " Link \n", - "0 http://dx.doi.org/10.2307/2424109 \n", - "1 http://dx.doi.org/10.2307/2424109 \n", - "2 http://dx.doi.org/10.2307/2424109 \n", - "3 http://dx.doi.org/10.2307/2424109 \n", - "4 http://dx.doi.org/10.2307/2424109 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load global stocks data\n", - "gc_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Gist & Crossley',skiprows=1)\n", - "gc_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here is a sample from Brockie & Moeed:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Taxonomic groupWet weight [g m^-2]SiteHabitatStudyLink
0Collembola0.2Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
1Coleoptera1.0Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
2Acarina3.0Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
3Arachnida0.3Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
4Chilopoda0.3Orongorongo Valley, New ZealandBroadleaf podocarp forestBrockie & Moeed (1986)http://dx.doi.org/10.1007/BF00377108
\n", - "
" - ], - "text/plain": [ - " Taxonomic group Wet weight [g m^-2] Site \\\n", - "0 Collembola 0.2 Orongorongo Valley, New Zealand \n", - "1 Coleoptera 1.0 Orongorongo Valley, New Zealand \n", - "2 Acarina 3.0 Orongorongo Valley, New Zealand \n", - "3 Arachnida 0.3 Orongorongo Valley, New Zealand \n", - "4 Chilopoda 0.3 Orongorongo Valley, New Zealand \n", - "\n", - " Habitat Study \\\n", - "0 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "1 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "2 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "3 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "4 Broadleaf podocarp forest Brockie & Moeed (1986) \n", - "\n", - " Link \n", - "0 http://dx.doi.org/10.1007/BF00377108 \n", - "1 http://dx.doi.org/10.1007/BF00377108 \n", - "2 http://dx.doi.org/10.1007/BF00377108 \n", - "3 http://dx.doi.org/10.1007/BF00377108 \n", - "4 http://dx.doi.org/10.1007/BF00377108 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bm_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Brockie & Moeed',skiprows=1)\n", - "bm_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the sum of biomass of all the groups of arthropods in each study to provide an estimate for the total biomass density of arthropods in litter:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate from Brockie & Moeed:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Wet weight [g m^-2]
Study
Bornebusch (1930) 16.2
Bornebusch (1930) 21.9
Bornebusch (1930) 31.9
Brockie & Moeed (1986)14.5
Kitazawa (1967) 114.2
Kitazawa (1967) 27.1
\n", - "
" - ], - "text/plain": [ - " Wet weight [g m^-2]\n", - "Study \n", - "Bornebusch (1930) 1 6.2\n", - "Bornebusch (1930) 2 1.9\n", - "Bornebusch (1930) 3 1.9\n", - "Brockie & Moeed (1986) 14.5\n", - "Kitazawa (1967) 1 14.2\n", - "Kitazawa (1967) 2 7.1" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gc_study = gc_data.groupby('Study').sum()\n", - "bm_study = bm_data.groupby('Study').sum()\n", - "\n", - "print('The estimate from Brockie & Moeed:')\n", - "bm_study" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate from Gist & Crossley:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]
Study
Bornebusch (1930) 12,145.56.31.9
Bornebusch (1930) 2265.01.30.4
Bornebusch (1930) 34,547.52.50.7
Gist & Crossley (1975)30,580.512.04.6
Kitazawa (1967) 1870,214.014.54.4
Moulder et al. (1970)40,887.01.60.5
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2,145.5 6.3 \n", - "Bornebusch (1930) 2 265.0 1.3 \n", - "Bornebusch (1930) 3 4,547.5 2.5 \n", - "Gist & Crossley (1975) 30,580.5 12.0 \n", - "Kitazawa (1967) 1 870,214.0 14.5 \n", - "Moulder et al. (1970) 40,887.0 1.6 \n", - "\n", - " Dry weight [g m^-2] \n", - "Study \n", - "Bornebusch (1930) 1 1.9 \n", - "Bornebusch (1930) 2 0.4 \n", - "Bornebusch (1930) 3 0.7 \n", - "Gist & Crossley (1975) 4.6 \n", - "Kitazawa (1967) 1 4.4 \n", - "Moulder et al. (1970) 0.5 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print('The estimate from Gist & Crossley:')\n", - "gc_study" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In cases where data is coflicting between the two studies, we calculate the mean. We merge the data from the papers to generate a list of estimates on the total biomass density of arhtropods" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Dry weight [g m^-2]Wet weight [g m^-2]
Study
Bornebusch (1930) 12,145.51.96.2
Bornebusch (1930) 2265.00.41.6
Bornebusch (1930) 34,547.50.72.2
Brockie & Moeed (1986)nannan14.5
Gist & Crossley (1975)30,580.54.612.0
Kitazawa (1967) 1870,214.04.414.4
Kitazawa (1967) 2nannan7.1
Moulder et al. (1970)40,887.00.51.6
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Dry weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2,145.5 1.9 \n", - "Bornebusch (1930) 2 265.0 0.4 \n", - "Bornebusch (1930) 3 4,547.5 0.7 \n", - "Brockie & Moeed (1986) nan nan \n", - "Gist & Crossley (1975) 30,580.5 4.6 \n", - "Kitazawa (1967) 1 870,214.0 4.4 \n", - "Kitazawa (1967) 2 nan nan \n", - "Moulder et al. (1970) 40,887.0 0.5 \n", - "\n", - " Wet weight [g m^-2] \n", - "Study \n", - "Bornebusch (1930) 1 6.2 \n", - "Bornebusch (1930) 2 1.6 \n", - "Bornebusch (1930) 3 2.2 \n", - "Brockie & Moeed (1986) 14.5 \n", - "Gist & Crossley (1975) 12.0 \n", - "Kitazawa (1967) 1 14.4 \n", - "Kitazawa (1967) 2 7.1 \n", - "Moulder et al. (1970) 1.6 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Concat the data from the two studies\n", - "conc = pd.concat([gc_study,bm_study])\n", - "conc_mean = conc.groupby(conc.index).mean()\n", - "conc_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate from the dry weight and wet weight estimates the biomass density in g C $m^{-2}$ by assuming 70% water content and 50% carbon in dry mass:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Study\n", - "Bornebusch (1930) 1 0.9\n", - "Bornebusch (1930) 2 0.2\n", - "Bornebusch (1930) 3 0.4\n", - "Brockie & Moeed (1986) 2.2\n", - "Gist & Crossley (1975) 2.3\n", - "Kitazawa (1967) 1 2.2\n", - "Kitazawa (1967) 2 1.1\n", - "Moulder et al. (1970) 0.2\n", - "Name: Biomass density [g C m^-2], dtype: float64" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Fill places with no dry weight estimate with 30% of the wet weight estimate \n", - "conc_mean['Dry weight [g m^-2]'].fillna(conc_mean['Wet weight [g m^-2]']*0.3,inplace=True)\n", - "\n", - "# Calculate carbon biomass as 50% of dry weight\n", - "conc_mean['Biomass density [g C m^-2]'] = conc_mean['Dry weight [g m^-2]']/2\n", - "conc_mean['Biomass density [g C m^-2]']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates from the different studies as our best estimate of the biomass density of litter arthropods." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in litter is ≈1 g C m^-2\n" - ] - } - ], - "source": [ - "litter_biomass_density = gmean(conc_mean.iloc[0:5,3])\n", - "print('Our best estimate for the biomass density of arthropods in litter is ≈%.0f g C m^-2' %litter_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Soil arthropod biomass\n", - "As our source for estimating the biomass of soil arthropods, we use these data collected from the literature, which are detailed below:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass density [g C m^-2]SiteBiomeLinkCommentsRemarks
Reference
Fragoso et al. (1999)1.8Several locationsForesthttp://horizon.documentation.ird.fr/exl-doc/pl...SoilTaken from Figure 2.1 in Fragoso et al. - we c...
Fragoso et al. (1999)1.8Several locationsSavannahttp://horizon.documentation.ird.fr/exl-doc/pl...SoilTaken from Figure 2.1 in Fragoso et al. - we c...
Stork (1996)0.8SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1SoilFrom page 6 – 30 kg dry weight per ha. Assumin...
\n", - "
" - ], - "text/plain": [ - " Biomass density [g C m^-2] Site \\\n", - "Reference \n", - "Fragoso et al. (1999) 1.8 Several locations \n", - "Fragoso et al. (1999) 1.8 Several locations \n", - "Stork (1996) 0.8 Seram \n", - "\n", - " Biome \\\n", - "Reference \n", - "Fragoso et al. (1999) Forest \n", - "Fragoso et al. (1999) Savanna \n", - "Stork (1996) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Fragoso et al. (1999) http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "Fragoso et al. (1999) http://horizon.documentation.ird.fr/exl-doc/pl... \n", - "Stork (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "\n", - " Comments \\\n", - "Reference \n", - "Fragoso et al. (1999) Soil \n", - "Fragoso et al. (1999) Soil \n", - "Stork (1996) Soil \n", - "\n", - " Remarks \n", - "Reference \n", - "Fragoso et al. (1999) Taken from Figure 2.1 in Fragoso et al. - we c... \n", - "Fragoso et al. (1999) Taken from Figure 2.1 in Fragoso et al. - we c... \n", - "Stork (1996) From page 6 – 30 kg dry weight per ha. Assumin... " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load additional data\n", - "soil_data = pd.read_excel('terrestrial_arthropods_data.xlsx','Soil',index_col='Reference')\n", - "soil_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimate for the biomass density of arthropods in soils:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in soils is ≈1 g C m^-2\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates of the biomass density of soil arthropods\n", - "soil_biomass_density = gmean(soil_data['Biomass density [g C m^-2]'])\n", - "\n", - "print('Our best estimate for the biomass density of arthropods in soils is ≈%.0f g C m^-2' %soil_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we sum the biomass density of soil and litter arthropods, we arrive at an estimate of ≈2 g C m^-2, which is inline with the data from Kitazawa et al. of 1-2 g C m^-2." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Canopy arthropod biomass\n", - "Data on the biomass density of canopy arthropods is much less abundant. We extracted from the literature the following values:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass density [g C m^-2]SiteBiomeLinkCommentsRemarks
Reference
Ellwood & Foster (2004)0.4Borneolowland dipterocarp rainforesthttp://dx.doi.org/10.1038/nature02560CanopyEllwood & Foster report 3776 g ha^-1 dry bioma...
Dial et al. (2006)1.2BorneoLowland Tropical Rain Foresthttp://dx.doi.org/10.1111/j.1744-7429.2006.001...CanopyDial et al. Estimate 23.6 kg dry weight per ha.\\n
Stork, N. E. (1996)0.8SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1CanopyFrom page 6 – 30 kg dry weight per ha. Assumin...
\n", - "
" - ], - "text/plain": [ - " Biomass density [g C m^-2] Site \\\n", - "Reference \n", - "Ellwood & Foster (2004) 0.4 Borneo \n", - "Dial et al. (2006) 1.2 Borneo \n", - "Stork, N. E. (1996) 0.8 Seram \n", - "\n", - " Biome \\\n", - "Reference \n", - "Ellwood & Foster (2004) lowland dipterocarp rainforest \n", - "Dial et al. (2006) Lowland Tropical Rain Forest \n", - "Stork, N. E. (1996) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Ellwood & Foster (2004) http://dx.doi.org/10.1038/nature02560 \n", - "Dial et al. (2006) http://dx.doi.org/10.1111/j.1744-7429.2006.001... \n", - "Stork, N. E. (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "\n", - " Comments \\\n", - "Reference \n", - "Ellwood & Foster (2004) Canopy \n", - "Dial et al. (2006) Canopy \n", - "Stork, N. E. (1996) Canopy \n", - "\n", - " Remarks \n", - "Reference \n", - "Ellwood & Foster (2004) Ellwood & Foster report 3776 g ha^-1 dry bioma... \n", - "Dial et al. (2006) Dial et al. Estimate 23.6 kg dry weight per ha.\\n \n", - "Stork, N. E. (1996) From page 6 – 30 kg dry weight per ha. Assumin... " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data on the biomass density of canopy arthropods\n", - "canopy_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Canopy',index_col='Reference')\n", - "canopy_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates for the biomass density of arthropods in canopies:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of arthropods in canopies is ≈0.7 g C m^-2\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates of biomass densitiy of canopy arthropods\n", - "canopy_biomass_density = gmean(canopy_data['Biomass density [g C m^-2]'])\n", - "print('Our best estimate for the biomass density of arthropods in canopies is ≈%.1f g C m^-2' %canopy_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our best estimate for the biomass of arthropods using estimates of biomass densities, we sum the estimates for the biomass density of arthropods in soils and in canopies, and apply this density over the entire ice-free land surface of $1.3×10^{14} \\: m^2$:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "# Sum the biomass densities of arthropods in soils and in canopies\n", - "total_denisty = litter_biomass_density+soil_biomass_density+canopy_biomass_density\n", - "\n", - "# Apply the average biomass density across the entire ice-free land surface\n", - "method1_estimate = total_denisty*1.3e14\n", - "\n", - "print('Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈%.1f Gt C' %(method1_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Average carbon content method\n", - "In this method, in order to estimate the total biomass of arthropods, we calculate the carbon content of a characteristic arthropod, and multiply this carbon content by an estimate for the total number of arthropods.\n", - "We rely both on data from Gist & Crossley which detail the total number of arthropods per unit area as well as the total biomass of arthropods per unit area for serveal studies. Form this data we can calculate the characteristic carbon content of a single arthropod assuming 50% carbon in dry mass:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Density of individuals [N m^-2]Wet weight [g m^-2]Dry weight [g m^-2]Carbon content [g C per individual]
Study
Bornebusch (1930) 12.1e+036.3e+001.9e+004.4e-04
Bornebusch (1930) 22.6e+021.3e+004.0e-017.6e-04
Bornebusch (1930) 34.5e+032.5e+007.4e-018.1e-05
Gist & Crossley (1975)3.1e+041.2e+014.6e+007.5e-05
Kitazawa (1967) 18.7e+051.5e+014.4e+002.5e-06
Moulder et al. (1970)4.1e+041.6e+004.7e-015.8e-06
\n", - "
" - ], - "text/plain": [ - " Density of individuals [N m^-2] Wet weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 2.1e+03 6.3e+00 \n", - "Bornebusch (1930) 2 2.6e+02 1.3e+00 \n", - "Bornebusch (1930) 3 4.5e+03 2.5e+00 \n", - "Gist & Crossley (1975) 3.1e+04 1.2e+01 \n", - "Kitazawa (1967) 1 8.7e+05 1.5e+01 \n", - "Moulder et al. (1970) 4.1e+04 1.6e+00 \n", - "\n", - " Dry weight [g m^-2] \\\n", - "Study \n", - "Bornebusch (1930) 1 1.9e+00 \n", - "Bornebusch (1930) 2 4.0e-01 \n", - "Bornebusch (1930) 3 7.4e-01 \n", - "Gist & Crossley (1975) 4.6e+00 \n", - "Kitazawa (1967) 1 4.4e+00 \n", - "Moulder et al. (1970) 4.7e-01 \n", - "\n", - " Carbon content [g C per individual] \n", - "Study \n", - "Bornebusch (1930) 1 4.4e-04 \n", - "Bornebusch (1930) 2 7.6e-04 \n", - "Bornebusch (1930) 3 8.1e-05 \n", - "Gist & Crossley (1975) 7.5e-05 \n", - "Kitazawa (1967) 1 2.5e-06 \n", - "Moulder et al. (1970) 5.8e-06 " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.options.display.float_format = '{:,.1e}'.format\n", - "\n", - "# Calculate the carbon content of a single arthropod by dividing the dry weight by 2 (assuming 50% carbon in\n", - "# dry weight) and dividing the result by the total number of individuals\n", - "gc_study['Carbon content [g C per individual]'] = gc_study['Dry weight [g m^-2]']/2/gc_study['Density of individuals [N m^-2]']\n", - "\n", - "gc_study" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine the data from these studies with data from additional sources detailed below:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Carbon content [g C per individual]SiteHabitatLinkCommentsRemarks
Reference
Ellwood & Foster (2004)1.0e-03Borneolowland dipterocarp rainforesthttp://dx.doi.org/10.1038/nature02560CanopyEllwood & Foster report 88 g dry weight per fe...
Stork (1996)3.5e-04SeramLowland Tropical Rain Foresthttp://dx.doi.org/10.1007/978-94-009-1685-2_1SoilFrom page 6 - The results of this study indica...
Dial et al. (2006)5.0e-04BorneoLowland Tropical Rain Foresthttp://dx.doi.org/10.1111/j.1744-7429.2006.001...CanopyDial et al. Estimate 2.4*10^4 g dry weight in ...
\n", - "
" - ], - "text/plain": [ - " Carbon content [g C per individual] Site \\\n", - "Reference \n", - "Ellwood & Foster (2004) 1.0e-03 Borneo \n", - "Stork (1996) 3.5e-04 Seram \n", - "Dial et al. (2006) 5.0e-04 Borneo \n", - "\n", - " Habitat \\\n", - "Reference \n", - "Ellwood & Foster (2004) lowland dipterocarp rainforest \n", - "Stork (1996) Lowland Tropical Rain Forest \n", - "Dial et al. (2006) Lowland Tropical Rain Forest \n", - "\n", - " Link \\\n", - "Reference \n", - "Ellwood & Foster (2004) http://dx.doi.org/10.1038/nature02560 \n", - "Stork (1996) http://dx.doi.org/10.1007/978-94-009-1685-2_1 \n", - "Dial et al. (2006) http://dx.doi.org/10.1111/j.1744-7429.2006.001... \n", - "\n", - " Comments \\\n", - "Reference \n", - "Ellwood & Foster (2004) Canopy \n", - "Stork (1996) Soil \n", - "Dial et al. (2006) Canopy \n", - "\n", - " Remarks \n", - "Reference \n", - "Ellwood & Foster (2004) Ellwood & Foster report 88 g dry weight per fe... \n", - "Stork (1996) From page 6 - The results of this study indica... \n", - "Dial et al. (2006) Dial et al. Estimate 2.4*10^4 g dry weight in ... " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load additional data sources\n", - "other_carbon_content_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Carbon content',index_col='Reference')\n", - "\n", - "other_carbon_content_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the estimates from the difference studies and use it as our best estimate for the carbon content of a characteristic arthropod:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of a characteristic arthropod is 1.2e-04 g C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates from the different studies on the average carbon content of a single arthropod.\n", - "average_carbon_content = gmean(pd.concat([other_carbon_content_data,gc_study])['Carbon content [g C per individual]'])\n", - "print('Our best estimate for the carbon content of a characteristic arthropod is %.1e g C' % average_carbon_content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of arthropods using the characteristic carbon content method, we multiply our best estiamte of the carbon content of a single arthropod by an estimate of the total number of arthropods made by [Williams](http://dx.doi.org/10.1086/282115). Williams estiamted a total of $~10^{18}$ individual insects in soils. We assume this estimate of the total number of insects is close to the total number of arthropods (noting that in this estimate Williams also included collembola which back in 1960 were considered insects, and are usually very numerous because of their small size). To estimate the total biomass of arthropods, we multiply the carbon content of a single arthropod by the the estimate for the total number of arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈0.1 Gt C\n" - ] - } - ], - "source": [ - "# Total number of insects estimated by Williams\n", - "tot_num_arthropods = 1e18\n", - "\n", - "# Calculate the total biomass of arthropods\n", - "method2_estimate = average_carbon_content*tot_num_arthropods\n", - "print('Our best estimate for the biomass of terrestrial arthropods using average biomass densities is ≈%.1f Gt C' %(method2_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the biomass of arthropods is the geometric mean of the estimates from the two methods:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial arthropods is ≈0.2 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates using the two methods\n", - "best_estimate = gmean([method1_estimate,method2_estimate])\n", - "print('Our best estimate for the biomass of terrestrial arthropods is ≈%.1f Gt C' %(best_estimate/1e15)) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty associated with the estimate of the biomass of terrestrial arthropods, we compile a collection of the different sources of uncertainty, and combine them to project the total uncertainty. We survey the interstudy uncertainty for estimates within each method, the total uncertainty of each method and the uncertainty of the geometric mean of the values from the two methods.\n", - "\n", - "## Average biomass densities method\n", - "We calculate the 95% confidence interval for the geometric mean of the biomass densities reported for soil and canopy arthropods:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval for the average biomass density of soil arthropods is ≈2.0-fold\n", - "The 95 percent confidence interval for the average biomass density of soil arthropods is ≈1.8-fold\n", - "The 95 percent confidence interval for the average biomass density of canopy arthropods is ≈1.9-fold\n" - ] - } - ], - "source": [ - "litter_CI = geo_CI_calc(conc_mean['Biomass density [g C m^-2]'])\n", - "soil_CI = geo_CI_calc(soil_data['Biomass density [g C m^-2]'])\n", - "canopy_CI = geo_CI_calc(canopy_data['Biomass density [g C m^-2]'])\n", - "print('The 95 percent confidence interval for the average biomass density of soil arthropods is ≈%.1f-fold' %litter_CI)\n", - "print('The 95 percent confidence interval for the average biomass density of soil arthropods is ≈%.1f-fold' %soil_CI)\n", - "print('The 95 percent confidence interval for the average biomass density of canopy arthropods is ≈%.1f-fold' %canopy_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the uncertainty of the global biomass estimate using the average biomass density method, we propagate the uncertainties of the soil and canopy biomass density:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval biomass of arthropods using the biomass densities method is ≈1.5-fold\n" - ] - } - ], - "source": [ - "method1_CI = CI_sum_prop(estimates=np.array([litter_biomass_density,soil_biomass_density,canopy_biomass_density]),mul_CIs=np.array([litter_CI,soil_CI,canopy_CI]))\n", - "print('The 95 percent confidence interval biomass of arthropods using the biomass densities method is ≈%.1f-fold' %method1_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Average carbon content method\n", - "As a measure of the uncertainty of the estimate of the total biomass of arthropods using the average carbon content method, we calculate the 95% confidence interval of the geometric mean of the estimates from different studies of the carbon content of a single arthropod:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval of the carbon content of a single arthropod is ≈4.1-fold\n" - ] - } - ], - "source": [ - "carbon_content_CI = geo_CI_calc(pd.concat([other_carbon_content_data,gc_study])['Carbon content [g C per individual]'])\n", - "print('The 95 percent confidence interval of the carbon content of a single arthropod is ≈%.1f-fold' %carbon_content_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine this uncertainty of the average carbon content of a single arthropod with the uncertainty reported in Williams on the total number of insects of about one order of magnitude. This provides us with a measure of the uncertainty of the estimate of the biomass of arthropods using the average carbon content method." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval biomass of arthropods using the average carbon content method is ≈14.9-fold\n" - ] - } - ], - "source": [ - "# The uncertainty of the total number of insects from Williams\n", - "tot_num_arthropods_CI = 10\n", - "\n", - "# Combine the uncertainties of the average carbon content of a single arthropod and the uncertainty of \n", - "# the total number of arthropods\n", - "method2_CI = CI_prod_prop(np.array([carbon_content_CI,tot_num_arthropods_CI]))\n", - "print('The 95 percent confidence interval biomass of arthropods using the average carbon content method is ≈%.1f-fold' %method2_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty\n", - "We calculate the 95% conficence interval of the geometric mean of the estimates of the biomass of arthropods using the average biomass density or the average carbon content:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The inter-method uncertainty of the geometric mean of the estimates of the biomass of arthropods is ≈3.0\n" - ] - } - ], - "source": [ - "inter_CI = geo_CI_calc(np.array([method1_estimate,method2_estimate]))\n", - "print('The inter-method uncertainty of the geometric mean of the estimates of the biomass of arthropods is ≈%.1f' % inter_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods, we take the highest uncertainty among the collection of uncertainties we generate, which is the ≈15-fold uncertainty of the average carbon content method. " - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods is ≈14.9-fold\n" - ] - } - ], - "source": [ - "mul_CI = np.max([inter_CI,method1_CI,method2_CI])\n", - "print('Our best projection for the uncertainty associated with the estimate of the biomass of terrestrial arthropods is ≈%.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The biomass of termites\n", - "As we state in the Supplementary Information, there are some groups of terrestrial arthropods for which better estimates are available. An example is the biomass of termites. We use the data in [Sanderson](http://dx.doi.org/10.1029/96GB01893) to estimate the global biomass of termites:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The estimate of the total biomass of termites based on Sanderson is ≈0.07 Gt C\n" - ] - } - ], - "source": [ - "# Load termite data\n", - "termite_data = pd.read_excel('terrestrial_arthropods_data.xlsx', 'Sanderson', skiprows=1, index_col=0)\n", - "\n", - "# Multiply biomass density by biome area and sum over biomes\n", - "termite_biomass = (termite_data['Area [m^2]']* termite_data['Biomass density [g wet weight m^-2]']).sum()\n", - "\n", - "# Calculate carbon mass assuming carbon is 15% of wet weight\n", - "termite_biomass *= 0.15\n", - "\n", - "print('The estimate of the total biomass of termites based on Sanderson is ≈%.2f Gt C' %(termite_biomass/1e15))" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to the animal biomass data\n", - "old_results = pd.read_excel('../../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Terrestrial arthropods',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,mul_CI)\n", - "\n", - "result.to_excel('../../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Terrestrial arthropods'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_estimate/1e15,mul_CI],\n", - " path='../../../results.xlsx')\n", - "\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Terrestrial arthropods'), \n", - " col=['Number of individuals'],\n", - " values=tot_num_arthropods,\n", - " path='../../../results.xlsx')\n", - "\n", - "# Update the biomass of termites in the MS\n", - "update_MS_data(row ='Biomass of termites',\n", - " values=termite_biomass/1e15,\n", - " path='../../../results.xlsx')\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/chordates/.ipynb_checkpoints/chordate_biomass-checkpoint.ipynb b/animals/chordates/.ipynb_checkpoints/chordate_biomass-checkpoint.ipynb deleted file mode 100644 index 48cddea..0000000 --- a/animals/chordates/.ipynb_checkpoints/chordate_biomass-checkpoint.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of chordates\n", - "To estimate the total biomass of animals, we conbine our estimates for the biomass of marine and terrestrial arthropods. Our estimates for the biomass of terrestrial and marine arthropods are:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:2: FutureWarning: \n", - "Passing list-likes to .loc or [] with any missing label will raise\n", - "KeyError in the future, you can use .reindex() as an alternative.\n", - "\n", - "See the documentation here:\n", - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n", - " \n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass [Gt C]Uncertainty
Taxon
Fish0.6676118.197784
Livestock0.107147NaN
Wild birdsNaNNaN
Wild mammals0.0073442.266285
Humans0.050000NaN
\n", - "
" - ], - "text/plain": [ - " Biomass [Gt C] Uncertainty\n", - "Taxon \n", - "Fish 0.667611 8.197784\n", - "Livestock 0.107147 NaN\n", - "Wild birds NaN NaN\n", - "Wild mammals 0.007344 2.266285\n", - "Humans 0.050000 NaN" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_excel('../animal_biomass_estimate.xlsx',index_col=0)\n", - "chord_biomass = data.loc[['Fish','Livestock','Wild birds', 'Wild mammals', 'Humans']]\n", - "chord_biomass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum all these different contributions to produce our best estimate for the biomass of animals:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of arthropods is ≈1 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = chord_biomass['Biomass [Gt C]'].sum()\n", - "\n", - "print('Our best estimate for the biomass of arthropods is ≈%.0f Gt C' %best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To project the uncertainty associated with our estimate of the total biomass of animals, we combine the uncertainties of the estimates for which is have uncertainty projections, namely arthropods (marine and terrestrial), fish and wild mammals." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our projection for the uncertainty of our estimate of the total biomass of animals is ≈9-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_sum_prop(estimates=chord_biomass.loc[~np.isnan(chord_biomass['Uncertainty']),'Biomass [Gt C]'].values, mul_CIs = chord_biomass.loc[~np.isnan(chord_biomass['Uncertainty']),'Uncertainty'].values)\n", - "\n", - "print('Our projection for the uncertainty of our estimate of the total biomass of animals is ≈%.0f-fold' %mul_CI)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to Fig. S2-S3\n", - "update_figs2s3(row='Chordates', \n", - " col='Uncertainty',\n", - " values=mul_CI,\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/chordates/fish/.ipynb_checkpoints/fish_biomass_estimate-checkpoint.ipynb b/animals/chordates/fish/.ipynb_checkpoints/fish_biomass_estimate-checkpoint.ipynb deleted file mode 100644 index 099f9ed..0000000 --- a/animals/chordates/fish/.ipynb_checkpoints/fish_biomass_estimate-checkpoint.ipynb +++ /dev/null @@ -1,1069 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline \n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of fish\n", - "To estimate the biomass of fish, we first estimate the total biomass of mesopelagic fish, and then add to this estimate the estmimate for the non-mesopelagic fish made by [Wilson et al.](http://dx.doi.org/10.1126/science.1157972). \n", - "\n", - "In order to estimate the biomass of mesopelagic fish, we rely on two independent methods - and estimate based on trawling by [Lam & Pauly](http://www.seaaroundus.org/doc/Researcher+Publications/dpauly/PDF/2005/OtherItems/MappingGlobalBiomassMesopelagicFishes.pdf), and an estimate based on sonar.\n", - "\n", - "## Sonar-based estimate\n", - "We generate the sonar-based estimate relying on data from [Irigoien et al.](http://dx.doi.org/10.1038/ncomms4271) and [Proud et al.](http://dx.doi.org/10.1016/j.cub.2016.11.003).\n", - "\n", - "Estimating the biomass of mesopelagic fish using sonar is a two step process. First we use estimates of the global backscatter of mesopelagic fish. This backscatter is converted to an estimate of the global biomass of mesopelagic fish by using estimates for the target strength of a single mesopelagic fish.\n", - "\n", - "### Total backscatter\n", - "To estimate the total backscatter of mesopelagic fish, we rely on [Irigoien et al.](http://dx.doi.org/10.1038/ncomms4271) and [Proud et al.](http://dx.doi.org/10.1016/j.cub.2016.11.003). Irigoien et al. generates several different estimates for the global nautical area scatter of mesopelagic fish. We use the geometric mean of the estimates of Irigoien et al. as one source for estimating the total backscatter of mesopelagic fish. We note that the units used by Irigoien et al. are incorrect, as nautical area scatteing coefficient (NASC) is measured in $\\frac{m^2}{nm^2}$, but the order of magnitude of the values estimated by Irigoien et al. implies that they multiplied the NASC by the surface area of the ocean in units of $m^2$. This means that the values reported by Irigoien et al. are in fact in units of $\\frac{m^4}{nm^2}$. We convert the values reported by Irigoein et al. from the total scatter to the total backscatter by using the equation: $$global \\: backscatter \\: [m^2] = \\frac{global \\: scatter \\: [\\frac{m^4}{nmi^2}]}{4\\pi×\\frac{1852^2 m^2}{nmi^2}}$$ " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sA estimateTotal sA [m^4 nmi^-2]Total backscatter [m^2]
0OLS: sA = 2384.4* ln(PP) – 116784.2e+179.8e+09
1OLS: ln (sA) = 1.52* ln(PP) – 1.364.7e+171.1e+10
2GWR: ln(sA) = 1.36*ln (PP) – 0.25.6e+171.3e+10
3GWR different equations for PP above and below...4.4e+171.0e+10
4Cruise average sA × ocean surface deeper than...4.1e+179.6e+09
\n", - "
" - ], - "text/plain": [ - " sA estimate Total sA [m^4 nmi^-2] \\\n", - "0 OLS: sA = 2384.4* ln(PP) – 11678 4.2e+17 \n", - "1 OLS: ln (sA) = 1.52* ln(PP) – 1.36 4.7e+17 \n", - "2 GWR: ln(sA) = 1.36*ln (PP) – 0.2 5.6e+17 \n", - "3 GWR different equations for PP above and below... 4.4e+17 \n", - "4 Cruise average sA × ocean surface deeper than... 4.1e+17 \n", - "\n", - " Total backscatter [m^2] \n", - "0 9.8e+09 \n", - "1 1.1e+10 \n", - "2 1.3e+10 \n", - "3 1.0e+10 \n", - "4 9.6e+09 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load scatter data from Irigoien et al.\n", - "scatter = pd.read_excel('fish_biomass_data.xlsx', 'Total scatter',skiprows=1)\n", - "\n", - "# convert scater to backscatter\n", - "scatter['Total backscatter [m^2]'] = scatter['Total sA [m^4 nmi^-2]']/(4*np.pi*1852**2)\n", - "scatter['Total sA [m^4 nmi^-2]'] = scatter['Total sA [m^4 nmi^-2]'].astype(float)\n", - "scatter" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The geometric mean of global backscatter from Irigoien et al. is ≈1.1e+10 m^2\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of values from Irigoien et al.\n", - "irigoien_backscatter = gmean(scatter['Total backscatter [m^2]'])\n", - "print('The geometric mean of global backscatter from Irigoien et al. is ≈%.1e m^2' %irigoien_backscatter)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the global backscatter of mesopelagic fish, we use the geometric mean of the average value from Irigoien et al. and the value reported in Proud et al." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the global backscatter of mesapelagic fish is 8e+09 m^2\n" - ] - } - ], - "source": [ - "# The global backscatter reported by Proud et al.\n", - "proud_backscatter = 6.02e9\n", - "\n", - "# Our best estimate \n", - "best_backscatter = gmean([irigoien_backscatter,proud_backscatter])\n", - "print('Our best estimate for the global backscatter of mesapelagic fish is %.0e m^2' %best_backscatter)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Target strength\n", - "In order to convert the global backscatter into biomass, we use reported values for the target strength per unit biomass of mesopelagic fish. The target strength is a measure of the the backscattering cross-section in dB, which is defined as $TS = 10 \\times log_{10}(\\sigma_{bs})$ with units of dB 1 re $m^2$. By measuring the relation between the target strength and biomass of mesopelagic fish, one can calculate the target strength per unit biomass in units of db 1 re $\\frac{m^2}{kg}$. We can use the global backscatter to calculate the total biomass of mesopelagic fish based on the equation provided in [MacLennan et al.](https://doi.org/10.1006/jmsc.2001.1158): $$biomass_{fish} \\:[kg]= \\frac{global \\: backscatter \\: [m^2]}{10^{\\frac{TS_{kg}}{10}} [m^2 kg^{-1}]}$$\n", - "Where $TS_{kg}$ is the terget strength per kilogram biomass.\n", - "\n", - "The main source affecting the target strength of mesopelagic fish is their swimbaldder, as the swimbladder serves as a strong acoustic reflector at the frequencies used to measure the backscattering of mesopelagic fish. Irigoien et al. provide a list of values from the literature of target strength per unit biomass for mesopelagic fish with or without swimbladder. It is clear from the data that the presence or absence of swimbladder segregates the data into two distinct groups:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0,0.5,'Counts')" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEQCAYAAACz0c/rAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xl4VOXd//H3l8UECKsID4g+EcsP\nEMIaXNie0AWrWK3LT6vg1lpEq1VLa9VWAZ9yWa9SRfxZqe2D1hVR0acqSqs1gIBC0LAGFygqiCKI\nYICAgfv3xzkZJ8nMZCaZkxk8n9d1zcWc7b4/OXP45syZyX3MOYeIiHzzNcl0ABERaRwq+CIiIaGC\nLyISEir4IiIhoYIvIhISKvgiIiGhgi8iEhIq+CIiIaGCLyISEir4IiIh0SzTAaJ17NjR5efnR6b3\n7NlDq1atMhcoDuVKjXKlRrlSk625oHGyrVixYrtz7qikVnbOZc1j8ODBLtprr73mspFypUa5UqNc\nqcnWXM41TjagxCVZY3VJR0QkJFTwRURCQgVfRCQksupD21i++uorNm/eTEVFRaajRLRt25aysrJM\nx6jlm5IrNzeXbt260bx58wBTiYRPoAXfzDYBXwIHgUrnXGGqbWzevJnWrVuTn5+PmaU7Yr18+eWX\ntG7dOtMxavkm5HLOsWPHDjZv3sxxxx0XcDKRcGmMM/xRzrnt9d24oqIiq4q9BMvMOPLII/nss88y\nHUXkG+ewuIavYh8uer1FghF0wXfAP8xshZmND7ivwJgZEydOjEzPmDGDyZMnB97v0KFDU1p/woQJ\nPP3007XmFxcXc8YZZ6TUVlFRESUlJbXmP/TQQ1xzzTUptSUi2SHoSzrDnXNbzKwT8E8zW++cWxi9\ngv+LYDxA586dKS4ujiwrLy+nbdu2fPnll5F5BVOrbd5gq38zss51cnJyeOaZZ7j22ms58sgjOXTo\nEPv376+WKwjz589PqQ/nHPv27au1zd69e6msrEyprYMHD7Jnz55a21RUVHDgwIGk26qsrMTMUt5X\nFRUV1Y6FIJSXlwfeR30oV2rSmmtraXra8ZXndKX4iel1r9hlQFr7jSfQgu+c2+L/u83MngVOBBbW\nWOcB4AGAwsJCV1RUFFlWXFxMbm5uoB9EJtN2s2bNuPLKK/nLX/7C1KlTadKkCTk5ObRu3ZpNmzbx\n4x//mO3bt3PUUUfx4IMPcuyxx1bbfsGCBVx33XWA925h4cKF3HTTTZx66qmceeaZnH322bRv355Z\ns2Yxa9YsNmzYwNSpU8nLy4sczJMmTaJdu3asXr2a888/n4KCAu655x727dvHc889x/HHH4+ZsXjx\nYu655x52797NXXfdxRlnnEHLli1p1qwZrVu3ZtmyZVx33XVUVFTQokULHnzwQXr27Mm+ffu4/PLL\nWblyJb169eLAgQO0atWK1q1b8+CDD3LHHXfQrl07+vfvH/nZP/vsMyZMmMCHH34IwPTp0xk2bBiT\nJ09mw4YNbNy4kWOPPZYHHngg5dcwNzeXgQMHprRNqoqLi4k+3rKFcqUmrbkmn5WednzFPadQ9M6k\nule8cFda+40nsEs6ZtbKzFpXPQdGA2uC6i9oP/vZz3jsscfYtav6C3Pttddy6aWXsmrVKsaOHcvP\nf/7zWttOmzaN++67j9LSUhYtWkSLFi0YMWIEixYtAmDLli2sW7cOgEWLFjFyZO13HStXrmTmzJmU\nlZXxyCOP8O6777Js2TKuuOIK7r333sh6mzZtYtmyZbz44otMmDCh1tdZe/XqxaJFi3j77be5/fbb\nueWWWwC4//77admyJWVlZUyZMoUVK1YAsHXrViZNmsTixYt5/fXXIzkBrrvuOm644QaWL1/OM888\nwxVXXBFZtm7dOl555RWeeOKJlPaziAQnyDP8zsCz/gdwzYDHnXMvB9hfoNq0acMll1zCjBkzaNLk\n69+TS5cuZe7cuQBcfPHF3HjjjbW2HTZsGL/4xS8YO3Ys55xzDt26dWPEiBFMnz6ddevWccIJJ7Bz\n5062bt3K0qVLmTFjRq02hgwZQpcuXQA4/vjjGT16NAAFBQW89tprkfXOP/98mjRpQo8ePejevTvr\n16+v1s6uXbu49NJLee+99zAzvvrqKwAWLlwY+WXVr18/+vXrB8Cbb75JUVERRx3ljc10wQUX8O67\n7wLwyiuvVPsFsHv3bsrLywE488wzadGiRdL7V0SCF1jBd85tBPoH1X4mXH/99QwaNIiLLrqII444\nIuntbrrpJsaMGcO8efMYNmwY8+fPp1evXnzxxRe8/PLLjBw5ks8//5w5c+aQl5cX8/JHTk5O5HnV\nJaWq55WVlZFlNb/hUnP61ltvZdSoUTz77LNs2rSpQW+FDx06xBtvvEFubm6tZdk6eqFImB0WX8vM\nFh06dOD888/nkUceicwbOnQos2fPBuCxxx5jxIgRtbbbsGEDBQUF/PrXv2bIkCGRs+6TTz6Z6dOn\nM3LkSEaMGMG0adNibp+Kp556ikOHDkWuoffs2bPa8l27dnH00UcD3jduqowcOZLHH38cgDVr1rBq\n1SoATjrpJBYsWMCOHTv46quveOqppyLbjB49utrlpNLS9H7gJSLppYKfookTJ7Jjx47I9L333suD\nDz5Iv379eOSRR7jnnntqbTN9+nT69u1Lv379aN68OaeddhoAI0aMoLKykm9961sMGjSIzz//vMEF\n/9hjj+XEE0/ktNNOY+bMmbXOvm+88UZuvvlmBg4cWO2dwVVXXUV5eTm9e/fmtttuY/DgwQB06dKF\nyZMnc8oppzBs2DB69+4d2WbGjBmUlJTQr18/TjjhBGbOnNmg7CISLPOGU84OhYWFLvq738XFxXTu\n3LlakckG34QhDBpTfXKVlZUF/rqH4lsnaRSKXJPbpqcdX9Lf0plc/2/pmNmKZIet0Rm+iEhIqOCL\niISECr6ISEio4IuIhIQKvohISKjgi4iEhAp+HW644QamT/96tLtTTz212vDAEydO5K677uLjjz/m\nvPPOA7w/QJo3b15kncmTJzNt2rRA8s2cOZOHH3446fU3bdpE3759Yy6LNyRyPImGXc7Pz2f79nrf\n90ZEApD197StJc3fk63r+6/Dhg1jzpw5XH/99Rw6dIjt27ezc+fOyPIlS5Zw991307Vr18hY9KWl\npZSUlHD66aenN2sMEyZMCLyPxnDw4EGaNm2a6Rgi32g6w6/D0KFDWbp0KQBr166lb9++5OXlsXPn\nTvbv309ZWRmDBg2KnDkfOHCA2267jSeffJIBAwbw5JNPAt7okUVFRXTv3j3m4GgHDx7ksssuo2/f\nvhQUFHD33Xezbdu2yF+8rly5EjOLDEV8/PHHs3fv3mrvHk4//XRuuOEGCgsL6d27N8uXL+ecc86h\nR48e/Pa3v430VVlZydixY+nduzfnnXcee/furZXnqquuorCwkD59+jBp0td/OPLyyy/Tq1cvBg0a\nFBk0DmDHjh2MHj2aPn36cMUVVxD9B32zZ8/mxBNPZMCAAVx55ZUcPHgQgLy8PCZOnEj//v0j+1hE\ngqOCX4euXbvSrFkzPvzwQ5YsWcIpp5xCYWEhS5cupaSkhIKCgmoDqR1xxBHcfvvtXHDBBZSWlnLB\nBRcAsH79eubPn8+yZcuYMmVKZJTKKqWlpWzZsoU1a9awevVqLr/8cjp16kRFRQW7d+9m0aJFFBYW\nsmjRIj744AM6depEy5Yta+U94ogjKCkpYcKECZx11lncd999rFmzhoceeigyJMQ777zD1VdfTVlZ\nGW3atOFPf/pTrXamTp1KSUkJq1atYsGCBaxatYqKigp++tOf8vzzz7NixQo++eSTyPpTpkxh+PDh\nrF27lrPPPjvyi6msrIy5c+eyePFiSktLadq0KY899hgAe/bs4aSTTmLlypUMHz68ga+UiNRFBT8J\nQ4cOZcmSJZGCf+KJJ0amhw0bllQbY8aMIScnh44dO9KpUyc+/fTTasu7d+/Oxo0bufbaa3n55Zdp\n06ZNpO/FixezcOFCbrnlFhYuXMiiRYvijrlz5plnAt6wyX369KFLly7k5OTQvXt3PvroIwCOOeaY\nSO5x48bx+uuv12pnzpw5DBo0iIEDB7J27VrWrVvH+vXrOe644+jRowdmxrhx4yLrL1y4MDI9ZswY\n2rdvD8Crr75KaWkpQ4YMYcCAAbz66qts3LgRgKZNm3Luuecmtf9EpOEOv2v4GTBs2DCWLFnC6tWr\n6du3L+3ateP++++nTZs2XH755Um1ET28cdOmTasNXAbQvn17Vq5cyfz585k5cyZz5sxh1qxZjBw5\nMnJWf9ZZZ3HnnXdiZowZMyZhP9FDKFdNV/VZ1xDK//73v5k2bRrLly+nffv2XHbZZbVupJIs5xwX\nXXQRf/zjH2sty83N1XV7kUakM/wkDB06lBdeeIEOHTrQtGlTOnTowBdffMHSpUtj3mi8devWKd/D\ndfv27Rw6dIhzzz2X3/3ud7z11luAN6Lmo48+So8ePWjSpAkdOnRg3rx5DboE8uGHH0aumT/++OO1\n2tq9ezetWrWibdu2fPrpp7z00kuAd7esTZs2sWHDBoBqd7OKHl75pZdeinyw/Z3vfIfnnnuObdu2\nAfD555/zwQcf1Du7iNSfCn4SCgoK2L59OyeffHK1eW3btqVjx4611h81ahTr1q2r9qFtXbZs2UJR\nUREDBgxg3Lhx3HHHHYD39UbnXOS2h8OHD6ddu3aRSyb10bNnT+677z569+7Nzp07ueqqq6ot79+/\nPwMHDqRXr15cdNFFkcs/ubm5PPDAA4wZM4ZBgwbRqVOnyDaTJk1i4cKF9OnTh7lz50bu63vCCSdw\n6623Mnr0aPr168f3vvc9tm7dWu/sIlJ/Gh65Hr5JwxA3Bg2PnBrlSo2GR9bwyCIiUoMKvohISKjg\ni4iExGFR8LPpcwYJnl5vkWBkfcHPzc1lx44dKgIh4Zxjx44dtW6+LiINl/V/eNWtWzc2b97MZ599\nlukoERUVFVlZkL4puXJzc+nWrVuAiUTCKesLfvPmzTnuuOMyHaOa4uJiBg4cmOkYtSiXiCSS9Zd0\nREQkPVTwRURCQgVfRCQkVPBFREJCBV9EJCRU8EVEQkIFX0QkJAIv+GbW1MzeNrMXgu5LRETia4wz\n/OuAskboR0REEgi04JtZN2AM8Ncg+xERkboFfYY/HbgROBRwPyIiUofAbnFoZmcApzvnrjazIuCX\nzrkzYqw3HhgP0Llz58GzZ8+OLCsvLycvLy+QfA0RnWv1lvrfmqyhCo6ufju2w2F/ZRPlSk2j5dpa\nmtLq5Tldydv/cUBhGibpbF0G1LuPUaNGJX2LwyAL/h3AxUAlkAu0AeY658bF2ybWPW2z/R6a+Te9\nmLEcm34/ptr04bC/solypabRcqV4X9mk7xubAaG5p61z7mbnXDfnXD7wI+BfiYq9iIgES9/DFxEJ\niUYZD985VwwUN0ZfIiISm87wRURCQgVfRCQkVPBFREJCBV9EJCRU8EVEQkIFX0QkJFTwRURCQgVf\nRCQkVPBFREJCBV9EJCRU8EVEQkIFX0QkJFTwRURCQgVfRCQkVPBFREJCBV9EJCRU8EVEQkIFX0Qk\nJFTwRURCQgVfRCQkVPBFREJCBV9EJCRU8EVEQkIFX0QkJFTwRURCQgVfRCQkVPBFREJCBV9EJCRU\n8EVEQkIFX0QkJFTwRURCQgVfRCQkVPBFREIisIJvZrlmtszMVprZWjObElRfIiJSt2YBtr0f+LZz\nrtzMmgOvm9lLzrk3AuxTRETiCKzgO+ccUO5PNvcfLqj+REQksUCv4ZtZUzMrBbYB/3TOvRlkfyIi\nEp95J+IpbGDWHjjGObcqhW3aAc8C1zrn1tRYNh4YD9C5c+fBs2fPjiwrLy8nLy8vpXyNITrX6i27\nMpaj4Oi21aYPh/2VTZQrNY2Wa2tpSquX53Qlb//HAYVpmKSzdRlQ7z5GjRq1wjlXmMy6SRV8MysG\nzsS7BLQC74x9sXPuF8mGMrPbgL3OuWnx1iksLHQlJSWR6eLiYoqKipLtotFE58q/6cWM5dj0+zHV\npg+H/ZVNlCs1jZZrctu614lS3HMKRe9MCihMwySdbXL9TxzNLOmCn+wlnbbOud3AOcDDzrmTgO/W\nEeIo/8weM2sBfA9Yn2R/IiKSZskW/GZm1gU4H3ghyW26AK+Z2SpgOd41/GS3FRGRNEv2WzpTgPnA\n68655WbWHXgv0Qb+Nf6BDcwnIiJpkmzB3+qc61c14ZzbaGZ3BZRJREQCkOwlnXuTnCciIlkq4Rm+\nmZ0CDAWOMrPob+S0AZoGGUxERNKrrks6RwB5/nqto+bvBs4LKpSIiKRfwoLvnFsALDCzh5xzHzRS\nJhERCUCyH9rmmNkDQH70Ns65bwcRSkRE0i/Zgv8UMBP4K3AwuDgiIhKUZAt+pXPu/kCTiIhIoJL9\nWubzZna1mXUxsw5Vj0CTiYhIWiV7hn+p/++vouY5oHt644iISFCSKvjOueOCDiIiIsFKquCb2SWx\n5jvnHk5vHBERCUqyl3SGRD3PBb4DvAWo4IuIHCaSvaRzbfS0P8797Diri4hIFqrvPW33ALquLyJy\nGEn2Gv7zeN/KAW/QtN7AnKBCiYhI+iV7DT/6PrSVwAfOuc0B5BERkYAkdUnHH0RtPd6Ime2BA0GG\nEhGR9Euq4JvZ+cAy4P/i3df2TTPT8MgiIoeRZC/p/AYY4pzbBmBmRwGvAE8HFUxERNIr2W/pNKkq\n9r4dKWwrIiJZINkz/JfNbD7whD99ATAvmEgiIhKEuu5p+y2gs3PuV2Z2DjDcX7QUeCzocCIikj51\nneFPB24GcM7NBeYCmFmBv+wHgaYTEZG0qes6fGfn3OqaM/15+YEkEhGRQNRV8NslWNYinUFERCRY\ndRX8EjP7ac2ZZnYFsCKYSCIiEoS6ruFfDzxrZmP5usAXAkcAZwcZTERE0ithwXfOfQoMNbNRQF9/\n9ovOuX8FnkxERNIq2fHwXwNeCziLiIgESH8tKyISEir4IiIhEVjBN7NjzOw1M1tnZmvN7Lqg+hIR\nkbolO5ZOfVQCE51zb5lZa2CFmf3TObcuwD5FRCSOwM7wnXNbnXNv+c+/BMqAo4PqT0REEmuUa/hm\nlg8MBN5sjP5ERKQ2c87VvVZDOjDLAxYAU/0B2GouHw+MB+jcufPg2bNnR5aVl5eTl5cXaL76iM61\nesuuDKf5WucW8Om+4PspOLptSusfDq9jNgl9rq2lKa1entOVvP0fBxSmYZLO1mVAvfsYNWrUCudc\nYTLrBlrwzaw58AIw3zl3V13rFxYWupKSksh0cXExRUVFgeWrr+hc+Te9mNkwUSYWVPLH1UF+LOPZ\n9PsxKa1/OLyO2ST0uSandkJR3HMKRe9MCihMwySdbXL9TxzNLOmCH+S3dAz4H6AsmWIvIiLBCvIa\n/jDgYuDbZlbqP04PsD8REUkgsPf/zrnXAQuqfRERSY3+0lZEJCRU8EVEQkIFX0QkJFTwRURCQgVf\nRCQkVPBFREJCBV9EJCRU8EVEQkIFX0QkJFTwRURCQgVfRCQkVPBFREJCBV9EJCRU8EVEQkIFX0Qk\nJFTwRURCQgVfRCQkVPBFREJCBV9EJCRU8EVEQkIFX0QkJFTwRURCQgVfRCQkVPBFREJCBV9EJCRU\n8EVEQkIFX0QkJFTwRURCQgVfRCQkVPBFREJCBV9EJCRU8EVEQkIFX0QkJAIr+GY2y8y2mdmaoPoQ\nEZHkBXmG/xDw/QDbFxGRFARW8J1zC4HPg2pfRERSo2v4IiIhYc654Bo3ywdecM71TbDOeGA8QOfO\nnQfPnj07sqy8vJy8vLyk+lq9ZVdDoqakcwv4dF+jdZe0xspVcHTblNZP5XVsTMpVh62l1SbLc7qS\nt//jDIWJL1tzQQrZugyodx+jRo1a4ZwrTGbdjBf8aIWFha6kpCQyXVxcTFFRUVJ95d/0YuoB62li\nQSV/XN2s0fpLVmPl2vT7MSmtn8rr2JiUqw6Tq/9iL+45haJ3JmUoTHzZmgtSyDa5/iesZpZ0wdcl\nHRGRkAjya5lPAEuBnma22cx+ElRfIiJSt8De/zvnLgyqbRERSZ0u6YiIhIQKvohISKjgi4iEhAq+\niEhIqOCLiISECr6ISEio4IuIhIQKvohISKjgi4iEhAq+iEhIqOCLiISECr6ISEio4IuIhIQKvohI\nSKjgi4iEhAq+iEhIqOCLiISECr6ISEio4IuIhIQKvohISKjgi4iEhAq+iEhIqOCLiISECr6ISEio\n4IuIhIQKvohISKjgi4iEhAq+iEhIqOCLiISECr6ISEio4IuIhIQKvohISARa8M3s+2b2jpm9b2Y3\nBdmXiIgkFljBN7OmwH3AacAJwIVmdkJQ/YmISGJBnuGfCLzvnNvonDsAzAbOCrA/ERFJIMiCfzTw\nUdT0Zn+eiIhkgDnngmnY7Dzg+865K/zpi4GTnHPX1FhvPDDen+wJvBO1uCOwPZCADaNcqVGu1ChX\narI1FzROtv90zh2VzIrNAgyxBTgmarqbP68a59wDwAOxGjCzEudcYTDx6k+5UqNcqVGu1GRrLsi+\nbEFe0lkO9DCz48zsCOBHwN8D7E9ERBII7AzfOVdpZtcA84GmwCzn3Nqg+hMRkcSCvKSDc24eMK8B\nTcS81JMFlCs1ypUa5UpNtuaCLMsW2Ie2IiKSXTS0gohISGRNwTeziWbmzKyjP/0rMyv1H2vM7KCZ\ndYix3UNm9u+odQcEnKvIzHZF9XdbnO2OM7M3/WElnvQ/uA4y11gzW2Vmq81siZn1j7NdY+8vM7MZ\n/n5YZWaD4mw32M/+vr++pSnPf/v9lprZP8ysqz8/o8dXglwZPb4S5Mro8ZUgV6aPrz+Y2Xq/72fN\nrJ0/f2zUPig1s0Ox9oWZTTazLVHrnZ6OXHE55zL+wPv65nzgA6BjjOU/AP4VZ9uHgPMaKxdQBLyQ\nxLZzgB/5z2cCVwWcayjQ3n9+GvBmluyv04GXAANOTpBrmb/c/PVPS1OmNlHPfw7MzIbjK16uTB9f\nCXJl9PhKkCvTx9dooJn//E7gzhjrFAAb4mw/GfhluvdXvEe2nOHfDdwIxPtA4ULgicaLE1FXrpj8\ns4dvA0/7s/4G/DDIXM65Jc65nf7kG3h/99DYYu2vs4CHnecNoJ2ZdYneyJ9u45x7w3n/Cx4mTfvL\nObc7arIVsV/LRj++kswVU5DHV7xcmT6+EuyvTB9f/3DOVfqT8fbLhXhDy2Rcxgu+mZ0FbHHOrYyz\nvCXwfeCZBM1M9d9S3W1mOY2Q6xQzW2lmL5lZnxjLjwS+iDoQ0jasRF37y/cTvLOYeBpzfyUzxMbR\n/vxE6zQk21Qz+wgYC9xWY1lGjq86cmXs+KojV5VGP74S5Mr48RXlx8TeLxeQ+ITiGn9/zTKz9gHk\n+lpjvI0AXgHWxHicBbwJtPXX20SNSzp4O+v5BG13wXubloN3pnNbkLmANkCe+/rt5Hsx2u2IN3Bc\n1fQxwJpG2l+jgDLgyCzZXy8Aw6PaeBUorNFuIfBK1PQIkriskUyuGuvdDEzJhuMrXq5MH19J7K+M\nHF8J9ldWHF/Ab4Bn8b/5GDX/JGB1grY74/2dUhNgKt7fKyWVqz6PwBpOckcWANv8ArEJqAQ+BP4j\nap1ngYuSbK8olReyIbmi1t1E7aJreONnVF3bOwWYH3QuoB+wAfg/2bK/gD8DF0at+w7Qpcb2XYD1\nUdMXAn8O4Hg7lhqFMRPHVzK5MnF81ZUrU8dXolzZcHwBlwFLgZYxlt0N3JJkO/nxjoN0PTJ6Scc5\nt9o518k5l++cy8d7qzXIOfcJgJm1Bf4L+N94bVRdr/Ova/4Q7zdvYLnM7D+qPuE3sxPxfjPvqLG9\nA14DzvNnXZroZ0hTrmOBucDFzrl347XR2PsLbziNS/xvU5wM7HLOba2x/VZgt5md7Oe6hDTsLwAz\n6xE1eRawPmpZRo6vRLkyeXzVkStjx1eiXGT++Po+3udWZzrn9tZY1gQ4nwTX72t83nA2adpfcQX5\n26Qevyk3EXU2g/ebc3aM9eYBXf3n/wJW+zvqUfy3w0HlAq4B1gIr8T6kGRonV3e8bwa8DzwF5ASc\n66/ATqDUf5Rkyf4yvBvhbPD7LYxarzTqeaGfaQPw/6jx1rgBWZ7x210FPA8cnQ3HV7xcmT6+EuTK\n6PGVIFemj6/38T5DqNovM6OWFQFvxNjmr1U5gUf83Kvwfnl1SUeueA/9pa2ISEhk/Fs6IiLSOFTw\nRURCQgVfRCQkVPBFREJCBV9EJCRU8EVEQkIFX0QkJFTwReSwY2bdzex/zOzputeWKir4WcLMjoy6\nCcInNW6KkNabp0T12c7Mrg56m/qq2ZeZ5ZtZsH96nibm3SQk7r6q62cxsyXBpUsP/+Ydv/SfH/SP\n1ZVm9paZDY2zTVpeQ+fcRufcTxraTtio4GcJ59wO59wA59wAvBta3F017Zw7UNf2/lgiqb6e7YBU\ni3fcbeqZoV59BSVdP4NzbigNyO9vfzjZ5x+r/fFGs7wjHY2aWYGZvVDj0SkdbYeRCv5hwsyeM7MV\nZrbWzMb78/LN7B0zexhvnJBjzOxWf97rZvZE1BnYODNb5p+F/dnMmgK/B4735/0hRp+tzOxF/6xt\njZldUHObOBlq9eWvV2Zmf/F/hn+YWQu/n5iZ4+RrGquNqMz55t1y7jG/v6fNG/M+3j6IuR9rtLcm\navqX/plt3J/HX6+8rv0LNIuVM2r7que/8Pf/GjO7vsbP+ZCZveu3810zW2xm75k38FrM4ybeaxvn\n9a55TPzG7+91oGeMnwm8IZ53xlkW3VZ3M3vbzIbEOwacNzDfGTUe2+pqW+IIcqAePeo9INNkatz2\nDOjg/9sCrygdiTec6iHgZH/ZELwBnHKB1sB7wC+B3ngDTjX31/sT3oiB+SQYjhU4F/hL1HTbmtvE\nyJCor0pggD9/DjAuXuaotmv2VauNGpnz8e6GNMyfnpVoH8T6GWK0F53hl/7rkzALUJ5o/8bLGb29\n/+9gvMG1WgF5eAOrDYzqvwDvxG2F34bhjSb5XLzjJsFrW2tejcxVWVriFfX3o16rg/7ruB7YBQxO\n8HOvwftl8TbQP9ExkODYPBJSLkk3AAADTElEQVTvnfAG4OZM/589XB46wz98/NzMqkZQPAaoGi72\nA+fd2g1gGPC/zrkK59yXeAUO4Dt4/1mXm1mpP909iT5XA98zszvNbIRzblec9aIzJOrr3865Uv/5\nCrz//PEyxxOrjZo+cs4t9p8/CgyvI1fNnyFZyWRJJFbOmoYDzzrn9jjnyvGGKB4R1f9q59whvF8E\nrzqvGq6OyhLvuIn12tb1eo/ws+x13i0H/x61rOqSTi+8O4g9bBb3RuFH4Q1PPNZ5d0hL9RjAeZdA\nJzjnjnfOpeXyURg0y3QAqZuZFQHfBU5xzu01s2K8syGAPck0AfzNOXdzjXbzE23knHvXzAbh3Xnp\nd2b2Kt79QGuKzpCor/1Rsw7inXWmKpk2ag4B6+LlihJvP1ZS/dJnbtTzhv48sXKmIrr/Q1HTh/Au\nFxUR57iJ9do6526PNS/FTDjnlppZR7zCHuvyyy68G+QMB9al2r7Un87wDw9tgZ3+f9pewMlx1lsM\n/MDMcs0sDzjDn/8qcJ75H3aZWQcz+0/gS7y30DGZWVdgr3PuUeAPwKC6tknQVzzxMpNEX/Eca2an\n+M8vAl6vR64qnwKdzPsWVU6NfHWpK3+snDUtAn5oZi3NrBXeTTIWJdl/3OMm1msb5/WOttDP0sLM\nWgM/iNWp31dTaty4JcoB/+e4xMwuIvExIGmkM/zDw8vABDMrw7uFW8xLD8655Wb2d7ybKXyK9xZ9\nl3NunZn9FviHed9A+Qr4mXPuDf9DvjXAS865X9VosgD4g5kd8re5yjm3I3obvJtPRGeI2RfwSSqZ\n/WUJ+0rgHeBnZjYL7wzyfr/oxcr1QaKGnHNfmdnteDcb2ULUHbPqUjN/jP1bK2eMNt4ys4f8/gH+\n6px7u653Z75Ex02t1zbOvJpZnsS7Ocs2YHnU4hb+pTLw3k1d6pw7GC+Yc26PmZ0B/BP4b7zLQ7WO\nAUkv3QDlG8bM8pxz5f43PhYC451zb2U6VyLpzOwXwhecc33TGFECdjget4cjneF/8zxgZifgXav9\n22Hyn+ZwzCzppWOgEegMX0QkJPShrYhISKjgi4iEhAq+iEhIqOCLiISECr6ISEio4IuIhIQKvohI\nSKjgi4iEhAq+iEhI/H8Sc0dAvwCF2gAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Load terget strength data\n", - "ts = pd.read_excel('fish_biomass_data.xlsx', 'Target strength',skiprows=1)\n", - "\n", - "# Plot the distribution of TS for fish with or without swimbladder\n", - "ts[ts['Swimbladder']=='No']['dB kg^-1'].hist(label='No swimbladder', bins=3)\n", - "ts[ts['Swimbladder']=='Yes']['dB kg^-1'].hist(label='With swimbladder', bins=3)\n", - "plt.legend()\n", - "plt.xlabel(r'Target strength per unit biomass dB kg$^{-1}$')\n", - "plt.ylabel('Counts')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the characteristic target strength per unit biomass of mesopelagic fish, we first estiamte the characteristic target strength per unit biomass of fish with or without swimbladder. We assume that fish with and without swimbladder represent an equal portion of the population of mesopelagic fish. We test the uncertainty associated with this assumption in the uncertainty analysis section." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Swimbladder\n", - "No -4.3e+01\n", - "Yes -2.9e+01\n", - "Name: dB kg^-1, dtype: float64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate the average TS per kg for fish with and without swimbladder\n", - "TS_bin = ts.groupby('Swimbladder').mean()\n", - "TS_bin['dB kg^-1']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use our best estimate for the target strength per unit biomass to estimate the total biomass of mesopelagic fish. We transform the TS to backscattering cross-section, and then calculate the effective population backscattering cross-section based on the assumption that fish with or without swimbladder represent equal portions of the population." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best sonar-based estimate for the biomass of mesopelagic fish is ≈1.8 Gt C\n" - ] - } - ], - "source": [ - "# The conversion equation from global backscatter and terget strength per unit biomass\n", - "biomass_estimator = lambda TS1,TS2,bs,frac: bs/(frac*10**(TS1/10.) + (1.-frac)*10**(TS2/10.))\n", - "\n", - "# Estimate biomass and convert to g C, assuming fish with or without swimbladder are both 50% of the population\n", - "sonar_biomass = biomass_estimator(*TS_bin['dB kg^-1'],best_backscatter,frac=0.5)*1000*0.15\n", - "print('Our best sonar-based estimate for the biomass of mesopelagic fish is ≈%.1f Gt C' %(sonar_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As noted in the Supplementary Information, there are several caveats which might bias the results. We use the geometric mean of estimates based on sonar and earlier estimates based on trawling to generate a robust estimate for the biomass of mesopelagic fish." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of mesopelagic fish is ≈0.5 Gt C\n" - ] - } - ], - "source": [ - "# The estimate of the global biomass of mesopelagic fish based on trawling reported in Lan & Pauly\n", - "trawling_biomass = 1.5e14\n", - "\n", - "# Estimate the biomass of mesopelagic fish based on the geometric mean of sonar-based and trawling-based estimates\n", - "best_mesopelagic_biomass = gmean([sonar_biomass,trawling_biomass])\n", - "print('Our best estimate for the biomass of mesopelagic fish is ≈%.1f Gt C' %(best_mesopelagic_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we add to our estimate of the biomass of mesopelagic fish the estimate of biomass of non-mesopelagic fish made by [Wilson et al.](http://dx.doi.org/10.1126/science.1157972) to generate our estimate for the total biomass of fish." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of fish is ≈0.7 Gt C\n" - ] - } - ], - "source": [ - "# The estimate of non-mesopelagic fish based on Wilson et al.\n", - "non_mesopelagic_fish_biomass = 1.5e14\n", - "\n", - "best_estimate = best_mesopelagic_biomass+non_mesopelagic_fish_biomass\n", - "print('Our best estimate for the biomass of fish is ≈%.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "In order to assess the uncertainty associated with our estimate for the biomass of fish, we assess the uncertainty associated with the sonar-based estimate of the biomass of mesopelagic fish, as well as for the non-mesopelagic fish biomass.\n", - "\n", - "## Mesopelagic fish uncertainty\n", - "To quantify the uncertainty associated with our estimate of the biomass of mesopelagic fish, we assess the uncertainty associated with the sonar-based estimate, and the inter-method uncertainty between the sonar-based and trawling-based estimates. We do not assess the uncertainty of the trawling-based estimate as no data regarding the uncertainty of the estimate is available.\n", - "\n", - "### Sonar-based estimate uncertainty\n", - "The main parameters influencing the uncertainty of the sonar-based estimates are the global backscatter and the characteristic target-strength per unit biomass. We calculate the uncertainty associated with each one of those parameters, and them combine these uncertainties to quantify the uncertainty of the sonar-based estimate.\n", - "\n", - "#### Global Backscatter\n", - "For calculating the global backscatter, we rely in two sources of data - Data from Irigoien et al. and data from Proud et al. We survery both the intra-study uncertainty and interstudy uncertainty associated with the global backscatter.\n", - "\n", - "##### Intra-study uncertainty\n", - "Irigoien et al. provides several estimates for the global scatter based on several different types of equations characterizing the relationship between primary productivity and the NASC. We calculate the 95% confidence interval of the geometric mean of these different estimates.\n", - "\n", - "Proud et al. estimate a global backscatter of 6.02×$10^9$ $m^2$ ± 1.4×$10^9$ $m^2$. We thus use this range as a measure of the intra-study uncertainty in the estimate of Proud et al.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the total backscatter estimate of Irigoien et al. is ≈1.1-fold\n", - "The intra-study uncertainty of the total backscatter estimate of Proud et al. is ≈1.2-fold\n" - ] - } - ], - "source": [ - "# Calculate the intra-study uncertainty of Irigoien et al.\n", - "irigoien_CI = geo_CI_calc(scatter['Total backscatter [m^2]'])\n", - "\n", - "# Calculate the intra-study uncertainty of Proud et al.\n", - "proud_CI = (1.4e9+6.02e9)/6.02e9\n", - "\n", - "print('The intra-study uncertainty of the total backscatter estimate of Irigoien et al. is ≈%.1f-fold' %irigoien_CI)\n", - "print('The intra-study uncertainty of the total backscatter estimate of Proud et al. is ≈%.1f-fold' %proud_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Interstudy uncertainty\n", - "As a measure of the interstudy uncertainty of the global backscatter, we calculate the 95% confidence interval of the geometric mean of the estimate from Irigoien et al. and Proud et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the total backscatter is ≈1.7-fold\n" - ] - } - ], - "source": [ - "# Calculate the interstudy uncertainty of the global backscatter\n", - "bs_inter_CI = geo_CI_calc([irigoien_backscatter,proud_backscatter])\n", - "\n", - "print('The interstudy uncertainty of the total backscatter is ≈%.1f-fold' %bs_inter_CI)\n", - "\n", - "# Take the highest uncertainty as our best projection of the uncertainty associates with the global backscatter\n", - "bs_CI = np.max([irigoien_CI,proud_CI,bs_inter_CI])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the highest uncertainty among these different kinds of uncertainty measures as our best projection of the uncertainty of the global backscatter, which is ≈1.7-fold.\n", - "\n", - "#### Target strength per unit biomass\n", - "To assess the uncertainty associated with the target strength per unit biomass, we calculate the uncertainty in estimating the characteristic target strength per unit biomass of fish with or without swimbladders, adn the uncertainty associated with the fraction of the population that either has or lacks swimbladder\n", - "\n", - "##### Uncertainty of characteristic target strength per unit biomass of fish with or without swimbladder\n", - "We calculate the 95% confidence interval of the target strength of fish with or withour swimbladder, and propagate this confidence interval to the total estimate of biomass to assess the uncertainty associated with the estimate of the target strength. We calculated an uncertainty of ≈1.3-fold. associated with te estimate of the target strength per unit biomass of fish." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the target strength per unit biomass is ≈1.3-fold\n" - ] - } - ], - "source": [ - "# Define the function that will estimate the 95% confidence interval\n", - "def CI_groupby(input):\n", - " return input['dB kg^-1'].std(ddof=1)/np.sqrt(input['dB kg^-1'].shape[0])\n", - "\n", - "# Group target strength values by the presence of absence of swimbladder\n", - "ts_bin = ts.groupby('Swimbladder')\n", - "\n", - "# Calculate sandard error of those values\n", - "ts_bin_CI = ts_bin.apply(CI_groupby)\n", - "ts_CI = []\n", - "\n", - "# For the target strength of fish with or without swimbladder, sample 1000 times from the distribution\n", - "# of target strengths, and calculate the estimate of the total biomass of fish. Then calcualte the 95%\n", - "# confidence interval of the resulting distribution as a measure of the uncertainty in the biomass \n", - "# estimate resulting from the uncertainty in the target strength\n", - "\n", - "for x, instance in enumerate(ts_bin_CI):\n", - " ts_dist = np.random.normal(TS_bin['dB kg^-1'][x],instance,1000)\n", - " biomass_dist = biomass_estimator(ts_dist,TS_bin['dB kg^-1'][1-x],best_backscatter,frac=0.5)*1000*0.15\n", - " upper_CI = np.percentile(biomass_dist,97.5)/np.mean(biomass_dist)\n", - " lower_CI = np.mean(biomass_dist)/np.percentile(biomass_dist,2.5)\n", - " ts_CI.append(np.mean([upper_CI,lower_CI]))\n", - "# Take the maximum uncertainty of the with or with out swimbladder as our best projection\n", - "ts_CI = np.max(ts_CI)\n", - "print('Our best projection for the uncertainty associated with the estimate of the target strength per unit biomass is ≈%.1f-fold' %ts_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Uncertainty of the fraction of the population possessing swimbladder\n", - "As a measure of the uncertainty associated with the assumption that fish with or without swimbladder contributed similar portions to the total population of mesopelagic fish, we sample different ratios of fish with and without swimbladder, and calculate the biomass estimate for those fractions." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0,0.5,'Biomass estimate [Gt C]')" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XmcHHWd//HXZ+6ZTDKTkMkBSUhC\nAhhuiAgCirJGRAVRRPECF8X7wGPXY394rK4Hwu7qeoEiiMDKIYqIHHIYZAEJAXMQjpADck8ymfue\n+fz+qO8kzTAz3TOZ7urj/Xw8+tHV36rq+lT3TH36+/1WfcvcHRERKVxFcQcgIiLxUiIQESlwSgQi\nIgVOiUBEpMApEYiIFDglAhGRAqdEICJS4JQIREQKnBKBiEiBK4k7gFRMnTrV586dG3cYIiI55fHH\nH9/p7nXJlsuJRDB37lyWLVsWdxgiIjnFzDamspyahkRECpwSgYhIgVMiEBEpcEoEIiIFTolARKTA\nKRGIiBQ4JQIRkQKnRCAikoW2NXVy2d3PsK6+Ne3bUiIQEclCLzS086P71rKlsTPt21IiEBHJQi2d\nPQBMrEj/ABBKBCIiWailsxdQIhARKVh7awSlad+WEoGISBZqzocagZnNNrP7zewpM1ttZp8J5V83\ns81m9mR4nJGuGEREclVLZy9lxUVUlBanfVvpTDW9wOfdfbmZTQQeN7N7wrz/dPcfpHHbIiI5raWz\nJyO1AUhjInD3rcDWMN1iZmuAA9K1PRGRfNLc2ZuxRJCRPgIzmwscAzwaij5pZivM7Cozm5yJGERE\ncklUI0h/RzFkIBGYWTVwC/BZd28GfgocBBxNVGO4bJj1LjKzZWa2rL6+Pt1hiohklZZ8qRGYWSlR\nErjO3X8H4O7b3b3P3fuBK4Hjh1rX3a9w98XuvriuLuktN0VE8kpLZw+Tcr1GYGYG/BJY4+6XJ5TP\nTFjsbGBVumIQEclVmawRpHMrJwHvB1aa2ZOh7CvAeWZ2NODABuAjaYxBRCQnRYkgMzWCdJ419DfA\nhph1R7q2KSKSD/r6ndauPOkjEBGR0WvtytxVxaBEICKSdQbGGcr5zmIRERmbpo4oEdRUKRGIiBSk\nxvYoEdRWKhGIiBSkPYmgqiwj21MiEBHJMo0d3QDUqmlIRKQwDdQIatQ0JCJSmJo6eqgozcy9CECJ\nQEQk6zS2d1NbmZn+AVAiEBHJOo3tPRnrHwAlAhGRrNPY0ZOx/gFQIhARyTpNqhGIiBS2xg71EYiI\nFDT1EYiIFLDOnj66evszNs4QKBGIiGSVveMMqWlIRKQgZXp4CVAiEBHJKpkeeRSUCEREssqecYZU\nIxARKUy72weahtRHICJSkBraokSw3wQlAhGRgrSztYvq8pKMjTwKSgQiIlllV2s3+1VnrjYASgQi\nIlmloa07o81CoEQgIpJVdrZ2MWVCeUa3qUQgIpJFdrV1MzXDTUMlI800sxUpvEe9u582TvGIiBSs\n/n5nd1vm+whGTARAMXDGCPMNuG38whERKVzNnT309nvGm4aSJYKPuPvGkRYws4+PYzwiIgVrZ2t0\nDUGmm4aS9RE8Y2aLBhea2SIzqwNw97+lJTIRkQKz92Ky7Oos/hEwdYjy/YD/HmlFM5ttZveb2VNm\nttrMPhPKp5jZPWb2XHiePLbQRUTyy67WLoCsu45ggbsvHVzo7g8CRyZZtxf4vLsvAk4APhFqF18C\n7nX3hcC94bWISMHbGcPwEpA8EUwcYd6IQ+O5+1Z3Xx6mW4A1wAHAWcA1YbFrgLelFqqISH5rCH0E\nk7MsEaw1s5edNWRmbwLWpboRM5sLHAM8Ckx3961h1jZgeqrvIyKSz3a2dlFTWUppcWYv8Up21tBn\ngT+Z2bnA46FsMXAi8JZUNmBm1cAtwGfdvdnM9sxzdzczH2a9i4CLAObMmZPKpkREctqOlk6mT8ps\nRzEkqRG4+3PAEcBfgbnh8VfgSHd/Ntmbm1kpURK4zt1/F4q3m9nMMH8msGOYbV/h7ovdfXFdXV1q\neyMiksO2N3cxbWJFxrebrEaAu3cBvxrtG1v00/+XwBp3vzxh1m3A+cB3w/MfRvveIiL5qL6li/l1\nEzK+3aSJYB+cBLwfWGlmT4ayrxAlgBvN7EJgI3BuGmMQEckJ7h6ahrKwRjBW4UIzG2a2xiYSEUmw\nu72Hnj5n2sQs6yMYMHAxWLIyEREZm+3NnQCx1AhSPUfp/CHKLhjHOERECtpAIoijRpBsGOrzgPcA\n88wscZTRiUBDOgMTESkkO1qi4SWysY/g/4CtROMNXZZQ3gKkcq8CERFJwY5QI6jLthpBGIJ6I9EF\nZCIikiY7WqKriitKizO+7RH7CMzsQjP7YsLrTWbWbGYtZvbR9IcnIlIYtjfHc1UxJO8s/ihwVcLr\nenefBNQB56UtKhGRAhPXVcWQPBGYu+9KeH0TgLt3ApVpi0pEpMBsbepgZk12JoLaxBfu/h8AZlbE\n0DesERGRUeru7WdHSxf718bz+zpZIrjbzL41RPk3gbvTEI+ISMHZ3tyJOxwQUyJIdvroF4FfmNla\n4B+h7ChgGfChdAYmIlIotjR2AMRWI0h2+mgbcJ6ZzQcOC8VPufvzaY9MRKRAbGmKEsHM2nj6CJJd\nWTzD3be5+zqGuSPZwDJpiU5EpABsaYwuJtu/Jjv7CO5I4T1SWUZERIaxubGDKRPKqCzL/MVkkLyP\n4Cgzax5hvgEjzRcRkSS2NHawf0zNQpC8jyCe9CQiUkC2NHYwd7/M35lsQKrDUIuISJpsbeyM7Ywh\nUCIQEYlVY3s3LV29zJqsRCAiUpA27moHYM6UqthiSDkRmNnJZvbBMF1nZvPSF5aISGHY2BAlggOz\nvY/AzL4G/Cvw5VBUCvwmXUGJiBSKF3a1AblRIzgbOBNoA3D3LUS3qxQRkX2wYVc70yaWx3YNAaSe\nCLrd3QEHMLP46jAiInnkhV3tHLhffLUBSD0R3GhmPwdqzezDwF+AX6QvLBGRwrCxoY05U+L9bZ3s\nymIA3P0HZvYGoquIDwEucfd70hqZiEie6+zpY3tzV+w1gpQSgZl9z93/FbhniDIRERmDF/acMZQb\nTUNvGKLsTeMZiIhIoRm4hiDOU0ch+TDUHwM+Dsw3sxUJsyYCD6UzMBGRfLcxnDp6YIynjkLypqHr\ngT8D3wG+lFDe4u4NaYtKRKQArN/ZxqSKEmqrSmONI9noo01AE3AegJlNAyqAajOrdvcX0h+iiEh+\nWrujlQXTqjGzWONI9crit5rZc8B64K/ABqKagoiIjNHz9a0snBb/tbmpdhZ/CzgBeNbd5wGnAY+M\ntIKZXWVmO8xsVULZ181ss5k9GR5njDlyEZEctrutm52t3SyYVh13KCkngh533wUUmVmRu98PLE6y\nztXA6UOU/6e7Hx0eus2liBSktfWtACyYHn8iSOk6AqDRzKqBpcB1ZraDMO7QcNx9qZnN3bfwRETy\n03PbQyKoiz8RpFojOAvoAC4G7gSeB946xm1+0sxWhKajycMtZGYXmdkyM1tWX18/xk2JiGSntTta\nqSwt5oAY70w2IKVE4O5t7t4HVAF/JBqC2sewvZ8CBwFHA1uBy0bY5hXuvtjdF9fV1Y1hUyIi2Wtt\nfSsHTZtAUVG8ZwxB6kNMfAT4BtAJ9ANGlAjmj2Zj7r494T2vBG4fzfoiIvli7fYWjp83Je4wgNT7\nCL4AHO7uO/dlY2Y20923hpdnA6tGWl5EJB+1dvWypakzK84YgtQTwfNA+2je2MxuAE4FpprZJuBr\nwKlmdjRRbWID8JHRvKeISD54emszAK+YOSnmSCKpJoIvA/9nZo8CXQOF7v7p4VZw9/OGKP7l6MIT\nEck/T4VEsGj/3EoEPwfuA1YS9RGIiMgYPbWlmclVpcyYVBF3KEDqiaDU3T+X1khERArEU1ubWbT/\npNjHGBqQ6nUEfw7n9c80sykDj7RGJiKSh3r7+nl6WwuLsqR/AFKvEQy09385oWzUp4+KiBS6dTvb\n6O7tz5r+AUj9nsXz0h2IiEghWL2lCYBFM2tijmSvZHcoe72732dmbx9qvrv/Lj1hiYjkp9Wbmykr\nKWJ+Xby3p0yUrEbwWqKzhYYaV8gBJQIRkVH4x6ZGDtt/EqXFqXbRpl+yO5R9LUx+093XJ84zMzUX\niYiMQk9fPys3N3He8XPiDuUlUk1JtwxRdvN4BiIiku+e2dZCZ08/x8wZduDlWCTrIzgUOAyoGdRP\nMIno3sUiIpKiJ15sBOCY2bUxR/JSyfoIDgHeAtTy0n6CFuDD6QpKRCQfPfHCbqZWlzFrcvz3IEiU\nrI/gD8AfzOxEd384QzGJiOSlJ19s5OjZk7PmiuIBqfYRnG1mk8ys1MzuNbN6M3tfWiMTEckjje3d\nrKtv45g52dUsBKkngiXu3kzUTLQBWAB8MV1BiYjkm8c27AZg8YHZ1VEMqSeC0vD8ZuAmd29KUzwi\nInnpkXW7KC8p4qgs6yiG1Mca+qOZPU10A/uPmVkd0W0rRUQkBY+u38Uxc2qpKC2OO5SXSfXm9V8C\nXg0sdvceoruVnZXOwERE8kVTRw9PbWnmVfP2izuUIaWUCMysCvg48NNQtD+wOF1BiYjkk2UbGuh3\neNX87By9P9U+gl8B3US1AoDNwLfSEpGISJ55dH0DZcVFHJtlVxQPSDURHOTu3wd6ANy9HciuE2FF\nRLLU0mfrOfbA7OwfgNQTQbeZVRKNOIqZHUTCTexFRGRo25o6eXpbC6ceMi3uUIaV6llDXwPuBGab\n2XXAScAF6QpKRCRfLH22HoBTD6mLOZLhpXqHsnvMbDlwAlGT0GfcfWdaIxMRyQMPPLuDGZMqOGT6\nxLhDGVaqNQLcfRfwpzTGIiKSV3r7+nnwuZ2ccfjMrBtfKFH23CJHRCTPPPFiIy2dvbw2i5uFQIlA\nRCRt7l2zg5Ii46QFU+MOZUSpXlB2kJmVh+lTzezTZpZ9A2aIiGQJd+fOVVs58aD9qKksTb5CjEZz\nq8o+M1sAXAHMBq5PW1QiIjluzdYWNuxq502Hz4w7lKRSTQT97t4LnA38yN2/CGT/3omIxOTOVVsp\nMlhy2PS4Q0kq1UTQY2bnAecDt4ey7K7riIjE6M+rtnH8vClMrS6PO5SkUk0EHwROBL7t7uvNbB5w\n7UgrmNlVZrbDzFYllE0xs3vM7LnwnJ0Db4iI7IO1O1p4bkdrTjQLQerDUD/l7p929xvCwXuiu38v\nyWpXA6cPKvsScK+7LwTuDa9FRPLKrU9spsjgTYfPiDuUlKR61tAD4Z7FU4DlwJVmdvlI67j7UqBh\nUPFZwDVh+hrgbaOMV0Qkq/X3O7cu38wpC+uYNqki7nBSkmrTUE24Z/HbgV+7+6uAfxrD9qa7+9Yw\nvQ3I/l4UEZFReGTdLrY0dfKO42bFHUrKUk0EJWY2EziXvZ3F+8TdnTCa6VDM7CIzW2Zmy+rr68dj\nkyIiaXfz8k1MrChhyaLc+Z2baiL4JnAXsNbdHzOz+cBzY9je9pBQCM87hlvQ3a9w98XuvriuLrsv\nzxYRAWjt6uXOVdt4y5Ezs/beA0NJtbP4Jnc/0t0/Hl6vc/d3jGF7txGdgkp4/sMY3kNEJCvd+sRm\n2rv7eOfi2XGHMiopjT5qZhXAhcBhwJ7eD3f/5xHWuQE4FZhqZpuI7mnwXeBGM7sQ2EjU1CQikvPc\nnWsf3sDhB0zimNm5NQJPqsNQXws8DbyRqJnovcCakVZw9/OGmXVaytGJiOSIR9Y18Oz2Vr5/zpFZ\nPeT0UFLtI1jg7v8PaHP3a4A3A69KX1giIrnl2kc2UFtVyplH7R93KKOW8hAT4bnRzA4HaoDsvQGn\niEgGvdjQzl2rt/OuxbNzqpN4QKpNQ1eEK4r/H1GHbzVwSdqiEhHJIT9f+jzFZnzwpHlxhzImqd6z\n+Bdh8q/A/PSFIyKSW3a0dHLjsk2847gDmFGTG1cSD5bqWUO1wAeAuYnruPun0xOWiEhu+OXf1tPb\n189HXnNQ3KGMWapNQ3cAjwArgf70hSMikjt2t3Vz3SMvcMYRM5k7dULc4YxZqomgwt0/l9ZIRERy\nzE8eWEt7dy+fev3CuEPZJ6meNXStmX3YzGaGewpMCSORiogUpM2NHVzz8EbefuwsDpkxMe5w9kmq\nNYJu4FLgq+wdKM5Rx7GIFKj/uudZAC5+w8ExR7LvUk0Enye6qGxnOoMREckFa7Y2c8vyTVx48jwO\nqK2MO5x9lmrT0FqgPZ2BiIjkAnfnkj+soraqjE+8bkHc4YyLVGsEbcCTZnY/0DVQqNNHRaTQ3LJ8\nM49t2M3333EktVVlcYczLlJNBL8PDxGRgtXU3sN37ljDsXNqOSeH7kCWTKpXFl9jZmXAQK/IM+7e\nM9I6IiL55pu3P0VjRw+/ftvhFBXl1gijI0n1yuJTiW42vwEwYLaZnR9uUC8ikvfuXr2NW5Zv4tOv\nX8Bh+9fEHc64SrVp6DJgibs/A2BmBwM3AMelKzARkWzR0NbNV25dyaKZk/hkjl88NpRUzxoqHUgC\nAO7+LFCanpBERLKHu/Pl362gqaOHy991FGUlqR42c0eqNYJlZvYL4Dfh9XuBZekJSUQke1z10Abu\nWr2dr5xxKIfOmBR3OGmRaiL4GPAJYOB00QeBn6QlIhGRLPH4xga+c8caliyazodPyd+BFFI9a6gL\nuDw8RETyXn1LF5+47gkOmFzJpe88KufuQzwaIyYCM7vR3c81s5XsHWNoD3c/Mm2RiYjEpKO7jw/9\nehlNHT3cfMGJ1FTmd5doshrBZ8LzW9IdiIhINujvdy7+7ZOs2NTIz993XN6dKjqUEROBu28NzxsH\nysxsKrDL3V9WQxARyWXuzrf+tIY7V2/j3978CpYcNiPukDJixPOgzOwEM3vAzH5nZseY2SpgFbDd\nzE7PTIgiIplx+T3PctVD67ng1XO58OTcvBH9WCRrGvof4CtADXAf8CZ3f8TMDiW6oOzONMcnIpIR\nP75/LT+6by3vfuVsLnnLorzuHB4s2ZURJe5+t7vfBGxz90cA3P3p9IcmIpJ+7s6P71/LpXc9w9nH\nHMC3zz4ir8YRSkWyGkHijeo7Bs1TH4GI5DR35z/uWMOVD67nbUfvz6XnHElxgSUBSJ4IjjKzZqKB\n5irDNOF1RVojExFJo56+fr5660puXLaJ8088kK+99bCCqwkMSHbWUHGmAhERyZTG9m4+cf1yHlq7\ni0+ftpCL/2lhQfUJDJbqEBMiInlh7Y5WPnTNY2xp7OTSc47knYtnxx1S7GJJBGa2AWgB+oBed18c\nRxwiUljuXLWVL968gvKSIq7/8KtYPHdK3CFlhThrBK9z950xbl9ECkRnTx/f/tMarn1kI0fNquHH\n7z2WWZOr4g4ra6hpSETy2nPbW/jUDU/w9LYWPnzKPL74xkPz8p4C+yKuRODA3WbmwM/d/YqY4hCR\nPNXT188VS9fx3395juqKEq66YDGvP3R63GFlpbgSwcnuvtnMpgH3mNnTg+9/bGYXARcBzJkzJ44Y\nRSRHrd7SxL/cvILVW5p58xEz+cZZhzG1ujzusLJWLInA3TeH5x1mditwPLB00DJXAFcALF68WBev\niUhSu9u6ufyeZ7nu0Y1MmVDOz953LKcfPjPusLJexhOBmU0Aity9JUwvAb6Z6ThEJH/09vVzw99f\n4LJ7nqW5o4f3n3AgF7/hYGqryuIOLSfEUSOYDtwaLt4oAa53dw1eJyKj1t/v/HnVNi6/5xmer2/j\nxPn78bUzF+XtvYXTJeOJwN3XAUdlersikj/cnfuf2cEP7nqWp7Y2s2BaNT9737G88bAZBX2F8Fjp\n9FERyRm9ff3cuXobVyxdx4pNTcyZUsXl5x7FWUcfUJCDxY0XJQIRyXod3X3c/PiLXPngel5oaGfe\n1Al85+1HcM5xsygt1jUB+0qJQESy1todrVz/6AvcsnwTTR09HDOnlq+c8QresGi6agDjSIlARLJK\nd28/d63exnWPbuSRdQ2UFhunHz6TD5x4IIsPnKw+gDRQIhCR2PX3O8s27ub3T27mjpVbaWzvYdbk\nSv7l9EM4d/FsXQyWZkoEIhILd+fpbS388R9b+MOTW9jc2EFlaTFLDpvO2cccwGsW1hXsjWIyTYlA\nRDKmr99Z/sJu7lq1jbuf2s4LDe0UGZyysI4vvPFgliyawYRyHZYyTZ+4iKRVc2cPDz+/i/vW7OAv\na7azq62bsuIiXr1gPz762oN4w6Lp1E1U00+clAhEZFz19vXzj01NPPhcPQ8+t5MnX2ykr9+ZWF7C\n6w6dxpLDpvPag+uYWFEad6gSKBGIyD7p7etn9ZZmHtvQwN/XN/Dwul20dPZiBkfOquXjpx7EKQvr\nOGZOrc75z1JKBCIyKu3dvTzxQiOPbWjgsQ0NPPFCI+3dfQDMmVLFm4+YySkL6zhpwX4a9C1HKBGI\nyLC6evt4emsLKzY3seLFRlZubuLZ7S30O5jBK2ZM4p3HzeKV86bwyrlTmD6pIu6QZQyUCEQEgNau\nXp7Z1sIz21pYvaWJlZubWLO1mZ6+6HYgUyaUceSsGpYsms4xB07muAMnM0nt/HlBiUCkwPT29bNh\nVxtPh4P+mq0tPLO9mRcbOvYsM7G8hCNm1XDhyfM5clYNR86q4YDaSl3Vm6eUCETyVGN7N8/Xt7Gu\nvpV1O9tYX9/Gup2tbNjVTndvPwDFRca8qRM4clYt71o8m0NmTOLQGROZNVkH/UKiRCCSo9ydhrZu\nXtzdwYsN7by4u5319W2s39nGup1tNLR171m2pMiYs18V86dW87pDpnHw9IkcMmMiC6ZVU1FaHONe\nSDZQIhDJYq1dvWza3c6LDXsP9i82dISydtrC2ToDplaXMX9qNUsWTWd+3QTmT61mft0EZk+p0qmb\nMiwlApEYuDtNHT1sbepkW3Mn25o6o+mmjvAcPVq6el+yXlVZMbMnVzF7SiUnzN+P2VOqmD25Mnqe\nUkW1hmeQMdBfjcg46u3rp6Gtmx0tXexs7aK+pYudrd17putbutjW3MnWpg46e/pfsq4ZTJtYzoya\nSg6qq+akBVOZUVPBAbWVew74UyaUqe1exp0SgcgI+vud5s4edrf30NDWTWN7Nw1t0aN+4GDf2sXO\nlm7qW7vY3d6N+8vfp6qsmLqJ5dRVl7No/0mcdug0ZtRUMLOmMjxXUDexXM03EgslAikYnT19NHf0\n0NzZQ+OeA3sPDe3d7G7vZndbN7vbe8JzNN3Y3k3/EAd2gIrSIuomljO1upwD96viuLmTqasuZ2o4\n4NdNLGNqdTRfI2pKNtNfp+SMrt4+mjt6ae7sCQf03j0H9qaOnmHnDZQPnDI5lLLiIiZPKGVyVRmT\nq8o4dMYkaqtKmTKhjNqqMqZMKI2ew/wp1WVMKCtWM43kBSUCSRt3p727j7auXloTHm1de8vawqNl\nz3TfnvLWrl7aukNZZy/dfcMfyAFKi42aylImVZQysbKUSRUlzJpcyaRQNqmyJJpXUbLngD9w8K/S\nQV0KmBJBgevt66e9p4+O7ujR3t1HR08vHd39tHf30tGTWD54OprfHtYdmD9w8G7r7h2yvXwwM5hQ\nVsKE8mKqy0uoLi9hQnkJsydUheliqstLqS4vjg70A4+KUmrCwX1SZSnlJUU6mIuMgRJBFunrdzp7\n+ujq7U/63NXTR2d4Hvy6s6efrt6hn6MDd++eA/fAODKpMoOq0mIqy0qoLCuiqrSEyrJiKkujg3Rl\nWTFVZcVMCAf0gYP6wHPiwX6grLK0WLckFIlRwSYCd6e7r5/u3n56+pzu3mh6oKy7r5+egenE8t5Q\n3jdM+Z6ygffve8n7d4VluoY4wPcO1yuZoorSIspLiod8riorYcqEougAHl4PHMCryoqpCM97p0te\nUl5ZWkxlWbF+dYvkobxOBD+89zl+/+TmIQ/Uo/0lnExJkVFaXERZSXgU730uLbE9ryeVllBRGh1Q\nB55ffvAO80qLqCh56XPisomvy4p1gBaRscnrRDBtYjmvmDmJ8nAQTjxQlxZHB9yy4iJKi42ykuJQ\nblH5wPLFg5Yf9D7R+kUUq2lDRHJUXieCdx8/h3cfPyfuMEREspouYxQRKXBKBCIiBS6WRGBmp5vZ\nM2a21sy+FEcMIiISyXgiMLNi4MfAm4BFwHlmtijTcYiISCSOGsHxwFp3X+fu3cD/AmfFEIeIiBBP\nIjgAeDHh9aZQ9hJmdpGZLTOzZfX19RkLTkSk0GRtZ7G7X+Hui919cV1dXdzhiIjkrTgSwWZgdsLr\nWaFMRERiYJ7K8JDjuUGzEuBZ4DSiBPAY8B53Xz3COvXAxjFuciqwc4zr5irtc2HQPheGfdnnA909\naZNKxq8sdvdeM/skcBdQDFw1UhII64y5bcjMlrn74rGun4u0z4VB+1wYMrHPsQwx4e53AHfEsW0R\nEXmprO0sFhGRzCiERHBF3AHEQPtcGLTPhSHt+5zxzmIREckuhVAjEBGREeRNIkg2kJ2ZlZvZb8P8\nR81sbuajHF8p7PPnzOwpM1thZvea2YFxxDmeUh2w0MzeYWZuZjl9hkkq+2tm54bvebWZXZ/pGMdb\nCn/Xc8zsfjN7IvxtnxFHnOPJzK4ysx1mtmqY+WZmPwyfyQozO3ZcA3D3nH8QnYb6PDAfKAP+ASwa\ntMzHgZ+F6XcDv4077gzs8+uAqjD9sULY57DcRGAp8AiwOO640/wdLwSeACaH19PijjsD+3wF8LEw\nvQjYEHfc47DfrwGOBVYNM/8M4M+AAScAj47n9vOlRpDKQHZnAdeE6ZuB0yy3b/KbdJ/d/X53bw8v\nHyG6ijuXpTpg4b8D3wM6MxlcGqSyvx8GfuzuuwHcfUeGYxxvqeyzA5PCdA2wJYPxpYW7LwUaRljk\nLODXHnkEqDWzmeO1/XxJBKkMZLdnGXfvBZqA/TISXXqkNHhfgguJflHksqT7HKrMs939T5kMLE1S\n+Y4PBg42s4fM7BEzOz1j0aVHKvv8deB9ZraJ6HqkT2UmtFiN9v99VPL6nsUSMbP3AYuB18YdSzqZ\nWRFwOXBBzKFkUglR89CpRDW+pWZ2hLs3xhpVep0HXO3ul5nZicC1Zna4u/fHHViuypcaQSoD2e1Z\nJox3VAPsykh06ZHS4H1m9k+NuKy2AAAJ1ElEQVTAV4Ez3b0rQ7GlS7J9nggcDjxgZhuI2lJvy+EO\n41S+403Abe7e4+7ricbxWpih+NIhlX2+ELgRwN0fBiqIxuPJZ2kdrDNfEsFjwEIzm2dmZUSdwbcN\nWuY24PwwfQ5wn4demByVdJ/N7Bjg50RJINfbjiHJPrt7k7tPdfe57j6XqF/kTHdfFk+4+yyVv+vf\nE9UGMLOpRE1F6zIZ5DhLZZ9fIBq0EjN7BVEiyPebltwGfCCcPXQC0OTuW8frzfOiaciHGcjOzL4J\nLHP324BfElUh1xJ1yrw7voj3XYr7fClQDdwU+sVfcPczYwt6H6W4z3kjxf29C1hiZk8BfcAX3T1n\na7op7vPngSvN7GKijuMLcvxHHWZ2A1FCnxr6Pr4GlAK4+8+I+kLOANYC7cAHx3X7Of75iYjIPsqX\npiERERkjJQIRkQKnRCAiUuCUCERECpwSgYhIgVMiGCdm1mdmTyY85o7De77NzBYlvP5muEAsbczs\nhjC64cVJYnkgmy/UMrO5w43kOGiZ9yS8XmxmP0x/dPEys4+a2Qdi3P6ZI40cO8w6rcOUX21m54zi\nfYb9u8j2v+l0yovrCLJEh7sfPdxMMysJYxyNxtuA24GnANz9kn2ILykzmwG80t0XJIslT8wF3gNc\nDxAuPMvVi89SFs5Lj3P7t/Hyi8RyjpkVu3tf3HGMB9UI0sjMLjCz28zsPuBeM6u26L4Ay81spZmd\nlbDsB8Iv8X+Y2bVm9mrgTODSUMM4KPHXj5mdFsZjXxnGMi8P5RvM7BsJ2zh0iLgqzOxXYf4TZva6\nMOtu4ICwvVMSln9ZLGHWO83s72b27MDyZlZsZpea2WNhfz4yxPbnmtnTZnadma0xs5vNrCqF/fp+\nKP+7mS0I5S/5RTjUL8ewvQfDZ7I87A/Ad4FTwj5dbGanmtntYZ0pZvb7sA+PmNmRofzrIa4HzGyd\nmX16mO++1cz+06J7BNxrZnWh/OjwfivM7FYzmxzKP2177x3xv6Hstba3hvmEmU0M5V9M+Hy/Ecom\nmNmfwt/PKjN7Vyj/bsL7/iBhH74Qph8ws+8N8T1WmdmNYd1bLbqHx8t+LQ9+//D9r7dIrUU15deE\nZZea2UKL/i/+J+H7+2n4TNaF7+Cq8Hdx9aBtvezzHDT/kvC5rDKzK8yiqyjN7LjwufwD+ETC8pVm\n9r9hW7cClQnzlpjZw+Hv5SYzqw7lG8LntRx451DffU6KexzufHkQXdX5ZHjcGsouIBoLZkp4XQJM\nCtNTia4SNOAwojFipoZ5A8tfDZyTsI2riYbHqCAaifDgUP5r4LNhegPwqTD9ceAXQ8T6eaIrNgEO\nJbpkv4LoF/Jw46EPjuUB4DLfO1b6X8L0RcC/helyol/Y8wa911yiK0JPCq+vAr6Qwn59NUx/ALh9\nmLhaE7axKkxXARVheiHRFaoQXcl5e8K6pya874+Ar4Xp1wNPhumvA/8X9m0q0XhVpUN8Xg68N0xf\nAvxPmF4BvDZMfxP4rzC9BSgP07Xh+Y8Jn1E10d/PEqLx+I3oh9ztRGPZvwO4MmH7NUSj6z7D3gtH\naxP24QtJvscvAD8P04cDvQy6t8MI738n0d/0W4iGjPhq+LzWJ/xfDHweVxMNNW1EQy03A0eEfXsc\nODrJ53k14fsn/N+E6WuBtyZ85q8J05ey9+/ic+z9PzhyYB/D97oUmBDm/StwScLf4b/EfbwZ74dq\nBOOnw92PDo+zE8rvcfeBccYN+A8zWwH8hWgY2elEB5qb3H0nQMLywzmE6J/q2fD6GqKDwYDfhefH\niQ6Ig50M/CZs62lgI9EYNaM11HaWEI2J8iTwKNHBYqhB0F5094fC9G9CTMn264aE5xNHEWcp0ZAE\nK4GbiG5mkszJRAcT3P0+YD8zGxgD/0/u3hW+rx1E3+Fg/cBvw/RvgJPNrIboYPnXUJ64fyuA6ywa\nKXagCfEh4PJQ66j1qGlxSXg8ASwnSuQLgZXAG8Kv1VPcvYloqPVO4Jdm9naioQmGMtT3eDLRARp3\nXxXiG2y4938w7NdrgO+E93olUVIYyh89OsquBLa7+0qPRhJdnRDPyz7PId7ndaHmspLof+owM6sl\n+uyWhmWuTVj+Nez9P1iRsI8nEP2NPBT+js8HEu/u91vyjBJB+rUlTL8XqAOO86g/YTvRr+DxNjDK\naB/p7QcaajtGVCMZSIrz3P3uIdYdPLZJKmOd+BDTvYS/Y4uGoS4bYr2LiT7ro4h+8Q21zGgkjuKa\n6mecbP/eDPyY6C5Vj1nUp/Rd4ENETRYPWdTMZ8B3Ej7fBe7+y5A8jyU6mH7LzC4JieN4ohsxvYXo\nl/pI+zOqv5cR3n8pcEqYdwdQS1TbejDJ9vt56WfbP0I8L/k8zawC+AlR7eAI4ErG/r9lRD/gBj7j\nRe5+YcL8tuFWzFVKBJlVA+xw9x6L2uUHfmXcR9Tevh9E7dOhvIVoaOXBngHmWmgnB94P/HWI5Ybz\nIFFSwswOBuaE9xzJcLEMdhfwMTMrHXh/M5swxHJzLBpLHqIO27+RfL/elfD8cJjeABwXps8kDNQ1\nSA2wNfzKfD/RYGbJ9inxMzoV2OnuzcMsO5QiomY8CPsXfqXvtr39L+8H/hoS2Gx3v5+oGaIGqDaz\ng8Kv4+8R/Zo+lOjz/eeENusDzGyame0PtLv7b4iaP44Ny9S4+x1EyfCoUcT/EHBu2MYiouaalxjh\n/f8OvBrod/dOoubSjxAliLF62ec5aP7AQX9niOscAI/uy9BoZgM1iPcmrLM0vBdmdjhR8xBEo9ae\nZHv7oSaE/5O8pbOGMus64I+h6roMeBrAo9EVv010UOgjqvZfQFQ1vzI0DezpEHX3TjP7INGooiVE\nB4nRnAnyE+CnIY5eotEbu2zkO3cOGcsQfkFUnV8eOuvqic44GuwZ4BNmdhXRmUg/TWG/JodmtS6i\nm5NA9MvvD6Ej8E6G/rX2E+AWi06ZTFxmBdAX1r2a6HMf8HXgqrC9dvYOYZ6qNuB4M/s3ouajgSR2\nPvAzizrH1xGNIlkM/CY0HRnwQ3dvNLN/Dz8YBppJ/hy+p1cAD4fvqxV4H7CAqDO/H+ghukf1xPDZ\nVIT3/dwo4v8JcI1Fo5o+HbbfNGiZId8/xPgi0QEVoqR6HlFtZayG+zwJ22w0syuBVcA2XtoM9UGi\n79KJTogY8FPgV2a2BlhD1DSGu9eb2QXADRZOVgD+jagfLy9p9FHJOIuusbjd3Q8fxTobiDord6Yp\nrHFlZq3uXh13HGNlZsVEneCdFp0l9hfgEI/uIyx5RjUCERlKFXB/aOIz4ONKAvlLNQIRkQKnzmIR\nkQKnRCAiUuCUCERECpwSgYhIgVMiEBEpcEoEIiIF7v8D7XwkXd4wS5cAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Sample different fractions of fish with swimbladder\n", - "ratio_range = np.linspace(0,1,1000)\n", - "\n", - "# Estiamte the biomass of mesopelagic fish using the sampled fraction\n", - "biomass_ratio_dist = biomass_estimator(*TS_bin['dB kg^-1'],best_backscatter,ratio_range)*1000*0.15/1e15\n", - "\n", - "# Plot the results for all fractions\n", - "plt.plot(ratio_range,biomass_ratio_dist)\n", - "plt.xlabel('Fraction of the population possessing swimbladder')\n", - "plt.ylabel('Biomass estimate [Gt C]')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We take the 95% range of distribution of fraction of fish with swimbladder account and calculate the uncertainty this fraction introduces into the sonar-based estimate of mesopelagic fish biomass. In this range the confidence interval of the biomass estimate is ≈8.7-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the fraction of fish possessing swimbladder is ≈8.7-fold\n" - ] - } - ], - "source": [ - "# Calculate the upper and lower bounds of the influence of the fraction of fish with swimbladder on biomass estimate\n", - "ratio_upper_CI = (biomass_estimator(*TS_bin['dB kg^-1'],best_backscatter,0.975)*1000*0.15)/sonar_biomass\n", - "ratio_lower_CI = sonar_biomass/(biomass_estimator(*TS_bin['dB kg^-1'],best_backscatter,0)*1000*0.15)\n", - "ratio_CI = np.max([ratio_upper_CI,ratio_lower_CI])\n", - "print('Our best projection for the uncertainty associated with the fraction of fish possessing swimbladder is ≈%.1f-fold' %ratio_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total uncertainty associated with the sonar-based estimate, we propagate the uncertainties associated with the total backscatter, the target strength per unit biomass and the fraction of fish with swimbladder.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the sonar-based estimate for the biomass of mesopelagic fish is ≈9.4-fold\n" - ] - } - ], - "source": [ - "sonar_CI = CI_prod_prop(np.array([ratio_CI,ts_CI,bs_CI]))\n", - "print('Our best projection for the uncertainty associated with the sonar-based estimate for the biomass of mesopelagic fish is ≈%.1f-fold' %sonar_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inter-method uncertainty\n", - "As a measure of the inter-method uncertainty of our estimate of the biomass of mesopelagic fish, we calculate the 95% confidence interval of the geometric mean of the sonar-based estimate and the trawling-based estimate." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the inter method uncertainty associated with estimate of the biomass of mesopelagic fish is ≈11.3-fold\n" - ] - } - ], - "source": [ - "meso_inter_CI = geo_CI_calc(np.array([sonar_biomass,trawling_biomass]))\n", - "print('Our best projection for the inter method uncertainty associated with estimate of the biomass of mesopelagic fish is ≈%.1f-fold' %meso_inter_CI)\n", - "\n", - "# Take the highest uncertainty as our best projection for the uncertainty associated with the estimate\n", - "# of the biomass of mesopelagic fish\n", - "meso_CI = np.max([meso_inter_CI,sonar_CI])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Comparing our projections for the uncertainty of the sonar-based estimate of the biomass of mesopelagic fish and the inter-method uncertainty, our best projection for the biomass of mesopelagic fish is about one order of magnitude.\n", - "\n", - "## Non-mesopelagic fish biomass uncertainty\n", - "For estimating the biomass of non-mesopelagic fish, we rely on estimates by Wilson et al., which does not report an uncertainty range for the biomass of non-meso pelagic fish. A later study ([Jennings et al.](https://doi.org/10.1371/journal.pone.0133794), gave an estimate for the total biomass of fish with body weight of 1 g to 1000 kg, based on ecological models. Jenning et al. reports a 90% confidence interval of 0.34-26.12 Gt wet weight, with a median estimate of ≈5 Gt wet weight. We take this range as a crude measure of the uncertainty associated with the estimate of the biomass of non-mesopelagic fish." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the biomass of fish is ≈8.3-fold\n" - ] - } - ], - "source": [ - "# Calculate the uncertainty of the non-mesopelagic fish biomass\n", - "non_meso_CI = np.max([26.12/5,5/0.34])\n", - "\n", - "# Propagate the uncertainties of mesopelagic fish biomass and non-mesopelagic fish biomass to the total estimate\n", - "# of fish biomass\n", - "mul_CI = CI_sum_prop(estimates=np.array([best_mesopelagic_biomass,non_mesopelagic_fish_biomass]), mul_CIs=np.array([meso_CI,non_meso_CI]))\n", - "print('Our best projection for the uncertainty associated with the estimate of the biomass of fish is ≈%.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prehuman fish biomass\n", - "To estimate the prehuman fish biomass, we rely on a study ([Costello et al.](http://dx.doi.org/10.1073/pnas.1520420113)) which states that fish stocks in global fisheries are 1.17 of the Maximal Sustainable Yield biomass, when looking at all fisheries and calculating a catch-weighted average global fishery (Figure S12 in the SI Appendix of Costello et al.). Costello et al. also reports the total biomass of present day fisheries at 0.84 Gt wet weight (Table S15 in the SI Appendix of Costello et al.). Assuming 70% water content and 50% carbon content out of wet weight, this translates to:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Costello et al. estimate ≈0.13 Gt C of current fisheries\n" - ] - } - ], - "source": [ - "costello_ww = 0.84\n", - "wet_to_c = 0.3*0.5\n", - "costello_cc = costello_ww*wet_to_c\n", - "print('Costello et al. estimate ≈%.2f Gt C of current fisheries' %costello_cc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This number is close to the number reported by Wilson et al. Using a database of published landings data and stock assessment biomass estimates, [Thorson et al.](http://dx.doi.org/10.1139/f2012-077) estimate that the biomass of fish at the maximum sustainable yield represent ≈40% of the biomass the population would have reached in case of no fishing. From these two numbers, we can estimate the prehuamn biomass of fish in fisheries. We use the total biomass of fisheries reported in Costello et al., divide it the bte ratio reported in Costello et al. to estimate the Maximal Sustainable Yield biomass, and then divide this number by 0.4 to arrive at the prehuman biomass of fish in fisheries. We add to this estimate the estimate of the total biomass of mesopelagic fish, assuming their biomass wasn't affected by humans." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total prehuman biomass of fish is ≈0.8 Gt C\n" - ] - } - ], - "source": [ - "costello_ratio = 1.17\n", - "thorson_ratio = 0.4\n", - "prehuman_biomass_fisheries = costello_cc*1e15/costello_ratio/thorson_ratio\n", - "prehuman_fish_biomass = prehuman_biomass_fisheries+best_mesopelagic_biomass\n", - "print('Our estimate for the total prehuman biomass of fish is ≈%.1f Gt C' %(prehuman_fish_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Comparing the prehuman fish biomass to the present day fish biomass, we can estimate the human associated reduction in fish biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the decrease in the total biomass of fish is ≈0.12 Gt C\n" - ] - } - ], - "source": [ - "fish_biomass_decrease = prehuman_fish_biomass-best_estimate\n", - "print('Our estimate for the decrease in the total biomass of fish is ≈%.2f Gt C' %(fish_biomass_decrease/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of fish\n", - "To estimate the total number of fish, we divide our estimate of the total biomass of mesopelagic fish by an estimate for the characteristic carbon content of a single mesopelagic fish. \n", - "To estimate the mean weight of mesopelagic fish, we rely on data reported in [Fock & Ehrich](https://doi.org/10.1111/j.1439-0426.2010.01450.x) for the family Myctophidae (Lanternfish), which dominate the mesopelagic fish species. Fock & Ehrich report the length range of each fish species, as well as allometric relations between fish length and weight for each species. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FamilyGenusSpeciesa(SL)b(SL)Calculation methodMinimum length (mm)Maximum length (mm)1982 deviation statistic dj1982 wj-bjregression slope1982 sample size for regression (n=)1983 deviation statistic dj1983 wj-bjregression slope1983 sample size for regression (n=)Remarks
95MyctophidaeBenthosemaglaciale1.0e-023.0e+003.0e+0021811.8e+024.2e-0153.4e+018.9e-0116
96MyctophidaeBenthosemasuborbitale1.0e-023.0e+006.0e+003737
97MyctophidaeBolinichthysindicus1.3e-022.9e+003.0e+0031461.0e+038.0e-023
98MyctophidaeBolinichthyssupralateralis7.5e-033.2e+003.0e+00421073.3e+011.9e+007
99MyctophidaeCeratoscopelusmaderensis6.8e-033.2e+003.0e+0027856.1e+021.3e-0181.5e+017.7e-012
\n", - "
" - ], - "text/plain": [ - " Family Genus Species a(SL) b(SL) \\\n", - "95 Myctophidae Benthosema glaciale 1.0e-02 3.0e+00 \n", - "96 Myctophidae Benthosema suborbitale 1.0e-02 3.0e+00 \n", - "97 Myctophidae Bolinichthys indicus 1.3e-02 2.9e+00 \n", - "98 Myctophidae Bolinichthys supralateralis 7.5e-03 3.2e+00 \n", - "99 Myctophidae Ceratoscopelus maderensis 6.8e-03 3.2e+00 \n", - "\n", - " Calculation method Minimum length (mm) Maximum length (mm) \\\n", - "95 3.0e+00 21 81 \n", - "96 6.0e+00 37 37 \n", - "97 3.0e+00 31 46 \n", - "98 3.0e+00 42 107 \n", - "99 3.0e+00 27 85 \n", - "\n", - " 1982 deviation statistic dj 1982 wj-bjregression slope \\\n", - "95 1.8e+02 4.2e-01 \n", - "96     \n", - "97 1.0e+03 8.0e-02 \n", - "98 3.3e+01 1.9e+00 \n", - "99 6.1e+02 1.3e-01 \n", - "\n", - " 1982 sample size for regression (n=) 1983 deviation statistic dj \\\n", - "95 5 3.4e+01 \n", - "96     \n", - "97 3   \n", - "98 7   \n", - "99 8 1.5e+01 \n", - "\n", - " 1983 wj-bjregression slope 1983 sample size for regression (n=) Remarks \n", - "95 8.9e-01 16   \n", - "96       \n", - "97       \n", - "98       \n", - "99 7.7e-01 2   " - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data from Fock & Ehrich\n", - "fe_data = pd.read_excel('fish_biomass_data.xlsx','Fock & Ehrich', skiprows=1)\n", - "\n", - "# Use only data for the Myctophidae family\n", - "fe_mycto = fe_data[fe_data['Family'] == 'Myctophidae']\n", - "\n", - "fe_mycto.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The allometric parameters a and b are plugged into the following equation to produce the weight of each fish species based on the length of each fish: $$ W = a*L^b$$\n", - "Where W is the fish weight and L is the fish length. For each fish species, we calculate the characteristic fish length by using the mean of the minimum and maximum reported fish lengths:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "fe_mean_length = np.mean([fe_mycto['Maximum length (mm)'].astype(float),fe_mycto['Minimum length (mm)'].astype(float)])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We plug the mean length of each fish species into the allometric equation along with its specific parameters a and b to generate the mean wet weight of each fish. We use the geometric mean of the weights of all species as our best estimate of the weight of a single mesopelagic fish. We convert wet weight to carbon mass assuming 70% water content and 50% carbon our of the dry weight." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of a single mesopelagic fish is ≈0.46 g C\n" - ] - } - ], - "source": [ - "# The allometric equation to convert fish length into fish weight. The equation takes values\n", - "# in cm and the data is given in mm so we divide the length by a factor of 10\n", - "calculate_weight = lambda x,a,b: a*(x/10)**b\n", - "\n", - "# Transform the mean lengths of each fish species into a characteristic weight of each fish species\n", - "fe_mean_weight = calculate_weight(fe_mean_length,fe_mycto['a(SL)'],fe_mycto['b(SL)'])\n", - "\n", - "# Conversion factor from wet weight to carbon mass\n", - "wet_to_c = 0.15\n", - "\n", - "# Calculate the mean carbon content of a single mesopelagic fish\n", - "fish_cc = gmean(fe_mean_weight.astype(float))*wet_to_c\n", - "\n", - "print('Our best estimate for the carbon content of a single mesopelagic fish is ≈%.2f g C' %fish_cc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We estimate the total number of mesopelagic fish by dividing our best estimate for the total biomass of mesopelagic fish by our estimate for the carbon content of a single mesopelagic fish:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of individual fish is ≈1e+15.\n" - ] - } - ], - "source": [ - "# Estimate the total number of fish\n", - "tot_fish_num = best_mesopelagic_biomass/fish_cc\n", - "\n", - "print('Our best estimate for the total number of individual fish is ≈%.0e.' %tot_fish_num)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to the chordate biomass data\n", - "old_results = pd.read_excel('../../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Fish',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,mul_CI)\n", - "result.to_excel('../../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Fish'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_estimate/1e15,mul_CI],\n", - " path='../../../results.xlsx')\n", - "\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Fish'), \n", - " col=['Number of individuals'],\n", - " values=tot_fish_num,\n", - " path='../../../results.xlsx')\n", - "\n", - "# Update the data mentioned in the MS\n", - "update_MS_data(row ='Decrease in biomass of fish',\n", - " values=fish_biomass_decrease/1e15,\n", - " path='../../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/chordates/humans/.ipynb_checkpoints/humans-checkpoint.ipynb b/animals/chordates/humans/.ipynb_checkpoints/humans-checkpoint.ipynb deleted file mode 100644 index bd4e576..0000000 --- a/animals/chordates/humans/.ipynb_checkpoints/humans-checkpoint.ipynb +++ /dev/null @@ -1,146 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import numpy as np\n", - "import pandas as pd\n", - "import sys\n", - "sys.path.insert(0,'../../../statistics_helper/')\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of humans\n", - "To estimate the total biomass of humans, we rely on estimates of the total human population from the [UN World Population Prospects of 2017](https://esa.un.org/unpd/wpp/Download/Standard/Population/) (File - 'Total Population - Both Sexes'). We use the estimate for the total human population in 2015" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The UN estimate for the total human population is ≈7.4e+09\n" - ] - } - ], - "source": [ - "#Load data from the UN\n", - "data = pd.read_excel('humans_data.xlsx',index_col=0,skiprows=16)\n", - "\n", - "# Use data from 2015, multiply by 1000 because data is given in thousands\n", - "tot_human_pop = data.loc[1,'2015']*1e3\n", - "\n", - "print('The UN estimate for the total human population is ≈%.1e' %tot_human_pop)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use an estimate for the average body mass of humans of ≈50 kg from [Hern](http://link.springer.com/article/10.1023/A:1022153110536). We convert the average body weight to carbon mass assuming 70% water content and 50% carbon out of the dry weight:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "wet_to_c = 0.15\n", - "human_cc = 5e4*wet_to_c" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We estimate the total biomass of humans by multiplying the total number of humans by the average carbon content of a single human:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of humans is ≈0.06 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = tot_human_pop*human_cc\n", - "\n", - "print('Our best estimate for the total biomass of humans is ≈%.2f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to the chordate biomass data\n", - "old_results = pd.read_excel('../../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Humans',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,None)\n", - "result.to_excel('../../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Humans'), \n", - " col='Biomass [Gt C]',\n", - " values=best_estimate/1e15,\n", - " path='../../../results.xlsx')\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Humans'), \n", - " col='Number of individuals',\n", - " values=tot_human_pop,\n", - " path='../../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/chordates/livestock/.ipynb_checkpoints/livestock_biomass-checkpoint.ipynb b/animals/chordates/livestock/.ipynb_checkpoints/livestock_biomass-checkpoint.ipynb deleted file mode 100644 index 9529908..0000000 --- a/animals/chordates/livestock/.ipynb_checkpoints/livestock_biomass-checkpoint.ipynb +++ /dev/null @@ -1,1430 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "import sys\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "sys.path.insert(0,'../../../statistics_helper/')\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of livestock\n", - "To estimate the biomass of livestock, we rely on data on global stocks of cattle, sheep goats, and pigs froms the Food and Agriculture Organization database FAOStat. We downloaded data from the domain Production/Live animals.\n", - "We combined data on the total stocks of each animal with estimates of the mean mass of each type of animal species (in kg) from [Dong et al.](http://www.ipcc-nggip.iges.or.jp/public/2006gl/pdf/4_Volume4/V4_10_Ch10_Livestock.pdf), Annex 10A.2, Tables 10A-4 to 10A-9.\n", - "\n", - "Here are samples of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Domain CodeDomainArea CodeAreaElement CodeElementItem CodeItemYear CodeYearUnitValueFlagFlag Description
0QALive Animals5100Africa5111Stocks1107Asses20142014Head18946358AAggregate, may include official, semi-official...
1QALive Animals5100Africa5111Stocks946Buffaloes20142014Head3949287AAggregate, may include official, semi-official...
2QALive Animals5100Africa5111Stocks1126Camels20142014Head23533724AAggregate, may include official, semi-official...
3QALive Animals5100Africa5111Stocks866Cattle20142014Head312327289AAggregate, may include official, semi-official...
4QALive Animals5100Africa5111Stocks1016Goats20142014Head374380445AAggregate, may include official, semi-official...
\n", - "
" - ], - "text/plain": [ - " Domain Code Domain Area Code Area Element Code Element \\\n", - "0 QA Live Animals 5100 Africa 5111 Stocks \n", - "1 QA Live Animals 5100 Africa 5111 Stocks \n", - "2 QA Live Animals 5100 Africa 5111 Stocks \n", - "3 QA Live Animals 5100 Africa 5111 Stocks \n", - "4 QA Live Animals 5100 Africa 5111 Stocks \n", - "\n", - " Item Code Item Year Code Year Unit Value Flag \\\n", - "0 1107 Asses 2014 2014 Head 18946358 A \n", - "1 946 Buffaloes 2014 2014 Head 3949287 A \n", - "2 1126 Camels 2014 2014 Head 23533724 A \n", - "3 866 Cattle 2014 2014 Head 312327289 A \n", - "4 1016 Goats 2014 2014 Head 374380445 A \n", - "\n", - " Flag Description \n", - "0 Aggregate, may include official, semi-official... \n", - "1 Aggregate, may include official, semi-official... \n", - "2 Aggregate, may include official, semi-official... \n", - "3 Aggregate, may include official, semi-official... \n", - "4 Aggregate, may include official, semi-official... " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load global stocks data\n", - "stocks = pd.read_csv('FAOSTAT_stock_data_mammals.csv')\n", - "stocks.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Cattle - dairyCattle - non-dairyBuffaloesSwine - marketSwine - breedingSheepGoatsHorsesAssesMulesCamelsCamelids, other
IPCC Area
Indian Subcontinent27511029528282.8e+013.0e+01238130130217217
Eastern Europe550391380501804.8e+013.8e+01377130130217217
Africa27517338028282.8e+013.0e+01238130130217217
Oceania500330380451804.8e+013.8e+01377130130217217
Western Europe600420380501984.8e+013.8e+01377130130217217
\n", - "
" - ], - "text/plain": [ - " Cattle - dairy Cattle - non-dairy Buffaloes \\\n", - "IPCC Area \n", - "Indian Subcontinent 275 110 295 \n", - "Eastern Europe 550 391 380 \n", - "Africa 275 173 380 \n", - "Oceania 500 330 380 \n", - "Western Europe 600 420 380 \n", - "\n", - " Swine - market Swine - breeding Sheep Goats Horses \\\n", - "IPCC Area \n", - "Indian Subcontinent 28 28 2.8e+01 3.0e+01 238 \n", - "Eastern Europe 50 180 4.8e+01 3.8e+01 377 \n", - "Africa 28 28 2.8e+01 3.0e+01 238 \n", - "Oceania 45 180 4.8e+01 3.8e+01 377 \n", - "Western Europe 50 198 4.8e+01 3.8e+01 377 \n", - "\n", - " Asses Mules Camels Camelids, other \n", - "IPCC Area \n", - "Indian Subcontinent 130 130 217 217 \n", - "Eastern Europe 130 130 217 217 \n", - "Africa 130 130 217 217 \n", - "Oceania 130 130 217 217 \n", - "Western Europe 130 130 217 217 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load species body mass data\n", - "body_mass = pd.read_excel('livestock_body_mass.xlsx',skiprows=1,index_col=0) \n", - "body_mass.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We pivot the stocks DataFrame to have a view of each kind of animal at each region:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ItemAssesBuffaloesCamelids, otherCamelsCattleGoatsHorsesMulesPigsSheep
Area
Africa1.9e+073.9e+060.0e+002.4e+073.1e+083.7e+086.1e+061.0e+063.4e+073.4e+08
Americas6.8e+061.3e+068.9e+060.0e+005.1e+083.6e+073.3e+075.9e+061.7e+088.6e+07
Asia1.6e+071.9e+080.0e+004.2e+064.9e+085.8e+081.4e+073.0e+065.9e+085.4e+08
Eastern Europe1.0e+051.7e+040.0e+007.4e+034.0e+074.6e+062.8e+063.5e+035.3e+073.6e+07
Northern America5.2e+040.0e+000.0e+000.0e+001.0e+082.6e+061.1e+074.0e+038.1e+076.1e+06
Oceania9.0e+032.4e+020.0e+000.0e+004.0e+074.0e+064.0e+050.0e+005.3e+061.0e+08
Southern Asia8.3e+061.5e+080.0e+001.7e+062.7e+082.9e+081.3e+065.8e+051.1e+071.5e+08
Western Europe3.4e+046.5e+030.0e+000.0e+004.2e+072.1e+061.1e+063.1e+046.5e+071.1e+07
\n", - "
" - ], - "text/plain": [ - "Item Asses Buffaloes Camelids, other Camels Cattle Goats \\\n", - "Area \n", - "Africa 1.9e+07 3.9e+06 0.0e+00 2.4e+07 3.1e+08 3.7e+08 \n", - "Americas 6.8e+06 1.3e+06 8.9e+06 0.0e+00 5.1e+08 3.6e+07 \n", - "Asia 1.6e+07 1.9e+08 0.0e+00 4.2e+06 4.9e+08 5.8e+08 \n", - "Eastern Europe 1.0e+05 1.7e+04 0.0e+00 7.4e+03 4.0e+07 4.6e+06 \n", - "Northern America 5.2e+04 0.0e+00 0.0e+00 0.0e+00 1.0e+08 2.6e+06 \n", - "Oceania 9.0e+03 2.4e+02 0.0e+00 0.0e+00 4.0e+07 4.0e+06 \n", - "Southern Asia 8.3e+06 1.5e+08 0.0e+00 1.7e+06 2.7e+08 2.9e+08 \n", - "Western Europe 3.4e+04 6.5e+03 0.0e+00 0.0e+00 4.2e+07 2.1e+06 \n", - "\n", - "Item Horses Mules Pigs Sheep \n", - "Area \n", - "Africa 6.1e+06 1.0e+06 3.4e+07 3.4e+08 \n", - "Americas 3.3e+07 5.9e+06 1.7e+08 8.6e+07 \n", - "Asia 1.4e+07 3.0e+06 5.9e+08 5.4e+08 \n", - "Eastern Europe 2.8e+06 3.5e+03 5.3e+07 3.6e+07 \n", - "Northern America 1.1e+07 4.0e+03 8.1e+07 6.1e+06 \n", - "Oceania 4.0e+05 0.0e+00 5.3e+06 1.0e+08 \n", - "Southern Asia 1.3e+06 5.8e+05 1.1e+07 1.5e+08 \n", - "Western Europe 1.1e+06 3.1e+04 6.5e+07 1.1e+07 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Replace NaN with zeros\n", - "stocks.fillna(value=0,inplace=True)\n", - "stock_pivot = pd.pivot(stocks.Area,stocks.Item, stocks.Value).astype(float)\n", - "\n", - "# Replace NaN with zeros\n", - "stock_pivot.fillna(value=0,inplace=True)\n", - "\n", - "stock_pivot" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There is a difference between the body mass of a dairy producing cow to a non-dairy producing cow. We thus count seperately the dairy producing cattle from the non-dairy producing cattle. Data about the amount of dairy cattle comes from the FAOStat domain Production - Livestock Primary.\n", - "There is also a difference in body mass between breeding and non-breeding pigs. We assume 90% of the population is breeding based on IPCC, 2006, Vol.4, Ch.10,Table.10.19." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ItemAssesBuffaloesCamelids, otherCamelsCattle - non-dairyGoatsHorsesMulesSwine - breedingSheepCattle - dairySwine - market
Area
Africa1.9e+073.9e+060.0e+002.4e+072.4e+083.7e+086.1e+061.0e+063.1e+073.4e+08674365683.4e+06
Americas6.8e+061.3e+068.9e+060.0e+004.5e+083.6e+073.3e+075.9e+061.5e+088.6e+07549305191.7e+07
Asia1.6e+071.9e+080.0e+004.2e+063.8e+085.8e+081.4e+073.0e+065.3e+085.4e+081075711935.9e+07
Eastern Europe1.0e+051.7e+040.0e+007.4e+032.3e+074.6e+062.8e+063.5e+034.8e+073.6e+07161887765.3e+06
Northern America5.2e+040.0e+000.0e+000.0e+009.1e+072.6e+061.1e+074.0e+037.3e+076.1e+06101613108.1e+06
Oceania9.0e+032.4e+020.0e+000.0e+003.3e+074.0e+064.0e+050.0e+004.8e+061.0e+0868747515.3e+05
Southern Asia8.3e+061.5e+080.0e+001.7e+062.0e+082.9e+081.3e+065.8e+051.0e+071.5e+08693250631.1e+06
Western Europe3.4e+046.5e+030.0e+000.0e+003.1e+072.1e+061.1e+063.1e+045.8e+071.1e+07112894096.5e+06
\n", - "
" - ], - "text/plain": [ - "Item Asses Buffaloes Camelids, other Camels \\\n", - "Area \n", - "Africa 1.9e+07 3.9e+06 0.0e+00 2.4e+07 \n", - "Americas 6.8e+06 1.3e+06 8.9e+06 0.0e+00 \n", - "Asia 1.6e+07 1.9e+08 0.0e+00 4.2e+06 \n", - "Eastern Europe 1.0e+05 1.7e+04 0.0e+00 7.4e+03 \n", - "Northern America 5.2e+04 0.0e+00 0.0e+00 0.0e+00 \n", - "Oceania 9.0e+03 2.4e+02 0.0e+00 0.0e+00 \n", - "Southern Asia 8.3e+06 1.5e+08 0.0e+00 1.7e+06 \n", - "Western Europe 3.4e+04 6.5e+03 0.0e+00 0.0e+00 \n", - "\n", - "Item Cattle - non-dairy Goats Horses Mules \\\n", - "Area \n", - "Africa 2.4e+08 3.7e+08 6.1e+06 1.0e+06 \n", - "Americas 4.5e+08 3.6e+07 3.3e+07 5.9e+06 \n", - "Asia 3.8e+08 5.8e+08 1.4e+07 3.0e+06 \n", - "Eastern Europe 2.3e+07 4.6e+06 2.8e+06 3.5e+03 \n", - "Northern America 9.1e+07 2.6e+06 1.1e+07 4.0e+03 \n", - "Oceania 3.3e+07 4.0e+06 4.0e+05 0.0e+00 \n", - "Southern Asia 2.0e+08 2.9e+08 1.3e+06 5.8e+05 \n", - "Western Europe 3.1e+07 2.1e+06 1.1e+06 3.1e+04 \n", - "\n", - "Item Swine - breeding Sheep Cattle - dairy Swine - market \n", - "Area \n", - "Africa 3.1e+07 3.4e+08 67436568 3.4e+06 \n", - "Americas 1.5e+08 8.6e+07 54930519 1.7e+07 \n", - "Asia 5.3e+08 5.4e+08 107571193 5.9e+07 \n", - "Eastern Europe 4.8e+07 3.6e+07 16188776 5.3e+06 \n", - "Northern America 7.3e+07 6.1e+06 10161310 8.1e+06 \n", - "Oceania 4.8e+06 1.0e+08 6874751 5.3e+05 \n", - "Southern Asia 1.0e+07 1.5e+08 69325063 1.1e+06 \n", - "Western Europe 5.8e+07 1.1e+07 11289409 6.5e+06 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data on the number of dairy producing cattle\n", - "dairy = pd.read_csv('FAOSTAT_cattle_dairy_data.csv')\n", - "\n", - "# Set the index of the DataFrame to be the region so we can compare with the stocks data\n", - "dairy.set_index('Area',inplace=True)\n", - "\n", - "# Add a category of dairy producing cattle\n", - "stock_pivot['Cattle - dairy'] = dairy.Value\n", - "\n", - "# Set the amount of non-dairy producing cattle to be the total number minus the dairy producing cattle\n", - "stock_pivot['Cattle'] = stock_pivot['Cattle']-stock_pivot['Cattle - dairy']\n", - "\n", - "# Rename the Cattle column name to Cattle - non-dairy\n", - "stock_pivot.rename(columns={'Cattle': 'Cattle - non-dairy'}, inplace=True)\n", - "\n", - "# Set the amount of non-breeding (market) pigs (swine) to 10% of the total amount of pigs\n", - "stock_pivot['Swine - market'] = 0.1*stock_pivot['Pigs']\n", - "\n", - "# Set the amount of breeding pigs (swine) to 90% of the total amount of pigs\n", - "stock_pivot['Pigs'] *= 0.9\n", - "\n", - "# Rename the Pigs column name to Swine - breeding\n", - "stock_pivot.rename(columns={'Pigs': 'Swine - breeding'}, inplace=True)\n", - "\n", - "stock_pivot" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Data on the mass of animals is divided into different regions than the FAOStat data so we need preprocess the stocks DataFrame and merge it with the body mass data:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ItemAssesBuffaloesCamelids, otherCamelsCattle - non-dairyGoatsHorsesMulesSwine - breedingSheepCattle - dairySwine - market
Area
Africa1.9e+073.9e+060.0e+002.4e+072.4e+083.7e+086.1e+061.0e+063.1e+073.4e+086.7e+073.4e+06
Latin America6.7e+061.3e+068.9e+060.0e+003.6e+083.3e+072.2e+075.9e+068.0e+078.0e+074.5e+078.9e+06
Asia8.2e+063.7e+070.0e+002.5e+061.8e+082.9e+081.3e+072.4e+065.2e+083.8e+083.8e+075.8e+07
Eastern Europe1.0e+051.7e+040.0e+007.4e+032.3e+074.6e+062.8e+063.5e+034.8e+073.6e+071.6e+075.3e+06
Northern America5.2e+040.0e+000.0e+000.0e+009.1e+072.6e+061.1e+074.0e+037.3e+076.1e+061.0e+078.1e+06
Oceania9.0e+032.4e+020.0e+000.0e+003.3e+074.0e+064.0e+050.0e+004.8e+061.0e+086.9e+065.3e+05
Indian Subcontinent8.3e+061.5e+080.0e+001.7e+062.0e+082.9e+081.3e+065.8e+051.0e+071.5e+086.9e+071.1e+06
Western Europe3.4e+046.5e+030.0e+000.0e+003.1e+072.1e+061.1e+063.1e+045.8e+071.1e+071.1e+076.5e+06
\n", - "
" - ], - "text/plain": [ - "Item Asses Buffaloes Camelids, other Camels \\\n", - "Area \n", - "Africa 1.9e+07 3.9e+06 0.0e+00 2.4e+07 \n", - "Latin America 6.7e+06 1.3e+06 8.9e+06 0.0e+00 \n", - "Asia 8.2e+06 3.7e+07 0.0e+00 2.5e+06 \n", - "Eastern Europe 1.0e+05 1.7e+04 0.0e+00 7.4e+03 \n", - "Northern America 5.2e+04 0.0e+00 0.0e+00 0.0e+00 \n", - "Oceania 9.0e+03 2.4e+02 0.0e+00 0.0e+00 \n", - "Indian Subcontinent 8.3e+06 1.5e+08 0.0e+00 1.7e+06 \n", - "Western Europe 3.4e+04 6.5e+03 0.0e+00 0.0e+00 \n", - "\n", - "Item Cattle - non-dairy Goats Horses Mules \\\n", - "Area \n", - "Africa 2.4e+08 3.7e+08 6.1e+06 1.0e+06 \n", - "Latin America 3.6e+08 3.3e+07 2.2e+07 5.9e+06 \n", - "Asia 1.8e+08 2.9e+08 1.3e+07 2.4e+06 \n", - "Eastern Europe 2.3e+07 4.6e+06 2.8e+06 3.5e+03 \n", - "Northern America 9.1e+07 2.6e+06 1.1e+07 4.0e+03 \n", - "Oceania 3.3e+07 4.0e+06 4.0e+05 0.0e+00 \n", - "Indian Subcontinent 2.0e+08 2.9e+08 1.3e+06 5.8e+05 \n", - "Western Europe 3.1e+07 2.1e+06 1.1e+06 3.1e+04 \n", - "\n", - "Item Swine - breeding Sheep Cattle - dairy Swine - market \n", - "Area \n", - "Africa 3.1e+07 3.4e+08 6.7e+07 3.4e+06 \n", - "Latin America 8.0e+07 8.0e+07 4.5e+07 8.9e+06 \n", - "Asia 5.2e+08 3.8e+08 3.8e+07 5.8e+07 \n", - "Eastern Europe 4.8e+07 3.6e+07 1.6e+07 5.3e+06 \n", - "Northern America 7.3e+07 6.1e+06 1.0e+07 8.1e+06 \n", - "Oceania 4.8e+06 1.0e+08 6.9e+06 5.3e+05 \n", - "Indian Subcontinent 1.0e+07 1.5e+08 6.9e+07 1.1e+06 \n", - "Western Europe 5.8e+07 1.1e+07 1.1e+07 6.5e+06 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Preprocessing the stocks DataFrame\n", - "\n", - "# Calculate the total number of animals in Latin America by subtracting values for Northern America from the total\n", - "# values for the Americas\n", - "stock_pivot.loc['Americas'] -= stock_pivot.loc['Northern America']\n", - "\n", - "# Change name of Americas to Latin America\n", - "stock_pivot.rename(index={'Americas': 'Latin America'},inplace=True)\n", - "\n", - "# Calculate the total number of animals in Asia without the Indian Subcontinent by subtracting values for the Southern Asia \n", - "# from the total values for the Asia\n", - "stock_pivot.loc['Asia'] -= stock_pivot.loc['Southern Asia']\n", - "\n", - "# Change name of Southern Asia to Indian Subcontinent\n", - "stock_pivot.rename(index={'Southern Asia': 'Indian Subcontinent'},inplace=True)\n", - "\n", - "\n", - "stock_pivot\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We now multiply the stocks of each animal type and for each region by the characteristic body weight of each animal:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AssesBuffaloesCamelids, otherCamelsCattle - dairyCattle - non-dairyGoatsHorsesMulesSheepSwine - breedingSwine - market
Africa2.5e+091.5e+090.0e+005.1e+091.9e+104.2e+101.1e+101.4e+091.3e+089.5e+098.7e+089.6e+07
Asia1.1e+091.4e+100.0e+005.5e+081.3e+107.2e+101.1e+104.9e+093.1e+081.9e+109.4e+102.9e+09
Eastern Europe1.3e+076.3e+060.0e+001.6e+068.9e+099.2e+091.8e+081.0e+094.6e+051.8e+098.6e+092.6e+08
Indian Subcontinent1.1e+094.5e+100.0e+003.7e+081.9e+102.2e+108.8e+093.2e+087.6e+074.3e+092.9e+083.2e+07
Latin America8.8e+085.0e+081.9e+090.0e+001.8e+101.1e+119.9e+085.2e+097.7e+082.2e+092.2e+092.5e+08
Middle eastnannannannannannannannannannannannan
Northern America6.8e+060.0e+000.0e+000.0e+006.1e+093.5e+101.0e+084.0e+095.2e+053.0e+081.4e+103.7e+08
Oceania1.2e+069.3e+040.0e+000.0e+003.4e+091.1e+101.5e+081.5e+080.0e+005.0e+098.7e+082.4e+07
Western Europe4.4e+062.5e+060.0e+000.0e+006.8e+091.3e+107.9e+074.1e+084.1e+065.2e+081.2e+103.2e+08
\n", - "
" - ], - "text/plain": [ - " Asses Buffaloes Camelids, other Camels \\\n", - "Africa 2.5e+09 1.5e+09 0.0e+00 5.1e+09 \n", - "Asia 1.1e+09 1.4e+10 0.0e+00 5.5e+08 \n", - "Eastern Europe 1.3e+07 6.3e+06 0.0e+00 1.6e+06 \n", - "Indian Subcontinent 1.1e+09 4.5e+10 0.0e+00 3.7e+08 \n", - "Latin America 8.8e+08 5.0e+08 1.9e+09 0.0e+00 \n", - "Middle east nan nan nan nan \n", - "Northern America 6.8e+06 0.0e+00 0.0e+00 0.0e+00 \n", - "Oceania 1.2e+06 9.3e+04 0.0e+00 0.0e+00 \n", - "Western Europe 4.4e+06 2.5e+06 0.0e+00 0.0e+00 \n", - "\n", - " Cattle - dairy Cattle - non-dairy Goats Horses \\\n", - "Africa 1.9e+10 4.2e+10 1.1e+10 1.4e+09 \n", - "Asia 1.3e+10 7.2e+10 1.1e+10 4.9e+09 \n", - "Eastern Europe 8.9e+09 9.2e+09 1.8e+08 1.0e+09 \n", - "Indian Subcontinent 1.9e+10 2.2e+10 8.8e+09 3.2e+08 \n", - "Latin America 1.8e+10 1.1e+11 9.9e+08 5.2e+09 \n", - "Middle east nan nan nan nan \n", - "Northern America 6.1e+09 3.5e+10 1.0e+08 4.0e+09 \n", - "Oceania 3.4e+09 1.1e+10 1.5e+08 1.5e+08 \n", - "Western Europe 6.8e+09 1.3e+10 7.9e+07 4.1e+08 \n", - "\n", - " Mules Sheep Swine - breeding Swine - market \n", - "Africa 1.3e+08 9.5e+09 8.7e+08 9.6e+07 \n", - "Asia 3.1e+08 1.9e+10 9.4e+10 2.9e+09 \n", - "Eastern Europe 4.6e+05 1.8e+09 8.6e+09 2.6e+08 \n", - "Indian Subcontinent 7.6e+07 4.3e+09 2.9e+08 3.2e+07 \n", - "Latin America 7.7e+08 2.2e+09 2.2e+09 2.5e+08 \n", - "Middle east nan nan nan nan \n", - "Northern America 5.2e+05 3.0e+08 1.4e+10 3.7e+08 \n", - "Oceania 0.0e+00 5.0e+09 8.7e+08 2.4e+07 \n", - "Western Europe 4.1e+06 5.2e+08 1.2e+10 3.2e+08 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "wet_biomass =(body_mass*stock_pivot)\n", - "wet_biomass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum over all regions and convert units from kg wet weight to Gt C carbon by assuming carbon is ≈15% of the wet weight (30% dry weight of wet weight and carbon is 50% of dry weight)." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Asses 0.001\n", - "Buffaloes 0.009\n", - "Camelids, other 0.000\n", - "Camels 0.001\n", - "Cattle - dairy 0.014\n", - "Cattle - non-dairy 0.047\n", - "Goats 0.005\n", - "Horses 0.003\n", - "Mules 0.000\n", - "Sheep 0.006\n", - "Swine - breeding 0.020\n", - "Swine - market 0.001\n", - "dtype: float64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.options.display.float_format = '{:,.3f}'.format\n", - "\n", - "# conversion factor from kg wet weight to Gt C\n", - "kg_to_gt_c = 1000*0.15/1e15\n", - "total_biomass = wet_biomass.sum()*kg_to_gt_c\n", - "total_biomass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum over all animal categories to generate our best estimate for the total biomass of livestock" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of mammal livestock is 0.1 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = total_biomass.sum()\n", - "print('Our best estimate for the biomass of mammal livestock is %.1f Gt C' % best_estimate)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# Feed results to the chordate biomass data\n", - "old_results = pd.read_excel('../../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Livestock',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate,None)\n", - "result.to_excel('../../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Livestock'), \n", - " col='Biomass [Gt C]',\n", - " values=best_estimate,\n", - " path='../../../results.xlsx')\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Livestock'), \n", - " col='Number of individuals',\n", - " values=stock_pivot.sum().sum(),\n", - " path='../../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/chordates/livestock/poultry/.ipynb_checkpoints/birds-checkpoint.ipynb b/animals/chordates/livestock/poultry/.ipynb_checkpoints/birds-checkpoint.ipynb deleted file mode 100644 index 741f83d..0000000 --- a/animals/chordates/livestock/poultry/.ipynb_checkpoints/birds-checkpoint.ipynb +++ /dev/null @@ -1,138 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py:194: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " self._setitem_with_indexer(indexer, value)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:27: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - "/usr/local/lib/python3.5/dist-packages/pandas/core/frame.py:3027: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " return super(DataFrame, self).rename(**kwargs)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:34: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" - ] - }, - { - "data": { - "text/plain": [ - "Chicken - Broilers 0.001883\n", - "Chicken - Layers 0.001837\n", - "Ducks 0.000457\n", - "Turkeys 0.000431\n", - "dtype: float64" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0,'../../../../statistics_helper/')\n", - "from excel_utils import *\n", - "\n", - "bird = pd.read_csv('FAOSTAT_data_bird.csv')\n", - "egg = pd.read_csv('FAOSTAT_data_eggs.csv')\n", - "body_mass = pd.read_csv('ipcc_animal_weight.csv')\n", - "body_mass.set_index('IPCC Area',inplace=True)\n", - "egg.set_index('Area',inplace=True)\n", - "bird_pivot = pd.pivot(bird.Area,bird.Item, bird.Value).astype(float)\n", - "\n", - "bird_pivot['Chicken - Layers'] = egg.Value\n", - "bird_pivot['Chickens'] -= egg.Value\n", - "bird_pivot.rename(columns={'Chickens': 'Chicken - Broilers'},inplace=True)\n", - "birds = ['Chicken - Broilers','Chicken - Layers','Ducks','Turkeys']\n", - "bird_pivot_filt = bird_pivot[birds]\n", - "body_mass_filt = body_mass[birds]\n", - "\n", - "# Convert units\n", - "bird_pivot_filt *= 1e3\n", - "\n", - "# Calculate the total number of animals in Latin America by subtracting values for Northern America from the total\n", - "# values for the Americas\n", - "bird_pivot_filt.loc['Americas'] -= bird_pivot_filt.loc['Northern America']\n", - "\n", - "# Change name of Americas to Latin America\n", - "bird_pivot_filt.rename(index={'Americas': 'Latin America'},inplace=True)\n", - "\n", - "# Calculate the total number of animals in Asia without the Indian Subcontinent by subtracting values for the Southern Asia \n", - "# from the total values for the Asia\n", - "bird_pivot_filt.loc['Asia'] -= bird_pivot_filt.loc['Southern Asia']\n", - "\n", - "# Change name of Southern Asia to Indian Subcontinent\n", - "bird_pivot_filt.rename(index={'Southern Asia': 'Indian Subcontinent'},inplace=True)\n", - "\n", - "bird_biomass = ((body_mass_filt*bird_pivot_filt)*1e3*0.15).sum()/1e15\n", - "bird_biomass" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total biomass of poultry is ≈0.005 Gt C\n" - ] - } - ], - "source": [ - "print('Our estimate for the total biomass of poultry is ≈%.3f Gt C' % bird_biomass.sum())" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "update_MS_data(row='Biomass of poultry',values= bird_biomass.sum(),path='../../../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/chordates/livestock/poultry/.ipynb_checkpoints/poultry-checkpoint.ipynb b/animals/chordates/livestock/poultry/.ipynb_checkpoints/poultry-checkpoint.ipynb deleted file mode 100644 index 728f447..0000000 --- a/animals/chordates/livestock/poultry/.ipynb_checkpoints/poultry-checkpoint.ipynb +++ /dev/null @@ -1,1102 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "import sys\n", - "sys.path.insert(0,'../../../../statistics_helper/')\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of poultry\n", - "To estimate the biomass of poultry, we rely on data on global stocks of chickens, ducks, and turkeys from the Food and Agriculture Organization database FAOStat. We downloaded data from the domain Production/Live animals.\n", - "We combined data on the total stocks of each animal with estimates of the mean mass of each type of animal species (in kg) from [Dong et al.](http://www.ipcc-nggip.iges.or.jp/public/2006gl/pdf/4_Volume4/V4_10_Ch10_Livestock.pdf), Annex 10A.2, Tables 10A-4 to 10A-9.\n", - "\n", - "Here are samples of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Domain CodeDomainArea CodeAreaElement CodeElementItem CodeItemYear CodeYearUnitValueFlagFlag Description
0QALive Animals5100Africa5112Stocks1057Chickens201420141000 Head1809059AAggregate, may include official, semi-official...
1QALive Animals5100Africa5112Stocks1068Ducks201420141000 Head28539AAggregate, may include official, semi-official...
2QALive Animals5100Africa5112Stocks1072Geese and guinea fowls201420141000 Head25296AAggregate, may include official, semi-official...
3QALive Animals5100Africa5112Stocks1083Pigeons, other birds201420141000 Head13384AAggregate, may include official, semi-official...
4QALive Animals5100Africa5112Stocks1079Turkeys201420141000 Head23658AAggregate, may include official, semi-official...
\n", - "
" - ], - "text/plain": [ - " Domain Code Domain Area Code Area Element Code Element \\\n", - "0 QA Live Animals 5100 Africa 5112 Stocks \n", - "1 QA Live Animals 5100 Africa 5112 Stocks \n", - "2 QA Live Animals 5100 Africa 5112 Stocks \n", - "3 QA Live Animals 5100 Africa 5112 Stocks \n", - "4 QA Live Animals 5100 Africa 5112 Stocks \n", - "\n", - " Item Code Item Year Code Year Unit Value \\\n", - "0 1057 Chickens 2014 2014 1000 Head 1809059 \n", - "1 1068 Ducks 2014 2014 1000 Head 28539 \n", - "2 1072 Geese and guinea fowls 2014 2014 1000 Head 25296 \n", - "3 1083 Pigeons, other birds 2014 2014 1000 Head 13384 \n", - "4 1079 Turkeys 2014 2014 1000 Head 23658 \n", - "\n", - " Flag Flag Description \n", - "0 A Aggregate, may include official, semi-official... \n", - "1 A Aggregate, may include official, semi-official... \n", - "2 A Aggregate, may include official, semi-official... \n", - "3 A Aggregate, may include official, semi-official... \n", - "4 A Aggregate, may include official, semi-official... " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load bird data\n", - "bird = pd.read_csv('FAOSTAT_data_bird.csv')\n", - "bird.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Cattle - dairyCattle - non-dairyBuffaloesSwine - marketSwine - breedingChicken - BroilersChicken - LayersDucksTurkeysSheepGoatsHorsesAssesMulesCamelsLlamas
IPCC Area
Indian Subcontinent27511029528289.0e-011.8e+002.7e+006.8e+002.8e+013.0e+01238130130217217
Eastern Europe550391380501809.0e-011.8e+002.7e+006.8e+004.8e+013.8e+01377130130217217
Africa27517338028289.0e-011.8e+002.7e+006.8e+002.8e+013.0e+01238130130217217
Oceania500330380451809.0e-011.8e+002.7e+006.8e+004.8e+013.8e+01377130130217217
Western Europe600420380501989.0e-011.8e+002.7e+006.8e+004.8e+013.8e+01377130130217217
\n", - "
" - ], - "text/plain": [ - " Cattle - dairy Cattle - non-dairy Buffaloes \\\n", - "IPCC Area \n", - "Indian Subcontinent 275 110 295 \n", - "Eastern Europe 550 391 380 \n", - "Africa 275 173 380 \n", - "Oceania 500 330 380 \n", - "Western Europe 600 420 380 \n", - "\n", - " Swine - market Swine - breeding Chicken - Broilers \\\n", - "IPCC Area \n", - "Indian Subcontinent 28 28 9.0e-01 \n", - "Eastern Europe 50 180 9.0e-01 \n", - "Africa 28 28 9.0e-01 \n", - "Oceania 45 180 9.0e-01 \n", - "Western Europe 50 198 9.0e-01 \n", - "\n", - " Chicken - Layers Ducks Turkeys Sheep Goats \\\n", - "IPCC Area \n", - "Indian Subcontinent 1.8e+00 2.7e+00 6.8e+00 2.8e+01 3.0e+01 \n", - "Eastern Europe 1.8e+00 2.7e+00 6.8e+00 4.8e+01 3.8e+01 \n", - "Africa 1.8e+00 2.7e+00 6.8e+00 2.8e+01 3.0e+01 \n", - "Oceania 1.8e+00 2.7e+00 6.8e+00 4.8e+01 3.8e+01 \n", - "Western Europe 1.8e+00 2.7e+00 6.8e+00 4.8e+01 3.8e+01 \n", - "\n", - " Horses Asses Mules Camels Llamas \n", - "IPCC Area \n", - "Indian Subcontinent 238 130 130 217 217 \n", - "Eastern Europe 377 130 130 217 217 \n", - "Africa 238 130 130 217 217 \n", - "Oceania 377 130 130 217 217 \n", - "Western Europe 377 130 130 217 217 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load body mass data\n", - "body_mass = pd.read_csv('ipcc_animal_weight.csv')\n", - "body_mass.set_index('IPCC Area',inplace=True)\n", - "body_mass.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We pivot the stocks DataFrame to have a view of each kind of animal at each region:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ItemChickensDucksGeese and guinea fowlsPigeons, other birdsTurkeys
Area
Africa1.8e+062.9e+042.5e+041.3e+042.4e+04
Americas5.4e+062.8e+047.8e+020.0e+003.1e+05
Asia1.2e+079.9e+052.8e+051.6e+041.5e+04
Eastern Europe1.0e+065.4e+041.7e+040.0e+003.7e+04
Northern America2.1e+069.3e+033.5e+020.0e+002.4e+05
Oceania1.3e+051.4e+038.2e+010.0e+001.4e+03
Southern Asia2.8e+067.6e+041.0e+030.0e+002.0e+03
Western Europe4.5e+053.0e+048.6e+021.8e+033.3e+04
\n", - "
" - ], - "text/plain": [ - "Item Chickens Ducks Geese and guinea fowls \\\n", - "Area \n", - "Africa 1.8e+06 2.9e+04 2.5e+04 \n", - "Americas 5.4e+06 2.8e+04 7.8e+02 \n", - "Asia 1.2e+07 9.9e+05 2.8e+05 \n", - "Eastern Europe 1.0e+06 5.4e+04 1.7e+04 \n", - "Northern America 2.1e+06 9.3e+03 3.5e+02 \n", - "Oceania 1.3e+05 1.4e+03 8.2e+01 \n", - "Southern Asia 2.8e+06 7.6e+04 1.0e+03 \n", - "Western Europe 4.5e+05 3.0e+04 8.6e+02 \n", - "\n", - "Item Pigeons, other birds Turkeys \n", - "Area \n", - "Africa 1.3e+04 2.4e+04 \n", - "Americas 0.0e+00 3.1e+05 \n", - "Asia 1.6e+04 1.5e+04 \n", - "Eastern Europe 0.0e+00 3.7e+04 \n", - "Northern America 0.0e+00 2.4e+05 \n", - "Oceania 0.0e+00 1.4e+03 \n", - "Southern Asia 0.0e+00 2.0e+03 \n", - "Western Europe 1.8e+03 3.3e+04 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "# Replace NaN with zeros\n", - "bird.fillna(value=0,inplace=True)\n", - "\n", - "bird_pivot = pd.pivot(bird.Area,bird.Item, bird.Value).astype(float)\n", - "\n", - "# Replace NaN with zeros\n", - "bird_pivot.fillna(value=0,inplace=True)\n", - "\n", - "\n", - "bird_pivot" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There is a difference between the body mass of a egg laying chicken to a non-egg laying chicken. We thus count seperately the egg laying chicken from the non-egg laying chicken. Data about the amount of egg laying chicken comes from the FAOStat domain Production - Livestock Primary." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ItemChicken - BroilersChicken - LayersDucksTurkeys
Area
Africa1.3e+065052032.9e+042.4e+04
Americas4.3e+0611493002.8e+043.1e+05
Asia7.4e+0645351749.9e+051.5e+04
Eastern Europe5.8e+054243705.4e+043.7e+04
Northern America1.8e+063980259.3e+032.4e+05
Oceania1.0e+05223341.4e+031.4e+03
Southern Asia2.2e+066601177.6e+042.0e+03
Western Europe2.8e+051683593.0e+043.3e+04
\n", - "
" - ], - "text/plain": [ - "Item Chicken - Broilers Chicken - Layers Ducks Turkeys\n", - "Area \n", - "Africa 1.3e+06 505203 2.9e+04 2.4e+04\n", - "Americas 4.3e+06 1149300 2.8e+04 3.1e+05\n", - "Asia 7.4e+06 4535174 9.9e+05 1.5e+04\n", - "Eastern Europe 5.8e+05 424370 5.4e+04 3.7e+04\n", - "Northern America 1.8e+06 398025 9.3e+03 2.4e+05\n", - "Oceania 1.0e+05 22334 1.4e+03 1.4e+03\n", - "Southern Asia 2.2e+06 660117 7.6e+04 2.0e+03\n", - "Western Europe 2.8e+05 168359 3.0e+04 3.3e+04" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data about egg laying chicken\n", - "egg = pd.read_csv('FAOSTAT_data_eggs.csv')\n", - "\n", - "# Set the index of the DataFrame to be the region so we can compare with the stocks data\n", - "egg.set_index('Area',inplace=True)\n", - "\n", - "# Add a category of egg laying chicken\n", - "bird_pivot['Chicken - Layers'] = egg.Value\n", - "\n", - "# Set the amount of non-egg laying chicken to be the total number minus the egg laying chicken\n", - "bird_pivot['Chickens'] -= egg.Value\n", - "\n", - "# Rename the Chicken column name to Chicken - Broileers\n", - "bird_pivot.rename(columns={'Chickens': 'Chicken - Broilers'},inplace=True)\n", - "\n", - "# Use only data for chicken, ducks and turkeys\n", - "birds = ['Chicken - Broilers','Chicken - Layers','Ducks','Turkeys']\n", - "bird_pivot_filt = bird_pivot[birds]\n", - "body_mass_filt = body_mass[birds]\n", - "\n", - "bird_pivot_filt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Data on the mass of animals is divided into different regions than the FAOStat data so we need preprocess the stocks DataFrame and merge it with the body mass data:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py:194: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " self._setitem_with_indexer(indexer, value)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " \n", - "/usr/local/lib/python3.5/dist-packages/pandas/core/frame.py:3027: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " return super(DataFrame, self).rename(**kwargs)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:13: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " del sys.path[0]\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ItemChicken - BroilersChicken - LayersDucksTurkeys
Area
Africa1.3e+095.1e+082.9e+072.4e+07
Latin America2.5e+097.5e+081.8e+076.9e+07
Asia5.2e+093.9e+099.1e+081.3e+07
Eastern Europe5.8e+084.2e+085.4e+073.7e+07
Northern America1.8e+094.0e+089.3e+062.4e+08
Oceania1.0e+082.2e+071.4e+061.4e+06
Indian Subcontinent2.2e+096.6e+087.6e+072.0e+06
Western Europe2.8e+081.7e+083.0e+073.3e+07
\n", - "
" - ], - "text/plain": [ - "Item Chicken - Broilers Chicken - Layers Ducks Turkeys\n", - "Area \n", - "Africa 1.3e+09 5.1e+08 2.9e+07 2.4e+07\n", - "Latin America 2.5e+09 7.5e+08 1.8e+07 6.9e+07\n", - "Asia 5.2e+09 3.9e+09 9.1e+08 1.3e+07\n", - "Eastern Europe 5.8e+08 4.2e+08 5.4e+07 3.7e+07\n", - "Northern America 1.8e+09 4.0e+08 9.3e+06 2.4e+08\n", - "Oceania 1.0e+08 2.2e+07 1.4e+06 1.4e+06\n", - "Indian Subcontinent 2.2e+09 6.6e+08 7.6e+07 2.0e+06\n", - "Western Europe 2.8e+08 1.7e+08 3.0e+07 3.3e+07" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Convert units\n", - "bird_pivot_filt *= 1e3\n", - "\n", - "# Calculate the total number of animals in Latin America by subtracting values for Northern America from the total\n", - "# values for the Americas\n", - "bird_pivot_filt.loc['Americas'] -= bird_pivot_filt.loc['Northern America']\n", - "\n", - "# Change name of Americas to Latin America\n", - "bird_pivot_filt.rename(index={'Americas': 'Latin America'},inplace=True)\n", - "\n", - "# Calculate the total number of animals in Asia without the Indian Subcontinent by subtracting values for the Southern Asia \n", - "# from the total values for the Asia\n", - "bird_pivot_filt.loc['Asia'] -= bird_pivot_filt.loc['Southern Asia']\n", - "\n", - "# Change name of Southern Asia to Indian Subcontinent\n", - "bird_pivot_filt.rename(index={'Southern Asia': 'Indian Subcontinent'},inplace=True)\n", - "\n", - "bird_pivot_filt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We now multiply the stocks of each animal type and for each region by the characteristic body weight of each animal:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Chicken - BroilersChicken - LayersDucksTurkeys
Africa1.2e+099.1e+087.7e+071.6e+08
Asia4.7e+097.0e+092.5e+098.6e+07
Eastern Europe5.2e+087.6e+081.5e+082.5e+08
Indian Subcontinent2.0e+091.2e+092.1e+081.4e+07
Latin America2.3e+091.4e+095.0e+074.7e+08
Middle eastnannannannan
Northern America1.6e+097.2e+082.5e+071.7e+09
Oceania9.3e+074.0e+073.9e+069.4e+06
Western Europe2.6e+083.0e+088.0e+072.3e+08
\n", - "
" - ], - "text/plain": [ - " Chicken - Broilers Chicken - Layers Ducks Turkeys\n", - "Africa 1.2e+09 9.1e+08 7.7e+07 1.6e+08\n", - "Asia 4.7e+09 7.0e+09 2.5e+09 8.6e+07\n", - "Eastern Europe 5.2e+08 7.6e+08 1.5e+08 2.5e+08\n", - "Indian Subcontinent 2.0e+09 1.2e+09 2.1e+08 1.4e+07\n", - "Latin America 2.3e+09 1.4e+09 5.0e+07 4.7e+08\n", - "Middle east nan nan nan nan\n", - "Northern America 1.6e+09 7.2e+08 2.5e+07 1.7e+09\n", - "Oceania 9.3e+07 4.0e+07 3.9e+06 9.4e+06\n", - "Western Europe 2.6e+08 3.0e+08 8.0e+07 2.3e+08" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "wet_bird_biomass = (body_mass_filt*bird_pivot_filt)\n", - "wet_bird_biomass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum over all regions and convert units from kg wet weight to Gt C carbon by assuming carbon is ≈15% of the wet weight (30% dry weight of wet weight and carbon is 50% of dry weight)." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Chicken - Broilers 0.002\n", - "Chicken - Layers 0.002\n", - "Ducks 0.000\n", - "Turkeys 0.000\n", - "dtype: float64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.options.display.float_format = '{:,.3f}'.format\n", - "\n", - "# conversion factor from kg wet weight to Gt C\n", - "kg_to_gt_c = 1000*0.15/1e15\n", - "total_biomass = wet_bird_biomass.sum()*kg_to_gt_c\n", - "total_biomass" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total biomass of poultry is ≈0.005 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = total_biomass.sum()\n", - "print('Our estimate for the total biomass of poultry is ≈%.3f Gt C' % best_estimate)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "update_MS_data(row='Biomass of poultry',values= best_estimate,path='../../../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/chordates/wild_birds/.ipynb_checkpoints/wild_birds-checkpoint.ipynb b/animals/chordates/wild_birds/.ipynb_checkpoints/wild_birds-checkpoint.ipynb deleted file mode 100644 index 133007f..0000000 --- a/animals/chordates/wild_birds/.ipynb_checkpoints/wild_birds-checkpoint.ipynb +++ /dev/null @@ -1,239 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../../statistics_helper/')\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of wild birds\n", - "To estimate the total biomass of wild birds, we rely on two estimates. \n", - "\n", - "## Estimate 1\n", - "The first is based on estimates of the total number of bird species reported in [Gaston & Blackburn](https://doi-org/10.1023/A:1018341530497). Gaston & Blackburn report an estimate of 2-4×10$^{11}$ for the total number of individual birds. We use the average values of this range which is ≈3×10$^{11}$ individual birds." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "tot_num_birds = 3e11" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To convert this total number of birds into an estimate of the total biomass of wild birds, we use data on the relation between population density and body weight reported in [Nee et al.](https://doi.org/10.1038/351312a0). Here is a sample of the data in Nee et al:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0,0.5,'Population size')" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEQCAYAAACqduMIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XvUXFWZ5/Hv7w0GBCWkidNq4OWW\nNBpwtE3k4q2xW53QQ8RRlxKwHbtpMiA42qhLXd0jtpclvRxsQeIlHWK0VVCRmQHEgdW9QBCIhuCF\nmzQxEgk6IhDiBdpc3mf+qFOkeKm36lTVudfvs1ZW3jpVdWrXrqrz7L2fffZRRGBmZjbdRNkFMDOz\nanKAMDOzrhwgzMysKwcIMzPrygHCzMy6coAwM7OuHCDMzKwrBwgzM+tqj7IL0CbpZcAptMq0KCJe\nXHKRzMzGWq49CElrJD0g6fZp25dKulvSRknvA4iIGyLidOBK4At5lsvMzPrLe4hpLbC0c4OkWcBK\n4HhgEbBc0qKOh5wMfCXncpmZWR+5BoiIuB54eNrmo4CNEbEpIrYDlwAnAkiaBLZFxG/yLJeZmfVX\nRg5iPnBfx+0twNHJ36cCn+/1ZEkrgBUA++yzz+LnPOc5eZTRzKyxNmzY8GBEPKPf4yqTpAaIiHNS\nPGaVpF8Ay571rGctvuWWWwoomZlZc0janOZxZUxzvR84sOP2Acm21CLiiohYMWfOnEwLZmZmu5UR\nINYDCyUdImk2cBJw+SA7kLRM0qpt27blUkAzM8t/muvFwM3A4ZK2SDo1InYCZwFXA3cBX4uIOwbZ\nr3sQZmb5yzUHERHLZ9h+FXDVsPuVtAxYtmDBgmF3YWZmfdRyqQ33IMzM8lfLAOEcRDobNm9l5bUb\n2bB5a9lFMbMaqtQ017Qi4grgiiVLlpxWdlmqasPmrZyyeh3bd04xe48JvvzXx7D4oLllFys3GzZv\nZd2mhzjm0P0b/T7NilTLAGH9rdv0ENt3TjEVsGPnFOs2PdTYA+e4BUOzoniIqaGOOXR/Zu8xwSzB\nU/aY4JhD9+/6uLoMQ/UqZ7dgaGajq2UPwkNM/S0+aC5f/utjeg671KXl3a+c7WC4Y+dUz2BoZoOp\nZYCwdBYfNLfnAb8uw1D9ypkmGJrZ4GoZIOp2HkReCdRR91uXlneacvYLhmY2OEVE2WUY2pIlS6Lo\nxfoGPSjnNYyT1X7rMvunLuU0qwNJGyJiSb/H1bIHUZZhDsp5DeNktd+6tLzrUk6zJqnlLKayDDNb\nJu1sokHltV8zs7Za9iDKykF0GwvvN/SRVwLViVkzy5tzEAPqDAhALaaJ2nhwnsbScg4iJ51j4Suv\n3ViLaaLWfHU5p8XqxTmIETQ9D1CXs6zNZ5NbPtyDGEHZeYA8hxTcIq2XupzTYvXiADGisqZf5n0A\nr8tZ1tZSdmPFmqmWAaJuZ1LnIe8DuFuk9eNzRSxrtQwQXqwvmwN4ryEqt0jNzNNca2yUHETVcwye\nsmmWH09zHQOjDClUOcdQ9eBlNi48zXVMVXmKrqdsmlWDexA1NsowTJVzDE6Qm1VDZXIQkiaADwP7\nArdExBf6PWeccxBNH4ZxDsIsP2lzELkOMUlaI+kBSbdP275U0t2SNkp6X7L5ROAAYAewJc9yNUHT\nh2EWHzSXM1+xwMHBrER55yDWAks7N0iaBawEjgcWAcslLQIOB26KiLOBM3IuV+1VOYdgZs2Qaw4i\nIq6XdPC0zUcBGyNiE4CkS2j1Hu4DtieP2ZVnuZqgyjkEqw4P1dkoykhSz6cVDNq2AEcD5wOfkvQy\n4PqZnixpBbACYHJyMsdiVp/PnB1v/Q7+Tc9TWf4qM4spIh4FTk3xuFWSfgEsmz179uL8S1YdebUG\n3cqsnzQH/yqf62L1UEaAuB84sOP2Acm21MZxqY28WoNuZdZTmoO/pwvbqMo4UW49sFDSIZJmAycB\nlw+yA0nLJK3atm1bLgWsorxmLTV9NlRTpZmk0M5Tnf3qwx34bSi59iAkXQwcB8yTtAU4JyIuknQW\ncDUwC1gTEXcMst9x7EEMcz3sYfdr1Zd2koLzVDaKypwoN4iO5b5Pu+eee8ouDlDMOH5e18N2DsJs\nvDR6sb6q9SCKGsfP63rYbmWaWTe1XKxv1BxE1tdaLmMc3yfKmVnexq4HkUdrP+tx/DRDPosPmssH\nTjiCb93+C44/8lnuAZhZ5moZIEaRx9zwLM9qThvANmzeyoeuvIPtO6dYf+/DHP7MpztImFmmxm6I\nKa+hmawWl0s7XOXpqdWW9TCmWRlq2YMYZYhp2NZ+3jN92vufu/fsVMNV7UC3fccUkpi79+zMy2TD\n8cmH1hS1DBCjGnTWTt4/+On7/8AJR7D10e2pchAf+D+3MxXBh65snUrS73mWPy9xYU1RywDRcR5E\nIa+X9w9++v63PrqdM1/R/71tfXQ7UxFMBWzfMfV4sHCrtTwbNm/l/kceY49ZE+za5ZMPrd5qmYOI\niCsiYsWcOXMKeb28p5QOu//O501M6PFg0S0n4THx/LV7gpd872cQwUlHTTpQW63VsgdRtLyvvTDs\n/jufN3fv2Xzoyju65i48Jl6Mzp7grqng2fs91fVsteYAkVLeZxv32n+vBPn053U7L8Jj4k+Wx6SD\nPNe18nIoVoZaBoiicxC9FDG7aZjzImB3wtoL8j1RXj2qvHqa7gFaWWoZIKqyFlMRP9y0rf/Ox3VL\nWPvypLvl2aPKo6fpHqCVpZZJ6qoo4mS1tAnsfgnrrE7ka4K6rWNVt/Jac9SyB1EVRQzdDLLuf5qE\nteU/6SBrdSuvNUctrwfRtmTJkrjlllsKf93p12Uo+zrR3R7rpGZ50ta9PyMrS6OvB1GmbnmHNCe1\njfoavWY4dXusr/FQjkEmFTjxbFVXyxxEmdekHjbvMMiJaoO8hhftqxYvtmhNUsseRJmzmIbJOwza\nWhzkNTyFtVp6fR6dQ0r+3KwOnIMYwqBjxyuv3ch519zNVLS6bC9ZOI93vvKPUo1Pz917dt8F+DyW\nXS0z5YSmNxIgn/yVWT/OQeRo0PH9zqW5p4AbNz7I+nsf7tmTaG9P0/NwvqFaun0e3YaUPO3Yqq6W\nOYg6abcmP3DCEbxk4TwmROpxZ49TN4fPZbA6cg8iR92u87D+3odTjzt7nLo5fC6D1VFlAoSk44AP\nA3cAl0TEdaUWKAPdrvMwyEHCB5Vm8VCg1U2uAULSGuAE4IGIOLJj+1LgfGAWsDoizgUC+C2wF7Al\nz3IVpVsPYNCDhA8qzedJBlZVuc5ikvRyWgf9L7YDhKRZwL8Br6IVCNYDy4EfR8SUpD8EPhERp/Tb\nf1mzmAZRtR9/1coz7nzCnJWhErOYIuJ6SQdP23wUsDEiNgFIugQ4MSLuTO7fCuyZZ7mKVKUegA9G\nT1Z2wPRKrVZlZeQg5gP3ddzeAhwt6XXAfwL2Ay6c6cmSVgArACYnJ3MsZvP4YPRERQXMXkHIExGs\nyiqTpI6Iy4DLUjxulaRfAMtmz569OP+SVceorV0fjJ4oTcActc77BaGsJyKU3SOyZikjQNwPHNhx\n+4BkW2pVuWBQL1n/ULNo7XpW1BP1C5hZ1Pm6TQ/x+x1TBK0LOXULQlkNQ3oI0bJWRoBYDyyUdAit\nwHAScPIgO6jSJUe7yeOHmtXw0CgHo6a1TvsFzCzqfO7es2lPA5lKbufFQ4iWtbynuV4MHAfMk7QF\nOCciLpJ0FnA1rWmuayLijkH2W/UeRL8f6jAH2rKHh5raOu0VMLOo862Pbn/87PkJtW7npezviDVP\n3rOYls+w/SrgqmH3W/UeRL8VPYc50JY9PDSOrdMs6rzIg7bzGZa1vudBSNobeBcwGRGnSVoIHB4R\nVxZRwF6qfB7ETD+uzpVdZwnOfvXhmV9waJDyDPL8U1ave/xA15QeRBHqeKBtao/RWrI8D+LzwAbg\n2OT2/cDXgdICRNV7EDDz0MUxh+7PHhNix65g1oQKGQZwgrtcVToXJq1x7DHak6UJEIdFxJskLQeI\niEclKedy9VT1HERfEhDJ//lpt1x//shjpSe4rV6czzBIFyC2S3oqrbWSkHQY8PtcS9VHHXoQM1m3\n6SF27mpNe9y1K7+WWWevYY8JscesCXbt8o/d0nGP0SBdgPgg8H+BAyV9GXgJ8NYcy9RXnXsQRbXM\nOocIdk0FbzrqQObv91T/2MfIqLkP9xitb4CIiGskbQCOAQS8IyIezL1kDdVumX3j1i3kNcC0YfNW\n7n/ksSf0Gl7/wgP8Yx8jTjJbFvoGCEn/CpwXEd/s2LYqIlbkWrKGu+zWLWzfOcU3bt2S6Y93+tDS\nSUdN8rqaBoc6zv6pCieZLQtpLjl6CPBeSed0bOs7PSpPkpZJWrVt27YyizG0PC8lOn1o6dn7PbWW\nB4Z2oDvvmrs5ZfU6NmzeWnaRaiXvS5xu2LyVlddu9OfScGlyEI8AfwZcIOkK4M35Fqm/OucgYPg8\nRJoWdVNmn2TVAm7X2dy9Z7P10e1j0xvJM8ns4avxkSZAKCJ2Am+T9FbgO4C/DSMY5seb9kdZldkn\nVVh5trPOpqKVQNvzKeNzQMsryezhq/GRJkB8tv1HRKyVdBtwZn5FGg+D/ngH+VGWPfukKifmddYZ\ntOZp+4A2uqb0Uq2/GQOEpH0j4tfA1yX9QcddPwXenXvJeqjzeRDDOubQ/dljVutHOTEh7n/kMTZs\n3lrJA10VVp6F3QeydlkmyGc8Pq2mJN2r0ku1/PXqQXwFOIHWMhsBT5iVGcChOZarp7rnIIYW0WoF\n7wou+d7PuCzjGVBZqUoLs/NANmoOIu8LB9VN2b1UK8aMASIiTkj+P6S44thM1m16iJ1TuxdWrPL4\n77AtzDxa2FkcyLK6cJDH7a1u0pwH8RLgBxHxO0lvBl4IfDIifpZ76cZMmmsXb98xxRStawtUefx3\n0ANz1i3sLINNFgf3fkvAe7hmcK63/KVJUn8GeL6k59Na9ns18M/An+RZsHEzyLWLmzhlM8sWdtbB\nJoshs5l6VU0beiqK660YaQLEzogISScCFyZXhDs174KNmzQHyLqN+w7Swssyb5H1cE5WSdlun5+H\nnobjeitGmgDxG0nvp3WC3MslTQBPybdYvVVtFlMWXd2qJHazMmgLL8uZMXnUZV7BuWmfe1Fcb8VI\nc0W5ZwInA+sj4gZJk8BxEfHFIgrYSxWuKDdqV7czuABPOEDWeYy1rCvntdWp7upU1ipxvQ0vsyvK\nRcT/Az7RcftnQOnBoSpG6ep2Cy7tg2jdx1jLbuHVaTiuTmWtEtdb/tIMMVkPoxwIewWXuo+x+mSq\n5nLLfXw4QIxolANhr+BSdgs8C01v4Y3jgXJ6z/YDJxzRuBl1tlvfHESVVSEHMapeB5lxPADVRd2H\nAIfVmVuaACYmxFTEWNVBE2SWg0hOlPsgcFDyeAEREZkvtSFpH+DbwAcj4sqs919FvVrZTW+B5ynv\n4Fr3IcBhdfZspVZwGLc6GCdphpguAv6G1ppMuwbZuaQ1tNZzeiAijuzYvhQ4H5gFrI6Ic5O73gt8\nbZDXMJsur9Z9Z9BpwhDgMKafsPmhK+8YuzoYJ2kCxLaI+NaQ+18LXEjHrCdJs4CVwKuALcB6SZcD\n84E7gb2GfC0zIF3rftAeRregM65J+M6e7eHPfPpQdeDh03pIEyCulfRx4DLg9+2NEXFrvydGxPWS\nDp62+ShgY0RsApB0CXAi8DRgH2AR8JikqyJiavo+Ja0AVgBMTk6mKL4VpSo/+n6t+2F6GN2Czpmv\nWDD2B7dhhkHHNX9TR2kCxNHJ/50JjQD+dMjXnA/c13F7C3B0RJwFkFy17sFuwQEgIlYBq6CVpB6y\nDJaxXj/6NIEjy+DSb2bZMPmDcR1SysO45m/qKM2Jcq8ooiAdr7e232OqttSGzfyjT9NazKNF2atl\nO8zB3ud1ZMfBtj7SzGKaA5wDvDzZ9G3gQxGxbcjXvB84sOP2Acm21Mb2gkEVNtOPPk1rsegW5bAH\n+ybPKityeNDBtj7SDDGtAW4H3pjc/gvg88DrhnzN9cBCSYfQCgwn0VrrKTX3IKpnph99mtZiGS3K\nJh/sB1VGTsD1Xw9pAsRhEfH6jtt/L+kHaXYu6WLgOGCepC3AOcly4WcBV9Oa5romIu4YsNxWQd1+\n9Glai25Rlss5AZtJmgDxmKSXRsR34PET5x5Ls/OIWD7D9quAq1KX8snP9xBTjaRpLbpFWR7nBGwm\naQLEGcAXklyEgIeBt+ZZKLNxU+YUYffgbCZpZjH9gNYlR/dNbv8691L14RxEtVTl/Ie6qsJ5Ae7B\nWTczBghJb46IL0k6e9p2ACLiE12fWAAPMT1RmQfoKhzc6i7r63E7WOdvXOq5Vw9in+T/p3e5r9QT\n1NyD2K3sA7QTnKNLmwPod1Aq+7swLsapnmcMEBHxueTPf4mIGzvvSxLVpXEPYrdeB+giWjlOcA6n\n/dnM3Xs2Wx/d3ve6CmkOSnkE63FpKQ9inBpFaZLUnwJemGKblWCmA3RRrRwnOAfX/mx+v2OKACZE\n388ozUEp62A97Heo6UFlnBpFvXIQxwIvBp4xLQ+xL63zF0rjIabdZjpAF9nKcYJzMO3Ppj1Om+Yz\nSnNQyjpYD/MdGofhl3FqFPXqQcymtcLqHjwxD/Fr4A15FqofDzE9UbcD9Di1cuqm/dls3zHFFK0e\nRL/PKO1BKctgPcx3aFyGX8puFBXVS+t7yVFJB0XE5txKMIImXHI0T0V9iZo+pJCH6TmIqtbdsNfN\naAeVJvYgypZFLy2zS44CjybXgziCjov5RMSwy31bQYpo5YzDkEIeym6BpjVoOcdp+KUsRfbS0gSI\nLwNfpXXp0NOB/wr8KpfSWO2My5CCpVeX4FdXRQ4fpwkQ+ycL7L0jIr4NfFvS+txKlIKT1NXhXIdZ\nsYrspaXJQayLiGMkXQ1cAPwcuDQiDsutVCk5B1ENzkGY1UuWOYiPJAv1vYvW+Q/7An8zYvmsQao2\npOCAZZaNNIv1XZn8uQ0o9PKjZtN5uQmz4vQ6Ue5T9FhzKSL+ey4lMptBWctNmI2rXj2Iyg7uO0k9\nnspYbsLS89Be8/RarO8LRRZkED6TejyVsdyEpeOhvWbqm4OQdC1dhpp8opwVrYzlJiwdD+01U5pZ\nTO/u+Hsv4PXAznyKY9abD/7V5KG9Zkozi2nDtE03SvpeTuUxsxry0F4zpRli+oOOmxPAYmBObiUy\ns1py76550gwxbaCVgxCtoaWfAqdmXRBJzwXeAcwD/jUiPpP1a5iZZxtZemmGmA4ZdueS1tBa5O+B\niDiyY/tS4HxaFx5aHRHnRsRdwOmSJoAvAg4QZhnzbCMbxES/B0jaS9LZki6T9A1J75S0V7/nJdYC\nS6ftbxawEjgeWAQsl7Qoue81wDeBqwZ4D2aWUrfZRmYz6RsgaLXmj6C1DtOFyd//nGbnEXE98PC0\nzUcBGyNiU0RsBy4BTkwef3lEHA+ckq74ZjaI9myjWSmuYmeWJgdxZEQs6rh9raQ7R3jN+cB9Hbe3\nAEdLOg54HbAnPXoQklYAKwAmJydHKEaxPO5rVZDXbCN/v5spTYC4VdIxEbEOQNLR5LAMR0RcB1yX\n4nGrgFXQWu4763LkweO+ViVZzzby97u50gwxLQZuknSvpHuBm4EXSbpN0o+GeM37gQM7bh+QbEtN\n0jJJq7Zt2zbEyxfP477l2rB5Kyuv3ciGzVvLLkoj+fvdXGl6EEv7P2Qg64GFkg6hFRhOAk7O+DUq\nxWeZlset2/z5+91caaa5bpb0fOBlyaYbIuKHaXYu6WLgOGCepC3AOcnlS88CrqY1zXVNRNwxSKHr\ntlifzzItj9cIyp+/382V5kzqdwCnAZclm74kaVVEfKrfcyNi+Qzbr2KEqax1XO67SWeZ1ikh6dbt\ncAb9jJv0/bbd0lyT+kfAsRHxu+T2PsDNEfEfCyhfT1W8JnXWB8+qHYzrOGRTRB1W7XMaRR0/YxtM\nltekFrCr4/auZJtNk/UPq4o/1DoO2fRr3Y56cK/i5zSKOn7Glo80s5g+D3xX0gclfRBYB1yUa6n6\nqOospqxnc1RxdkjTTrRqH9zPu+ZuTlm9bqiZTjN9TnWdPdW0z9iGlyZJ/QlJ1wEvTTb9ZUR8P9dS\n9VHVJHXW491VHD9vWkIyi9Zyt8+pzr2Kpn3GNrwZA0Sy3tLpwALgNuDTEVGJCwVVNUmd9Q+rqj/U\nJiUkswjC3T6nldduzHyYpsg8R5M+4yw1KdeUxoxJaklfBXYAN9BaWO/eiHhngWXrq4pJaqufPH70\n7R5EO/B09iCGeb0690iaokmfQRZJ6kUR8bxkZxcBvoqcNVIereWZen/DHmTyShyPW4t4FOOYvO8V\nIHa0/4iInVJ1Ji5VdYjJrFO3wDPsQSaPfFSTWsRFqGJOMG+9AsTzJf06+VvAU5PbAiIi9s29dDOo\napLarJ9hDzJ55KPGsUU8iqrmBPM0Y4CIiFlFFsRsHIxykMl6KGwcW8SjGrfkfd8zqavMSWrrpgnj\n6kW9hybUlQ0uyzOpK8c5CJtJE8bVi3wP49YitsGkOZO6ciLiiohYMWfOnLKLYhVTxbPPB1W191DX\nM8JtdLXsQZjNpAnj6lV6D03okdnwHCCsUZow06RK78EzncabA4R1VefkZRPG1avyHqrUm7HiOUDY\nk3hYwdqq1Jux4tUyQHgWU748rGCdqtKbseJ5FpM9ia8HYGZQ0x6E5cvDCvVT55xRllwP2XKAsK48\nrFAfzhm1uB6yV8shJjPbrWon1pXF9ZA9BwizmnPOqMX1kL1KLdYn6bXAfwb2BS6KiGt6Pd6L9Zm1\neOy9xfWQTtrF+nIPEJLWACcAD0TEkR3blwLnA7OA1RFxbsd9c4H/GRGn9tq3A4RZtfgAXQ9VWs11\nLXAh8MX2BkmzgJXAq4AtwHpJl0fEnclD/i6538xqwkni5sk9BxER1wMPT9t8FLAxIjZFxHbgEuBE\ntfwD8K2IuDXvstWJV9S0qnOSuHnKmuY6H7iv4/YW4Gjg7cArgTmSFkTEZ6c/UdIKYAXA5ORkAUUt\nn1tmVgdet6l5KnUeRERcAFzQ5zGrgFXQykEUUa6yeekLqwOfYNk8ZQWI+4EDO24fkGxLZdzWYnLL\nzOrCJ1g2S1kBYj2wUNIhtALDScDJJZWl8twyG0+eEWRlK2Ka68XAccA84JfAORFxkaQ/Bz5Ja5rr\nmoj46KD79jRXayrnnSxPlZnmGhHLZ9h+FXDVMPsctyEmGz9F5p3cU7GZ1HKpDS/3nU7dpsbWrbx5\nKmrZiHZP5bxr7uaU1etc9/YElZrFlJZ7EP3VbYiibuXNW1F5pzx7Ku6Z1F8tA0REXAFcsWTJktPK\nLktV1W1qbN3KW4QiZgTlNUPOAb8ZahkgrL+6TY2tW3mbIq+eigN+M9QyQHiIqb+6TY2tW3mbJI+e\nigN+M1Rque9BeZqrWXU5B1FdlZnmambjyWdV118tp7lKWiZp1bZt28ouiplZY9UyQPg8CDOz/NUy\nQJjVjU8CtDpyDsIsZz4nwOqqlj0I5yCsTnylNaurWgYI5yCsTopaV8ksax5iMsuZTwK0unKAMCtA\n088J8ElxzeQAYWYjcRK+uWqZgzCz6nASvrlqGSA8i8msOpyEby4v1mdmI3MOol68WF8FNeFH1IT3\nYNlrehJ+XDlAFKQJibwmvAczS6+WOYg6akIirwnvwczSc4AoSBMSeU14D2aWXmWS1JIOBf4WmBMR\nb0jznLolqZswft+E92A27tImqXMNEJLWACcAD0TEkR3blwLnA7OA1RFxbsd9lzY1QJiZVUHaAJH3\nENNaYGnnBkmzgJXA8cAiYLmkRTmXw8zMBpRrgIiI64GHp20+CtgYEZsiYjtwCXBinuWw0fhiN2bj\nqYxprvOB+zpubwGOlrQ/8FHgjyW9PyI+1u3JklYAKwAmJyfzLuvY89TW8jnvY2WpzHkQEfEQcHqK\nx62S9Atg2ezZsxfnX7Lx1m1qqw9SxXGAtjKVMc31fuDAjtsHJNtS8wWDiuOpreXyuSdWpjJ6EOuB\nhZIOoRUYTgJOHmQHkpYByxYsWJBD8ayTL3ZTrnaA3rFzygHaCpf3NNeLgeOAecAvgXMi4iJJfw58\nktY01zUR8dFh9u9prqPx2HY9+HOyrFXiPIi8dPQgTrvnnnvKLk4teWzbbHxV5TyIXDgHMTqPbZtZ\nP7UMEL5g0OicfDazfmo5xNTmHMRo0o5tewzcrFl8wSDrK81FXpyrMBtfHmKynpyrMBtftQwQTlIX\nx7kKs/HlISbrySfKmY2vWgYIn0ldLF+Q3mw8eYjJzMy6qmWAMDOz/DlAmJlZV7UMEJ7mamaWv1oG\nCOcgzMzyV8sAYWZm+av1WkySfgVsTm7OAfqNOfV6zEz3DbJ9+rZ5wIN9ypSlNHWQ5T5GrfNe96fd\n3u1xdav3QZ9fdL1X8bverQx5Pz+vY8xM9+VZ7wdFxDP6PioiGvEPWDXKY2a6b5Dt07cBt1StDrLc\nx6h1nkW9z/A51KreB31+0fVexe96Het90PuqUO9NGmK6YsTHzHTfINvTlCFPWbz+IPsYtc573Z92\ne9l1DqOXYdDnF13vVfyuQ/3qfdD7Sq/3Wg8xVZ2kWyLFkrqWLdd78Vzn5ci73pvUg6iiVWUXYEy5\n3ovnOi9HrvXuHoSZmXXlHoSZmXXlAGFmZl05QJiZWVcOEAWSdKikiyRdWnZZxoWk10r6J0lflfTq\nssszLiQ9V9JnJV0q6YyyyzNOJO0j6RZJJ4y6LweIEUlaI+kBSbdP275U0t2SNkp6H0BEbIqIU8sp\naXMMWOf/OyJOA04H3lRGeZtiwHq/KyJOB94IvKSM8jbFIPWeeC/wtSxe2wFidGuBpZ0bJM0CVgLH\nA4uA5ZIWFV+0xlrL4HX+d8n9Nry1DFDvkl4DfBO4qthiNs5aUta7pFcBdwIPZPHCDhAjiojrgYen\nbT4K2Jj0GLYDlwAnFl64hhqkztXyD8C3IuLWosvaJIN+1yPi8og4Hjil2JI2y4D1fhxwDHAycJqk\nkY7xtbwmdQ3MB+7ruL0FOFrS/sBHgT+W9P6I+FgppWumrnUOvB14JTBH0oKI+GwZhWuwmb7rxwGv\nA/bEPYg8dK33iDgLQNJbgQchGeQ/AAAFS0lEQVQjYmqUF3GAKFBEPERrLNwKEhEXABeUXY5xExHX\nAdeVXIyxFRFrs9iPh5jycT9wYMftA5Jtlh/XeTlc7+UopN4dIPKxHlgo6RBJs4GTgMtLLlPTuc7L\n4XovRyH17gAxIkkXAzcDh0vaIunUiNgJnAVcDdwFfC0i7iiznE3iOi+H670cZda7F+szM7Ou3IMw\nM7OuHCDMzKwrBwgzM+vKAcLMzLpygDAzs64cIMzMrCsHCKsUSf8o6Z0dt6+WtLrj9nmSzu7x/P0k\nvW2G+w6evmTygGU7TtKVwz5/gNd5dpprhkj67QzbXzvT6sGS1kr6qaSeS75IOkzSD2Z6DRsPDhBW\nNTcCLwZIVqKcBxzRcf+LgZt6PH8/oGuAqIuI+HlEvGGEXbyW1hLQM3lPv0ULI+InEfGCEcpgDeAA\nYVVzE3Bs8vcRwO3AbyTNlbQn8FzgVgBJ75G0XtKPJP198pxzgXbr9+Nd9r+HpC9Luiu52tneyb7+\nTNL3Jd2WXKBlz2T7Ukk/lnQrrdVJkTQh6R5Jz+i4vbF9uy3Z137JkuMPSXpLsv2Lkl4laZakj3e8\nh/+W3P94T0fS3pK+JulOSf9L0nclLel4jY9K+qGkdZL+UNKLgdcAH0/q4LBelZ30FNYlZf2IewzW\nyQHCKiUifg7slDRJq7dwM/BdWkFjCXBbRGxX6/KhC2mti/8CYLGklwPvA34SES+IiPd0eYnDgU9H\nxHOBXwNvk7QXrYuyvCkinkdrleMzku3/BCwDFgPPTMo4BXyJ3dc5eCXww4j41bTXupHW1dSOADYB\nL0u2H0srEJ4KbIuIFwEvorV+/yHT9vE2YGtELAL+R1KOtn2AdRHxfOB64LSIuInWmjzvSergJ10r\nerfzgfOT972lz2NtzDhAWBXdRCs4tAPEzR23b0we8+rk3/dp9SieQytg9HNfRLT38SXgpbSCxk8j\n4t+S7V8AXp7s86cRcU+01qT5Usd+1gBvSf7+K+DzXV7rhmQ/Lwc+AzxP0nxaB/zfJeV/i6Qf0AqC\n+3d5Dy+ldTEYIuJ24Ecd920H2jmRDcDBKd7/dMcCX0/+/soQz7cG8/UgrIraeYjn0Rpiug94F60W\nf/tALOBjEfG5zidKOrjPvqcvPjbUYmQRcZ+kX0r6U1q9mG5XTbseOBOYBP4W+C/AG2gFDmi9h7dH\nxNWdT0rxHtp2xO7F1Hbh37NlzD0Iq6KbgBOAhyNiV0Q8TCv53B6agdYqln8l6WkAkuZL+g/Ab4Cn\n99j3pKR2juNk4DvA3cDBkhYk2/8C+Dbw42R7exx/+bR9rabVq/h6ROya/kIRcR+tJPvCiNiUvNa7\naQWO9ns4Q9JTkvfwR5L2mbabG4E3JvcvohU0++lXB53WAa9P/j4p5XNsTDhAWBXdRuvAum7atm0R\n8SBARFxDa0jkZkm3AZcCT0+u2nejpNtnSFLfDZwp6S5gLvCZiPh34C+Bryf7mgI+m2xfAXwzSVJP\nvxD85cDT6D681PZdoD10dQOtS0V+J7m9mtYF5m9NktKf48m9gE8Dz5B0J/AR4A5gW4/Xg9aQ1HuS\npHvPJDXwTuBsST8CFqTYt40RL/dtNqRkNtE/RsTL+j54+NeYBTwlIv49Odj/C3B4cqH6Yfa3Frgy\nIi5Nbu8NPBYRIekkYHlEnNjx+N9GxNNGfiNWSx6zNBuCpPcBZ9A995ClvYFrk2EoAW8bNjgktgEf\nljQvORdiMXChJAGP0Eq4kwSjbwC/HKn0VmvuQZiZWVfOQZiZWVcOEGZm1pUDhJmZdeUAYWZmXTlA\nmJlZVw4QZmbW1f8HDEp25VvyB2QAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Load Nee et al. data\n", - "nee = pd.read_excel('wild_bird_data.xlsx',skiprows=1)\n", - "\n", - "# Plot data\n", - "plt.loglog(nee['Wet body weight [g]'],nee['Population size'],'.')\n", - "plt.xlabel('Wet body weight [g]')\n", - "plt.ylabel('Population size')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the weighted average weight of the bird species reported in Nee et al. as our best estimate of the average weight of individual birds. We convert the average wet weight to carbon mass assuming 70% water content and 50% carbon out of the dry weight." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "wet_to_c = 0.15\n", - "avg_bird_cc = np.average(nee['Wet body weight [g]'],weights=nee['Population size'])*wet_to_c" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We multiply our estimate of the total number of birds by our estimate of the average carbon content of a single bird to generate our estimate for the total biomass of wild birds:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of wild birds using method 1 is ≈0.004 Gt C\n" - ] - } - ], - "source": [ - "estimate_1 = tot_num_birds*avg_bird_cc\n", - "\n", - "print('Our best estimate for the total biomass of wild birds using method 1 is ≈%.3f Gt C' %(estimate_1/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Estimate 2\n", - "As an alternative estimate, we use data on the mass of individual birds for each bird species, the population density of each species, and the study area in which the population density was measured ([Novosolov et al.](http://dx.doi.org/10.1111/geb.12617)). This dataset included data on ≈350 mammal species. Using multiple regression, a power-law (log-log) functional relation was established between body mass, study area and the total number of individuals measured in the study area. The functional relation established is: $$log_{10}(number\\ of\\ individuals) = 3.26-0.3×log_{10}(body\\ mass) + 0.63×log_{10}(study\\ area), (R^2 ≈0.3)$$ \n", - "\n", - "This relation was used to extrapolate the total number of individuals for ≈75% of the total number of bird species, by using breeding range sizes data from Novosolov et al. as a surrogate for study area, and data on body mass ([Meiri et al.](http://onlinelibrary.wiley.com/doi/10.1111/j.1365-2699.2010.02390.x/full)). From total number of individuals the total biomass is calculated through multiplying the total number of individuals by the mean body mass. This approach yielded an estimate of ≈0.001 Gt C." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of wild birds using method 2 is ≈0.001 Gt C\n" - ] - } - ], - "source": [ - "estimate_2 = 5012745870861*wet_to_c\n", - "print('Our best estimate for the total biomass of wild birds using method 2 is ≈%.3f Gt C' %(estimate_2/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our best estimate of the total biomass of wild birds, we use the geometric mean of the estimates based on the two methods" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of wild birds is ≈0.002 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = gmean([estimate_1,estimate_2])\n", - "\n", - "print('Our best estimate for the total biomass of wild birds is ≈%.3f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to the chordate biomass data\n", - "old_results = pd.read_excel('../../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Wild birds',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,None)\n", - "result.to_excel('../../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Wild birds'), \n", - " col='Biomass [Gt C]',\n", - " values=best_estimate/1e15,\n", - " path='../../../results.xlsx')\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Wild birds'), \n", - " col='Number of individuals',\n", - " values=tot_num_birds,\n", - " path='../../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/chordates/wild_mammals/.ipynb_checkpoints/wild_mammal-checkpoint.ipynb b/animals/chordates/wild_mammals/.ipynb_checkpoints/wild_mammal-checkpoint.ipynb deleted file mode 100644 index e979109..0000000 --- a/animals/chordates/wild_mammals/.ipynb_checkpoints/wild_mammal-checkpoint.ipynb +++ /dev/null @@ -1,553 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../../statistics_helper/')\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of wild mammals\n", - "To estimate the total biomass of wild mammals, we first estimate the total biomass of wild land mammals and wild marine mammals.\n", - "\n", - "## Estimating the biomass of wild land mammals\n", - "To estimate the total biomass of land mammals living today we rely on three sources. The first is [Smil](http://vaclavsmil.com/wp-content/uploads/PDR37-4.Smil_.pgs613-636.pdf), which estimates ≈0.025 Gt wet weight based on biomass densities per biome taken from the History Database of the Global Environment (HYDE)." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "smil_estimate = 0.025e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The second source is an estimate made in [Barnosky](http://dx.doi.org/10.1073/pnas.0801918105), which reports an estimate of ≈0.05 Gt wet weight." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "barnosky_estimate = 10**10.72*1000 #From figure 3" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The third source is based on data on the mass of individual mammals for each mammal species, the population density of each species, and the study area in which the population density was measured ([Novosolov et al.](http://dx.doi.org/10.1111/geb.12617)). This dataset included data on ≈350 mammal species. Using multiple regression, a power-law (log-log) functional relation was established between body mass, study area and the total number of individuals measured in the study area. The functional relation established is: $$log_{10}(number\\ of\\ individuals) = 1.65-0.53×log_{10}(body\\ mass) + 0.73×log_{10}(study\\ area), (R^2≈0.5)$$ This relation was used to extrapolate the total number of individuals for ≈3700 mammal species for which range and mass data is available, by using range sizes from IUCN data (http://www.iucnredlist.org/technical-documents/spatial-data#mammals) as a surrogate for study area, and data on body mass ([Meiri et al.](http://onlinelibrary.wiley.com/doi/10.1111/j.1365-2699.2010.02390.x/full); there are additional ≈1800 mammal species without mass data, but these usually have small body mass, small ranges and low population densities). From total number of individuals the total biomass is calculated by multiplying the total number of individuals by the mean body mass. This approach yielded an estimate of ≈0.006 Gt wet weight." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Estimate produced by the third approach\n", - "shai_meiri_estimate = 5454700007879" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our estimate of the total biomass of wild mammals is based on the geometric mean of these three estimates. We convert wet weight values to carbon mass assuming 70% water content and 50% carbon content out of dry weight." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of wild land mammals is ≈0.003 Gt C\n" - ] - } - ], - "source": [ - "# Conversion factor between wet weight and carbon mass\n", - "wet_to_c = 0.15\n", - "\n", - "# Estimate the total biomass of land mammals\n", - "best_land_mammal_biomass = gmean([smil_estimate,shai_meiri_estimate,barnosky_estimate])*wet_to_c\n", - "\n", - "print('Our best estimate of the total biomass of wild land mammals is ≈%.3f Gt C'%(best_land_mammal_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Estimating the biomass of wild marine mammals\n", - "For estimating the biomass of marine mammals, we rely on estimates made by [Christensen](https://open.library.ubc.ca/cIRcle/collections/ubctheses/831/items/1.0074892), based on using a dataset compiled by [Kaschner et al.](https://open.library.ubc.ca/collections/ubctheses/831/items/1.0074881), gathered from various resources. The biomass of marine mammals is dominated by whales and seals. Here is the data from Christensen:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MinMeanMax
Year
1800102061900000000126185600000000155876300000000
2000222680400000002969072000000042680410000000
\n", - "
" - ], - "text/plain": [ - " Min Mean Max\n", - "Year \n", - "1800 102061900000000 126185600000000 155876300000000\n", - "2000 22268040000000 29690720000000 42680410000000" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "christensen = pd.read_excel('marine_mammal_data.xlsx','Christensen',skiprows=1,index_col=0)\n", - "christensen" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We the mean values for the year 2000 as our best estimate for the biomass of wild marine mammals. We convert the estimates of Chirstensen, which are of the total wet weight of marine mammals to carbon mass assuming 70% water content and 50% carbon content of dry weight: " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of wild marine mammals is ≈0.004 Gt C\n" - ] - } - ], - "source": [ - "best_christensen = christensen.loc[2000,'Mean']*wet_to_c\n", - "\n", - "print('Our best estimate of the total biomass of wild marine mammals is ≈%.3f Gt C'%(best_christensen/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum our estimates for the total biomass of wild land and marine mammals to generate our best estimate for the total biomass of wild mammals:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of wild marine mammals is ≈0.007 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = best_christensen+best_land_mammal_biomass\n", - "\n", - "print('Our best estimate of the total biomass of wild marine mammals is ≈%.3f Gt C'%(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To estimate the uncertainty associated with our estimate of the total biomass of wild mammals, we first project the uncertainty associated with our estimate of the total biomass of wild land mammals and wild marine mammals, and then combine these uncertainties.\n", - "\n", - "## Wild land mammals\n", - "Our projection of the uncertainty associated with our estimate of the total biomass of wild land mammals is based on the multiplicative 95% confidence interval around the geometric mean of the three independent estimates we use as sources for our best estimate." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the total biomass of wild land mammals is ≈4-fold\n" - ] - } - ], - "source": [ - "land_mammal_CI = geo_CI_calc(np.array([smil_estimate,shai_meiri_estimate,barnosky_estimate]))\n", - "\n", - "print('Our best projection for the uncertainty associated with our estimate of the total biomass of wild land mammals is ≈%0.0f-fold' %land_mammal_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Wild marine mammals\n", - "To project the uncertainty associated with our estimate of the total biomass of wild marine mammals, we rely both on the intra-study uncertainty reported by Christensen, as well as on a consistency check against data from the IUCN. Christensen reports a 95% confidence interval of ≈1.4-fold. " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty reported by Christensen is ≈1.4-fold\n" - ] - } - ], - "source": [ - "marine_intra_CI = christensen.loc[2000,'Max']/christensen.loc[2000,'Mean']\n", - "\n", - "print('The intra-study uncertainty reported by Christensen is ≈%.1f-fold' %marine_intra_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As a consistency check, we compared the data for ≈30 whale species which are the main contributors to the global marine mammal biomass with data from the IUCN. The correlation between the data is high (Spearman R$^2$=0.98), and the total biomass from both methods varies about ≈1.3-fold. " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The correlation coefficient between Christensen and the IUCN data is ≈0.98\n", - "The inter-study uncertainty between Christensen and the IUCN data is ≈1.3-fold\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEXCAYAAAC6baP3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XmYXFWd//H3JyFxYQ0hOBrIxqa4\nzGB6MAg64IIwElCMSsQFRRAHRAUXFJVxRsfd3wyKQkCMaAAxgwgaRAYDQSWYbkQSNomBDEHHxBDD\nIpKEfH9/3FOkqFRV36quW0v35/U89fStU1X3fk911T117tkUEZiZmeU1qtMBmJlZb3HBYWZmDXHB\nYWZmDXHBYWZmDXHBYWZmDXHBYWZmDXHBYWZmDXHBYWZmDdmm1gOSrszx+gcj4rjWhWNmZt2uZsEB\nPA94d53HBZzT2nDMzKzb1Ss4zoyIG+q9WNKnWxyPmZl1uZptHBFxGYCkN1Y+VkorPcfMzEYODTbJ\noaRbIuLFg6WZmdnIUK9x/HDgn4GJks4ue2gHYFPRgZmZWXeq18bxB6AfOBIYKEt/GPhgkUGZmVn3\nynOpakxEbGxTPGZm1uXq1ThK9pf0r8Dk9HwBERHTigzMzMy6U54ax11kl6YGgCdK6RGxttjQzMys\nG+WpcayPiKsLj8TMzHpCzRqHpFJ32zcBo4HLgcdLj0fELYVH16BddtklpkyZ0ukwzMx6ysDAwJ8j\nYkLe59ercXyl4n5f2XYAr2gksHaYMmUK/f39nQ7DzKynSFrZyPNrFhwRccjQwzEzs+Fm0DYOSadV\nSV4PDETEra0PyczMulme9Tj6gJOAien2HuAw4HxJHykwttwkzZQ0Z/369Z0Oxcxs2MtTcOwGvDgi\nTo+I04HpwK7Ay4HjCowtt4i4KiJO3HHHHTsdipnZsJen4NiVst5UwEbgWRHxWEW6mZmNAHnGccwD\nbpb0o3R/JnCxpG2BOwqLzMzMchlYuY7FK9YyY9p4pk8eV/jxBi04IuLfJV0NHJiSToqIUp/XYwuL\nzMzMBjWwch3HXrCYDZs2M3abUcx794zCC49606rvEBEPSdoZWJFupcd2jogHC43MzMwGtXjFWjZs\n2szmgI2bNrN4xdrOFRzAxcARZHNUBWlyw7K/nuTQzKzDZkwbz9htRrFx02bGbDOKGdPGF37MegMA\nj0h/pxYeBSBpGnAmsGNEzEppzwPeD+wCXBcR32xHLGZmvWL65HHMe/eMtrZxDNqrSpm3Svpkuj9J\n0v55di7pQkmrJS2rSD9M0t2Slks6AyAiVkTE8eXPi4g7I+IksvmyDsTMzLYyffI4Tj5kz7YUGpCv\nO+43gAOAt6T7DwPn5Nz/XLLBgk+SNDq9/nBgX2C2pH1r7UDSkcBPgAU5j2lmZgXKU3C8JCJOBv4G\nEBHrgLF5dh4Ri4DKRvT9geWphrEBuBQ4qs4+royIw6nRg0vSiZL6JfWvWbMmT1hmZjYEeQqOjamW\nEACSJgCbh3DMicD9ZfdXARMljZd0LrCfpI+lYx0s6WxJ51GjxhERcyKiLyL6JkzIPSuwmZk1Kc8A\nwLOBHwK7SvosMAv4RKsDSSsKnlSRdj1w/WCvlTQTmLnnnnu2OiwzM6uQZwDgPEkDwCvJuuK+LiLu\nHMIxHwB2L7u/W0ozM7MekKdX1b+TnejnRsTXh1hoACwB9pI0VdJY4BjgyqHs0JMcmpm1T542jhXA\nbKBf0q8lfUVSzcbscpIuAW4C9pG0StLxEbEJOAW4BrgTuCwibm8y/tJxPK26mVmb1FxzfKsnSn9H\nNp7iQ8C4iNi+yMCa0dfXF1461sysMZIGIqJv8Gdm8qwAeAHZeIs/ATeSNY7f0nSEZmY2ZO2eEbdc\nnl5V44HRwF/IxmT8OV1u6hruVWVmI0knZsQtN2gbR0S8PiJeAnwR2AlYKGlV4ZE1wI3jZjaSVJsR\nt53yXKo6AngZ2VKxOwE/J7tk1TVc4zCzkaQTM+KWG7RxXNLXyQqKGyPiD22JqkluHDezbtCO9odW\nHqPljeMRccqQIjIzG0Ha1f4wffK4tjeKl+QZx2FmZjl1uv2hHYZFweEBgGbWLUrtD6NFR9of2iFP\nG8f7I+K/BkvrBm7jMLNu0MkxFs1otI0jT43jHVXSjssdkZlZhw2sXMc5C5czsHJdW47X7hX52q1m\n47ik2WSr/k2VVD4J4fZsvTiTmVlX6vRgueGoXq+qXwF/BHYBvlKW/jBwW5FBNcrjOMyslmqN1S44\nhqbmpaqIWBkR10fEAcB9wJiIuIFsRttntCm+XDxy3MxqGQmN1e2WZ+T4CcCJwM7AHmQLL51LtrCT\nmVlXmz55HPPePaOnGqu7XZ5JDk8G9gduBoiIeyTtWmhUZmYt1IrBco32lOq1nlWNyFNwPB4RGyQB\nIGkbIN8iHmZmw0CjDezDvUE+T3fcGyR9HHiGpFcDPwCuKjYsM7Pu0eho8OE+ejxPwXEGsAZYCrwH\nWAB8osigGuWR42ZWpEYb2Id7g3yekeOvBH4VEY+1J6TmeeS4mRXVtjCc2zgaHTmep+D4DnAA2aC/\nG4FFwC8ioj1DMBvggsNsZBvubQtFafmUIxHxjojYGzgauB84h+zSlZlZVxnubQvdIs84jreSrQD4\nQuDPQGlhJzOzwjVyyafTK+ONFHm64/4n8HuyQX8LI+K+QiMyM0savfTUzGC/XmqL6BZ5VgDcRdLz\nydYc/6ykvYC7I+JthUdnZiNaM/NMNTLYz20izRm0jUPSDsAkYDIwBdgR2FxEMJKmSfqWpPllaa+T\ndL6k70s6tIjjmll3Krpbq9tEmpPnUtUvym5fj4hVjRxA0oXAEcDqiHhBWfphwH8Bo4ELIuLzEbEC\nOL684IiIK4ArJI0Dvgz8rJHjm1nvKnqeKbeJNGfQ7rhDPoD0cuAR4KJSwSFpNPA74NXAKmAJMDsi\n7kiPz4+IWRX7+QowLyJuqXUsd8c1s0a5jaPx7rh5ahxDEhGLJE2pSN4fWJ5qGEi6FDgKuKPy9com\nyfo8cHW1QkPSiWSz9zJp0qSWxm5mw18rJkAcafJMOVKEiWRjQkpWARMljZd0LrCfpI+lx94HvAqY\nJemkyh1FxJyI6IuIvgkTJhQeuJnZSJdnHMeBEfHLwdJaISLWAidVpJ0NnD1IjF4B0MysTfLUOL6W\nM60RDwC7l93fLaU1xSsAmg0fAyvXcc7C5Qys7LpZjSypWeOQdADwUmCCpNPKHtqBrCfUUCwB9pI0\nlazAOAZ4S7M7c43DbHjwuIreUK/GMRbYjqxw2b7s9hAwq87rnkLSJcBNwD6SVkk6PiI2AacA15Ct\nYX5ZRNzeXBZc4zAbLjyuojfUrHFExA1kizjNjYiVzR4gImbXSF9AtrbHkLnGYTY8eFxFb6g5jkPS\nQcC0iLgo3Z8P7Jwe/kxE/Lw9IebncRxmvc/jKtqvleM4Pk3WFbZkH+A4YFvg40DXFByucZgNHx5X\n0f3qtXHsUBrJndwTEQMRsYisraNruI3DzKx96tU4diq/ExFHl919VjHhmFmvG1i5jstvWUUAb3jx\nbq49DEP1Co67JL02In5SnijpCODuYsNqjC9VmXWHgZXrmH1+1p0WYH7//Vxy4gEuPIaZegXHB4Gf\nSJoFlOaImk42tuOIogNrRERcBVzV19d3QqdjMRvJFq9Yy8ZNW1Zd2PhE5FpDw3pLzTaOiFgOvIhs\nmdgp6bYIeFFE/K4dwZlZb5kxbTxjttlyWhkzWu5SOwwVPq16O5Rdqjrhnnvu6XQ4ZiOa2zh6T6Pd\nceuN4/hxRNS9JJXnOe3kcRxmZo1r5TiOgyRdWe9YwL65IzOzEckD+oafegXHUTlev6FVgZjZ8ONJ\nC4enweaq6gnujmvWnapNWuiCo/d1agXAlvLIcbPuVJq0cLTwpIXDSOFrjpvZyDV98jjmvXuG2ziG\nGRccZlYoT1o4/NRbAXAhUGuQR0TEK4sJyczMulm9GseHqqTNAD4CrC4mHDOz6tytt3vU61U1UNqW\n9E/AJ4GnAydFxNVtiM3MekiRJ3Z36+0udds4JL0G+ATwOPDZiFjYlqga5O64Zp1V9Ind3Xq7S83u\nuJKWAOcBl5Jdnlov6cWlW7sCzMPdcc06q9qJvZXcrbe71KtxPAo8AswC3kA2xUhJAK8oMC4za1In\n2gJKJ/aNmzYXcmJ3t97uMixmxy3xJIc20nWyLcCN172rZZMcSnorWcHy3Yr0twFPRMTFzYdpZkXo\nZFuAx2uMHPWmHHkf8MMq6ZcDpxcTjpkNRVFtAQMr13HOwuUMrFzXkv1Zb6vXxjEmIh6pTIyIRyWN\naXUgkqYBZwI7RsSsWmlmVlsRbQHuCmuV6tU4niFp28pESdsDY/PsXNKFklZLWlaRfpikuyUtl3QG\nQESsiIjjy59XLc3M6ps+eRwnH7Jny07uRfeYst5Tr+D4FjBf0uRSgqQpZN1zv5Vz/3OBw8oTJI0G\nzgEOJ1sIarYkLwhl1qXcFdYq1Rs5/mVJjwCLJG1H1h33YeDzEfHNPDuPiEWpsCm3P7A8IlYASLqU\nbNGoOxoP38yK5q6wVqnuehwRcW5ETAamAJMjYnLeQqOOicD9ZfdXARMljZd0LrCfpI8BVEurJOlE\nSf2S+tesWTPE0MysmlZf/rLeVq877mkVSSHpz8AvIuLeVgcSEWuBkwZLq/K6OZL+CMwcO3bs9FbH\nZWZmT1WvxrF9xW0HoA+4WtIxQzjmA8DuZfd3S2lN85QjZmbtU6+N49PV0iXtDPwPWSN5M5YAe0ma\nSlZgHAO8pcl9lWLyJIdmbeDR4QZNrDkeEQ/y1HmrapJ0CXATsI+kVZKOj4hNwCnANcCdwGURcXuj\ncVTE5BqHWZkiBuyVxnN85Wd3c+wFiz0YcARreOlYSYcAuT4xETG7RvoCYEGjx64Tk2scZklRA/Y8\ntbmV1GscX8rWS8fuDPwBeHuRQTUqIq4Crurr6zuh07GYtUqzl4WKOsEXPQOu9Y56NY4jKu4HsDYi\nHi0wHjNjaLWGok7wHs9hJfUKjoerpD1N0tPgybaOruBLVTbc1Ko15KmFFHmC9wy4BvULjgGyWka1\nhvAAphUSURN8qcqGm2q1hkZqIT7BW5Hqdced2s5AhsI1DhtuqtUazlm4fNC2C3eXtXZouFdVN3KN\nw4ajylrDYG0Xnv7c2mVYFBxmI8FgbRfuLmvt4oLDrIfUa7twd1lrl1wFh6SDgL0i4tuSJgDbFTHR\nYbPcxmHm7rLWPoqoHONX8QTpLLLJDfeJiL0lPQf4QUQc2I4AG9HX1xf9/f2dDsPMrKdIGoiIvrzP\nzzNX1euBI4FHASLiD2Sz5ZpZlylijiqzSnkuVW2IiJAUANXWITezznOvKmuXPDWOyySdB+wk6QSy\nKdXPLzasxkiaKWnO+vXrOx2KWcdU61VlVoRBC46I+DIwH/hvYB/gUxHxtaIDa4SnVTfb0qtqtHCv\nKitUrl5VEXEtcG3BsZjZELhXlbXLoAWHpKOBLwC7ks1bJSAiYoeCYzOzBnmOKmuHPDWOLwIzI+LO\nooMxG248d5QNR3kKjj+50DBrnHs52XBVbwXAo9Nmv6TvA1cAj5cej4jLC44tN48ct1ZpZQ2h0bmj\nXDuxXlGvxjGzbPuvwKFl9wPomoLDs+NaK7S6htDI3FGunVgvqbcexzsBJB0YEb8sf0xS1003YjZU\nrZ5dtpFeTp7Z1npJnjaOrwEvzpFm1tPyrHfR6KWkvL2cPLOt9ZJ6bRwHAC8FJkg6reyhHYDRRQdm\n1m71aghFX0ryGAzrJfVqHGOB7dJzyic1fAiYVWRQZp1Sq4bQjktJHoNhvaJeG8cNwA2S5kbEyjbG\nZNZ1fCnJbItB2zjaVWhImgacCewYEbNS2rbAN4ANwPURMa8dsZhV8qUksy3yzI7bNEkXSlotaVlF\n+mGS7pa0XNIZABGxIiKOr9jF0cD8iDiBbE0Qs46ZPnkcJx+SjRXymhc2khW95vhc4OvARaUESaOB\nc4BXA6uAJZKujIg7qrx+N2Bp2n6i2FDNBufxFmY5ahySpkr6qqTLJV1ZuuXZeUQsAh6sSN4fWJ5q\nGBuAS4GjauxiFVnhUTNWSSdK6pfUv2bNmjxhmTXNa16Y5atxXAF8C7gK2NyCY04E7i+7vwp4iaTx\nwGeB/SR9LCI+RzY6/euSXpuOv5WImAPMgWzN8RbEZ1aTG8nN8hUcf4uIs4sOJCLWAidVpD0KvHOw\n13quKmsXN5Kb5Ss4/kvSWcDPeOokh7c0ecwHgN3L7u+W0prmuaqsnTzewka6PAXHC4G3Aa9gy6Wq\nSPebsQTYS9JUsgLjGOAtTe4LcI3DzKyd8nTHfSMwLSL+KSIOSbdchYakS4CbgH0krZJ0fERsAk4B\nrgHuBC6LiNubzQB4zXEzs3bKU+NYBuwErG505xExu0b6AmBBo/urxTUOM7P2yVPj2Am4S9I1jXbH\nbRfXOMzM2idPjeOswqMYItc4zMzaZ9AaR5rs8C6yGXK3B+5MaV3DNQ4zs/bJM3L8TcCvyRrJ3wTc\nLMnTqpuZjVB5LlWdCfxjRKwGkDQB+B9gfpGBNcKXqszM2idP4/ioUqGRrM35urbxpSozs/bJU+P4\nqaRrgEvS/TfTwq60ZmbWW/Is5PRhSUcDB6WkORHxw2LDaowvVZmZtY8iak8om9bO+J+IOKR9ITWv\nr68v+vv7Ox2GmVlPkTQQEX15n1+3rSIingA2S3LjgZmZAfnaOB4Blkq6Fni0lBgRpxYWlZmZda08\nBcfl6da13MZhZtY+NQsOSddFxCuBfSPio22MqWFej8PMrH3q1TieLemlwJGSLgVU/uAQFnIyM7Me\nVq/g+BTwSbIV+r5a8dhQFnIyM7MeVrPgiIj5wHxJn4yIf29jTGZm1sXyzI7rQsPMzJ7UVXNONUvS\nTElz1q9f3+lQzMyGvWFRcHiSQzOz9slVcEg6SNI70/YESVOLDcvMzLpVnoWczgI+CnwsJY0Bvldk\nUGZm1r3y1DheDxxJmm4kIv5AtoSsmZmNQHkKjg2RTaEbAJK2LTYkMzPrZnkKjssknQfsJOkEsmVj\nLyg2LDMz61Z5FnL6sqRXAw8B+wCfiohrC48MkLQv8K9ky9VelwYlmplZB+VpHP9CRFwbER+OiA9F\nxLWSvtDsASVdKGm1pGUV6YdJulvScklnpOTDga9FxHuBtzd7TDMza508l6peXSXt8CEccy5wWHlC\nWmnwnLTffYHZqbbxXeAYSV8Cxg/hmGZm1iI1Cw5J75W0FNhH0m1lt3uB25o9YEQsAh6sSN4fWB4R\nKyJiA3ApcFRErI6Ik4EzgD83e0wzM2udem0cFwNXA58jO3GXPBwRlSf+oZoI3F92fxXwEklTgI8D\n2wJfqvZCSScCJwJMmjSpxWGZmVmlerPjrgfWA7MBJO0KPB3YTtJ2EfG/RQcXEfeRCoU6z5kj6Y/A\nzLFjx04vOiYzs5EuT+P4TEn3APcCNwD3kdVEWukBYPey+7ultFw8V5WZWfvkaRz/DDAD+F1ETAVe\nCSxucRxLgL0kTZU0FjgGuDLviz07rplZ++QpODZGxFpglKRREbEQ6Gv2gJIuAW4ia3RfJen4iNgE\nnAJcA9wJXBYRt+fdp2scZmbtM+gAQOAvkrYDFgHzJK0mzVvVjIiYXSN9AbCgmX1KmgnM3HPPPZsN\ny8zMcspT4zgKeAz4IPBT4PfAzCKDapRrHGZm7ZNnypFHASTtAFxVeERmZtbVBi04JL0H+DTwN2Az\nILKZcqcVG1p+vlRlZtY+eS5VfQh4QURMiYhpETE1Irqm0ABfqjIza6c8Bcfvgb8WHchQuDuumVn7\nKFujqc4TpP2AbwM3A4+X0iPi1GJDa1xfX1/09/d3Ogwzs54iaSAicg+zyNMd9zzg58BSsjYOMzMb\nwfIUHGMi4rTCIzEzs56Qp43jakknSnq2pJ1Lt8Ija4DbOMzM2idPG8e9VZKj23pWgds4zMya0fI2\njjSxoZmZGVCn4JD0ioj4uaSjqz0eEZcXF5aZmXWrejWOfyLrTVVtXqoAuqbg8MhxM7P2ydPGMTUi\n7h0srRu4jcPMrHGNtnHk6VX131XS5ucPyczMhpN6bRzPBZ4P7FjRzrED2drjZmY2AtVr49gHOALY\niae2czwMnFBkUGZm1r1qFhwR8SPgR5IOiIib2hiTmZl1sTxtHK+XtIOkMZKuk7RG0lsLj6wBHjlu\nZtY+eQqOQyPiIbLLVvcBewIfLjKoRvXCehwDK9dxzsLlDKxc1+lQzMyGJNckh+nva4EfRMR6SQWG\nNPwMrFzHsRcsZsOmzYzdZhTz3j2D6ZPHdTosM7Om5KlxXCXpLmA6cJ2kCWTLyFpOi1esZcOmzWwO\n2LhpM4tXrO10SGZmTRu04IiIM4CXAn0RsZFsNcCjig5sOJkxbTxjtxnFaMGYbUYxY9r4TodkZta0\nQS9VSXom8C/AJOBE4DlkXXV/XGxow8f0yeOY9+4ZLF6xlhnTxvsylZn1tDxtHN8GBshqHQAPAD/A\nBUdDpk8e5wLDzIaFPG0ce0TEF4GNABHxV6AtreOSJkm6QtKFks5oxzHNzKy+PAXHBknPIJsRF0l7\nAI83e8BUCKyWtKwi/TBJd0taXlZIvBCYHxHvAvZr9phmZtY6eQqOs4CfArtLmgdcB3xkCMecCxxW\nniBpNHAOcDiwLzBb0r7AYuB4ST9PMVibeNyJmdWSZwXAayXdAswgu0T1/oj4c7MHjIhFkqZUJO8P\nLI+IFQCSLiXrubUROCu9Zj5Ze8tTSDqRrNGeSZMmNRuWlfG4EzOrJ0+Ng4hYGxE/iYgfD6XQqGMi\ncH/Z/VUp7afAqZLOJRu1Xi22ORHRFxF9EyZMKCC0kcfjTsysnjy9qjomIpYBswZ7nlcAbK3SuJON\nmzZ73ImZbaVbCo4HgN3L7u+W0nKJiKuAq/r6+jzdewt43ImZ1ZNnAOAewKqIeFzSwcCLgIsi4i8t\njGMJsJekqWQFxjHAW/K+2DWO1vO4EzOrJe/SsU9I2hOYQ1YzuLjZA0q6BLgJ2EfSKknHR8Qm4BTg\nGuBO4LKIuD3vPnthdlwzs+Eiz6WqzRGxSdLrga9FxNck/abZA0bE7BrpC4AFzezTNQ4zs/bJU+PY\nKGk28A62TDMyps7z2841DjOz9slTcLwTOAD4bETcm9ohvltsWI3xCoBmZu2jiMj/ZGkcsHtE3FZc\nSM3r6+uL/v7+TodhZtZTJA1ERF/e5w9a45B0fVpzfGfgFuB8SV8dSpBmZta78lyq2jGtOX40WTfc\nlwCvKjasxvhSlZlZ++QpOLaR9GzgTXTpGhxuHDcza588Bce/kY2vWB4RSyRNA+4pNiwzM+tWeWbH\n/QHZin+l+yuANxQZVKM8jsPMrH3yTDnydOB44PnA00vpaXGlruC5qszM2ifPparvAn8HvAa4gWwC\nwoeLDMrMzLpXnoJjz4j4JPBoRHwHeC3wkmLDMjOzbpVrypH09y+SXgDsCOxaXEiNc3dcM7P2yVNw\nzEkjxj8JXAncAXyx0Kga1IruuF5j28wsnzy9qi5ImzcA04oNpzO8xraZWX55elXtBLwdmFL+/Ig4\ntbiw2qvaGtsuOMzMqsuzHscCYDGwFNhcbDid4TW2zczyy1NwPD0iTis8kg7yGttmZvnlKTi+K+kE\nsnmqHi8lRsSDhUXVoFaMHPca22Zm+eTpVbUB+BLZOuED6dZVi154kkMzs/bJU+M4nWwQ4J+LDsbM\nzLpfnhrHcuCvRQdiZma9IU+N41HgVkkLeWobx7DpjmtmZvnlKTiuSDczM7NcI8e/I2kssHdKujsi\nNtZ7jZmZDV+KiPpPkA4GvgPcBwjYHXhHRCwqOrhGSVoDrCSbiLE042G17V2AoTT2l++zmedVSx8s\nzXlqnPOUL915cp4mR8SEHPFmIqLujaz77T5l9/cGBgZ7XSdvwJx620B/q/bfzPOqpQ+W5jw5T86T\n89TJPJXf8vSqGhMRd5fuRMTvgDE5XtdJV+XYbtX+m3letfTB0pynxjlP+dKdp8ZjGcxwzNOT8lyq\nupBsjqrvpaRjgdHRRUvHNkpSf0T0dTqOVnKeeoPz1Bucp/ry9Kp6L3AyUOp+eyPwjVYcvIPmdDqA\nAjhPvcF56g3OUx2D1jjMzMzK1axxSLosIt4kaSmwVekSES8qNDIzM+tKNWsckp4dEX+UNLna4xGx\nstDIzMysKzV0qUrSLsDa8PUtM7MRq2Z3XEkzJF0v6XJJ+0laBiwD/iTpsPaFWDxJL5N0rqQLJP2q\n0/G0gqRJkq6QdKGkMzodTytI2lfSZZK+KWlWp+MZCknTJH1L0vyytG0lfUfS+ZKO7WR8zaiRp63S\nekmNPL0u/Y++L+nQTsbXjBp5el46B86X9N5Bd1JnYEo/cCjwRmAdMCOlPxf4TasGkhR1Ay4EVgPL\nKtIPA+4mm/X3jIrHXge8p9OxtyJPwGuBt6bt73c69hbl6XTgZWn7yk7H3qLP3Pyy7bcBM7vpfzbU\nPNVLGwZ5Ggd8q9P5aXGeRgHfG/R4dQK5tWz7zorHeqHgeDnw4vI3EhgN/B6YBowFfgvsW/b4ZcD2\nnY69FXkCxgMLgZ8D7+x07C3K067AOWQLi/2y07G36DNXXnB8DPiHtH1xp/PTijzVSxsGefoK8OJO\n56dVeQKOBK4G3jLY8eqNHN9ctv1YxWNd38YR2Vxalcvb7g8sj4gVEbEBuBQ4CrJLO8D6iHi4vZHm\n12Ce3gmcFRGvIKt9dKVG8hQRqyPiZOAMhjaPUCEa/cxVsQrYLW3nmdWhcC3IU9cZap6U+QJwdUTc\nUmy0+bTi/xQRV0bE4WSDvOuq9+H8e0kPSXoYeFHaLt1/4aA56U4TgfvL7q9KaQDHA99ue0RDVytP\nPwVOlXQu2QSVvaRqniRNkTQHuIis1tELauVlfPrf7CfpY+mxy4E3SPomBUwT0UK581Qjn92okf/T\n+4BXAbMkndTmOBvRyP/pYElnSzoPWDDYjmuO44iI0UMMuqdExFmdjqGVImIZ0NMNyJUi4j7gxE7H\n0QoRsRY4qSLtUbKaYk+qkaet0npJjTydDZzdmYiGrkaergeuz7uPrqgOt9EDZNPCl+yW0nqZ89Td\nhlNeSpyn3lBYnkZawbEE2EuEgga7AAAJh0lEQVTS1LQ41THAlR2Oaaicp+42nPJS4jz1huLy1One\nAAX2MrgE+COwkeza3vEp/Z+B35H1Njiz03E6T8MnT8MpL86T81Tv5kkOzcysISPtUpWZmQ2RCw4z\nM2uICw4zM2uICw4zM2uICw4zM2uICw4zM2uIC442kfSEpFsl/VbSLZJemtKf06trFZRLaxTsW3b/\n3yS9qgX73UnSvzTxujdKulPSwqHGkONYh0vql3SHpN9I+kpKn5t33ZDB1oGR9PFWxNpKab2euyUd\nWZZ2mqS7JC1Nn/WvShqTHmt5HiQdJ+k5Ld7nkYOtYZPmdvpxjcc+IOmZZfcXSnpEUl8r4+yoTg9c\nGSk34JGy7dcAN3Q6phbnby4wq4D9TqFijYGcr/spcFCV9G1aHN8LyAZXPTfdHw28N+97kjee8s9P\nt9zI5jbqK7t/Unrfd0r3x5LNZLxDUXmojKGNeT8Y+HGNx+4DdumGOAvLf6cDGCm3ioLjjcAVafvJ\nEyPwdLIZepcCvwEOSenHAVcA16YP5SnAaek5i4Gd0/NOIJtm4LfAfwPPLDvespS+KKU9H/g1cCtw\nG7BXlZgPBW4CbgF+AGyX0j8P3JFe92XgpWRTOt+b9rdH+Ukzxfy59Fg/2boB16QT7knpOdsB16Vj\nLSWbRh2yqaAfS6/9Ukr7cMrnbcCnq8T9KeARsgVsvpTevyvJ1ia5AVBKX5aO9eb0uoPT4z8CVqR8\nHpvep6XAHlWOdRHwrhr/87lkk+H9Ku1vVtlxbkwx/a788wE8G1iU8rsMeFmK44mUNi89761l/7/z\ngNGl/QCfTf/rxcCz6nwGRqf3ofRevqcsvuuB+cBdwDzSMtMV+buepxYc9wNTa7wXW+Wh4vvw1bT9\nfmBF2p5GWncFmJ7+NwNkn51nk03iWfo/3wo8o2yfuwIDafvvyZaCmJTu/x54JjCB7HuyJN0OLPu+\nfT1t75Hex6XAZ8r+T1XfI+BUYEN6/sJa71Wv3zoewEi5lX1p7gLWA9NT+hS2FBynAxem7ecC/0tW\nmBxHtoLX9unDvp4tJ9z/B3wgbY8vO95ngPel7aXAxLRd+jX4NeDYtD22/EuX0nYhO4Ftm+5/lOyE\nPD59UVWxv7mU/bpm64LjvWXx3laWlz+l9G3Y8st0l5RfUVHjICvM5qTHRgE/Bl5e5f1+8oua3r9V\nbClg30BWCI8GnpXe52enk8Ff0vbTyCaE+3R6zfuB/6xynFuAv6/xP59LVuCOIluIanlKPxh4lLKT\nLFtOSKeTpoZI8W1f/njafh7ZtOtj0v1vAG9P28GWVQS/CHyizmfgxLLHn0ZWqE9N8a0nmxRvFNmP\nh2q1t/L3eAdg3SDfgao1DuDvgCVpez7ZSXwi8A6yHxxjyArfCek5b2bL9+TJGKrs9/YU1ylpn8cC\nk4Gb0uMXl/IFTCItWMdTC44fA7PT9kk8teCo+h4xAmocNadVt5Z7LCL+AUDSAcBFkl5Q8ZyDyE7o\nRMRdklYCe6fHFka2yNTDktazZb2GpcCL0vYLJH0G2InsF/w1Kf2XwFxJl5Gt+QDZB/1MSbsBl0fE\nPRWxzCA72f1SEmSFy01kX5a/Ad9K13irXuetojS52lKymkspL49L2onsRPofkl5OtojYRLKTeqVD\n0+036f52wF5khVw910ZEaaGbg4BLIuIJ4E+SbgD+EXiI7AT2RwBJvwd+Vhb3ITnzWu6KiNgM3CGp\nPD+/joh7qzx/CXBhahe4IiJurfKcV5L9Al+S/jfPIFs2FLJfu6X/yQDw6rRd7TNwKNlaO6V2mB3J\n3ssNKb5VAJJuJSvAf5E305JeA3yB7LP4loio2YYTEf8naTtJ25PN5nox2Yp2L0ux7kN2SfDalN/R\nZPMyDeZXwIFpX/9BtoyqyGp7kK2psW/aJ8AOkrar2McBZEtKk+L6ctljQ3qPepkLjg6IiJsk7UL2\nizuvx8u2N5fd38yW/+Nc4HUR8VtJx5H9KiIiTpL0ErKVAAckTY+IiyXdnNIWSHpPRPy87BgiO9nO\nrgxE0v5kJ69ZZL/mXtFA/OWxl8d/LNn7MT0iNkq6j6y2tdXhgc9FxHk5jlnu0ZzPy/M+l7ud7CT+\n2xz7U9l21XgiYlEqPF9LdqL/akRcVPE0Ad+JiGoLI22M9BOXrJa7TdrvVp+BtJ/3RcQ15TuQdHBF\n3E/up5aIeCg1AE+NiHvTPq9JPy7G1ntt8iuytUjuJjuxv4vspH06WW3g9og4IMd+yi0iK3wmk11+\n/ChZjewn6fFRwIyI+Fv5i8oKksE09B4NJ+5V1QGSnkv2q2ltxUM3kpZtlLQ32Rfm7gZ2vT3wx/Rr\n9cnlHyXtERE3R8SngDXA7pKmkV1LPpvsS/Wiin0tBg6UtGfax7aS9k6/yHaMiAXAB8muHwM8nI7f\nrB2B1anQOITsy15tv9cA7yr9MpQ0UdKuDR7rRuDNkkZLmkD2i/TXTcb9JeDj6f+FpFEawqpwkiaT\nXb47H7iArD0IYGOpdxJZW9CsUr4l7ZxeV2+/W30GyN7L95b1etpb0rbNxk52WembqQaJsjNweeFf\nnodKNwIfIjvZ/4asdvd4RKwn+w5MSDV1JI2R9Pz0unqfuxvJ2oLuSbW+B8lmiy3VCn5Gtpofab//\nUGUfi8kubUI2LXkeQ/0udL0RU0J2gWek6ixkv/TeERFPVPy6+QbZF28psAk4LiIeb+AX0CeBm8lO\nDDez5cP7JUl7peNeR/br+KPA2yRtBP6PrCr/pIhYk2otl0h6Wkr+BNmX4keSnp72d1p67FLgfEmn\n0tzKg/OAq1Le+8nagoiItZJ+KWkZ2RrPH5b0POCm9L48QnZyWF1jv9X8kOzX7G/JfoF+JF0ueW6j\nQUfEbZI+QPY+PTPtL+/lu2oOBj6c/i+PAG9P6XOA2yTdEhHHSvoE8DNJo8im0j4ZWFlnv9U+A7eR\nXV65JZ3k17DlskwzvglsC9ws6fEU/y/ZclnxKXmoeO2NZIXZovS9uJ8tn4EN6XLa2ZJ2JDtv/SdZ\nbW8ucK6kx4ADIuKx0g4j4r6Ur9JlzF8Au0XEunT/VOAcSbelfS5i69UKPwB8T9KZZD3G1ud4H+YA\nP5X0h4ho5vJm1/O06mbWFEnXAx+KiP5Ox1KU9GPgsYgISceQNZQf1cR+rmcYvVe+VGVmzXqQrB3m\nyEGf2bumA7emWsm/kLW5NETZINRpZDXDYcE1DjMza4hrHGZm1hAXHGZm1hAXHGZm1hAXHGZm1hAX\nHGZm1hAXHGZm1pD/D1Xtsds8kSq2AAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Load IUCN data\n", - "comparison_data = pd.read_excel('marine_mammal_data.xlsx',index_col=0)\n", - "\n", - "# IUCN values reported in tons, convert to grams\n", - "comparison_data['Biomass estimate from IUCN'] *=1e6\n", - "comparison_data['Biomass estimate from Christensen'] *=1e6\n", - "\n", - "# Plot data\n", - "plt.loglog(comparison_data['Biomass estimate from Christensen'],comparison_data['Biomass estimate from IUCN'],'.')\n", - "plt.xlabel('Biomass estimate from Christensen [Gt wet weight]')\n", - "plt.ylabel('Biomass estimate from the IUCN [Gt wet weight]')\n", - "\n", - "# Generate best estimate from the IUCN data by summing across all species and converting to carbon mass\n", - "best_IUCN = comparison_data['Biomass estimate from IUCN'].sum()*wet_to_c\n", - "print('The correlation coefficient between Christensen and the IUCN data is ≈%.2f' %(comparison_data.corr(method='spearman').iloc[0,1]))\n", - "\n", - "# Calculate the inter-study uncertainty\n", - "marine_inter_CI = geo_CI_calc(np.array([best_IUCN,best_christensen]))\n", - "print('The inter-study uncertainty between Christensen and the IUCN data is ≈%.1f-fold' %marine_inter_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection of the uncertainty associated with our estimate of the total biomass of wild marine mammals, we use the higher uncertainty out of the intra-study and intra-study uncertainties:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "marine_mammal_CI = np.max([marine_inter_CI,marine_intra_CI])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our projection of the total uncertainty associated with our estimate of the total biomass of wild mammals, we combine our uncertainties associated with our estimates for the total biomass of wild land and marine mammals:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the total biomass of wild mammals is ≈2-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_sum_prop(np.array([best_land_mammal_biomass,best_christensen]), np.array([land_mammal_CI,marine_mammal_CI]))\n", - "print('Our best projection for the uncertainty associated with our estimate of the total biomass of wild mammals is ≈%.0f-fold'%mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prehuman wild mammal biomass\n", - "We can compare our estimates for the present day biomass of wild mammals with estimates of the prehuman biomass of wild land and marine mammals. For wild land mammals, we use the estimate of the prehuman biomass of wild land mammals reported in Barnosky of:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Based on the data in Barnosky, we estimate wild land mammal biomass decreased ≈7.6-fold\n" - ] - } - ], - "source": [ - "# Taken from figure 3 in Barnosky\n", - "prehuman_barnosky_biomass = 10**11.165*1000*wet_to_c \n", - "\n", - "print('Based on the data in Barnosky, we estimate wild land mammal biomass decreased ≈%.1f-fold' %(prehuman_barnosky_biomass/best_land_mammal_biomass))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For marine mammals, we rely on estimates by Christensen on values of wild marine mammals in 1800 before commercial whaling:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Based on the data in Christensen, we estimate wild marine mammal biomass decreased ≈4.3-fold, with a range of between ≈7.0-fold and ≈2.4-fold\n" - ] - } - ], - "source": [ - "prehuman_marine_biomass = christensen.loc[1800,'Mean']*wet_to_c\n", - "\n", - "print('Based on the data in Christensen, we estimate wild marine mammal biomass decreased ≈%.1f-fold, with a range of between ≈%.1f-fold and ≈%.1f-fold' %((prehuman_marine_biomass/best_christensen),(christensen.loc[1800,'Max']/christensen.loc[2000,'Min']) ,(christensen.loc[1800,'Min']/christensen.loc[2000,'Max'])))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Overall, we can combine the estimates for the prehuman wild land and marine mammal biomass and estimate the reduction in total biomass of wild mammals between prehuman and present values:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fold reduction in the biomass of wild mammals between prehuman and present values is ≈5.6-fold\n" - ] - } - ], - "source": [ - "prehuman = prehuman_barnosky_biomass + prehuman_marine_biomass\n", - "\n", - "print('Our best estimate for the fold reduction in the biomass of wild mammals between prehuman and present values is ≈%.1f-fold' %(prehuman/best_estimate))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py:196: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " res = shell.run_cell(code, store_history=store_history, silent=silent)\n", - "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py:2683: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " interactivity=interactivity, compiler=compiler, result=result)\n" - ] - } - ], - "source": [ - "# Feed results to the chordate biomass data\n", - "old_results = pd.read_excel('../../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Wild mammals',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,mul_CI)\n", - "result.to_excel('../../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Wild mammals'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_estimate/1e15,mul_CI],\n", - " path='../../../results.xlsx')\n", - "\n", - "\n", - "# Feed results to Fig. 2A\n", - "update_results(sheet='Fig2A', \n", - " row=('Terrestrial','Wild land mammals'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_land_mammal_biomass/1e15,land_mammal_CI],\n", - " path='../../../results.xlsx')\n", - "\n", - "update_results(sheet='Fig2A', \n", - " row=('Marine','Wild marine mammals'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_christensen/1e15,marine_mammal_CI],\n", - " path='../../../results.xlsx')\n", - "\n", - "# Update the data mentioned in the MS\n", - "update_MS_data(row ='Prehuman biomass of wild land mammals',\n", - " values=prehuman_barnosky_biomass/1e15,\n", - " path='../../../results.xlsx')\n", - "\n", - "update_MS_data(row ='Prehuman biomass of marine mammals',\n", - " values=prehuman_marine_biomass/1e15,\n", - " path='../../../results.xlsx')\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/cnidarians/.ipynb_checkpoints/cnidarians-checkpoint.ipynb b/animals/cnidarians/.ipynb_checkpoints/cnidarians-checkpoint.ipynb deleted file mode 100644 index 1cd96cc..0000000 --- a/animals/cnidarians/.ipynb_checkpoints/cnidarians-checkpoint.ipynb +++ /dev/null @@ -1,379 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from openpyxl import load_workbook\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of Cnidarians\n", - "To estimate the total biomass of cnidarians, we combine estimates for two main groups which we assume dominate the biomass of cnidarains = planktonic cnidarians (i.e. jellyfish) and corals. We describe the procedure for estimating the biomass of each group\n", - "\n", - "## Planktonic cnidarians\n", - "Our estimate of the total biomass of plaktonic cnidarians is based on [Lucas et al.](http://dx.doi.org/10.1111/geb.12169), which assembled a large dataset of abundance mauresments of different dypes of gelatinous zooplankton. Globally, they estimate ≈0.04 Gt C of gelatinous zooplankton, of which 92% are contributed by cnidarians. Therefore, we estimate the total biomass of planktonic cnidarians at ≈0.04 Gt C.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "planktonic_cnidarian_biomass = 0.04e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Corals\n", - "The procedure we take to estimate the total biomass of corals in coral reefs is to first calculate the total surface area of coral tissue globally, and then convert this value to biomass by the carbon mass density of coral tissue per unit surface area. We estimate the total surface area of corals worldwide using two approaches. \n", - "\n", - "The first approach estimates the total surface area of corals using the total area of coral reefs (in $m^2$) from [Harris et al.](http://dx.doi.org/10.1016/j.margeo.2014.01.011). " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Total surface area of coral reefs\n", - "coral_reef_area = 0.25e12" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We estimate that 20% of the reef area is covered by corals based on [De'ath et al.](http://dx.doi.org/10.1073/pnas.1208909109)." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Coverage of coral reef area by corals\n", - "coverage = 0.2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This gives us the projected area of corals. Corals have a complex 3D structure that increases their surface area. To take this effect into account, we use a recent study that estimated the ratio between coral tissue surface area and projected area at ≈5 ([Holmes & Glen](http://dx.doi.org/10.1016/j.jembe.2008.07.045))." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# The conversion factor from projected surface area to actual surface area\n", - "sa_3d_2a = 5" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Multiplying these factors, we get an estimate for the total surface area of corals:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate of the global surface area of corals based on our first method is ≈2.5×10^11 m^2\n" - ] - } - ], - "source": [ - "# Calculate the total surface area of corals\n", - "method1_sa = coral_reef_area*coverage*sa_3d_2a\n", - "\n", - "print('Our estimate of the global surface area of corals based on our first method is ≈%.1f×10^11 m^2' % (method1_sa/1e11))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The second approach uses an estimate of the global calcification rate in coral reefs based on [Vecsei](http://dx.doi.org/10.1016/j.gloplacha.2003.12.002). " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Global annual calcufocation rate of corals [g CaCO3 yr^-1]\n", - "annual_cal = 0.75e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We divide this rate by the surface area specific calcification rate of corals based on values from [McNeil](http://dx.doi.org/10.1029/2004GL021541) and [Kuffner et al.](http://dx.doi.org/10.1007/s00338-013-1047-8). Our best estimate for the surface area specific calcification rate is the geometric mean of values from the two sources above." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate of the global surface area of corals based on our second method is ≈0.6×10^11 m^2\n" - ] - } - ], - "source": [ - "from scipy.stats import gmean\n", - "# Surface area specific calcification rate from McNeil, taken from figure 1 [g CaCO3 m^-2 yr^-1]\n", - "mcneil_cal_rate = 1.5e4\n", - "\n", - "# Surface area specific calcification rate from Kuffner et al., taken from first\n", - "# Sentence of Discussion [g CaCO3 m^-2 yr^-1]\n", - "kuffner_cal_rate = 0.99e4\n", - "\n", - "# Our best estimate for the surface area specific calcification rate is the geometric mean of the two values\n", - "best_cal_rate = gmean([mcneil_cal_rate,kuffner_cal_rate])\n", - "\n", - "# Calculate the surface area of corals\n", - "method2_sa = annual_cal/best_cal_rate\n", - "\n", - "print('Our estimate of the global surface area of corals based on our second method is ≈%.1f×10^11 m^2' % (method2_sa/1e11))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the global surface area of corals we use the geometric mean of the estimates from the two methods:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the global surface area of corals is ≈1.2×10^11 m^2\n" - ] - } - ], - "source": [ - "best_sa = gmean([method1_sa,method2_sa])\n", - "print('Our best estimate of the global surface area of corals is ≈%.1f×10^11 m^2' % (best_sa/1e11))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To convert the total surface area to biomass, we use estimates for the tissue biomass per unit surface area of corals from [Odum & Odum](http://dx.doi.org/10.2307/1943285):" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of corals is ≈0.05 Gt C\n" - ] - } - ], - "source": [ - "# Tissue biomass based on Odum & Odum [g C m^-2]\n", - "carbon_per_sa = 400\n", - "\n", - "# Multiply our best estimate for the surface area of corals by the tissue biomass\n", - "coral_biomass = best_sa*carbon_per_sa\n", - "\n", - "print('Our best estimate for the biomass of corals is ≈%.2f Gt C' %(coral_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "An important caveat of this analysis is that it doesn’t include contribution of corals outside coral reefs, like those located in seamounts. Nevertheless, we account for this biomass of corals which are out of formal coral reefs when calculating the total benthic biomass.\n", - "\n", - "Our best estimate of the total biomass of cnidarians is the sum of the biomass of planktonic cnidarians and corals:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of cnidarians is ≈0.1 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = planktonic_cnidarian_biomass + coral_biomass\n", - "\n", - "print('Our best estimate for the biomass of cnidarians is ≈%.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of cnidarians\n", - "To estimate the total number of cnidarians, we divide the total biomass of jellyfish by the characteristic carbon content of a single jellyfish. We do not consider corals as they are colonial organisms, and therefore it is hard to robustly define an individual. To estimate the characteristic carbon content of a single jellyfish, we rely on the data from Lucas et al.. We calculate the mean and median carbon content of all the species considered in the study, and use the geometric mean or the median and mean carbon contents as our best estimate of the characteristic carbon content of a single jellyfish." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of cnidarians is ≈2.0e+16.\n" - ] - } - ], - "source": [ - "# Load data from Lucas et al.\n", - "data = pd.read_excel('carbon_content_data.xls', 'Biometric equations', skiprows=1)\n", - "\n", - "# Calculate the median and mean carbon contents\n", - "median_cc = (data['mg C ind-1'].median()*1e-3)\n", - "mean_cc = (data['mg C ind-1'].mean()*1e-3)\n", - "\n", - "# Calculate the geometric mean of the median and mean carbon contents\n", - "best_cc = gmean([median_cc,mean_cc])\n", - "\n", - "# Calculate the total number of jellyfish\n", - "tot_cnidaria_num = planktonic_cnidarian_biomass/best_cc\n", - "\n", - "print('Our best estimate for the total number of cnidarians is ≈%.1e.' %tot_cnidaria_num)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to the chordate biomass data\n", - "old_results = pd.read_excel('../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Cnidarians',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,None)\n", - "result.to_excel('../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Cnidarians'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_estimate/1e15,None],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Cnidarians'), \n", - " col='Number of individuals',\n", - " values= tot_cnidaria_num,\n", - " path='../../results.xlsx')\n", - "\n", - "# We need to use the results on the biomass of gelatinous zooplankton \n", - "# for our estimate of the total biomass of marine arthropods, so we \n", - "# feed these results to the data used in the estimate of the total \n", - "# biomass of marine arthropods\n", - "path = '../arthropods/marine_arthropods/marine_arthropods_data.xlsx'\n", - "marine_arthropods_data = pd.read_excel(path,'Other macrozooplankton')\n", - "\n", - "marine_arthropods_data.loc[0] = pd.Series({\n", - " 'Parameter': 'Biomass of gelatinous zooplankton',\n", - " 'Value': planktonic_cnidarian_biomass,\n", - " 'Units': 'g C',\n", - " 'Uncertainty': None\n", - " })\n", - "writer = pd.ExcelWriter(path, engine = 'openpyxl')\n", - "book = load_workbook(path)\n", - "writer.book = book\n", - "writer.sheets = dict((ws.title, ws) for ws in book.worksheets)\n", - "\n", - "\n", - "marine_arthropods_data.to_excel(writer, sheet_name = 'Other macrozooplankton',index=False)\n", - "writer.save()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/molluscs/.ipynb_checkpoints/molluscs-checkpoint.ipynb b/animals/molluscs/.ipynb_checkpoints/molluscs-checkpoint.ipynb deleted file mode 100644 index dad1a24..0000000 --- a/animals/molluscs/.ipynb_checkpoints/molluscs-checkpoint.ipynb +++ /dev/null @@ -1,212 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from excel_utils import *\n", - "from openpyxl import load_workbook" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of molluscs\n", - "To estimate the biomass of molluscs, we estimate the biomass of two major components of mollusc biomass - pteropods and cephalopods. In this work we assume the contribution of other types of molluscs is limited compared to pteropods and cephalopods.\n", - "\n", - "## Pteropods\n", - "Our estimate of the global biomass of pteropods is based on data from the MAREDAT database ([Buitenhuis et al.](http://search.proquest.com/openview/0e8e5672fa28111df473268e13f2f757/1?pq-origsite=gscholar&cbl=105729)). \n", - "\n", - "Buitenhuis et al. generated two estimates for the global biomass of pteropods by using a characteristic biomass concentration for each depth (either a median or average of the values in the database) and applying it across the entire volume of ocean at that depth. This approach results in two types of estimates for the global biomass of pteropods: a so called “minimum” estimate which uses the median concentration of biomass from the database, and a so called “maximum” estimate which uses the average biomass concentration. Because the distributions of values in the database are usually highly skewed by asymmetrically high values, the median and mean are loosely associated by the MAREDAT authors with a minimum and maximum estimate. The estimate based on the average value is more susceptible to biases in oversampling singular locations such as blooms of plankton species, or of coastal areas in which biomass concentrations are especially high, which might lead to an overestimate. On the other hand, the estimate based on the median biomass concentration might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. Therefore, our best estimate of the biomass of pteropods is the geometric mean of the “minimum” and “maximum” estimates. Buitenhuis et al. reports a \"minimum\" estimate of 0.026 Gt C and a \"maximum\" estimate of 0.67 Gt C. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of pteropods is ≈0.13 Gt C\n" - ] - } - ], - "source": [ - "from scipy.stats import gmean\n", - "\n", - "# The estimate of pteropod biomass based on average biomass density\n", - "average_biomass = 0.67e15\n", - "\n", - "# The estimate of pteropod biomass based on median biomass density\n", - "median_biomass = 0.026e15\n", - "\n", - "# Our best estimate for the biomass of pteropods is the geometric mean of the average-based\n", - "# and median-based estimates\n", - "pteropod_estimate = gmean([average_biomass,median_biomass])\n", - "\n", - "print('Our best estimate for the total biomass of pteropods is ≈%.2f Gt C' %(pteropod_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cephalopods\n", - "Our estimate of the total biomass of cephalopods is based on an estimate by [Rodhouse & Nigmatullin](http://dx.doi.org/10.1098/rstb.1996.0090), which put cephalopod biomass at ≈0.05 Gt C." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Estimate for the total biomass of cephalopods from Rodhouse & Nigmatullin\n", - "cephalopod_biomass = 0.05e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the total biomass of molluscs is the sum of our estimates for the total biomass of pteropods and cephalopods:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of molluscs is 0.2 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = pteropod_estimate +cephalopod_biomass\n", - "print('Our best estimate for the total biomass of molluscs is %.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of molluscs\n", - "To estimate the total number of molluscs, we consider the total biomass of pteropods and divide it by the average carbon content of a single pteropod. To estimate the characteristic carbon content of a single pteropod, we rely on data from [Bednaršek et al.](https://doi.org/10.5194/essd-4-167-2012). Bednaršek et al. measured biomass of population densities pteropods per cubic meter. We divide these two measurements by one another to estimate the carbon content of a single pteropod. We use the mean across all samples as our best estimate for the characteristic carbon content of a single pteropod." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of pteropods is ≈5.0e+17\n" - ] - } - ], - "source": [ - "# Load data from Bednaršek et al.\n", - "data = pd.read_excel('carbon_content_data.xlsx',skiprows=4)\n", - "\n", - "# Calculate the characteristic carbon content of a single pteropod\n", - "best_cc = data['Biomass (mg/m3)'].mean()/data['Abundace (ind./m3)'].mean()*1e-3\n", - "\n", - "# Calculate the total number of molluscs\n", - "tot_molluscs_num = pteropod_estimate/best_cc\n", - "\n", - "print('Our best estimate for the total number of pteropods is ≈%.1e' %tot_molluscs_num)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to the chordate biomass data\n", - "old_results = pd.read_excel('../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Molluscs',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,None)\n", - "result.to_excel('../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Molluscs'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_estimate/1e15,None],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Molluscs'), \n", - " col='Number of individuals',\n", - " values= tot_molluscs_num,\n", - " path='../../results.xlsx')\n", - "\n", - "# We need to use the results on the biomass of pteropods for our estimate\n", - "# of the total biomass of marine arthropods, so we feed these results to \n", - "# the data used in the estimate of the total biomass of marine arthropods\n", - "path = '../arthropods/marine_arthropods/marine_arthropods_data.xlsx'\n", - "marine_arthropods_data = pd.read_excel(path,'Other macrozooplankton')\n", - "\n", - "marine_arthropods_data.loc[1] = pd.Series({\n", - " 'Parameter': 'Biomass of pteropods',\n", - " 'Value': pteropod_estimate,\n", - " 'Units': 'g C',\n", - " 'Uncertainty': None\n", - " })\n", - "writer = pd.ExcelWriter(path, engine = 'openpyxl')\n", - "book = load_workbook(path)\n", - "writer.book = book\n", - "writer.sheets = dict((ws.title, ws) for ws in book.worksheets)\n", - "\n", - "\n", - "marine_arthropods_data.to_excel(writer, sheet_name = 'Other macrozooplankton',index=False)\n", - "writer.save()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/animals/nematodes/.ipynb_checkpoints/nematodes-checkpoint.ipynb b/animals/nematodes/.ipynb_checkpoints/nematodes-checkpoint.ipynb deleted file mode 100644 index 373cba9..0000000 --- a/animals/nematodes/.ipynb_checkpoints/nematodes-checkpoint.ipynb +++ /dev/null @@ -1,374 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of nematodes\n", - "To estimate the total biomass of nematodes, we calculate the total biomas of terrestrial and marine nematodes.\n", - "\n", - "## Terrestrial nematodes\n", - "We based our estimate of the biomass of terrestrial nematodes on data collected in a recent study by [Fierer et al.](http://dx.doi.org/10.1111/j.1461-0248.2009.01360.x). Fierer et al. collected data on the biomass density of two major groups on annelids (Enchytraeids & Earthworms) in different biomes. Here is a sample from the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Average biomass density [g C m^-2]Median biomass density [g C m^-2]
Biome
Boreal forests0.080.06
Desert0.010.03
Temperate coniferous forest0.100.06
Temeprate deciduous forest0.250.05
Temprate grassland0.360.17
Tropical forest0.010.01
Tundra0.180.11
Native tropical savannaNaNNaN
Tropical pasturesNaNNaN
CropsNaNNaN
\n", - "
" - ], - "text/plain": [ - " Average biomass density [g C m^-2] \\\n", - "Biome \n", - "Boreal forests 0.08 \n", - "Desert 0.01 \n", - "Temperate coniferous forest 0.10 \n", - "Temeprate deciduous forest 0.25 \n", - "Temprate grassland 0.36 \n", - "Tropical forest 0.01 \n", - "Tundra 0.18 \n", - "Native tropical savanna NaN \n", - "Tropical pastures NaN \n", - "Crops NaN \n", - "\n", - " Median biomass density [g C m^-2] \n", - "Biome \n", - "Boreal forests 0.06 \n", - "Desert 0.03 \n", - "Temperate coniferous forest 0.06 \n", - "Temeprate deciduous forest 0.05 \n", - "Temprate grassland 0.17 \n", - "Tropical forest 0.01 \n", - "Tundra 0.11 \n", - "Native tropical savanna NaN \n", - "Tropical pastures NaN \n", - "Crops NaN " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data taken from Fierer et al.\n", - "data = pd.read_excel('nematode_biomass_data.xlsx','Fierer',skiprows=1,index_col='Biome')\n", - "data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The data in Fierer et al. does not include biomass density of nematodes in savanna, pastures and cropland. We use the geometric mean of values from other biomes as our best estimate for the biomass density of nematodes in these biomes:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the biomass density across biomes\n", - "average_biomass_density = gmean(data['Average biomass density [g C m^-2]'].dropna())\n", - "median_biomass_density = gmean(data['Median biomass density [g C m^-2]'].dropna())\n", - "\n", - "# Set the biomass density in the missing biomes as the geometric mean of the biomass density of the\n", - "# available biomes\n", - "data.loc['Native tropical savanna','Average biomass density [g C m^-2]'] = average_biomass_density\n", - "data.loc['Tropical pastures','Average biomass density [g C m^-2]'] = average_biomass_density\n", - "data.loc['Crops','Average biomass density [g C m^-2]'] = average_biomass_density\n", - "data.loc['Native tropical savanna','Median biomass density [g C m^-2]'] = median_biomass_density\n", - "data.loc['Tropical pastures','Median biomass density [g C m^-2]'] = median_biomass_density\n", - "data.loc['Crops','Median biomass density [g C m^-2]'] = median_biomass_density\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For each biome, Fierer et al. provides an estimate of the average biomass density and the median biomass density. We generate two estimates for the total biomass of annelids, one based on average biomass densities and one based on median biomass densities. The estimate based on the average biomass density is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are in non-natural conditions, or samples which have some technical biases associated with them) might shift the average biomass density significantly. On the other hand, the estimate based on median biomass densities might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies.\n", - "\n", - "For each biome, we multiply the sum of the biomass density of nematodes by the total area of that biome taken from the book [Biogeochemistry: An analysis of Global Change](https://www.sciencedirect.com/science/book/9780123858740) by Schlesinger & Bernhardt.:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The total biomass of terrestrial nematodes based on Fierer et al. based on average biomass densities is 0.006 Gt C\n", - "The total biomass of terrestrial nematodes based on Fierer et al. based on median biomass densities is 0.005 Gt C\n", - "Our best estimate of total biomass of terrestrial nematodes based on Fierer et al. is 0.01 Gt C\n" - ] - } - ], - "source": [ - "# Load biome area data\n", - "area = pd.read_excel('nematode_biomass_data.xlsx','Biome area', skiprows=1, index_col='Biome')\n", - "\n", - "# Calculate the total biomass of annelids based on average or median biomass densities\n", - "total_biomass_mean = (data['Average biomass density [g C m^-2]']*area['Area [m^2]']).sum()\n", - "total_biomass_median = (data['Median biomass density [g C m^-2]']*area['Area [m^2]']).sum()\n", - "\n", - "print('The total biomass of terrestrial nematodes based on Fierer et al. based on average biomass densities is %.3f Gt C' %(total_biomass_mean/1e15))\n", - "print('The total biomass of terrestrial nematodes based on Fierer et al. based on median biomass densities is %.3f Gt C' %(total_biomass_median/1e15))\n", - "\n", - "# Use the geometric mean of the estimate based on the average biomass density and the\n", - "# estimate based on the median biomass density as our best estimate for the biomass of\n", - "# nematodes\n", - "best_terrestrial_biomass = gmean([total_biomass_mean,total_biomass_median])\n", - "print('Our best estimate of total biomass of terrestrial nematodes based on Fierer et al. is %.3f Gt C' %(best_terrestrial_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Marine nematodes\n", - "Our estimate of the total biomass of marine nematodes is based on data for seafloor biomass from [Wei et al.](http://dx.doi.org/10.1371/journal.pone.0015323). Wei et al. estimate ≈0.1 Gt C of benthic biomass, with ≈13% of that biomass contributed by meiofauna (defined as organisms which are 45µm-1mm in diameter). We assume meiofauna to be dominated by nematodes, (see nematodes section in the Supplementary Information for details regarding this assumption). Thus, we estimate the total biomass of marine nematodes at ≈0.01 Gt C.\n", - "\n", - "This estimate does not include biomass contribution from nematodes in benthic environments which are \"hot spots\" (such as marine canyons and seamounts). For more details regarding such contribution, see the other phyla section in the Supplementary Information.\n", - "\n", - "Our best estimate for the total biomass of nematodes is the sum of our estimates for the biomass of terrestrial nematodes and marine nematodes:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of total biomass of nematodes is 0.02 Gt C\n" - ] - } - ], - "source": [ - "# As noted above, our best estimate for the biomass of marine nematodes is ≈0.01 Gt C\n", - "best_marine_biomass = 0.014e15\n", - "\n", - "# Calculate our best estimate for the biomass of nematodes\n", - "best_estimate = best_terrestrial_biomass+best_marine_biomass\n", - "\n", - "print('Our best estimate of total biomass of nematodes is %.2f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of nematodes\n", - "We calculate the total number of nematodes by dividing our estimate of the total biomass of nematodes by the carbon content of nematodes, which is ≈0.05 µg C (Fierer et al.):" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of nematodes is ≈3.9e+20\n" - ] - } - ], - "source": [ - "# Carbon content of a single nematode based on Fierer et al.\n", - "carbon_content = 0.05e-6\n", - "\n", - "# Calculate the total number of nematodes\n", - "tot_nematode_num = best_estimate/carbon_content\n", - "\n", - "print('Our best estimate for the total number of nematodes is ≈%.1e' %tot_nematode_num)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/lib/python3/dist-packages/ipykernel/ipkernel.py:196: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " res = shell.run_cell(code, store_history=store_history, silent=silent)\n", - "/usr/lib/python3/dist-packages/IPython/core/interactiveshell.py:2717: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " interactivity=interactivity, compiler=compiler, result=result)\n" - ] - } - ], - "source": [ - "# Feed results to the animal biomass data\n", - "old_results = pd.read_excel('../animal_biomass_estimate.xlsx',index_col=0)\n", - "result = old_results.copy()\n", - "result.loc['Nematodes',(['Biomass [Gt C]','Uncertainty'])] = (best_estimate/1e15,np.nan)\n", - "result.to_excel('../animal_biomass_estimate.xlsx')\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Animals','Nematodes'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_estimate/1e15,None],\n", - " path='../../results.xlsx')\n", - "\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Animals','Nematodes'), \n", - " col=['Number of individuals'],\n", - " values=tot_nematode_num,\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed results to Fig. 2A\n", - "update_results(sheet='Fig2A', \n", - " row=('Terrestrial','Nematodes'), \n", - " col=['Biomass [Gt C]'],\n", - " values=best_terrestrial_biomass/1e15,\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed results to Fig. 2A\n", - "update_results(sheet='Fig2A', \n", - " row=('Marine','Nematodes'), \n", - " col=['Biomass [Gt C]'],\n", - " values=best_marine_biomass/1e15,\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/.ipynb_checkpoints/bacteria_archaea_biomass-checkpoint.ipynb b/bacteria_archaea/.ipynb_checkpoints/bacteria_archaea_biomass-checkpoint.ipynb deleted file mode 100644 index fbe48d8..0000000 --- a/bacteria_archaea/.ipynb_checkpoints/bacteria_archaea_biomass-checkpoint.ipynb +++ /dev/null @@ -1,328 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of bacteria & archaea\n", - "To estimate the total biomass of bacteria & archaea, we conbine our estimates for the biomass of each environment, which we calculated in each subdirectory. Our estimates for the biomass of each animal taxon are:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biomass [Gt C]Rounded biomass [Gt C]UncertaintyTotal biomass [Gt C]Rounded total biomass [Gt C]Total uncertainty
BacteriaTerrestrial deep subsurface58.06929460.020.38030273.42550270.09.0
Marine1.3556931.41.828451NaNNaNNaN
Soil7.3522987.06.847128NaNNaNNaN
Marine deep subsurface6.6482177.07.643075NaNNaNNaN
ArchaeaTerrestrial deep subsurface3.7065514.055.7141898.3000008.015.0
Marine0.3389230.32.703437NaNNaNNaN
Soil0.5157050.53.756745NaNNaNNaN
Marine deep subsurface2.8492363.07.931545NaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " Biomass [Gt C] Rounded biomass [Gt C] \\\n", - "Bacteria Terrestrial deep subsurface 58.069294 60.0 \n", - " Marine 1.355693 1.4 \n", - " Soil 7.352298 7.0 \n", - " Marine deep subsurface 6.648217 7.0 \n", - "Archaea Terrestrial deep subsurface 3.706551 4.0 \n", - " Marine 0.338923 0.3 \n", - " Soil 0.515705 0.5 \n", - " Marine deep subsurface 2.849236 3.0 \n", - "\n", - " Uncertainty Total biomass [Gt C] \\\n", - "Bacteria Terrestrial deep subsurface 20.380302 73.425502 \n", - " Marine 1.828451 NaN \n", - " Soil 6.847128 NaN \n", - " Marine deep subsurface 7.643075 NaN \n", - "Archaea Terrestrial deep subsurface 55.714189 8.300000 \n", - " Marine 2.703437 NaN \n", - " Soil 3.756745 NaN \n", - " Marine deep subsurface 7.931545 NaN \n", - "\n", - " Rounded total biomass [Gt C] \\\n", - "Bacteria Terrestrial deep subsurface 70.0 \n", - " Marine NaN \n", - " Soil NaN \n", - " Marine deep subsurface NaN \n", - "Archaea Terrestrial deep subsurface 8.0 \n", - " Marine NaN \n", - " Soil NaN \n", - " Marine deep subsurface NaN \n", - "\n", - " Total uncertainty \n", - "Bacteria Terrestrial deep subsurface 9.0 \n", - " Marine NaN \n", - " Soil NaN \n", - " Marine deep subsurface NaN \n", - "Archaea Terrestrial deep subsurface 15.0 \n", - " Marine NaN \n", - " Soil NaN \n", - " Marine deep subsurface NaN " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_excel('../results.xlsx','Table1 & Fig1',index_col=[0,1])\n", - "data.loc[['Bacteria','Archaea']]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum all these different contributions to produce our best estimate for the biomass of animals:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of bacteria is ≈73.4 Gt C\n", - "Our best estimate for the biomass of archaea is ≈7.4 Gt C\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:1: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " \"\"\"Entry point for launching an IPython kernel.\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:2: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " \n" - ] - } - ], - "source": [ - "best_estimate_bac = data.loc['Bacteria','Biomass [Gt C]'].sum()\n", - "best_estimate_arch = data.loc['Archaea','Biomass [Gt C]'].sum()\n", - "\n", - "print('Our best estimate for the biomass of bacteria is ≈%.1f Gt C' %best_estimate_bac)\n", - "print('Our best estimate for the biomass of archaea is ≈%.1f Gt C' %best_estimate_arch)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To project the uncertainty associated with our estimate of the total biomass of animals, we combine the uncertainties of the estimates for which is have uncertainty projections, namely arthropods (marine and terrestrial), fish and wild mammals." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our projection for the uncertainty of our estimate of the total biomass of bacteria is ≈10-fold\n", - "Our projection for the uncertainty of our estimate of the total biomass of archaea is ≈12-fold\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:1: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " \"\"\"Entry point for launching an IPython kernel.\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:2: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " \n" - ] - } - ], - "source": [ - "mul_CI_bac = CI_sum_prop(estimates=data.loc['Bacteria','Biomass [Gt C]'].values, mul_CIs = data.loc['Bacteria','Uncertainty'].values)\n", - "mul_CI_arch = CI_sum_prop(estimates=data.loc['Archaea','Biomass [Gt C]'].values, mul_CIs = data.loc['Archaea','Uncertainty'].values)\n", - "\n", - "print('Our projection for the uncertainty of our estimate of the total biomass of bacteria is ≈%.0f-fold' %mul_CI_bac)\n", - "print('Our projection for the uncertainty of our estimate of the total biomass of archaea is ≈%.0f-fold' %mul_CI_arch)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py:196: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " res = shell.run_cell(code, store_history=store_history, silent=silent)\n", - "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py:2683: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " interactivity=interactivity, compiler=compiler, result=result)\n" - ] - } - ], - "source": [ - "# Feed bacteria results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Bacteria','Terrestrial deep subsurface'), \n", - " col=['Total biomass [Gt C]', 'Total uncertainty'],\n", - " values=[best_estimate_bac,mul_CI_bac],\n", - " path='../results.xlsx')\n", - "\n", - "# Feed archaea results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Archaea','Terrestrial deep subsurface'), \n", - " col=['Total biomass [Gt C]', 'Total uncertainty'],\n", - " values=[best_estimate_arch,mul_CI_arch],\n", - " path='../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine/.ipynb_checkpoints/marine_prok_biomass_estimate-checkpoint.ipynb b/bacteria_archaea/marine/.ipynb_checkpoints/marine_prok_biomass_estimate-checkpoint.ipynb deleted file mode 100644 index ac67b63..0000000 --- a/bacteria_archaea/marine/.ipynb_checkpoints/marine_prok_biomass_estimate-checkpoint.ipynb +++ /dev/null @@ -1,240 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import numpy as np\n", - "import pandas as pd\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of marine archaea and bacteria\n", - "\n", - "We use our best estimates for the total number of marine prokaryotes, the carbon content of marine prokaryotes and the fraction of marine archaea and bacteria out of the total population of marine prokaryotes to estimate the total biomass of marine bacteria and archaea" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "results = pd.read_excel('marine_prok_biomass_estimate.xlsx')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "These are our best estimates for the different parameters required for the estimate, along with the associated uncertainties" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ParameterValueUnitsUncertainty
0Total number of marine bacteria and archaea1.2e+29Cells1.5e+00
1Carbon content1.1e+01fg C cell^-11.4e+00
2Fraction of archaea2.0e-01Unitless2.2e+00
3Fraction of bacteria8.0e-01Unitless1.3e+00
4Fraction of the total biomass of marine bacter...2.0e-01Unitless4.4e+00
\n", - "
" - ], - "text/plain": [ - " Parameter Value Units \\\n", - "0 Total number of marine bacteria and archaea 1.2e+29 Cells \n", - "1 Carbon content 1.1e+01 fg C cell^-1 \n", - "2 Fraction of archaea 2.0e-01 Unitless \n", - "3 Fraction of bacteria 8.0e-01 Unitless \n", - "4 Fraction of the total biomass of marine bacter... 2.0e-01 Unitless \n", - "\n", - " Uncertainty \n", - "0 1.5e+00 \n", - "1 1.4e+00 \n", - "2 2.2e+00 \n", - "3 1.3e+00 \n", - "4 4.4e+00 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We multiply all the relevant parameters to arrive at our best estimate for the biomass of marine archaea and bacteria, and propagate the uncertainties associated with each parameter to calculate the uncertainty associated with the estimate for the total biomass" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of marine archaea is 0.3 Gt C\n", - "Our best estimate for the total biomass of marine bacteria is 1.3 Gt C\n", - "The uncertainty associated with the estimate for the biomass of archaea is 2.6-fold\n", - "The uncertainty associated with the estimate for the biomass of bacteria is 1.8-fold\n" - ] - } - ], - "source": [ - "# Calculate the total biomass of marine archaea and bacteria\n", - "total_arch_biomass = results['Value'][0]*results['Value'][1]*(1+results['Value'][4])*1e-15*results['Value'][2]\n", - "total_bac_biomass = results['Value'][0]*results['Value'][1]*(1+results['Value'][4])*1e-15*results['Value'][3]\n", - "\n", - "print('Our best estimate for the total biomass of marine archaea is %.1f Gt C' %(total_arch_biomass/1e15))\n", - "print('Our best estimate for the total biomass of marine bacteria is %.1f Gt C' %(total_bac_biomass/1e15))\n", - "\n", - "# Propagate the uncertainty in the total biomass of bacteria and archaea\n", - "prok_biomass_CI = CI_sum_prop(estimates=np.array([results['Value'][0]*results['Value'][1], results['Value'][0]*results['Value'][1]*results['Value'][4]]), mul_CIs=np.array([CI_prod_prop(results['Uncertainty'][:2]),results['Uncertainty'][4]]))\n", - "\n", - "# Propagate the uncertainty associated with each parameter to the final estimate\n", - "arch_biomass_uncertainty = CI_prod_prop(np.array([prok_biomass_CI,results['Uncertainty'][2]]))\n", - "bac_biomass_uncertainty = CI_prod_prop(np.array([prok_biomass_CI,results['Uncertainty'][3]]))\n", - "\n", - "print('The uncertainty associated with the estimate for the biomass of archaea is %.1f-fold' %arch_biomass_uncertainty)\n", - "print('The uncertainty associated with the estimate for the biomass of bacteria is %.1f-fold' %bac_biomass_uncertainty)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Feed bacteria results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Bacteria','Marine'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[total_bac_biomass/1e15,bac_biomass_uncertainty],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed archaea results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Archaea','Marine'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[total_arch_biomass/1e15,arch_biomass_uncertainty],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed bacteria results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Bacteria','Marine'), \n", - " col=['Number of individuals'],\n", - " values= results['Value'][0]*results['Value'][3],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed archaea results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Archaea','Marine'), \n", - " col=['Number of individuals'],\n", - " values= results['Value'][0]*results['Value'][2],\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine/POC_prokaroytes/.ipynb_checkpoints/POC_prokaryotes-checkpoint.ipynb b/bacteria_archaea/marine/POC_prokaroytes/.ipynb_checkpoints/POC_prokaryotes-checkpoint.ipynb deleted file mode 100644 index 6cbdd96..0000000 --- a/bacteria_archaea/marine/POC_prokaroytes/.ipynb_checkpoints/POC_prokaryotes-checkpoint.ipynb +++ /dev/null @@ -1,1091 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Import dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from openpyxl import load_workbook\n", - "from CI_helper import *\n", - "from fraction_helper import *\n", - "pd.options.display.float_format = '{:,.2f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimateing the biomass contribution of particle-attached bacteria and archaea in the ocean\n", - "In order to estimate the total biomass of bacteria and archaea attached to particulate organic matter (POM), we assemble studies which report the local contribution of particle-attached bacteria and archaea to the total number of cells. We focus here on the two main categories of POM - macroaggregates (>0.5 mm in diameter) and microaggregates (smaller then 0.5 mm in diameter ). We estimate the biomass contribution of bacteria and archaea attached to each size category, and then sum up the contributions to estimate the total biomass of particle-attached bacteria and archaea in the ocean. We first survey data on the fraction of the total number of cells which is attached to either macro- or microaggregates. We then estimate the carbon content of particle-attached cells relative to free-living cells.\n", - "\n", - "## Fraction of number of cells which is particle-attached\n", - "### Macroaggregates\n", - "In order to estimate the total biomass of bacteria and archaea attached to macroaggregates, we rely a collection of data from several studies which report the fraction of the total number of bacteria and archaea which is attached to macroaggregates. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
LocationFraction of cells in aggregatesSize of cells relative to free-living cellsVolume of cells [µm^3]Carbon content [fg C]Cells on aggregates concentration [cells mL^-1]Depth [m]Concentration of macroaggregates [# L^1-]ReferenceLinkRemarks
0Southern California Bight0.011.900.20nan1,045.0015-200.55Alldredge & Gotschalkhttp://dx.doi.org/10.1016/0278-4343(90)90034-JBased on tables 1,2 and 3,
1Southern California Bight0.001.400.15nan1,997.5015-200.42Alldredge & Gotschalkhttp://dx.doi.org/10.1016/0278-4343(90)90034-JBased on tables 1,2 and 3
2Southern California Bight0.001.100.12nan1,100.0015-200.50Alldredge & Gotschalkhttp://dx.doi.org/10.1016/0278-4343(90)90034-JBased on tables 1,2 and 3
3Southern California Bight0.011.300.16nan6,612.0015-201.14Alldredge & Gotschalkhttp://dx.doi.org/10.1016/0278-4343(90)90034-JBased on tables 1,2 and 3
4Mediterranean0.03nannannan10,600.0040nanTurley & Stutthttp://dx.doi.org/10.4319/lo.2000.45.2.0419NaN
\n", - "
" - ], - "text/plain": [ - " Location Fraction of cells in aggregates \\\n", - "0 Southern California Bight 0.01 \n", - "1 Southern California Bight 0.00 \n", - "2 Southern California Bight 0.00 \n", - "3 Southern California Bight 0.01 \n", - "4 Mediterranean 0.03 \n", - "\n", - " Size of cells relative to free-living cells Volume of cells [µm^3] \\\n", - "0 1.90 0.20 \n", - "1 1.40 0.15 \n", - "2 1.10 0.12 \n", - "3 1.30 0.16 \n", - "4 nan nan \n", - "\n", - " Carbon content [fg C] Cells on aggregates concentration [cells mL^-1] \\\n", - "0 nan 1,045.00 \n", - "1 nan 1,997.50 \n", - "2 nan 1,100.00 \n", - "3 nan 6,612.00 \n", - "4 nan 10,600.00 \n", - "\n", - " Depth [m] Concentration of macroaggregates [# L^1-] Reference \\\n", - "0 15-20 0.55 Alldredge & Gotschalk \n", - "1 15-20 0.42 Alldredge & Gotschalk \n", - "2 15-20 0.50 Alldredge & Gotschalk \n", - "3 15-20 1.14 Alldredge & Gotschalk \n", - "4 40 nan Turley & Stutt \n", - "\n", - " Link Remarks \n", - "0 http://dx.doi.org/10.1016/0278-4343(90)90034-J Based on tables 1,2 and 3, \n", - "1 http://dx.doi.org/10.1016/0278-4343(90)90034-J Based on tables 1,2 and 3 \n", - "2 http://dx.doi.org/10.1016/0278-4343(90)90034-J Based on tables 1,2 and 3 \n", - "3 http://dx.doi.org/10.1016/0278-4343(90)90034-J Based on tables 1,2 and 3 \n", - "4 http://dx.doi.org/10.4319/lo.2000.45.2.0419 NaN " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data\n", - "macro = pd.read_excel('poc_data.xlsx','Macroaggregates')\n", - "macro.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our best estimate for the fraction of cells attached to macroaggregates, we first calculate the average fraction of particle-attached cells within each study. We calculate both the arithmetic mean fraction as well as the geometric mean fraction of cells." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/pandas/core/groupby.py:1880: RuntimeWarning: Mean of empty slice\n", - " res = f(group)\n" - ] - } - ], - "source": [ - "# Calculate the mean fraction of particle-attached cells whitin each study\n", - "macro_study_mean = macro.groupby('Reference')['Fraction of cells in aggregates'].apply(np.nanmean)\n", - "macro_study_gmean = macro.groupby('Reference')['Fraction of cells in aggregates'].apply(frac_mean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then calculate the mean fraction of particle-attached cells between different studies. We calculate both the arithmetic mean fraction as well as the geometric mean fraction of cells. We thus generate two estimates for the fraction of particle-attached cells out of the total population of marine bacteria and archaea- one based on arithmetic means and one based on geometric means. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are contaminated with organic carbon sources, or samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the arithmetic and geometric mean estimates." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of the toal number of marine bacteria and archaea which is attached to macroaggregates is ≈3 percent\n" - ] - } - ], - "source": [ - "# Calculate the mean fraction of particle-attached cells between different studies\n", - "macro_mean = np.nanmean(macro_study_mean)\n", - "macro_gmean = frac_mean(macro_study_gmean)\n", - "\n", - "# Use the geometric mean of the arithmetic and geometric mean based estimates as our best estimate\n", - "best_macro_frac = frac_mean(np.array([macro_mean,macro_gmean]))\n", - "\n", - "print('Our best estimate for the fraction of the toal number of marine bacteria and archaea which is attached to macroaggregates is ≈%.0f percent' %(best_macro_frac*100))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our samples of the populations of attached cells are mainly representative of the epipelagic and mesopelagic realms, but we did not find measurements of the concenetration of attached bacterial and archaeal cells in the bathypelagic realm. We therefore assume the distribution of particle attached cells is similar in the bathypelagic realm. To give some support of this assumption, we compare the average macroaggregate concentrations measured in the studies on which we rely to measurements of the concentration of macroaggregates in the deep-sea. We calculate the average concentration of macroaggregates measured in the deep-sea:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average concentration of macroaggregates in the deep-sea is ≈2 aggregates per liter\n" - ] - } - ], - "source": [ - "# Load data on the concentration of macroaggregates in the deep-sea\n", - "deep_sea_macro = pd.read_excel('poc_data.xlsx','Macroaggregate concentration')\n", - "\n", - "# Calculate the mean concentration of macroaggregates in the deep-sea\n", - "mean_deep_macro = geo_CI_calc(deep_sea_macro.groupby('Reference')['Macroaggregate concentration [# L^-1]'].apply(np.nanmean))\n", - "\n", - "print('The average concentration of macroaggregates in the deep-sea is ≈%.0f aggregates per liter' %mean_deep_macro)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This number is similar to the average number we get from the studies on which we rely our estimate:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average concentration of macroaggregates in reported in the studies on which we rely is ≈1 aggregates per liter\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/pandas/core/groupby.py:1880: RuntimeWarning: Mean of empty slice\n", - " res = f(group)\n" - ] - } - ], - "source": [ - "mean_shallow_macro = gmean(macro.groupby('Reference')['Concentration of macroaggregates [# L^1-]'].apply(np.nanmean).dropna())\n", - "\n", - "print('The average concentration of macroaggregates in reported in the studies on which we rely is ≈%.0f aggregates per liter' %mean_shallow_macro)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Microaggregates\n", - "In order to estimate the total biomass of bacteria and archaea attached to microaggregates, we collected data from several studies which report the fraction of the total number of bacteria and archaea which is attached to macroaggregates. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SiteDepth [m]StationFraction of attached cellsReferenceLink
0Arctic Fram Strait5S30.02Busch et al.http://dx.doi.org/10.3389/fmars.2017.00166
1Arctic Fram Strait15S30.02Busch et al.http://dx.doi.org/10.3389/fmars.2017.00166
2Arctic Fram Strait30S30.02Busch et al.http://dx.doi.org/10.3389/fmars.2017.00166
3Arctic Fram Strait45S30.01Busch et al.http://dx.doi.org/10.3389/fmars.2017.00166
4Arctic Fram Strait75S30.02Busch et al.http://dx.doi.org/10.3389/fmars.2017.00166
\n", - "
" - ], - "text/plain": [ - " Site Depth [m] Station Fraction of attached cells \\\n", - "0 Arctic Fram Strait 5 S3 0.02 \n", - "1 Arctic Fram Strait 15 S3 0.02 \n", - "2 Arctic Fram Strait 30 S3 0.02 \n", - "3 Arctic Fram Strait 45 S3 0.01 \n", - "4 Arctic Fram Strait 75 S3 0.02 \n", - "\n", - " Reference Link \n", - "0 Busch et al. http://dx.doi.org/10.3389/fmars.2017.00166 \n", - "1 Busch et al. http://dx.doi.org/10.3389/fmars.2017.00166 \n", - "2 Busch et al. http://dx.doi.org/10.3389/fmars.2017.00166 \n", - "3 Busch et al. http://dx.doi.org/10.3389/fmars.2017.00166 \n", - "4 Busch et al. http://dx.doi.org/10.3389/fmars.2017.00166 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data on microaggregates\n", - "micro = pd.read_excel('poc_data.xlsx','Microaggregates')\n", - "micro.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In a similar manner to our procedure regarding macroaggregates, we calculate the arithmetic and geometric means of the fraction of particle-attached cells within each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the mean fraction of particle-attached cells whitin each study\n", - "micro_study_mean = micro.groupby('Reference')['Fraction of attached cells'].apply(np.nanmean)\n", - "micro_study_gmean = micro.groupby('Reference')['Fraction of attached cells'].apply(frac_mean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then calculate the mean fraction of particle-attached cells between different studies. We calculate both the arithmetic mean fraction as well as the geometric mean fraction of cells. We thus generate two estimates for the fraction of particle-attached cells out of the total population of marine bacteria and archaea- one based on arithmetic means and one based on geometric means. We use as our best estimate the geometric mean of the estimates from the two methodologies." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of the toal number of marine bacteria and archaea which is attached to microaggregates is ≈4 percent\n" - ] - } - ], - "source": [ - "# Calculate the mean fraction of particle-attached cells between different studies\n", - "micro_mean = micro_study_mean.mean()\n", - "micro_gmean = frac_mean(micro_study_gmean)\n", - "\n", - "# Use the geometric mean of the arithmetic and geometric mean based estimates as our best estimate\n", - "best_micro_frac = frac_mean(np.array([micro_mean,micro_gmean]))\n", - "\n", - "print('Our best estimate for the fraction of the toal number of marine bacteria and archaea which is attached to microaggregates is ≈%.0f percent' %(best_micro_frac*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Carbon content of particle-attached cells\n", - "Several studies have indicated that particle-attached cells are more bigger in volume, and thus more carbon rich. To estimate the carbon content of particle-attached cells we use two strategies. The first is based on studies which report the carbon content of particle-attached cells relative to free-living cells. We assume the carbon content of bacteria and archaea which are attached to microaggregates and macroaggregates is similar. \n", - "\n", - "### Relative carbon content\n", - "We first calculate the geometric mean of the size of particle-attached cells relative to free-living cells within each study. Then we calculate the geometric mean of the values reported by different studies as our best estimate for the size of particle-attached cells relative to free-living cells." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the size of particle-attached cells relative to free-living cells is ≈3-fold\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the relative size of particle attached cells within each study\n", - "rel_size_study = macro.groupby(['Location','Reference'])['Size of cells relative to free-living cells'].apply(gmean)\n", - "\n", - "# Calculate the geometric mean of the values reported in different studies as our best estimate\n", - "best_rel_size = gmean(rel_size_study.dropna())\n", - "\n", - "print('Our best estimate for the size of particle-attached cells relative to free-living cells is ≈%1.f-fold' % best_rel_size)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Volume of particle-attached cells\n", - "We assembled studies estimating the volume of particle-attached cells. We convert this volume to carbon content using the allometric relation reported by [Simon & Azam](http://dx.doi.org/10.3354/meps051201). The allometric model is:\n", - "$$C = 88.1 \\times V^{0.59}$$\n", - "We first calculate the geometric mean of volumes within each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the volume of particle-attached cells reported within each study\n", - "vol_study = macro.groupby('Reference')['Volume of cells [µm^3]'].apply(gmean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then calculate the geometric mean of volumes reported in different studies. We convert our best estimate to the volume of particle-attached cells to carbon content based on the formula reported in Simon & Azam. We calculate the carbon content of particle-attached cells relative to free-living cells based on our estimate for the carbon content of free-living bacteria and archaea in the ocean of ≈11 fg C (see the relevant section in the Supplementary Information for more details)." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of particle-attached cells relative to free-living cells based on volume is ≈3-fold\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of volumes reported in different studies\n", - "best_vol = gmean(vol_study.dropna())\n", - "\n", - "# The allometric model reported by Simon & Azam\n", - "sa_model = lambda x: 88.1*x**0.59\n", - "\n", - "# Convert our best estimate for the volume of particle-attached cells to carbon content\n", - "best_cc = sa_model(best_vol)\n", - "\n", - "# Our best estimate for the carbon content of free-living marine bacteria and archaea \n", - "free_living_cc = pd.read_excel('../marine_prok_biomass_estimate.xlsx').iloc[1,1]\n", - "\n", - "# Calculate the relative carbon content of particle-attached cells.\n", - "vol_rel_size = best_cc/free_living_cc\n", - "\n", - "print('Our best estimate for the carbon content of particle-attached cells relative to free-living cells based on volume is ≈%1.f-fold' % vol_rel_size)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the geometric mean of the two estimates of the carbon content of particle-attached cells relative to free-living cells as our best estimate:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of particle-attached cells relative to free-living cells is ≈3-fold\n" - ] - } - ], - "source": [ - "best_rel_cc= gmean([best_rel_size,vol_rel_size])\n", - "print('Our best estimate for the carbon content of particle-attached cells relative to free-living cells is ≈%1.f-fold' % best_rel_cc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the fraction of the total biomass of marine bacteria and archaea which is particle-attached, we sum up the fraction of the total number of cells contributed by cells attached to micro- and macroaggregates, and multiply it by the relative carbon content of particle-attached cells:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of the total biomass of marine bacteria and archaea which is particle-attached is ≈20 percent\n" - ] - } - ], - "source": [ - "best_estimate = (best_macro_frac+best_micro_frac)*best_rel_cc\n", - "\n", - "print('Our best estimate for the fraction of the total biomass of marine bacteria and archaea which is particle-attached is ≈%.0f percent' %(best_estimate*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To project the uncertainty associated with our estimate of the fraction of the total biomass of marine bacteria and archaea which is particle-attached, we first project the uncertainty associated with the two factors of our estimate - the fraction of the total number of cells which is particle-attached and the relative carbon content of particle-attached cells\n", - "\n", - "## Fraction of cells\n", - "We first assess the uncertainty associated with the estimate of the fraction of the total number of cells which is attached to microaggregates and macroaggregates. We then propagate the uncertainty from each fraction to our estimate of the total fraction of cells. We begin with the uncertainty associated with our estimate of the total number of cells which are attached to microaggregates.\n", - "\n", - "### Microaggregates\n", - "We collect both the intra-study uncertainty and the interstudy uncertainty associated with our estimate of the fraction of the total number of marine bacteria and archaea which are attached to microaggregates. We use the maximum of this collection of uncertainties as our best projection of the uncertainty associated with our estimate of the fraction of the total number of marine bacteria and archaea whcih are attached to microaggregates.\n", - "#### Intra-study uncertainty\n", - "We calculate the 95% confidence interval around the mean fraction of microaggregate-attached cells within each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the 95% confidence interval around the mean fraction of cells attached to microaggregates \n", - "# within each study\n", - "micro_study_CI = micro.groupby('Reference')['Fraction of attached cells'].apply(frac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Interstudy uncertainty\n", - "We calculate the 95% confidence interval around the mean fraction of microaggregate-attached cells between differnt studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the 95% confidence interval around the mean fraction of cells attached to microaggregates \n", - "# between different studies\n", - "micro_CI = frac_CI(micro_study_gmean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Inter-method uncertainty\n", - "We calculate the 95% confidence interval around the geometric mean between the estimate based on arithmetic means and geometric means of the fraction of cells attached to microaggregates:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the 95% confidence interval around the geometric mean of the estimates based on arithmetic means\n", - "# and geometric means\n", - "micro_inter_method_CI = frac_CI(np.array([micro_mean,micro_gmean]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximum of the collection of uncertainties as our best projection for the uncertainty associated with our estimate of the fraction of the total number of bacteria and archaea which is attached to microaggregates:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the total number of bacteria and archaea which is attached to microaggregates is ≈2.3-fold\n" - ] - } - ], - "source": [ - "micro_frac_CI = np.max([micro_inter_method_CI,micro_study_CI.max(),micro_CI])\n", - "print('Our best projection for the uncertainty associated with our estimate of the total number of bacteria and archaea which is attached to microaggregates is ≈%.1f-fold' %micro_frac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Macroaggregates\n", - "We collect both the intra-study uncertainty and the interstudy uncertainty associated with our estimate of the fraction of the total number of marine bacteria and archaea which are attached to macroaggregates. We use the maximum of this collection of uncertainties as our best projection of the uncertainty associated with our estimate of the fraction of the total number of marine bacteria and archaea whcih are attached to macroaggregates.\n", - "#### Intra-study uncertainty\n", - "We calculate the 95% confidence interval around the mean fraction of macroaggregate-attached cells within each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:4291: RuntimeWarning: Invalid value encountered in percentile\n", - " interpolation=interpolation)\n" - ] - } - ], - "source": [ - "# Calculate the 95% confidence interval around the mean fraction of cells attached to macroaggregates \n", - "# within each study\n", - "macro_study_CI = macro.groupby('Reference')['Fraction of cells in aggregates'].apply(frac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Interstudy uncertainty\n", - "We calculate the 95% confidence interval around the mean fraction of macroaggregate-attached cells between differnt studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the 95% confidence interval around the mean fraction of cells attached to microaggregates \n", - "# between different studies\n", - "macro_CI = frac_CI(macro_study_gmean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Inter-method uncertainty\n", - "We calculate the 95% confidence interval around the geometric mean between the estimate based on arithmetic means and geometric means of the fraction of cells attached to microaggregates:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the 95% confidence interval around the geometric mean of the estimates based on arithmetic means\n", - "# and geometric means\n", - "macro_inter_method_CI = frac_CI(np.array([macro_mean,macro_gmean]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximum of the collection of uncertainties as our best projection for the uncertainty associated with our estimate of the fraction of the total number of bacteria and archaea which is attached to macroaggregates:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the total number of bacteria and archaea which is attached to macroaggregates is ≈7.6-fold\n" - ] - } - ], - "source": [ - "macro_frac_CI = np.max([macro_inter_method_CI,macro_study_CI.max(),macro_CI])\n", - "print('Our best projection for the uncertainty associated with our estimate of the total number of bacteria and archaea which is attached to macroaggregates is ≈%.1f-fold' %macro_frac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We propagate the uncertainties associated with the estimates of the fraction of the total number of marine bacteria and archaea attached to micro- and macroaggregates to the final estimate of the fraction of marine bacteria and archaea which is particle-attached:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the total number of bacteria and archaea which is particle-attached is ≈2.8-fold\n" - ] - } - ], - "source": [ - "# Propagate the uncertainties of the fraction of cells attached to micro- and macroaggregates\n", - "# to the estiamte of the fraction of cells which is particle-attached\n", - "num_frac_CI = CI_sum_prop(estimates=np.array([best_micro_frac,best_macro_frac]),mul_CIs=np.array([micro_frac_CI,macro_frac_CI]))\n", - "print('Our best projection for the uncertainty associated with our estimate of the total number of bacteria and archaea which is particle-attached is ≈%.1f-fold' %num_frac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Relative carbon content\n", - "We assess the uncertainty associated with the estimate of the relative carbon content of particle-attached cells. We used two independent methods to estimate the relative carbon content. We assess the unertainty associate with each one of them.\n", - "### Relative size-based\n", - "We collect both the intra-study uncertainty and the interstudy uncertainty associated with our estimate of the size of particle-attached cells relative to free-livign cells. \n", - "#### Intra-study\n", - "We calculate the 95% confidence interval around the mean size of particle-attached cells reltive to free-living cells within each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "size_intra_CI = macro.groupby(['Location','Reference'])['Size of cells relative to free-living cells'].apply(geo_CI_calc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Inter-study\n", - "We calculate the 95% confidence interval around the mean size of particle-attached cells reltive to free-living cells between different studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "size_inter_CI = geo_CI_calc(rel_size_study.dropna())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Volume-based\n", - "We collect both the intra-study uncertainty and the interstudy uncertainty associated with our estimate of the volume of particle-attached cells. \n", - "#### Intra-study\n", - "We calculate the 95% confidence interval around the mean volume of particle-attached cells within each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "vol_intra_CI = macro.groupby('Reference')['Volume of cells [µm^3]'].apply(geo_CI_calc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Inter-study\n", - "We calculate the 95% confidence interval around the mean volume of particle-attached cells between different studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "vol_inter_CI = geo_CI_calc(vol_study.dropna())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inter-method uncertainty\n", - "We calculate the 95% confidence interval around the geometric mean between the size-based estimate and the volume based estimate:" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "cc_inter_method_CI = geo_CI_calc(np.array([vol_rel_size,best_rel_size]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximum of the collection of uncertainties for both the volume-based methoda and the size based method as our best projection of the uncertainty associated with our estimate of the relative carbon content of particle-attached bacteria and archaea:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the relative carbon content of particles-attached bacteria and archaea is ≈2.9-fold\n" - ] - } - ], - "source": [ - "cc_CI = np.max([cc_inter_method_CI,vol_inter_CI,vol_intra_CI.max(),size_inter_CI,size_intra_CI.max()])\n", - "print('Our best projection for the uncertainty associated with our estimate of the relative carbon content of particles-attached bacteria and archaea is ≈%.1f-fold' %cc_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine our projections for the uncertainty associated with our estimate of the fraction of the total number of cells which is particle-attached and our estimate of the relative carbon content of particle-attached cells." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the fraction of the total biomass of marine bactetia and archaea which is particle-attached is ≈4.4-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_prod_prop(np.array([cc_CI,num_frac_CI]))\n", - "print('Our best projection for the uncertainty associated with our estimate of the fraction of the total biomass of marine bactetia and archaea which is particle-attached is ≈%.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraction of the total biomass of marine bacteria and archaea which is particle-attahced: 2.0e-01\n", - "Uncertainty associated with the fraction of the biomass of marine bacteria and archaea which is particle-attached: 4.4-fold\n" - ] - } - ], - "source": [ - "print('Fraction of the total biomass of marine bacteria and archaea which is particle-attahced: %.1e' % best_estimate)\n", - "print('Uncertainty associated with the fraction of the biomass of marine bacteria and archaea which is particle-attached: %.1f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../marine_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "\n", - "if (result.shape[0]==0):\n", - " result = pd.DataFrame(index= range(4), columns=['Parameter','Value','Units','Uncertainty'])\n", - "\n", - "result.loc[4] = pd.Series({\n", - " 'Parameter': 'Fraction of the total biomass of marine bacteria and archaea which is particle-attached',\n", - " 'Value': best_estimate,\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../marine_prok_biomass_estimate.xlsx',index=False)\n", - "\n", - "# We need to use the results on the carbon content of particle-attached cells\n", - "# for our estimate of the total biomass of particle-attached protists, \n", - "# so we feed these results to the data used in the estimate of the total\n", - "# biomass of particle-attached protists\n", - "path = '../../../protists/marine_protists/marine_protists_data.xlsx'\n", - "marine_protists_data = pd.read_excel(path,'POC prokaryotes')\n", - "\n", - "marine_protists_data.loc[0] = pd.Series({\n", - " 'Parameter': 'Carbon content of particle-attached prokaryotes',\n", - " 'Value': best_cc,\n", - " 'Units': 'fg C cell^-1',\n", - " 'Uncertainty': cc_CI\n", - " })\n", - "writer = pd.ExcelWriter(path, engine = 'openpyxl')\n", - "book = load_workbook(path)\n", - "writer.book = book\n", - "writer.sheets = dict((ws.title, ws) for ws in book.worksheets)\n", - "\n", - "\n", - "marine_protists_data.to_excel(writer, sheet_name = 'POC prokaryotes',index=False)\n", - "writer.save()\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine/arch_bac_ratio/.ipynb_checkpoints/marine_arch_frac-checkpoint.ipynb b/bacteria_archaea/marine/arch_bac_ratio/.ipynb_checkpoints/marine_arch_frac-checkpoint.ipynb deleted file mode 100644 index 78064a8..0000000 --- a/bacteria_archaea/marine/arch_bac_ratio/.ipynb_checkpoints/marine_arch_frac-checkpoint.ipynb +++ /dev/null @@ -1,1145 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies \n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from fraction_helper import *\n", - "from openpyxl import load_workbook\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "# Genaral parameters used in the estimate\n", - "ocean_area = 3.6e14\n", - "liters_in_m3 = 1e3\n", - "ml_in_m3 = 1e6" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the fraction of marine archaea out of the total marine prokaryote population" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to estimate the fraction of archaea out of the total population of marine bacteria and archaea, we rely of two independent methods: fluorescent in-situ hybridization (FISH) and 16S rDNA sequencing. For each one of the methods, we calculate the fraction of archaea out of the total population of marine bacteria and archaea in the three depth layers of the ocean - the epieplagic (< 200 meters depth), the mesopelagic (200-1000 meters depth) and bathypelagic (1000-4000 meters depth).\n", - "\n", - "### FISH based estimate\n", - "For our FISH based estimate we rely on data from [Lloyd et al.](http://dx.doi.org/10.1128/AEM.02090-13). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
paperSampleWater Depth (m)Cells per ccCARDFISH Bac per ccCARDFISH Arc per ccCARDFISH Total per ccFISH yieldFraction Arc CARDFISHFish or cardFish...FixativeBac permeabilizationArc permeabilizationBac probeArc probeCounting methodqPCR-Bacteria (copies/mL water)qPCR-Archaea (copies/mL water)qPCR-MCG (copies/mL water)Total qPCR(copies/mL water)
0Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast2.5e+011.0e+067.5e+059.2e+048.4e+058.3e-011.1e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
1Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast1.0e+027.1e+054.3e+051.1e+055.5e+057.8e-012.1e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
2Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast5.0e+021.0e+055.3e+042.8e+048.1e+047.9e-013.4e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
3Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast1.0e+036.9e+043.3e+042.1e+045.4e+047.8e-013.9e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
4Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast1.8e+036.4e+042.6e+042.0e+044.7e+047.3e-014.4e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
\n", - "

5 rows × 21 columns

\n", - "
" - ], - "text/plain": [ - " paper Sample Water Depth (m) \\\n", - "0 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 2.5e+01 \n", - "1 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 1.0e+02 \n", - "2 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 5.0e+02 \n", - "3 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 1.0e+03 \n", - "4 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 1.8e+03 \n", - "\n", - " Cells per cc CARDFISH Bac per cc CARDFISH Arc per cc \\\n", - "0 1.0e+06 7.5e+05 9.2e+04 \n", - "1 7.1e+05 4.3e+05 1.1e+05 \n", - "2 1.0e+05 5.3e+04 2.8e+04 \n", - "3 6.9e+04 3.3e+04 2.1e+04 \n", - "4 6.4e+04 2.6e+04 2.0e+04 \n", - "\n", - " CARDFISH Total per cc FISH yield Fraction Arc CARDFISH Fish or cardFish \\\n", - "0 8.4e+05 8.3e-01 1.1e-01 CARDFISH \n", - "1 5.5e+05 7.8e-01 2.1e-01 CARDFISH \n", - "2 8.1e+04 7.9e-01 3.4e-01 CARDFISH \n", - "3 5.4e+04 7.8e-01 3.9e-01 CARDFISH \n", - "4 4.7e+04 7.3e-01 4.4e-01 CARDFISH \n", - "\n", - " ... Fixative Bac permeabilization \\\n", - "0 ... formaldehyde lysozyme/achromopeptidase \n", - "1 ... formaldehyde lysozyme/achromopeptidase \n", - "2 ... formaldehyde lysozyme/achromopeptidase \n", - "3 ... formaldehyde lysozyme/achromopeptidase \n", - "4 ... formaldehyde lysozyme/achromopeptidase \n", - "\n", - " Arc permeabilization Bac probe Arc probe Counting method \\\n", - "0 proteinase K EUB338 ARCH915 Microscope-eye \n", - "1 proteinase K EUB338 ARCH915 Microscope-eye \n", - "2 proteinase K EUB338 ARCH915 Microscope-eye \n", - "3 proteinase K EUB338 ARCH915 Microscope-eye \n", - "4 proteinase K EUB338 ARCH915 Microscope-eye \n", - "\n", - " qPCR-Bacteria (copies/mL water) qPCR-Archaea (copies/mL water) \\\n", - "0 nan nan \n", - "1 nan nan \n", - "2 nan nan \n", - "3 nan nan \n", - "4 nan nan \n", - "\n", - " qPCR-MCG (copies/mL water) Total qPCR(copies/mL water) \n", - "0 nan nan \n", - "1 nan nan \n", - "2 nan nan \n", - "3 nan nan \n", - "4 nan nan \n", - "\n", - "[5 rows x 21 columns]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the dataset\n", - "lloyd = pd.read_excel('marine_arch_frac_data.xlsx','Lloyd',skiprows=1)\n", - "lloyd.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The data in Lloyd et al. contains estimates for the number of bacteria and archaea. Lloyd et al. generated regression equations for the concentration of bacteria and archaea as a function of depth. We use these equations to estimate the total number of archaea and bacteria at each of the three depth layers." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of archaea in the epipelagic layer based on FISH is 5.8 percent\n", - "The fraction of archaea in the mesopelagic layer based on FISH is 24.0 percent\n", - "The fraction of archaea in the bathypelagic layer based on FISH is 34.9 percent\n" - ] - } - ], - "source": [ - "# Define the regression equation for the number of bacteria in the top 64 m:\n", - "def bac_surf(depth):\n", - " result = np.zeros_like(depth)\n", - " for i,x in enumerate(depth):\n", - " if x==0 :\n", - " result[i] = 5.54\n", - " \n", - " else:\n", - " result[i] = np.log10(x)*0.08+5.54\n", - " return 10**result\n", - "\n", - "# Define the regression equation for the number of bacteria in water deeper than 64 m:\n", - "bac_deep = lambda x: 10**(np.log10(x)*-1.09+7.66)\n", - "\n", - "# Define the regression equation for the number of archaea in the top 389 m:\n", - "def arch_surf(depth):\n", - " result = np.zeros_like(depth)\n", - " for i,x in enumerate(depth):\n", - " if x==0 :\n", - " result[i] = 4.1\n", - " \n", - " else:\n", - " result[i] = np.log10(x)*0.1+4.1\n", - " return 10**result\n", - "\n", - "# Define the regression equation for the number of bacteria in water below 389 m:\n", - "arch_deep = lambda x: 10**(np.log10(x)*-0.8+6.43)\n", - "\n", - "# Estimate the total number of bacteria and archaea in the epipelagic layer by first estimating the concentration using the \n", - "# regression equation, multiplying by the volume at each depth, which is 1 m^3 times the surface\n", - "# Area of the ocean, and finally summing across different depths\n", - "total_bac_epi = (bac_surf(np.linspace(0,64,65))*ml_in_m3*ocean_area).sum() + (bac_deep(np.linspace(65,200,200-65+1))*ml_in_m3*ocean_area).sum()\n", - "total_arch_epi = (arch_surf(np.linspace(0,200,201))*ml_in_m3*ocean_area).sum()\n", - "\n", - "# Calculate the fraction of archaea in the epipelagic layer\n", - "FISH_arch_frac_epi = total_arch_epi/(total_arch_epi+total_bac_epi)\n", - "\n", - "\n", - "# We repeat the same procedure for the total number of bacteria and archaea in the mesopelagic layer\n", - "# Number of archaea\n", - "total_bac_meso = (bac_deep(np.linspace(201,1000,1000-200+1))*ml_in_m3*ocean_area).sum()\n", - "total_arch_meso = (arch_surf(np.linspace(201,389,390-201+1))*ml_in_m3*ocean_area).sum() + (arch_deep(np.linspace(390,1000,1000-390+1))*ml_in_m3*ocean_area).sum()\n", - "\n", - "# Calculate the fraction of archaea in the mesopelagic layer\n", - "FISH_arch_frac_meso = total_arch_meso/(total_arch_meso+total_bac_meso)\n", - "\n", - "# We repeat the same procedure for the total number of bacteria and archaea in the mesopelagic layer\n", - "# Number of archaea\n", - "total_bac_bathy = (bac_deep(np.linspace(1001,4000,4000-1001+1))*ml_in_m3*ocean_area).sum()\n", - "total_arch_bathy = (arch_deep(np.linspace(1001,4000,4000-1001+1))*ml_in_m3*ocean_area).sum()\n", - "\n", - "# Calculate the fraction of archaea in the bathypelagic layer\n", - "FISH_arch_frac_bathy = total_arch_bathy/(total_arch_bathy+total_bac_bathy)\n", - "\n", - "print('The fraction of archaea in the epipelagic layer based on FISH is %.1f percent' % (FISH_arch_frac_epi*100))\n", - "print('The fraction of archaea in the mesopelagic layer based on FISH is %.1f percent' % (FISH_arch_frac_meso*100))\n", - "print('The fraction of archaea in the bathypelagic layer based on FISH is %.1f percent' % (FISH_arch_frac_bathy*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 16S rDNA sequencing\n", - "\n", - "To estimate the fraction of archaea out of the total population of marine bacteria and archaea, we rely on data from [Sunagawa et al.](http://science.sciencemag.org/content/348/6237/1261359) for the epipelagic and mesopelagic layers, and data from [Salazar et al.](http://dx.doi.org/10.1038/ismej.2015.137) for the bathypelagic layer." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "sunagawa = pd.read_excel('marine_arch_frac_data.xlsx','Sunagawa',skiprows=1,index_col=0)\n", - "salazar = pd.read_excel('marine_arch_frac_data.xlsx','Salazar')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here are samples from the data in Sunagawa et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ThaumarcheotaEuryarchaeota
SRF6.2e-012.7e+00
DCM1.4e+004.1e+00
MESO1.1e+012.9e+00
\n", - "
" - ], - "text/plain": [ - " Thaumarcheota Euryarchaeota\n", - "SRF 6.2e-01 2.7e+00\n", - "DCM 1.4e+00 4.1e+00\n", - "MESO 1.1e+01 2.9e+00" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sunagawa.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here are samples from the data in Salazar et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
samplestationfiltersizeDateOceancodeDepthLongitudeLatitudeUnnamed: 9Unnamed: 10ArchaeaBacteriaNo blast hit
0MP0145102.0e-012010-12-26North AtlanticMP0145-4002-2.6e+011.5e+01nanMP01453.1e-016.9e-019.4e-05
1MP0262172.0e-012011-01-02South AtlanticMP0262-4002-2.7e+01-3.0e+00nanMP02621.9e-018.1e-010.0e+00
2MP0327202.0e-012011-01-05South AtlanticMP0327-4001-3.0e+01-9.1e+00nanMP03271.7e-018.3e-010.0e+00
3MP0372232.0e-012011-01-08South AtlanticMP0372-4003-3.3e+01-1.6e+01nanMP03721.4e-018.6e-019.4e-05
4MP0441262.0e-012011-01-11South AtlanticMP0441-3907-3.7e+01-2.3e+01nanMP04411.2e-018.8e-010.0e+00
\n", - "
" - ], - "text/plain": [ - " sample station filtersize Date Ocean code Depth \\\n", - "0 MP0145 10 2.0e-01 2010-12-26 North Atlantic MP0145 -4002 \n", - "1 MP0262 17 2.0e-01 2011-01-02 South Atlantic MP0262 -4002 \n", - "2 MP0327 20 2.0e-01 2011-01-05 South Atlantic MP0327 -4001 \n", - "3 MP0372 23 2.0e-01 2011-01-08 South Atlantic MP0372 -4003 \n", - "4 MP0441 26 2.0e-01 2011-01-11 South Atlantic MP0441 -3907 \n", - "\n", - " Longitude Latitude Unnamed: 9 Unnamed: 10 Archaea Bacteria \\\n", - "0 -2.6e+01 1.5e+01 nan MP0145 3.1e-01 6.9e-01 \n", - "1 -2.7e+01 -3.0e+00 nan MP0262 1.9e-01 8.1e-01 \n", - "2 -3.0e+01 -9.1e+00 nan MP0327 1.7e-01 8.3e-01 \n", - "3 -3.3e+01 -1.6e+01 nan MP0372 1.4e-01 8.6e-01 \n", - "4 -3.7e+01 -2.3e+01 nan MP0441 1.2e-01 8.8e-01 \n", - "\n", - " No blast hit \n", - "0 9.4e-05 \n", - "1 0.0e+00 \n", - "2 0.0e+00 \n", - "3 9.4e-05 \n", - "4 0.0e+00 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "salazar.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As we are working with fractions here, we shall use a utility that will calculate mean and uncertainty of fractions. For details regarding the procedure look at the documentation of the relevant functions.\n", - "For the epipelagic layer, we will use the sum of the fractions of Thaumarcheota and Euryarchaeota, two main archaeal phyla. We will use the geometric mean of the fractions in surface waters and the deep chlorophyll maximum." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of archaea in the epipelagic layer based on 16S rDNA sequencing is 4.3 percent\n", - "The fraction of archaea in the mesopelagic layer based on 16S rDNA sequencing is 14.4 percent\n" - ] - } - ], - "source": [ - "sunagawa_sum = (sunagawa['Thaumarcheota'] + sunagawa['Euryarchaeota'])/100\n", - "seq_arch_frac_epi = frac_mean(sunagawa_sum.loc[['DCM','SRF']])\n", - "seq_arch_frac_meso = frac_mean(sunagawa_sum.loc['MESO'])\n", - "print('The fraction of archaea in the epipelagic layer based on 16S rDNA sequencing is %.1f percent' % (seq_arch_frac_epi*100))\n", - "print('The fraction of archaea in the mesopelagic layer based on 16S rDNA sequencing is %.1f percent' % (seq_arch_frac_meso*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For the bathypelagic layer, we estimate the fraction of archaea based on 16S rDNA sequencing by using the geometric mean of the data in Salazar et al." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of archaea in the bathypelagic layer based on 16S rDNA sequencing is 15.0 percent\n" - ] - } - ], - "source": [ - "seq_arch_frac_bathy = frac_mean(salazar['Archaea'])\n", - "print('The fraction of archaea in the bathypelagic layer based on 16S rDNA sequencing is %.1f percent' % (seq_arch_frac_bathy*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the fraction of archaea out of the total population of marine bacteria and archaea at each layer is the geometric mean of the estimates of the fraction of archaea based on FISH and on 16S rDNA sequencing, corrected for the lower rDNA operon copy number" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The best estimate for the fraction of archaea in the epipelagic layer is 7.1 percent\n", - "The best estimate for the fraction of archaea in the mesopelagic layer is 26.3 percent\n", - "The best estimate for the fraction of archaea in the bathypelagic layer is 32.4 percent\n" - ] - } - ], - "source": [ - "best_arch_frac_epi = frac_mean(np.array([FISH_arch_frac_epi,seq_arch_frac_epi*2]))\n", - "best_arch_frac_meso = frac_mean(np.array([FISH_arch_frac_meso,seq_arch_frac_meso*2]))\n", - "best_arch_frac_bathy = frac_mean(np.array([FISH_arch_frac_bathy,seq_arch_frac_bathy*2]))\n", - "print('The best estimate for the fraction of archaea in the epipelagic layer is %.1f percent' % (best_arch_frac_epi*100))\n", - "print('The best estimate for the fraction of archaea in the mesopelagic layer is %.1f percent' % (best_arch_frac_meso*100))\n", - "print('The best estimate for the fraction of archaea in the bathypelagic layer is %.1f percent' % (best_arch_frac_bathy*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Estimating the fraction of the population in each depth layer\n", - "In order to calculate the fraction of archaea out of the total population of marine bacteria and archaea, we need to estimate the fraction of cells in epipelagic, mesopelagic and bathypelagic layers. To do so we use the same sources used for estimating the total number of marine bacteria and archaea, namely, Aristegui et. al, Buitenhuis et al. and Lloyd et al. " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load the datasets\n", - "buitenhuis = pd.read_excel('../cell_num/marine_prok_cell_num_data.xlsx','Buitenhuis',skiprows=1)\n", - "aristegui = pd.read_excel('../cell_num/marine_prok_cell_num_data.xlsx','Aristegui',skiprows=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Lloyd et al., we already calculated the total number of bacteria and archaea at each layer, so we can estimate what is the relative fraction of cells in each layer" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of cells in the epipelagic layer according to Lloyd et al. is 38.9 percent\n", - "The fraction of cells in the mesopelagic layer according to Lloyd et al. is 32.5 percent\n", - "The fraction of cells in the bathypelagic layer according to Lloyd et al. is 28.6 percent\n" - ] - } - ], - "source": [ - "# For lloyd et al. we calculate fraction of the sum of bacteria and archaea in each layer out of the \n", - "# total number of bacteria and archaea\n", - "\n", - "lloyd_total_bac_arch_epi = total_arch_epi + total_bac_epi\n", - "lloyd_total_bac_arch_meso = total_arch_meso + total_bac_meso\n", - "lloyd_total_bac_arch_bathy = total_arch_bathy + total_bac_bathy\n", - "lloyd_total_bac_arch = lloyd_total_bac_arch_epi+lloyd_total_bac_arch_meso+lloyd_total_bac_arch_bathy\n", - "\n", - "lloyd_epi_frac = lloyd_total_bac_arch_epi/lloyd_total_bac_arch\n", - "lloyd_meso_frac = lloyd_total_bac_arch_meso/lloyd_total_bac_arch\n", - "lloyd_bathy_frac = lloyd_total_bac_arch_bathy/lloyd_total_bac_arch\n", - "\n", - "print('The fraction of cells in the epipelagic layer according to Lloyd et al. is %.1f percent' % (lloyd_epi_frac*100))\n", - "print('The fraction of cells in the mesopelagic layer according to Lloyd et al. is %.1f percent' % (lloyd_meso_frac*100))\n", - "print('The fraction of cells in the bathypelagic layer according to Lloyd et al. is %.1f percent' % (lloyd_bathy_frac*100))" - ] - }, - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "For Buitenhuis et al., we bin the data along the depth of each sample in 100 m bins. We calculate the average concentration of cells at each bin. For each bin, we calculate the total number of cells in the bin by multiplying the average concentration by the total volume of water in the bin. We calculate the total number of cells in each layer by dividing the bins to each of the layers and summing across all the bins that belong to the same layer." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total fraction of cells in the epipelagic layer based on Buitenhuis et al.: 30.7 percent\n", - "Total fraction of cells in the mesopelagic layer based on Buitenhuis et al.: 27.8 percent\n", - "Total fraction of cells in the bathypelagic layer based on Buitenhuis et al.: 39.5 percent\n" - ] - } - ], - "source": [ - "# Define depth range every 100 m from 0 to 4000 meters\n", - "depth_range = np.linspace(0,4000,41)\n", - "\n", - "# Bin data along depth bins\n", - "buitenhuis['Depth_bin'] = pd.cut(buitenhuis['Depth'], depth_range)\n", - "\n", - "# For each bin, calculate the average number of cells per liter\n", - "buitenhuis_bins = buitenhuis.groupby(['Depth_bin']).mean()['Bact/L']\n", - "\n", - "# Multiply each average concentration by the total volume at each bin: 100 meters depth time the surface area of the oceac\n", - "\n", - "buitenhuis_bins *= 100*ocean_area*liters_in_m3\n", - "\n", - "# For the epipelagic layer, sum across the first three bins\n", - "buitenhuis_total_epi = buitenhuis_bins.iloc[0:3].sum()\n", - "\n", - "# For the mesopelagic layer, sum across the relevant bins\n", - "buitenhuis_total_meso = buitenhuis_bins.iloc[3:11].sum()\n", - "\n", - "# For the bathypelagic layer, sum across the remaining bins\n", - "buitenhuis_total_bathy = buitenhuis_bins.iloc[12:].sum()\n", - "\n", - "#Calculate the total number of cells\n", - "buitenhuis_total = buitenhuis_bins.sum()\n", - "\n", - "# Calculate relative fractions\n", - "buitenhuis_frac_epi = buitenhuis_total_epi/buitenhuis_total\n", - "buitenhuis_frac_meso = buitenhuis_total_meso/buitenhuis_total\n", - "buitenhuis_frac_bathy = buitenhuis_total_bathy/buitenhuis_total\n", - "print('Total fraction of cells in the epipelagic layer based on Buitenhuis et al.: %.1f percent' % (buitenhuis_frac_epi*100))\n", - "print('Total fraction of cells in the mesopelagic layer based on Buitenhuis et al.: %.1f percent' % (buitenhuis_frac_meso*100))\n", - "print('Total fraction of cells in the bathypelagic layer based on Buitenhuis et al.: %.1f percent' % (buitenhuis_frac_bathy*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Aristegui et al. the data is already binned along each layer, so we just calculate the relative fraction of each layer" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total fraction of cells in the epipelagic layer based on Aristegui et al.: 23.2 percent\n", - "Total fraction of cells in the mesopelagic layer based on Aristegui et al.: 36.0 percent\n", - "Total fraction of cells in the bathypelagic layer based on Aristegui et al.: 40.8 percent\n" - ] - } - ], - "source": [ - "aristegui_total = aristegui['Cell abundance (cells m-2)'].sum()\n", - "aristegui_frac_epi = aristegui.iloc[0]['Cell abundance (cells m-2)']/aristegui_total\n", - "aristegui_frac_meso = aristegui.iloc[1]['Cell abundance (cells m-2)']/aristegui_total\n", - "aristegui_frac_bathy = aristegui.iloc[2]['Cell abundance (cells m-2)']/aristegui_total\n", - "print('Total fraction of cells in the epipelagic layer based on Aristegui et al.: %.1f percent' % (aristegui_frac_epi*100))\n", - "print('Total fraction of cells in the mesopelagic layer based on Aristegui et al.: %.1f percent' % (aristegui_frac_meso*100))\n", - "print('Total fraction of cells in the bathypelagic layer based on Aristegui et al.: %.1f percent' % (aristegui_frac_bathy*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the fraction of bacterial and archaeal cells located at each layer is the geometric mean of estiamtes for our three resources - Lloyd et al., Buitenhuis et al., and Aristegui et al." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The best estimate for the fraction of cells in the epipelagic layer is 30.6 percent\n", - "The best estimate for the fraction of cells in the mesopelagic layer is 32.0 percent\n", - "The best estimate for the fraction of cells in the bathypelagic layer is 36.1 percent\n" - ] - } - ], - "source": [ - "\n", - "best_frac_epi = frac_mean(np.array([lloyd_epi_frac,buitenhuis_frac_epi,aristegui_frac_epi]))\n", - "best_frac_meso = frac_mean(np.array([lloyd_meso_frac,buitenhuis_frac_meso,aristegui_frac_meso]))\n", - "best_frac_bathy = frac_mean(np.array([lloyd_bathy_frac,buitenhuis_frac_bathy,aristegui_frac_bathy]))\n", - "\n", - "print('The best estimate for the fraction of cells in the epipelagic layer is %.1f percent' % (best_frac_epi*100))\n", - "print('The best estimate for the fraction of cells in the mesopelagic layer is %.1f percent' % (best_frac_meso*100))\n", - "print('The best estimate for the fraction of cells in the bathypelagic layer is %.1f percent' % (best_frac_bathy*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the fraction of archaea out of the total population of marine bacteria and archaea is the weighted sum of the fraction of archaea in each layer and the fraction of total cells in each layer" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of archaea out of the total population of marine bacteria and archaea is 22.3 percent\n" - ] - } - ], - "source": [ - "best_arch_frac = best_arch_frac_epi*best_frac_epi + best_arch_frac_meso*best_frac_meso+best_arch_frac_bathy*best_frac_bathy\n", - "print('Our best estimate for the fraction of archaea out of the total population of marine bacteria and archaea is %.1f percent' %(best_arch_frac*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "\n", - "In order to assess the uncertainty associated with our estimate for the fraction of marine archaea out of the total population of marine bacteria and archaea, we gather all possible indices of uncertainty. We compare the uncertainty of values within each one of the methods and the uncertainty stemming from the variability of the values provided by the two methods. \n", - "\n", - "## Intra-study uncertainty\n", - "We first look at the uncertainty of values within the FISH method and the 16S sequencing method.\n", - "\n", - "### FISH method\n", - "For the FISH method, as we use regression lines to extrapolate the number of archaea and bacteria across the depth profile. We do not have a good handle of the uncertainty of this procedure. We thus use an alternative measure for the uncertainty of the fraction of archaea. Lloyd et al. reports in each site the fraction of archaea out of the total population of bacteria and archaea. We use the variation of the values between sites as a measure of the uncertainty of the values for the fraction of archaea and bacteria using FISH." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of measurements using FISH for the fraction of archaea is 1.1-fold\n", - "The intra-study uncertainty of measurements using FISH for the fraction of bacteria is 1.01-fold\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "../../../statistics_helper/fraction_helper.py:54: RuntimeWarning: invalid value encountered in log\n", - " log_alpha = np.log(alpha)\n", - "../../../statistics_helper/fraction_helper.py:25: RuntimeWarning: invalid value encountered in log10\n", - " log_alpha = np.log10(alpha)\n" - ] - } - ], - "source": [ - "# Set zero values to a small number for numerical stability of the fraction\n", - "lloyd_arc_frac = lloyd['Fraction Arc CARDFISH'].dropna()\n", - "lloyd_arc_frac[lloyd_arc_frac == 0] = 0.001\n", - "\n", - "print('The intra-study uncertainty of measurements using FISH for the fraction of archaea is %.1f-fold' % frac_CI(lloyd_arc_frac))\n", - "print('The intra-study uncertainty of measurements using FISH for the fraction of bacteria is %.2f-fold' % frac_CI(1.-lloyd_arc_frac))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 16S rDNA sequencing\n", - "\n", - "For the 16S rDNA sequencing method, we rely of two main resources - Sunagawa et al. for the epipelagic and mesopelagic layers, and Salazar et al. for the bathypelagic layer. No uncertainties are reported by Sunagawa et al., and thus we rely on the variability of values in Salazar et al. as a measure of the uncertainty of the values for the fraction of archaea using 16S rDNA sequencing" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of measurements using 16S rDNA sequencing for the fraction of archaea is 1.2-fold\n", - "The intra-study uncertainty of measurements using 16S rDNA sequencing for the fraction of bacteria is 1.03-fold\n" - ] - } - ], - "source": [ - "print('The intra-study uncertainty of measurements using 16S rDNA sequencing for the fraction of archaea is %.1f-fold' % frac_CI(salazar['Archaea']))\n", - "print('The intra-study uncertainty of measurements using 16S rDNA sequencing for the fraction of bacteria is %.2f-fold' % frac_CI(1.-salazar['Archaea']))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Interstudy uncertainty\n", - "\n", - "We calculate the uncertainty (95% multiplicative confidence interval) between the estimates using the two methods - FISH and 16S rDNA sequencing." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty of the fraction of archaea out of the total population of bacteria and archaea in the epipelagic layer is 1.4-fold\n", - "The uncertainty of the fraction of archaea out of the total population of bacteria and archaea in the mesopelagic layer is 1.6-fold\n", - "The uncertainty of the fraction of archaea out of the total population of bacteria and archaea in the bathypelagic layer is 2.3-fold\n", - "The uncertainty of the fraction of bacteria out of the total population of bacteria and archaea in the epipelagic layer is 1.0-fold\n", - "The uncertainty of the fraction of bacteria out of the total population of bacteria and archaea in the mesopelagic layer is 1.1-fold\n", - "The uncertainty of the fraction of bacteria out of the total population of bacteria and archaea in the bathypelagic layer is 1.3-fold\n" - ] - } - ], - "source": [ - "# For each layer, calculate the uncertainty between methods\n", - "from fractions import *\n", - "epi_mul_CI_arch = frac_CI(np.array([FISH_arch_frac_epi,seq_arch_frac_epi]))\n", - "meso_mul_CI_arch = frac_CI(np.array([FISH_arch_frac_meso,seq_arch_frac_meso]))\n", - "bathy_mul_CI_arch = frac_CI(np.array([FISH_arch_frac_bathy,seq_arch_frac_bathy]))\n", - "print('The uncertainty of the fraction of archaea out of the total population of bacteria and archaea in the epipelagic layer is %.1f-fold' %epi_mul_CI_arch)\n", - "print('The uncertainty of the fraction of archaea out of the total population of bacteria and archaea in the mesopelagic layer is %.1f-fold' %meso_mul_CI_arch)\n", - "print('The uncertainty of the fraction of archaea out of the total population of bacteria and archaea in the bathypelagic layer is %.1f-fold' %bathy_mul_CI_arch)\n", - "\n", - "epi_mul_CI_bac = frac_CI(np.array([1.-FISH_arch_frac_epi,1.-seq_arch_frac_epi]))\n", - "meso_mul_CI_bac = frac_CI(np.array([1.-FISH_arch_frac_meso,1.-seq_arch_frac_meso]))\n", - "bathy_mul_CI_bac = frac_CI(np.array([1.-FISH_arch_frac_bathy,1.-seq_arch_frac_bathy]))\n", - "print('The uncertainty of the fraction of bacteria out of the total population of bacteria and archaea in the epipelagic layer is %.1f-fold' %epi_mul_CI_bac)\n", - "print('The uncertainty of the fraction of bacteria out of the total population of bacteria and archaea in the mesopelagic layer is %.1f-fold' %meso_mul_CI_bac)\n", - "print('The uncertainty of the fraction of bacteria out of the total population of bacteria and archaea in the bathypelagic layer is %.1f-fold' %bathy_mul_CI_bac)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimates for the uncertainty associated with the fraction of archaea and bacteria out of the total population of marine bacteria and archaea, we use the highest uncertainty out of the uncertainties of the three depth layers.\n", - "\n", - "The highest interstudy uncertainty for the fraction of archaea is ≈1.8-fold, which is higher than the highest intra-study uncertainty of ≈1.2-fold, so we use ≈1.8-fold as our best projection of the uncertainty associated with the fraction of archaea out of the total population of marine bacteria and archaea. \n", - "Similarly, the highest interstudy uncertainty for the fraction of bacteria is ≈1.2-fold, which is higher than the highest intra-study uncertainty of ≈1.03-fold, so we use ≈1.2-fold as our best projection of the uncertainty associated with the fraction of bacteria out of the total population of marine bacteria and archaea. \n", - "\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraction of marine archaea out of the total population of marine bacteria and archaea: 22.3 percent\n", - "Fraction of marine bacteria out of the total population of marine bacteria and archaea: 77.7 percent\n", - "Uncertainty associated with the fraction of marine archaea: 2.3-fold\n", - "Uncertainty associated with the fraction of marine bacteria: 1.3-fold\n" - ] - } - ], - "source": [ - "print('Fraction of marine archaea out of the total population of marine bacteria and archaea: %.1f percent' %(best_arch_frac*100))\n", - "print('Fraction of marine bacteria out of the total population of marine bacteria and archaea: %.1f percent' %(100.-best_arch_frac*100))\n", - "print('Uncertainty associated with the fraction of marine archaea: %.1f-fold' % np.max([epi_mul_CI_arch,meso_mul_CI_arch,bathy_mul_CI_arch]))\n", - "print('Uncertainty associated with the fraction of marine bacteria: %.1f-fold' % np.max([epi_mul_CI_bac,meso_mul_CI_bac,bathy_mul_CI_bac]))\n", - "\n", - "old_results = pd.read_excel('../marine_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "\n", - "\n", - "if (result.shape[0]==0):\n", - " result = pd.DataFrame(index= range(2), columns=['Parameter','Value','Units','Uncertainty'])\n", - "\n", - "\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Fraction of archaea',\n", - " 'Value': \"{0:.1f}\".format(best_arch_frac),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(np.max([epi_mul_CI_arch,meso_mul_CI_arch,bathy_mul_CI_arch]))\n", - " })\n", - "\n", - "result.loc[3] = pd.Series({\n", - " 'Parameter': 'Fraction of bacteria',\n", - " 'Value': \"{0:.1f}\".format(1.0 - best_arch_frac),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(np.max([epi_mul_CI_bac,meso_mul_CI_bac,bathy_mul_CI_bac]))\n", - " })\n", - "\n", - "\n", - "result.to_excel('../marine_prok_biomass_estimate.xlsx',index=False)\n", - "\n", - "# We need to use the results on the amount of cells in the epipelagic layer for our estimate of\n", - "# the total biomass of marine fungi, so we feed these results to the data used in the estimate\n", - "# of the biomass of marine fungi\n", - "marine_fungi_data = pd.read_excel('../../../fungi/marine_fungi/marine_fungi_data.xlsx','Bacteria biomass')\n", - "\n", - "marine_fungi_data.loc[0] = pd.Series({\n", - " 'Parameter': 'Fraction of prokaryotes in epipelagic realm',\n", - " 'Value': best_frac_epi,\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': frac_CI(np.array([lloyd_epi_frac,buitenhuis_frac_epi,aristegui_frac_epi]))\n", - " })\n", - "writer = pd.ExcelWriter('../../../fungi/marine_fungi/marine_fungi_data.xlsx', engine = 'openpyxl')\n", - "book = load_workbook('../../../fungi/marine_fungi/marine_fungi_data.xlsx')\n", - "writer.book = book\n", - "writer.sheets = dict((ws.title, ws) for ws in book.worksheets)\n", - "\n", - "\n", - "marine_fungi_data.to_excel(writer, sheet_name = 'Bacteria biomass',index=False)\n", - "writer.save()\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine/carbon_content/.ipynb_checkpoints/marine_prok_carbon_content-checkpoint.ipynb b/bacteria_archaea/marine/carbon_content/.ipynb_checkpoints/marine_prok_carbon_content-checkpoint.ipynb deleted file mode 100644 index 1087cc5..0000000 --- a/bacteria_archaea/marine/carbon_content/.ipynb_checkpoints/marine_prok_carbon_content-checkpoint.ipynb +++ /dev/null @@ -1,303 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *\n", - "pd.options.display.float_format = '{:,.1f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the carbon content of marine bacteria and archaea" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to estimate the characteristic carbon content of marine bacteria and archaea, we collected data on the carbon content of marine prokaryotes from 5 studies. Here is a summary of the data collected" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceLink to paperfg C cell-1LocationIntra-study uncertaintyremarks
0Lee and Fuhrman (1987)https://www.ncbi.nlm.nih.gov/pubmed/1634736220.0NW-atlantic1.1standard error of 0.8. We use two standard err...
1Ducklow and Carlson (1992)http://dx.doi.org/10.1007/978-1-4684-7609-5_312.2Oceans1.1mean and standard error calculated in sheet2 b...
2Gundersen et al. (2002)http://onlinelibrary.wiley.com/doi/10.4319/lo....7.7N-Atlantic1.1mean and standard error calculated in sheet3 b...
3Carlson et al. (1999)http://dx.doi.org/10.3354/ame0192297.7Antarctica1.3range of 5.5-9.8
4Fukuda et al. (1998)http://aem.asm.org/cgi/pmidlookup?view=long&pm...12.4Pacific Ocean1.4std of 6.3 and sample size of 6 equals standar...
\n", - "
" - ], - "text/plain": [ - " Reference \\\n", - "0 Lee and Fuhrman (1987) \n", - "1 Ducklow and Carlson (1992) \n", - "2 Gundersen et al. (2002) \n", - "3 Carlson et al. (1999) \n", - "4 Fukuda et al. (1998) \n", - "\n", - " Link to paper fg C cell-1 \\\n", - "0 https://www.ncbi.nlm.nih.gov/pubmed/16347362 20.0 \n", - "1 http://dx.doi.org/10.1007/978-1-4684-7609-5_3 12.2 \n", - "2 http://onlinelibrary.wiley.com/doi/10.4319/lo.... 7.7 \n", - "3 http://dx.doi.org/10.3354/ame019229 7.7 \n", - "4 http://aem.asm.org/cgi/pmidlookup?view=long&pm... 12.4 \n", - "\n", - " Location Intra-study uncertainty \\\n", - "0 NW-atlantic 1.1 \n", - "1 Oceans 1.1 \n", - "2 N-Atlantic 1.1 \n", - "3 Antarctica 1.3 \n", - "4 Pacific Ocean 1.4 \n", - "\n", - " remarks \n", - "0 standard error of 0.8. We use two standard err... \n", - "1 mean and standard error calculated in sheet2 b... \n", - "2 mean and standard error calculated in sheet3 b... \n", - "3 range of 5.5-9.8 \n", - "4 std of 6.3 and sample size of 6 equals standar... " - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "summary = pd.read_excel('marine_prok_carbon_content_data.xlsx','Summary')\n", - "summary.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the geometric mean of the carbon content from each study as our best estimate for the carbon content of marine bacteria and archaea." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of marine bacteria and arcaea is 11.2 fg C cell^-1\n" - ] - } - ], - "source": [ - "best_estimate = 10**(np.log10(summary['fg C cell-1']).mean())\n", - "print('Our best estimate for the carbon content of marine bacteria and arcaea is %0.1f fg C cell^-1' % best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "\n", - "In order to assess the uncertainty associated with our estimate of the carbon content of marine bacteria and archaea, we survey all availabe measures of uncertainty.\n", - "\n", - "## Intra-study uncertainty\n", - "We collected the uncertainties reported in each of the studies. Below is a list of the uncertainties reported in each of the studies. The highest uncertainty reported is lower than 1.5-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Reference Intra-study uncertainty\n", - "0 Lee and Fuhrman (1987) 1.1\n", - "1 Ducklow and Carlson (1992) 1.1\n", - "2 Gundersen et al. (2002) 1.1\n", - "3 Carlson et al. (1999) 1.3\n", - "4 Fukuda et al. (1998) 1.4\n" - ] - } - ], - "source": [ - "print(summary[['Reference','Intra-study uncertainty']])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Interstudy uncertainty\n", - "We estimate the 95% multiplicative confidence interval around the geometric mean of the values from the different studies. " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty is ≈1.4-fold\n" - ] - } - ], - "source": [ - "mul_CI = geo_CI_calc(summary['fg C cell-1'])\n", - "print('The interstudy uncertainty is ≈%.1f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We thus take the highest uncertainty from our collection of intra-study and interstudy uncertainties which is ≈1.4-fold.\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Carbon content of marine bacteria and archaea: 11.2 fg C cell^-1\n", - "Uncertainty associated with the carbon content of marine bacteria and archaea: 1.4-fold\n" - ] - } - ], - "source": [ - "print('Carbon content of marine bacteria and archaea: %.1f fg C cell^-1' % best_estimate)\n", - "print('Uncertainty associated with the carbon content of marine bacteria and archaea: %.1f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../marine_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Carbon content',\n", - " 'Value': \"{0:.1f}\".format(best_estimate),\n", - " 'Units': 'fg C cell^-1',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../marine_prok_biomass_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine/cell_num/.ipynb_checkpoints/SI_B_unsupervised_gating-checkpoint.ipynb b/bacteria_archaea/marine/cell_num/.ipynb_checkpoints/SI_B_unsupervised_gating-checkpoint.ipynb deleted file mode 100644 index 22eb6c0..0000000 --- a/bacteria_archaea/marine/cell_num/.ipynb_checkpoints/SI_B_unsupervised_gating-checkpoint.ipynb +++ /dev/null @@ -1,9546 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Unsupervised gating of the data via the front and side scattering." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "(c) 2017 the authors. This work is licensed under a [Creative Commons Attribution License CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/). All code contained herein is licensed under an [MIT license](https://opensource.org/licenses/MIT). " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "--- " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " Loading BokehJS ...\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/javascript": [ - "\n", - "(function(global) {\n", - " function now() {\n", - " return new Date();\n", - " }\n", - "\n", - " var force = \"1\";\n", - "\n", - " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force !== \"\") {\n", - " window._bokeh_onload_callbacks = [];\n", - " window._bokeh_is_loading = undefined;\n", - " }\n", - "\n", - "\n", - " \n", - " if (typeof (window._bokeh_timeout) === \"undefined\" || force !== \"\") {\n", - " window._bokeh_timeout = Date.now() + 5000;\n", - " window._bokeh_failed_load = false;\n", - " }\n", - "\n", - " var NB_LOAD_WARNING = {'data': {'text/html':\n", - " \"
\\n\"+\n", - " \"

\\n\"+\n", - " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", - " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", - " \"

\\n\"+\n", - " \"
    \\n\"+\n", - " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", - " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", - " \"
\\n\"+\n", - " \"\\n\"+\n", - " \"from bokeh.resources import INLINE\\n\"+\n", - " \"output_notebook(resources=INLINE)\\n\"+\n", - " \"\\n\"+\n", - " \"
\"}};\n", - "\n", - " function display_loaded() {\n", - " if (window.Bokeh !== undefined) {\n", - " Bokeh.$(\"#f964d53f-fdbb-42ae-8e37-0bf028adc09a\").text(\"BokehJS successfully loaded.\");\n", - " } else if (Date.now() < window._bokeh_timeout) {\n", - " setTimeout(display_loaded, 100)\n", - " }\n", - " }\n", - "\n", - " function run_callbacks() {\n", - " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", - " delete window._bokeh_onload_callbacks\n", - " console.info(\"Bokeh: all callbacks have finished\");\n", - " }\n", - "\n", - " function load_libs(js_urls, callback) {\n", - " window._bokeh_onload_callbacks.push(callback);\n", - " if (window._bokeh_is_loading > 0) {\n", - " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", - " return null;\n", - " }\n", - " if (js_urls == null || js_urls.length === 0) {\n", - " run_callbacks();\n", - " return null;\n", - " }\n", - " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", - " window._bokeh_is_loading = js_urls.length;\n", - " for (var i = 0; i < js_urls.length; i++) {\n", - " var url = js_urls[i];\n", - " var s = document.createElement('script');\n", - " s.src = url;\n", - " s.async = false;\n", - " s.onreadystatechange = s.onload = function() {\n", - " window._bokeh_is_loading--;\n", - " if (window._bokeh_is_loading === 0) {\n", - " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", - " run_callbacks()\n", - " }\n", - " };\n", - " s.onerror = function() {\n", - " console.warn(\"failed to load library \" + url);\n", - " };\n", - " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", - " }\n", - " };var element = document.getElementById(\"f964d53f-fdbb-42ae-8e37-0bf028adc09a\");\n", - " if (element == null) {\n", - " console.log(\"Bokeh: ERROR: autoload.js configured with elementid 'f964d53f-fdbb-42ae-8e37-0bf028adc09a' but no matching script tag was found. \")\n", - " return false;\n", - " }\n", - "\n", - " var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.js'];\n", - "\n", - " var inline_js = [\n", - " function(Bokeh) {\n", - " Bokeh.set_log_level(\"info\");\n", - " },\n", - " \n", - " function(Bokeh) {\n", - " \n", - " Bokeh.$(\"#f964d53f-fdbb-42ae-8e37-0bf028adc09a\").text(\"BokehJS is loading...\");\n", - " },\n", - " function(Bokeh) {\n", - " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", - " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", - " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", - " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", - " }\n", - " ];\n", - "\n", - " function run_inline_js() {\n", - " \n", - " if ((window.Bokeh !== undefined) || (force === \"1\")) {\n", - " for (var i = 0; i < inline_js.length; i++) {\n", - " inline_js[i](window.Bokeh);\n", - " }if (force === \"1\") {\n", - " display_loaded();\n", - " }} else if (Date.now() < window._bokeh_timeout) {\n", - " setTimeout(run_inline_js, 100);\n", - " } else if (!window._bokeh_failed_load) {\n", - " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", - " window._bokeh_failed_load = true;\n", - " } else if (!force) {\n", - " var cell = $(\"#f964d53f-fdbb-42ae-8e37-0bf028adc09a\").parents('.cell').data().cell;\n", - " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", - " }\n", - "\n", - " }\n", - "\n", - " if (window._bokeh_is_loading === 0) {\n", - " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", - " run_inline_js();\n", - " } else {\n", - " load_libs(js_urls, function() {\n", - " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", - " run_inline_js();\n", - " });\n", - " }\n", - "}(this));" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " Loading BokehJS ...\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/javascript": [ - "\n", - "(function(global) {\n", - " function now() {\n", - " return new Date();\n", - " }\n", - "\n", - " var force = \"1\";\n", - "\n", - " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force !== \"\") {\n", - " window._bokeh_onload_callbacks = [];\n", - " window._bokeh_is_loading = undefined;\n", - " }\n", - "\n", - "\n", - " \n", - " if (typeof (window._bokeh_timeout) === \"undefined\" || force !== \"\") {\n", - " window._bokeh_timeout = Date.now() + 5000;\n", - " window._bokeh_failed_load = false;\n", - " }\n", - "\n", - " var NB_LOAD_WARNING = {'data': {'text/html':\n", - " \"
\\n\"+\n", - " \"

\\n\"+\n", - " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", - " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", - " \"

\\n\"+\n", - " \"
    \\n\"+\n", - " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", - " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", - " \"
\\n\"+\n", - " \"\\n\"+\n", - " \"from bokeh.resources import INLINE\\n\"+\n", - " \"output_notebook(resources=INLINE)\\n\"+\n", - " \"\\n\"+\n", - " \"
\"}};\n", - "\n", - " function display_loaded() {\n", - " if (window.Bokeh !== undefined) {\n", - " Bokeh.$(\"#c85fb501-ac4d-4459-a1b8-db2d566a95c2\").text(\"BokehJS successfully loaded.\");\n", - " } else if (Date.now() < window._bokeh_timeout) {\n", - " setTimeout(display_loaded, 100)\n", - " }\n", - " }\n", - "\n", - " function run_callbacks() {\n", - " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", - " delete window._bokeh_onload_callbacks\n", - " console.info(\"Bokeh: all callbacks have finished\");\n", - " }\n", - "\n", - " function load_libs(js_urls, callback) {\n", - " window._bokeh_onload_callbacks.push(callback);\n", - " if (window._bokeh_is_loading > 0) {\n", - " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", - " return null;\n", - " }\n", - " if (js_urls == null || js_urls.length === 0) {\n", - " run_callbacks();\n", - " return null;\n", - " }\n", - " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", - " window._bokeh_is_loading = js_urls.length;\n", - " for (var i = 0; i < js_urls.length; i++) {\n", - " var url = js_urls[i];\n", - " var s = document.createElement('script');\n", - " s.src = url;\n", - " s.async = false;\n", - " s.onreadystatechange = s.onload = function() {\n", - " window._bokeh_is_loading--;\n", - " if (window._bokeh_is_loading === 0) {\n", - " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", - " run_callbacks()\n", - " }\n", - " };\n", - " s.onerror = function() {\n", - " console.warn(\"failed to load library \" + url);\n", - " };\n", - " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", - " }\n", - " };var element = document.getElementById(\"c85fb501-ac4d-4459-a1b8-db2d566a95c2\");\n", - " if (element == null) {\n", - " console.log(\"Bokeh: ERROR: autoload.js configured with elementid 'c85fb501-ac4d-4459-a1b8-db2d566a95c2' but no matching script tag was found. \")\n", - " return false;\n", - " }\n", - "\n", - " var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.js'];\n", - "\n", - " var inline_js = [\n", - " function(Bokeh) {\n", - " Bokeh.set_log_level(\"info\");\n", - " },\n", - " \n", - " function(Bokeh) {\n", - " \n", - " Bokeh.$(\"#c85fb501-ac4d-4459-a1b8-db2d566a95c2\").text(\"BokehJS is loading...\");\n", - " },\n", - " function(Bokeh) {\n", - " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", - " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", - " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", - " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", - " }\n", - " ];\n", - "\n", - " function run_inline_js() {\n", - " \n", - " if ((window.Bokeh !== undefined) || (force === \"1\")) {\n", - " for (var i = 0; i < inline_js.length; i++) {\n", - " inline_js[i](window.Bokeh);\n", - " }if (force === \"1\") {\n", - " display_loaded();\n", - " }} else if (Date.now() < window._bokeh_timeout) {\n", - " setTimeout(run_inline_js, 100);\n", - " } else if (!window._bokeh_failed_load) {\n", - " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", - " window._bokeh_failed_load = true;\n", - " } else if (!force) {\n", - " var cell = $(\"#c85fb501-ac4d-4459-a1b8-db2d566a95c2\").parents('.cell').data().cell;\n", - " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", - " }\n", - "\n", - " }\n", - "\n", - " if (window._bokeh_is_loading === 0) {\n", - " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", - " run_inline_js();\n", - " } else {\n", - " load_libs(js_urls, function() {\n", - " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", - " run_inline_js();\n", - " });\n", - " }\n", - "}(this));" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import os\n", - "import glob\n", - "import numpy as np\n", - "import pandas as pd\n", - "import scipy\n", - "\n", - "import mwc_induction_utils as mwc\n", - "\n", - "# Import matplotlib stuff for plotting\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib.cm as cm\n", - "\n", - "# Seaborn, useful for graphics\n", - "import seaborn as sns\n", - "\n", - "mwc.set_plotting_style()\n", - "\n", - "# Import Bokeh modules for interactive plotting\n", - "import bokeh.io\n", - "import bokeh.mpl\n", - "import bokeh.plotting\n", - "\n", - "# Magic function to make matplotlib inline; other style specs must come AFTER\n", - "%matplotlib inline\n", - "\n", - "# This enables SVG graphics inline (only use with static plots (non-Bokeh))\n", - "%config InlineBackend.figure_format = 'svg'\n", - "\n", - "# Datashader to plot lots of datapoints\n", - "import datashader as ds\n", - "from datashader.bokeh_ext import InteractiveImage\n", - "from datashader.utils import export_image\n", - "from IPython.core.display import HTML, display\n", - "\n", - "# Set up Bokeh for inline viewing\n", - "bokeh.io.output_notebook()\n", - "bokeh.plotting.output_notebook()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Plotting all of the data with datashader\n", - "\n", - "In this notebook we will develop an automatic procedure to gate the flow cytometry data based on the front and side scattering lectures returned by the equipment. \n", - "We will use the [`datashader`](http://datashader.readthedocs.io/en/latest/) library in combination with [`Bokeh`](http://bokeh.pydata.org/en/latest/) to generate interactive plots with an arbitrary number of data points really easily\n", - "\n", - "Let's first read an example data set. We will be working with the LacI titration data set generated on `20160804`." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0HDR-TFSC-AFSC-HFSC-WSSC-ASSC-HSSC-WFITC-AFITC-HFITC-WAPC-Cy7-AAPC-Cy7-HAPC-Cy7-W
005.2347424931.0737307329.62500088179.91406224927.37500021066.142578155096.3125003863.8930665620.25488390111.250000198.227966303.54083385596.835938
119.3767966684.4697277270.852539120501.25000029407.79101622308.511719172783.2812501327.9787606212.77978528016.57812556.292591269.90319827337.144531
2211.02853547620.46093834441.476562181226.515625126428.73437583701.867188197979.6562509810.0273446209.653320207067.906250131.344498370.19116246504.585938
3314.6962436008.4824226114.314453128803.2890627114.6386725086.319336183340.812500-1113.5396733575.527588-40820.23437552.552475162.48689342392.082031
4417.8472549215.0878916789.694336177893.14062543032.67187528439.339844198330.140625-1728.4139402786.592529-81298.812500-0.379246235.683441-210.912140
\n", - "
" - ], - "text/plain": [ - " Unnamed: 0 HDR-T FSC-A FSC-H FSC-W \\\n", - "0 0 5.234742 4931.073730 7329.625000 88179.914062 \n", - "1 1 9.376796 6684.469727 7270.852539 120501.250000 \n", - "2 2 11.028535 47620.460938 34441.476562 181226.515625 \n", - "3 3 14.696243 6008.482422 6114.314453 128803.289062 \n", - "4 4 17.847254 9215.087891 6789.694336 177893.140625 \n", - "\n", - " SSC-A SSC-H SSC-W FITC-A FITC-H \\\n", - "0 24927.375000 21066.142578 155096.312500 3863.893066 5620.254883 \n", - "1 29407.791016 22308.511719 172783.281250 1327.978760 6212.779785 \n", - "2 126428.734375 83701.867188 197979.656250 9810.027344 6209.653320 \n", - "3 7114.638672 5086.319336 183340.812500 -1113.539673 3575.527588 \n", - "4 43032.671875 28439.339844 198330.140625 -1728.413940 2786.592529 \n", - "\n", - " FITC-W APC-Cy7-A APC-Cy7-H APC-Cy7-W \n", - "0 90111.250000 198.227966 303.540833 85596.835938 \n", - "1 28016.578125 56.292591 269.903198 27337.144531 \n", - "2 207067.906250 131.344498 370.191162 46504.585938 \n", - "3 -40820.234375 52.552475 162.486893 42392.082031 \n", - "4 -81298.812500 -0.379246 235.683441 -210.912140 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# define the date to find the experiment files\n", - "date = 20160804\n", - "\n", - "# list the directory with the data\n", - "datadir = '../../data/flow/csv/'\n", - "files = np.array(os.listdir(datadir))\n", - "\n", - "# select the files from the chosen date\n", - "csv_bool = np.array([str(date) in f and 'csv' in f for f in files])\n", - "files = files[np.array(csv_bool)]\n", - "\n", - "# Read files into a pandas Data Frame\n", - "df_example = pd.read_csv(datadir + files[1])\n", - "df_example.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now in order to use `datashader` in an interactive `Bokeh` plot we need to define a `base_plot` function to initialize a `Bokeh` plot canvas." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def base_plot(df, x_col, y_col, log=False):\n", - " '''\n", - " Initialize canvas to plot the flow cytometry raw data with Bokeh.\n", - " Parameters\n", - " ----------\n", - " df : Pandas dataframe.\n", - " Data frame containing the data to be plotted.\n", - " x_col, y_col : str.\n", - " Name of the dataframe columns containing the x and y data \n", - " respectively to be plotted.\n", - " log : bool.\n", - " Boolean indicating if the data should be plotted in log_10 scale.\n", - " NOTE: Since Bokeh is known to have issues with plotting in log scale\n", - " rather than changing the axis to log scale the function computes\n", - " explicitly the log base 10 value of each datum.\n", - " '''\n", - "\n", - " x_range = (df[x_col].min(), df[x_col].max())\n", - " y_range = (df[y_col].min(), df[y_col].max())\n", - " \n", - " # Initialize the Bokeh plot\n", - " p = bokeh.plotting.figure(\n", - " x_range=x_range,\n", - " y_range=y_range,\n", - " tools='save,pan,wheel_zoom,box_zoom,reset', \n", - " plot_width=500, \n", - " plot_height=500,\n", - " )\n", - " \n", - " # Add all the features to the plot\n", - " p.xgrid.grid_line_color = '#a6a6a6'\n", - " p.ygrid.grid_line_color = '#a6a6a6'\n", - " p.ygrid.grid_line_dash = [6, 4]\n", - " p.xgrid.grid_line_dash = [6, 4]\n", - " if log:\n", - " p.xaxis.axis_label = 'log ' + x_col\n", - " p.yaxis.axis_label = 'log ' + y_col\n", - " else:\n", - " p.xaxis.axis_label = x_col\n", - " p.yaxis.axis_label = y_col\n", - " p.xaxis.axis_label_text_font_size = '15pt'\n", - " p.yaxis.axis_label_text_font_size = '15pt'\n", - " p.background_fill_color = '#E3DCD0'\n", - " return p" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With this in hand we define a simple function that takes one of our data frames and plot whichever columns we want on a 2-D scatter plot." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def ds_plot(df, x_col, y_col, log=False):\n", - " if log:\n", - " data = np.log10(df[[x_col, y_col]])\n", - " else:\n", - " data = df[[x_col, y_col]]\n", - " p = base_plot(data, x_col, y_col, log=log)\n", - " pipeline = ds.Pipeline(data, ds.Point(x_col, y_col))\n", - " return p, pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's plot the front and side scattering channels we will use for the gating. \n", - "The amaing features about `datashader` is that we can plot **all of the data points** (>100,000) basically instantly." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/razo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:3: RuntimeWarning: invalid value encountered in log10\n", - " app.launch_new_instance()\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "p, pipeline = ds_plot(df_example, 'FSC-A', 'SSC-A', log=True)\n", - "InteractiveImage(p, pipeline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Applying the unsupervised gating procedure \n", - "\n", - "To get some sense of potentials ways to automatically gate the data let's first plot a Kernel Density Estimation to get a sense of where does most of the data-points density is." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will use the convenient `jointplot` from `seaborn` to make a KDE plot of these scattering.\n", - "\n", - "First let's take a look at the linear scattering data..." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/razo/anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n", - " y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.jointplot(x='FSC-A', y='SSC-A', data=df_example, kind=\"kde\");" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "... and the log scattering values." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/razo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:2: RuntimeWarning: invalid value encountered in log10\n", - " from ipykernel import kernelapp as app\n", - "/Users/razo/anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n", - " y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.jointplot(x='FSC-A', y='SSC-A', \n", - " data=np.log10(df_example[['FSC-A', 'SSC-A']]), kind=\"kde\");" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From this last plot, we can see that the marginal distributions look relatively unimodal. The slight bi-modality shown on the side scattering is not a common feature on all data sets and both peaks are relatively close to each other such that the proposed automatic gating procedure will be relatively insensitive to them.\n", - "\n", - "We propose setting an automatic gate by **fitting a bivariate Gaussian distribution** to the $\\log$ front and $\\log$ side scattering and then selecting an interval that contains a fraction $\\alpha$ of the total data (for example, an $\\alpha$ of $0.4$ yields 40% of the data).\n", - "\n", - "For this we need a robut function to fit a 2D Gaussian matrix to the data. We will take advantage of the `astroML` [`fit_bivariate_normal`](http://www.astroml.org/book_figures/chapter3/fig_robust_pca.html) function to estimate the mean $\\mathbf{\\mu}$ and covariance matrix $\\mathbf{\\Sigma}$." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def fit_2D_gaussian(df, x_val='FSC-A', y_val='SSC-A', log=False):\n", - " '''\n", - " This function hacks astroML fit_bivariate_normal to return the mean and\n", - " covariance matrix when fitting a 2D gaussian fuction to the data contained\n", - " in the x_vall and y_val columns of the DataFrame df.\n", - " Parameters\n", - " ----------\n", - " df : pandas DataFrame.\n", - " dataframe containing the data from which to fit the distribution\n", - " x_val, y_val : str.\n", - " name of the dataframe columns to be used in the function\n", - " log : bool.\n", - " indicate if the log of the data should be use for the fit or not\n", - " \n", - " Returns\n", - " -------\n", - " mu : tuple.\n", - " (x, y) location of the best-fit bivariate normal\n", - " cov : 2 x 2 array\n", - " covariance matrix.\n", - " cov[0, 0] = variance of the x_val column\n", - " cov[1, 1] = variance of the y_val column\n", - " cov[0, 1] = cov[1, 0] = covariance of the data\n", - " '''\n", - " if log:\n", - " x = np.log10(df[x_val])\n", - " y = np.log10(df[y_val])\n", - " else:\n", - " x = df[x_val]\n", - " y = df[y_val]\n", - " \n", - " # Fit the 2D Gaussian distribution using atroML function\n", - " mu, sigma_1, sigma_2, alpha = mwc.fit_bivariate_normal(x, y, robust=True)\n", - "\n", - " # compute covariance matrix from the standar deviations and the angle\n", - " # that the fit_bivariate_normal function returns\n", - " sigma_xx = ((sigma_1 * np.cos(alpha)) ** 2\n", - " + (sigma_2 * np.sin(alpha)) ** 2)\n", - " sigma_yy = ((sigma_1 * np.sin(alpha)) ** 2\n", - " + (sigma_2 * np.cos(alpha)) ** 2)\n", - " sigma_xy = (sigma_1 ** 2 - sigma_2 ** 2) * np.sin(alpha) * np.cos(alpha)\n", - " \n", - " # put elements of the covariance matrix into an actual matrix\n", - " cov = np.array([[sigma_xx, sigma_xy], [sigma_xy, sigma_yy]])\n", - " \n", - " return mu, cov" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then to compute the interval that contains a fraction $\\alpha$ of the data we follow [this procedure](https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Interval) in which for a 2D Gaussian distributiuon an elliptic region defined by\n", - "\\begin{equation}\n", - "\\left(\\mathbf{x} - \\mathbf{\\mu} \\right)^T \\mathbf{\\Sigma}^{-1} \\left(\\mathbf{x} - \\mathbf{\\mu} \\right) \\leq \\chi^2_{2, \\alpha}(p)\n", - "\\end{equation}\n", - "contains $\\alpha\\ \\times 100$ % of the data. $\\chi^2_{2, \\alpha}(p)$ is the quantile function for probability $p$ of the chi-squared distribution with $2$ degrees of freedom.\n", - "\n", - "Let's define a function to compute the left hand side of the inequality for our data." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def gauss_interval(df, mu, cov, x_val='FSC-A', y_val='SSC-A', log=False):\n", - " '''\n", - " Computes the of the statistic\n", - " (x - µx)'∑(x - µx) \n", - " for each of the elements in df columns x_val and y_val.\n", - " \n", - " Parameters\n", - " ----------\n", - " df : DataFrame.\n", - " dataframe containing the data from which to fit the distribution\n", - " mu : array-like.\n", - " (x, y) location of bivariate normal\n", - " cov : 2 x 2 array\n", - " covariance matrix\n", - " x_val, y_val : str.\n", - " name of the dataframe columns to be used in the function\n", - " log : bool.\n", - " indicate if the log of the data should be use for the fit or not \n", - " \n", - " Returns\n", - " -------\n", - " statistic_gauss : array-like.\n", - " array containing the result of the linear algebra operation:\n", - " (x - µx)'∑(x - µx) \n", - " '''\n", - " # Determine that the covariance matrix is not singular\n", - " det = np.linalg.det(cov)\n", - " if det == 0:\n", - " raise NameError(\"The covariance matrix can't be singular\")\n", - " \n", - " # Compute the vector x defined as [[x - mu_x], [y - mu_y]]\n", - " if log: \n", - " x_vect = np.log10(np.array(df[[x_val, y_val]]))\n", - " else:\n", - " x_vect = np.array(df[[x_val, y_val]])\n", - " x_vect[:, 0] = x_vect[:, 0] - mu[0]\n", - " x_vect[:, 1] = x_vect[:, 1] - mu[1]\n", - " \n", - " # compute the inverse of the covariance matrix\n", - " inv_sigma = np.linalg.inv(cov)\n", - " \n", - " # compute the operation\n", - " interval_array = np.zeros(len(df))\n", - " for i, x in enumerate(x_vect):\n", - " interval_array[i] = np.dot(np.dot(x, inv_sigma), x.T)\n", - " \n", - " return interval_array" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that we defined the functions let's fit a 2D gaussian to our $\\log$ scattering data and then compute the interval statistic." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/razo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:26: RuntimeWarning: invalid value encountered in log10\n", - "/Users/razo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:27: RuntimeWarning: invalid value encountered in log10\n", - "/Users/razo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:33: RuntimeWarning: invalid value encountered in log10\n" - ] - } - ], - "source": [ - "# Fit the bivariate Gaussian distribution\n", - "mu, cov = fit_2D_gaussian(df_example, log=True)\n", - "\n", - "# Compute the statistic for each of the pair of log scattering data\n", - "interval_array = gauss_interval(df_example, mu, cov, log=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Having the statistic allows us to compare it to the $\\chi^2$ quantile function. Just as in the 68-95-99.7% rule for 1D Gaussians let us choose an arbitrary threshold to keep 40% of the data density.\n", - "\n", - "To compute the $\\chi^2$ quantile funciton we will use the `scipy.stats.chi2.ppf` function." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Fraction of the data kept after the alpha = 0.40 threshold:\n", - "0.41\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/razo/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:4: RuntimeWarning: invalid value encountered in less_equal\n" - ] - } - ], - "source": [ - "alpha = 0.40\n", - "\n", - "# Find which data points fall inside the interval\n", - "idx = interval_array <= scipy.stats.chi2.ppf(alpha, 2)\n", - "print('''\n", - "Fraction of the data kept after the alpha = {0:0.2f} threshold:\n", - "{1:0.2f}\n", - "'''.format(alpha, np.sum(idx) / len(interval_array)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's apply the threshold to our data and plot it again using `datashader`." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Apply the threshold to the data\n", - "df_thresh_gauss = df_example[idx]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### $\\log$ scattering" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "p, pipeline = ds_plot(df_thresh_gauss, 'FSC-A', 'SSC-A', log=True)\n", - "InteractiveImage(p, pipeline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This looks pretty good! Let's now plot this thresholded data on a linear scale." - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "p, pipeline = ds_plot(df_thresh_gauss, 'FSC-A', 'SSC-A', log=False)\n", - "InteractiveImage(p, pipeline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It looks much better than the arbitrary thresholds set by eye!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After processing the data using a value of $\\alpha = 0.4$ let's look at the results from this experiment.\n", - "\n", - "We will read a pre-processed `pandas DataFrame` that contains all the fold changes for these strains." - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# define variables to use over the script\n", - "date = 20160725\n", - "username = 'mrazomej'\n", - "\n", - "# read the CSV file with the mean fold change\n", - "df_preprocess = pd.read_csv('../../data/' + str(date) +\\\n", - " '_lacI_titration_MACSQuant.csv', comment='#')\n", - "\n", - "rbs = df_preprocess.rbs.unique()\n", - "replica = df_preprocess.replica.unique()\n", - "\n", - "# compute the theoretical repression level\n", - "repressor_array = np.logspace(0, 3, 100)\n", - "epsilon_array = np.array([-15.3, -13.9, -9.7, -17])\n", - "operators = np.array(['O1', 'O2', 'O3', 'Oid'])\n", - "\n", - "colors = sns.hls_palette(len(operators), l=.3, s=.8)\n", - "# plot theoretical curve\n", - "# First for the A channel\n", - "plt.figure(figsize=(6,6))\n", - "for i, o in enumerate(operators):\n", - " fold_change_theor = 1 / (1 + 2 * repressor_array / 5E6 \\\n", - " * np.exp(-epsilon_array[i]))\n", - " plt.plot(repressor_array, fold_change_theor, label=o,\n", - " color=colors[i])\n", - " plt.scatter(df_preprocess[(df_preprocess.operator == o) & \\\n", - " (df_preprocess.rbs != 'auto') & \\\n", - " (df_preprocess.rbs != 'delta')].repressors, \n", - " df_preprocess[(df_preprocess.operator == o) & \\\n", - " (df_preprocess.rbs != 'auto') & \\\n", - " (df_preprocess.rbs != 'delta')].fold_change_A,\n", - " marker='o', linewidth=0, color=colors[i], \n", - " label=o + ' flow cytometer',\n", - " alpha=0.7)\n", - "plt.xscale('log')\n", - "plt.yscale('log')\n", - "plt.xlabel('repressor copy number')\n", - "plt.ylabel('fold-change')\n", - "plt.title('FITC-A')\n", - "plt.xlim([1, 1E3])\n", - "plt.legend(loc=0, ncol=2, fontsize=10)\n", - "plt.tight_layout()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It looks quite decent and it didn't involve manually choosing gates for the scattering data. The only parameter we set was the fraction $\\alpha$ that we wanted to keep of the data.\n", - "\n", - "Let's finally define a function that returns a thresholded `DataFrame`." - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def auto_gauss_gate(df, alpha, x_val='FSC-A', y_val='SSC-A', log=False):\n", - " '''\n", - " Function that applies an \"unsupervised bivariate Gaussian gate\" to the data\n", - " over the channels x_val and y_val.\n", - " \n", - " Parameters\n", - " ----------\n", - " df : DataFrame.\n", - " dataframe containing the data from which to fit the distribution\n", - " alpha : float. [0, 1]\n", - " fraction of data aimed to keep. Used to compute the chi^2 quantile function\n", - " x_val, y_val : str.\n", - " name of the dataframe columns to be used in the function\n", - " log : bool.\n", - " indicate if the log of the data should be use for the fit or not \n", - " '''\n", - " data = df[[x_val, y_val]]\n", - " # Fit the bivariate Gaussian distribution\n", - " mu, cov = fit_2D_gaussian(data, log=log)\n", - "\n", - " # Compute the statistic for each of the pair of log scattering data\n", - " interval_array = gauss_interval(data, mu, cov, log=log)\n", - " \n", - " # Find which data points fall inside the interval\n", - " idx = interval_array <= scipy.stats.chi2.ppf(alpha, 2)\n", - " \n", - " return df[idx]" - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/bacteria_archaea/marine/cell_num/.ipynb_checkpoints/marine_prokaryote_cell_number-checkpoint.ipynb b/bacteria_archaea/marine/cell_num/.ipynb_checkpoints/marine_prokaryote_cell_number-checkpoint.ipynb deleted file mode 100644 index 9e062fc..0000000 --- a/bacteria_archaea/marine/cell_num/.ipynb_checkpoints/marine_prokaryote_cell_number-checkpoint.ipynb +++ /dev/null @@ -1,817 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *\n", - "\n", - "# Genaral parameters used in the estimate\n", - "ocean_area = 3.6e14\n", - "liters_in_m3 = 1e3\n", - "ml_in_m3 = 1e6" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of marine bacteria and archaea" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook details the procedure for estimating the total number of marine bacteria and archaea.\n", - "The estimate is based on three data sources:\n", - "[Aristegui et al.](http://dx.doi.org/10.4319/lo.2009.54.5.1501),\n", - "[Buitenhuis et al.](http://dx.doi.org/10.5194/essd-4-101-2012), and\n", - "[Lloyd et al.](http://dx.doi.org/10.1128/AEM.02090-13)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load the datasets\n", - "buitenhuis = pd.read_excel('marine_prok_cell_num_data.xlsx','Buitenhuis',skiprows=1)\n", - "aristegui = pd.read_excel('marine_prok_cell_num_data.xlsx','Aristegui',skiprows=1)\n", - "aristegui[['Cell abundance (cells m-2)','SE']] = aristegui[['Cell abundance (cells m-2)','SE']].astype(float)\n", - "lloyd = pd.read_excel('marine_prok_cell_num_data.xlsx','Lloyd',skiprows=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here are samples from the data in Aristegui et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ZoneCell abundance (cells m-2)SE
0Epipelagic (0-200 m)1.1e+148.0e+12
1Mesopelagic (200-1000 m)1.7e+141.0e+13
2Bathypelagic (1000-4000 m)1.9e+141.4e+13
\n", - "
" - ], - "text/plain": [ - " Zone Cell abundance (cells m-2) SE\n", - "0 Epipelagic (0-200 m) 1.1e+14 8.0e+12\n", - "1 Mesopelagic (200-1000 m) 1.7e+14 1.0e+13\n", - "2 Bathypelagic (1000-4000 m) 1.9e+14 1.4e+13" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "aristegui.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From the data in Buitenhuis et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
InvestigatorsReferenceCruise/sample idLatitudeLongitudeYeardayDepthmonthBact/Lug C/L
0Arabian_Sea/bottleNaN280011.9e+015.8e+0119951.2e+011.1e+0194.9e+094.5e+01
1Arabian_Sea/bottleNaN290011.9e+015.9e+0119951.3e+011.6e+0093.9e+093.5e+01
2Arabian_Sea/bottleNaN30022.2e+016.2e+0119952.0e+018.3e+0083.2e+092.9e+01
3Arabian_Sea/bottleNaN30022.2e+016.2e+0119952.0e+012.3e+0083.2e+092.9e+01
4Arabian_Sea/bottleNaN30022.2e+016.2e+0119952.0e+015.7e+0083.2e+092.9e+01
\n", - "
" - ], - "text/plain": [ - " Investigators Reference Cruise/sample id Latitude Longitude Year \\\n", - "0 Arabian_Sea/bottle NaN 28001 1.9e+01 5.8e+01 1995 \n", - "1 Arabian_Sea/bottle NaN 29001 1.9e+01 5.9e+01 1995 \n", - "2 Arabian_Sea/bottle NaN 3002 2.2e+01 6.2e+01 1995 \n", - "3 Arabian_Sea/bottle NaN 3002 2.2e+01 6.2e+01 1995 \n", - "4 Arabian_Sea/bottle NaN 3002 2.2e+01 6.2e+01 1995 \n", - "\n", - " day Depth month Bact/L ug C/L \n", - "0 1.2e+01 1.1e+01 9 4.9e+09 4.5e+01 \n", - "1 1.3e+01 1.6e+00 9 3.9e+09 3.5e+01 \n", - "2 2.0e+01 8.3e+00 8 3.2e+09 2.9e+01 \n", - "3 2.0e+01 2.3e+00 8 3.2e+09 2.9e+01 \n", - "4 2.0e+01 5.7e+00 8 3.2e+09 2.9e+01 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "buitenhuis.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And from Llyod et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
paperSampleWater Depth (m)Cells per ccCARDFISH Bac per ccCARDFISH Arc per ccCARDFISH Total per ccFISH yieldFraction Arc CARDFISHFish or cardFish...FixativeBac permeabilizationArc permeabilizationBac probeArc probeCounting methodqPCR-Bacteria (copies/mL water)qPCR-Archaea (copies/mL water)qPCR-MCG (copies/mL water)Total qPCR(copies/mL water)
0Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast2.5e+011.0e+067.5e+059.2e+048.4e+058.3e-011.1e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
1Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast1.0e+027.1e+054.3e+051.1e+055.5e+057.8e-012.1e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
2Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast5.0e+021.0e+055.3e+042.8e+048.1e+047.9e-013.4e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
3Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast1.0e+036.9e+043.3e+042.1e+045.4e+047.8e-013.9e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
4Al Ali 2009La Seyne-sur-Mer, French Mediterranean coast1.8e+036.4e+042.6e+042.0e+044.7e+047.3e-014.4e-01CARDFISH...formaldehydelysozyme/achromopeptidaseproteinase KEUB338ARCH915Microscope-eyenannannannan
\n", - "

5 rows × 21 columns

\n", - "
" - ], - "text/plain": [ - " paper Sample Water Depth (m) \\\n", - "0 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 2.5e+01 \n", - "1 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 1.0e+02 \n", - "2 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 5.0e+02 \n", - "3 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 1.0e+03 \n", - "4 Al Ali 2009 La Seyne-sur-Mer, French Mediterranean coast 1.8e+03 \n", - "\n", - " Cells per cc CARDFISH Bac per cc CARDFISH Arc per cc \\\n", - "0 1.0e+06 7.5e+05 9.2e+04 \n", - "1 7.1e+05 4.3e+05 1.1e+05 \n", - "2 1.0e+05 5.3e+04 2.8e+04 \n", - "3 6.9e+04 3.3e+04 2.1e+04 \n", - "4 6.4e+04 2.6e+04 2.0e+04 \n", - "\n", - " CARDFISH Total per cc FISH yield Fraction Arc CARDFISH Fish or cardFish \\\n", - "0 8.4e+05 8.3e-01 1.1e-01 CARDFISH \n", - "1 5.5e+05 7.8e-01 2.1e-01 CARDFISH \n", - "2 8.1e+04 7.9e-01 3.4e-01 CARDFISH \n", - "3 5.4e+04 7.8e-01 3.9e-01 CARDFISH \n", - "4 4.7e+04 7.3e-01 4.4e-01 CARDFISH \n", - "\n", - " ... Fixative Bac permeabilization \\\n", - "0 ... formaldehyde lysozyme/achromopeptidase \n", - "1 ... formaldehyde lysozyme/achromopeptidase \n", - "2 ... formaldehyde lysozyme/achromopeptidase \n", - "3 ... formaldehyde lysozyme/achromopeptidase \n", - "4 ... formaldehyde lysozyme/achromopeptidase \n", - "\n", - " Arc permeabilization Bac probe Arc probe Counting method \\\n", - "0 proteinase K EUB338 ARCH915 Microscope-eye \n", - "1 proteinase K EUB338 ARCH915 Microscope-eye \n", - "2 proteinase K EUB338 ARCH915 Microscope-eye \n", - "3 proteinase K EUB338 ARCH915 Microscope-eye \n", - "4 proteinase K EUB338 ARCH915 Microscope-eye \n", - "\n", - " qPCR-Bacteria (copies/mL water) qPCR-Archaea (copies/mL water) \\\n", - "0 nan nan \n", - "1 nan nan \n", - "2 nan nan \n", - "3 nan nan \n", - "4 nan nan \n", - "\n", - " qPCR-MCG (copies/mL water) Total qPCR(copies/mL water) \n", - "0 nan nan \n", - "1 nan nan \n", - "2 nan nan \n", - "3 nan nan \n", - "4 nan nan \n", - "\n", - "[5 rows x 21 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "lloyd.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Aristegui et al. we estimate the total number of cells by multiplying each layer by the surface area of the ocean" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total number of cells based on Aristegui et al.: 1.7e+29\n" - ] - } - ], - "source": [ - "aristegui_total = (aristegui['Cell abundance (cells m-2)']*ocean_area).sum()\n", - "print('Total number of cells based on Aristegui et al.: %.1e' % aristegui_total)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Buitenhuis et al. we bin the data along 100 meter depth bins, and estimate the average cell abundance in each bin. We then multiply the total number of cells per liter by the volume at each depth and sum across layers." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total number of cells based on Buitenhuis et al.: 1.3e+29\n" - ] - } - ], - "source": [ - "# Define depth range every 100 m from 0 to 4000 meters\n", - "depth_range = np.linspace(0,4000,41)\n", - "\n", - "#Bin data along depth bins\n", - "buitenhuis['Depth_bin'] = pd.cut(buitenhuis['Depth'], depth_range)\n", - "\n", - "#For each bin, calculate the average number of cells per liter\n", - "buitenhuis_bins = buitenhuis.groupby(['Depth_bin']).mean()['Bact/L']\n", - "\n", - "#Multiply each average concentration by the total volume at each bin: 100 meters depth times the surface area of the oceac\n", - "\n", - "buitenhuis_bins *= 100*ocean_area*liters_in_m3\n", - "\n", - "#Sum across all bins to get the total estimate for the number of cells of marine prokaryotes\n", - "buitenhuis_total = buitenhuis_bins.sum()\n", - "print('Total number of cells based on Buitenhuis et al.: %.1e' % buitenhuis_total)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For Lloyd et al., we rely on the sum of the total number of bacteria and archaea. The estimate for the number of bacteria and archaea is based on the regression of the concentration of bacteria and archaea with depth. We use the equations reported in Lloyd et al. to extrapolate the number of cells of bacteria and archaea across the average ocean depth of 4000 m." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total number of cells based on Lloyd et al.: 6.2e+28\n" - ] - } - ], - "source": [ - "# Define the regression equation for the number of bacteria in the top 64 m:\n", - "def bac_surf(depth):\n", - " result = np.zeros_like(depth)\n", - " for i,x in enumerate(depth):\n", - " if x==0 :\n", - " result[i] = 5.54\n", - " \n", - " else:\n", - " result[i] = np.log10(x)*0.08+5.54\n", - " return 10**result\n", - "\n", - "# Define the regression equation for the number of bacteria in water deeper than 64 m:\n", - "bac_deep = lambda x: 10**(np.log10(x)*-1.09+7.66)\n", - "\n", - "# Define the regression equation for the number of bacteria in the top 389 m:\n", - "def arch_surf(depth):\n", - " result = np.zeros_like(depth)\n", - " for i,x in enumerate(depth):\n", - " if x==0 :\n", - " result[i] = 4.1\n", - " \n", - " else:\n", - " result[i] = np.log10(x)*0.1+4.1\n", - " return 10**result\n", - "\n", - "# Define the regression equation for the number of bacteria in water below 389 m:\n", - "arch_deep = lambda x: 10**(np.log10(x)*-0.8+6.43)\n", - "\n", - "# Estimate the total number of bacteria in the top 64 m by first estimating the concentration using the \n", - "# regression equation, multiplying by the volume at each depth, which is 1 m^3 times the surface\n", - "# Area of the ocean, and finally summing across different depths\n", - "total_bac_surf = (bac_surf(np.linspace(0,64,65))*ml_in_m3*ocean_area).sum()\n", - "\n", - "# We repeat the same procedure for the total number of bacteria in waters deeper than 64 m, and for the total\n", - "# Number of archaea\n", - "total_bac_deep = (bac_deep(np.linspace(65,4000,4000-65+1))*ml_in_m3*ocean_area).sum()\n", - "total_arch_surf = (arch_surf(np.linspace(0,389,390))*ml_in_m3*ocean_area).sum()\n", - "total_arch_deep = (arch_deep(np.linspace(390,4000,4000-390+1))*ml_in_m3*ocean_area).sum()\n", - "\n", - "# Sum across bacteria and archaea to get the estimate for the total number of bacteria and archaea in the ocean\n", - "lloyd_total = total_bac_surf+total_bac_deep+total_arch_surf+total_arch_deep\n", - "print('Total number of cells based on Lloyd et al.: %.1e' % lloyd_total)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The estimate of the total number of cells in Lloyd et al. is based on FISH measurements, but in general not all cells which are DAPI positive are also stained with FISH. To correct for this effect, we estimate the average FISH yield across samples, and divide our estimate from the FISH measurements by the average FISH yield." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The mean yield of FISH is 0.8\n", - "After correcting for FISH yield, the estimate for the total number of bacteria and archaea based on Lloyd et al is 8.1e+28\n" - ] - } - ], - "source": [ - "fish_yield = lloyd['FISH yield'].dropna()\n", - "\n", - "# Values which are not feasible are turned to the maximal value. We do not use 1 because of numerical reasons\n", - "fish_yield[fish_yield >=1] = 0.999\n", - "\n", - "# calculate the statistics on the fish_visible/fish_invisible value and not the \n", - "# fish_visible/(fish_visible+fish_invisible) value because the first is not bound by 0 and 1\n", - "# We transform the values to log space to calculate the geometric mean\n", - "alpha_fish_yield = np.log10(1./(1./fish_yield[fish_yield<1]-1.))\n", - "mean_alpha_yield = np.average(-alpha_fish_yield.dropna())\n", - "mean_yield = 1./(1.+10**mean_alpha_yield)\n", - "\n", - "print('The mean yield of FISH is %.1f' % mean_yield)\n", - "lloyd_total /= mean_yield\n", - "print('After correcting for FISH yield, the estimate for the total number of bacteria and archaea based on Lloyd et al is %.1e' % lloyd_total)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the total number of marine bacteria and archaea is the geometric mean of the estimates from Aristegui et al., Buitenhuis et al. and Lloyd et al." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of marine bacteria and archaea is 1.2e+29\n" - ] - } - ], - "source": [ - "estimates = [aristegui_total,buitenhuis_total,lloyd_total]\n", - "best_estimate = 10**(np.log10(estimates).mean())\n", - "\n", - "print('Our best estimate for the total number of marine bacteria and archaea is %.1e' %best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "\n", - "To calculate the uncertainty associated with the estimate for the total number of of bacteria and archaea, we first collect all available uncertainties and then take the largest value as our best projection for the uncertainty. \n", - "\n", - "## Intra-study uncertainties \n", - "We first survey the uncertainties reported in each of the studies. Aristegui et al. report a standard error of ≈10% for the average cell concentration per unit area. Buitenhuis et al. and Lloyd et al. do not report uncertainties.\n", - "\n", - "## Interstudy uncertainties\n", - "\n", - "We estimate the 95% multiplicative error of the geometric mean of the values from the three studies." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty is about 1.5\n" - ] - } - ], - "source": [ - "mul_CI = geo_CI_calc(estimates)\n", - "\n", - "print('The interstudy uncertainty is about %.1f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We thus take the highest uncertainty from our collection which is ≈1.4-fold.\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total number of marine bacteria and archaea: 1.2e+29\n", - "Uncertainty associated with the total number of marine bacteria and archaea: 1.5-fold\n" - ] - } - ], - "source": [ - "print('Total number of marine bacteria and archaea: %.1e' % best_estimate)\n", - "print('Uncertainty associated with the total number of marine bacteria and archaea: %.1f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../marine_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Total number of marine bacteria and archaea',\n", - " 'Value': int(best_estimate),\n", - " 'Units': 'Cells',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../marine_prok_biomass_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine/marine_prok_biomass_estimate_old.xlsx b/bacteria_archaea/marine/marine_prok_biomass_estimate_old.xlsx deleted file mode 100644 index 7e75373..0000000 Binary files a/bacteria_archaea/marine/marine_prok_biomass_estimate_old.xlsx and /dev/null differ diff --git a/bacteria_archaea/marine_deep_subsurface/.ipynb_checkpoints/marine_deep_subsurface_prok_biomass_estimate-checkpoint.ipynb b/bacteria_archaea/marine_deep_subsurface/.ipynb_checkpoints/marine_deep_subsurface_prok_biomass_estimate-checkpoint.ipynb deleted file mode 100644 index cb2b7fb..0000000 --- a/bacteria_archaea/marine_deep_subsurface/.ipynb_checkpoints/marine_deep_subsurface_prok_biomass_estimate-checkpoint.ipynb +++ /dev/null @@ -1,214 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "#Load dependencies\n", - "import numpy as np\n", - "import pandas as pd\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of marine deep subsurface archaea and bacteria\n", - "\n", - "We use our best estimates for the total number of marine deep subsurface prokaryotes, the carbon content of marine deep subsurface prokaryotes and the fraction of archaea and bacteria out of the total population of marine deep subsurface prokaryotes to estimate the total biomass of marine deep subsurface bacteria and archaea." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ParameterValueUnitsUncertainty
0Total number of bacteria and archaea in the ma...4.0e+29Cells6.4e+00
1Carbon content of bacterial and archaeal cells...2.4e+01fg C cell^-12.2e+00
2Fraction of archaea3.0e-01Unitless1.6e+00
3Fraction of bacteria7.0e-01Unitless1.3e+00
\n", - "
" - ], - "text/plain": [ - " Parameter Value Units \\\n", - "0 Total number of bacteria and archaea in the ma... 4.0e+29 Cells \n", - "1 Carbon content of bacterial and archaeal cells... 2.4e+01 fg C cell^-1 \n", - "2 Fraction of archaea 3.0e-01 Unitless \n", - "3 Fraction of bacteria 7.0e-01 Unitless \n", - "\n", - " Uncertainty \n", - "0 6.4e+00 \n", - "1 2.2e+00 \n", - "2 1.6e+00 \n", - "3 1.3e+00 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_excel('marine_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We multiply all the relevant parameters to arrive at our best estimate for the biomass of marine deep subsurface archaea and bacteria, and propagate the uncertainties associated with each parameter to calculate the uncertainty associated with the estimate for the total biomass." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of marine deep subsurface archaea is 3 Gt C\n", - "Our best estimate for the total biomass of marine deep subsurface bacteria is 7 Gt C\n", - "The uncertainty associated with the estimate for the biomass of archaea is 7.9-fold\n", - "The uncertainty associated with the estimate for the biomass of bacteria is 7.6-fold\n" - ] - } - ], - "source": [ - "# Calculate the total biomass of marine archaea and bacteria\n", - "total_arch_biomass = results['Value'][0]*results['Value'][1]*1e-15*results['Value'][2]\n", - "total_bac_biomass = results['Value'][0]*results['Value'][1]*1e-15*results['Value'][3]\n", - "\n", - "print('Our best estimate for the total biomass of marine deep subsurface archaea is %.0f Gt C' %(total_arch_biomass/1e15))\n", - "print('Our best estimate for the total biomass of marine deep subsurface bacteria is %.0f Gt C' %(total_bac_biomass/1e15))\n", - "\n", - "# Propagate the uncertainty associated with each parameter to the final estimate\n", - "\n", - "arch_biomass_uncertainty = CI_prod_prop(results['Uncertainty'][:3])\n", - "bac_biomass_uncertainty = CI_prod_prop(results.iloc[[0,1,3]]['Uncertainty'])\n", - "\n", - "print('The uncertainty associated with the estimate for the biomass of archaea is %.1f-fold' %arch_biomass_uncertainty)\n", - "print('The uncertainty associated with the estimate for the biomass of bacteria is %.1f-fold' %bac_biomass_uncertainty)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Feed bacteria results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Bacteria','Marine deep subsurface'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[total_bac_biomass/1e15,bac_biomass_uncertainty],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed archaea results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Archaea','Marine deep subsurface'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[total_arch_biomass/1e15,arch_biomass_uncertainty],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed bacteria results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Bacteria','Marine deep subsurface'), \n", - " col=['Number of individuals'],\n", - " values= results['Value'][0]*results['Value'][3],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed archaea results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Archaea','Marine deep subsurface'), \n", - " col=['Number of individuals'],\n", - " values= results['Value'][0]*results['Value'][2],\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/marine_deep_subsurface_arch_frac-checkpoint.ipynb b/bacteria_archaea/marine_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/marine_deep_subsurface_arch_frac-checkpoint.ipynb deleted file mode 100644 index 1bd699a..0000000 --- a/bacteria_archaea/marine_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/marine_deep_subsurface_arch_frac-checkpoint.ipynb +++ /dev/null @@ -1,551 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from fraction_helper import *\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "\n", - "# Genaral parameters used in the estimate\n", - "ocean_area = 3.6e14\n", - "liters_in_m3 = 1e3\n", - "ml_in_m3 = 1e6" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the fraction of archaea out of the total marine deep subsurface prokaryote population\n", - "\n", - "In order to estimate the fraction of archaea out of the total population of marine deep subsurface bacteria and archaea, we rely of two independent methods: catalyzed reporter deposition fluorescent in-situ hybridization (CARD-FISH) and quantitative PCR (qPCR). Both methods have been found reliable for reporting the fraction of archaea out of the population of marine deep subsurface bacteria and archaea\n", - "\n", - "### CARD-FISH based estimate\n", - "For our CARD-FISH based estimate we rely on data from [Lloyd et al.](http://dx.doi.org/10.1128/AEM.02090-13). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
papercoreSediment Depth (m)Cells per ccCARDFISH Bac per ccCARDFISH Arc per ccCARDFISH Total per ccFISH yieldqPCR Bac per ccqPCR Arc per cc...Taqman UniversalArc standardBac standardDNA extraction procedureMud volcano or seep?Water depth (m)Environment TypeDNA extraction efficiency from spiking as a fraction of total cellsTemplate DNA dilution factorUses 516 for Arc
0Amaro 2012NE Atlantic Canyon4.5e-031.0e+086.0e+071.9e+077.9e+077.5e-01nannan...NaNNaNNaNNaNnan3.5e+03Deep-seaNaNNaNnan
1Amaro 2012NE Atlantic Canyon3.5e-027.8e+065.2e+061.3e+066.5e+068.4e-01nannan...NaNNaNNaNNaNnan3.5e+03Deep-seaNaNNaNnan
2Amaro 2012NE Atlantic Canyon6.5e-023.5e+061.5e+061.8e+063.3e+069.5e-01nannan...NaNNaNNaNNaNnan3.5e+03Deep-seaNaNNaNnan
3Amaro 2012NE Atlantic Canyon9.5e-028.1e+063.7e+063.2e+066.9e+068.5e-01nannan...NaNNaNNaNNaNnan3.5e+03Deep-seaNaNNaNnan
4Amaro 2012NE Atlantic Canyon1.3e-016.1e+063.0e+061.9e+064.9e+068.1e-01nannan...NaNNaNNaNNaNnan3.5e+03Deep-seaNaNNaNnan
\n", - "

5 rows × 47 columns

\n", - "
" - ], - "text/plain": [ - " paper core Sediment Depth (m) Cells per cc \\\n", - "0 Amaro 2012 NE Atlantic Canyon 4.5e-03 1.0e+08 \n", - "1 Amaro 2012 NE Atlantic Canyon 3.5e-02 7.8e+06 \n", - "2 Amaro 2012 NE Atlantic Canyon 6.5e-02 3.5e+06 \n", - "3 Amaro 2012 NE Atlantic Canyon 9.5e-02 8.1e+06 \n", - "4 Amaro 2012 NE Atlantic Canyon 1.3e-01 6.1e+06 \n", - "\n", - " CARDFISH Bac per cc CARDFISH Arc per cc CARDFISH Total per cc \\\n", - "0 6.0e+07 1.9e+07 7.9e+07 \n", - "1 5.2e+06 1.3e+06 6.5e+06 \n", - "2 1.5e+06 1.8e+06 3.3e+06 \n", - "3 3.7e+06 3.2e+06 6.9e+06 \n", - "4 3.0e+06 1.9e+06 4.9e+06 \n", - "\n", - " FISH yield qPCR Bac per cc qPCR Arc per cc ... \\\n", - "0 7.5e-01 nan nan ... \n", - "1 8.4e-01 nan nan ... \n", - "2 9.5e-01 nan nan ... \n", - "3 8.5e-01 nan nan ... \n", - "4 8.1e-01 nan nan ... \n", - "\n", - " Taqman Universal Arc standard Bac standard DNA extraction procedure \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN \n", - "\n", - " Mud volcano or seep? Water depth (m) Environment Type \\\n", - "0 nan 3.5e+03 Deep-sea \n", - "1 nan 3.5e+03 Deep-sea \n", - "2 nan 3.5e+03 Deep-sea \n", - "3 nan 3.5e+03 Deep-sea \n", - "4 nan 3.5e+03 Deep-sea \n", - "\n", - " DNA extraction efficiency from spiking as a fraction of total cells \\\n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "\n", - " Template DNA dilution factor Uses 516 for Arc \n", - "0 NaN nan \n", - "1 NaN nan \n", - "2 NaN nan \n", - "3 NaN nan \n", - "4 NaN nan \n", - "\n", - "[5 rows x 47 columns]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the dataset\n", - "lloyd = pd.read_excel('marine_deep_subsurface_arch_frac_data.xlsx','Lloyd',skiprows=1)\n", - "lloyd.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use values reported in Lloyd et al. for sediments deeper than 10 cm and using CARD-FISH with proteinase K permeabilization (this mathod generates reliable results). We calculate the geometric mean fraction of archaea out of the population of archaea and bacteria in this dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The geometric mean of the fraction of archaea out of the population of bacteria and archaea measured using CARD-FISH is 44%\n" - ] - } - ], - "source": [ - "# Filter the data in Lloyd et al. to contain only samples which have been measured in sediments deeper than\n", - "# 10 cm and with CARD-FISH with proteinase K permeabilization\n", - "# Also remove NaN values and zeros\n", - "lloyd_fish = lloyd[(lloyd['Arc permeabilization'] == 'proteinase K') & \n", - " (lloyd['Fish or cardFish'] == 'CARDFISH') & \n", - " (lloyd['Fraction Arc CARDFISH']>0) &\n", - " (lloyd['Sediment Depth (m)'] >0.01)]\n", - "\n", - "# Calculate the geometric mean of the fraction of archaea out of the total population of bacteria and archaea\n", - "# Remove zeros and NaNs\n", - "fish_frac = frac_mean(lloyd_fish['Fraction Arc CARDFISH'])\n", - "print('The geometric mean of the fraction of archaea out of the population of bacteria and archaea measured using CARD-FISH is ' + '{:,.0f}%'.format(fish_frac*100))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## qPCR-based estimate\n", - "For the qPCR-based estimate of the fraction of archaea out of the total population of marine deep subsurface bacteria and archaea, we also rely on data from Lloyd et al. We also consider only samples deeper than 10 cm. We exclude measurements using the ARCH516 as an archaeal primer or TaqMan probe, as measurements based on these primers of probes were shown to be unreliable." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The geometric mean of the fraction of archaea out of the population of bacteria and archaea measured using qPCR is 27%\n" - ] - } - ], - "source": [ - "# Filter the data in Lloyd et al. to contain only samples which have been measured in sediments deeper than\n", - "# 10 cm and not with the ARCH516 as an archaeal primer or TaqMan probe.\n", - "# Also remove NaN values and zeros\n", - "lloyd_qpcr = lloyd[(~np.isnan(lloyd['Fraction Arc qPCR'])) & \n", - " (lloyd['Sediment Depth (m)'] >0.01) &\n", - " (lloyd['Fraction Arc qPCR']>0) &\n", - " (lloyd['Arc reverse'].str.contains('516')==False) &\n", - " (lloyd['Arc forward'].str.contains('519')==False)]\n", - "lloyd_qpcr = lloyd_qpcr.drop(lloyd_qpcr['TaqMan Arc'].dropna().index)\n", - "\n", - "\n", - "# Calculate the geometric mean of the fraction of archaea out of the total population of bacteria and archaea\n", - "qpcr_frac = frac_mean(lloyd_qpcr['Fraction Arc qPCR'])\n", - "print('The geometric mean of the fraction of archaea out of the population of bacteria and archaea measured using qPCR is ' + '{:,.0f}%'.format(qpcr_frac*100))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the fraction of archaea out of the total population of marine deep subsurface bacteria and archaea is the geometric mean of the estimates based on CARD-FISH and qPCR." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of archaea out of the population marine deep subsurface bacteria and archaea is 35%\n" - ] - } - ], - "source": [ - "\n", - "best_estimate = frac_mean(np.array([fish_frac,qpcr_frac]))\n", - "print('Our best estimate for the fraction of archaea out of the population marine deep subsurface bacteria and archaea is ' + '{:,.0f}%'.format(best_estimate*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "\n", - "In order to assess the uncertainty associated with our estimate for the fraction of marine archaea out of the total population of bacteria and archaea in the marine deep subsurface, we gather all possible indices of uncertainty. We compare the uncertainty of values within each one of the methods and the uncertainty stemming from the variability of the values provided by the two methods. \n", - "\n", - "## Intra-method uncertainty \n", - "### CARD-FISH-based method\n", - "We calculate the 95% confidence inteval for the geometric mean of the values for the fraction of archaea out of the total population of bacteria and archaea measured using CARD-FISH." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty of the CARD-FISH-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈1.1-fold\n", - "The uncertainty of the CARD-FISH-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.1-fold\n" - ] - } - ], - "source": [ - "fish_arc_CI = frac_CI(lloyd_fish['Fraction Arc CARDFISH'])\n", - "fish_bac_CI = frac_CI(1-lloyd_fish['Fraction Arc CARDFISH'])\n", - "print('The uncertainty of the CARD-FISH-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' %fish_arc_CI)\n", - "print('The uncertainty of the CARD-FISH-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' %fish_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### qPCR-based method\n", - "We calculate the 95% confidence inteval for the geometric mean of the values for the fraction of archaea out of the total population of bacteria and archaea measured using qPCR." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈1.2-fold\n", - "The uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.1-fold\n" - ] - } - ], - "source": [ - "qpcr_arc_CI = frac_CI(lloyd_qpcr['Fraction Arc qPCR'])\n", - "qpcr_bac_CI = frac_CI(1-lloyd_qpcr['Fraction Arc qPCR'])\n", - "print('The uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' %qpcr_arc_CI)\n", - "print('The uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' %qpcr_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty \n", - "We calculate the 95% confidence inteval for the geometric mean of the estiamtes based on CARD-FISH and qPCR for the fraction of archaea out of the total population of bacteria and archaea. This serves as a measure of the inter-method uncertainty of our estimate." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The inter-method uncertainty of the estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈1.6-fold\n", - "The inter-method uncertainty of the estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.3-fold\n" - ] - } - ], - "source": [ - "inter_arc_CI = frac_CI(np.array([fish_frac,qpcr_frac]))\n", - "inter_bac_CI = frac_CI(np.array([1-fish_frac,1-qpcr_frac]))\n", - "print('The inter-method uncertainty of the estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' %inter_arc_CI)\n", - "print('The inter-method uncertainty of the estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' %inter_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimates for the uncertainty associated with the fraction of archaea and bacteria out of the total population of marine deep subsurface bacteria and archaea, we use the highest uncertainty out of the available set pf uncertainties we collected.\n", - "\n", - "The highest inter-method uncertainty for the fraction of archaea is ≈1.6-fold, which is higher than the highest intra-method uncertainty of ≈1.2-fold, so we use ≈1.8-fold as our best projection of the uncertainty associated with the fraction of archaea out of the total population of marine deep subsurface bacteria and archaea. \n", - "Similarly, the highest inter-method uncertainty for the fraction of bacteria is ≈1.3-fold, which is higher than the highest intra-method uncertainty of ≈1.1-fold, so we use ≈1.3-fold as our best projection of the uncertainty associated with the fraction of bacteria out of the total population of marine deep subsurface bacteria and archaea. \n", - "\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraction of archaea out of the total population of marine deep subsurface bacteria and archaea: 35 percent\n", - "Fraction of bacteria out of the total population of marine deep subsurface bacteria and archaea: 65 percent\n", - "Uncertainty associated with the fraction of marine archaea: 1.6-fold\n", - "Uncertainty associated with the fraction of marine bacteria: 1.3-fold\n" - ] - } - ], - "source": [ - "# Take the maximum uncertainty as our best projection of uncertainty\n", - "arc_mul_CI = np.max([fish_arc_CI,qpcr_arc_CI,inter_arc_CI])\n", - "bac_mul_CI = np.max([fish_bac_CI,qpcr_bac_CI,inter_bac_CI])\n", - "\n", - "print('Fraction of archaea out of the total population of marine deep subsurface bacteria and archaea: %.0f percent' %(best_estimate*100))\n", - "print('Fraction of bacteria out of the total population of marine deep subsurface bacteria and archaea: %.0f percent' %(100.-best_estimate*100))\n", - "print('Uncertainty associated with the fraction of marine archaea: %.1f-fold' % arc_mul_CI)\n", - "print('Uncertainty associated with the fraction of marine bacteria: %.1f-fold' % bac_mul_CI)\n", - "\n", - "old_results = pd.read_excel('../marine_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "\n", - "if (result.shape[0]==0):\n", - " result = pd.DataFrame(index= range(2), columns=['Parameter','Value','Units','Uncertainty'])\n", - "\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Fraction of archaea',\n", - " 'Value': \"{0:.1f}\".format(best_estimate),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(arc_mul_CI)\n", - " })\n", - "\n", - "result.loc[3] = pd.Series({\n", - " 'Parameter': 'Fraction of bacteria',\n", - " 'Value': \"{0:.1f}\".format(1.0 - best_estimate),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(bac_mul_CI)\n", - " })\n", - "\n", - "\n", - "result.to_excel('../marine_deep_subsurface_prok_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine_deep_subsurface/carbon_content/.ipynb_checkpoints/marine_deep_subsurface_prok_carbon_content-checkpoint.ipynb b/bacteria_archaea/marine_deep_subsurface/carbon_content/.ipynb_checkpoints/marine_deep_subsurface_prok_carbon_content-checkpoint.ipynb deleted file mode 100644 index e569631..0000000 --- a/bacteria_archaea/marine_deep_subsurface/carbon_content/.ipynb_checkpoints/marine_deep_subsurface_prok_carbon_content-checkpoint.ipynb +++ /dev/null @@ -1,965 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *\n", - "pd.options.display.float_format = '{:,.2f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the carbon content of marine bacteria and archaea\n", - "\n", - "In order to estimate the characteristic carbon content of marine bacteria and archaea, we rely on two main methodologies - volume based estimates and amino acid based estimates.\n", - "\n", - "## Volume-based estimates\n", - "We collected measurements of the characeteristic volume of bacteria and archaea in the marine deep subsurface from 4 different studies. For 3 of those studies, we collected reported average cell volumes. Here are the average values we collected from those three studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StudyMean cell volume (µm^3)Remarks
0Parkes et al.0.21NaN
1Lipp et al. (coccoid)0.07Calculated assuming a spherical cell with diam...
2Lipp et al. (rod)0.20Calculated assuming a cylinderical cell with d...
3Kallmeter et al.0.04NaN
\n", - "
" - ], - "text/plain": [ - " Study Mean cell volume (µm^3) \\\n", - "0 Parkes et al. 0.21 \n", - "1 Lipp et al. (coccoid) 0.07 \n", - "2 Lipp et al. (rod) 0.20 \n", - "3 Kallmeter et al. 0.04 \n", - "\n", - " Remarks \n", - "0 NaN \n", - "1 Calculated assuming a spherical cell with diam... \n", - "2 Calculated assuming a cylinderical cell with d... \n", - "3 NaN " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "volumes = pd.read_excel('marine_deep_subsurface_prok_carbon_content_data.xlsx','Volume based')\n", - "volumes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In addition we used data from [Braun et al.](http://dx.doi.org/10.3389/fmicb.2016.01375) which measured cell volumes for three cell morphologies (coccoid, elongated and filamentous), along with the relative fraction of each morphology in each site sampled. Here is the data extracted from Braun et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Depth (m)Mean volume (µm^3)Cell typeFraction FM
00.400.05Spherical0.44
12.750.05Spherical0.44
24.320.03Spherical0.52
39.570.03Spherical0.54
414.550.01Spherical0.42
520.530.02Spherical0.33
638.950.01Spherical0.18
70.400.10Elongated0.49
82.750.11Elongated0.48
94.320.08Elongated0.35
109.570.03Elongated0.42
1114.550.04Elongated0.55
1220.530.03Elongated0.63
1338.950.02Elongated0.66
140.400.34Filamentous0.07
152.750.19Filamentous0.07
164.320.20Filamentous0.13
179.570.11Filamentous0.04
1814.550.10Filamentous0.04
1920.530.22Filamentous0.03
2038.950.08Filamentous0.16
\n", - "
" - ], - "text/plain": [ - " Depth (m) Mean volume (µm^3) Cell type Fraction FM\n", - "0 0.40 0.05 Spherical 0.44\n", - "1 2.75 0.05 Spherical 0.44\n", - "2 4.32 0.03 Spherical 0.52\n", - "3 9.57 0.03 Spherical 0.54\n", - "4 14.55 0.01 Spherical 0.42\n", - "5 20.53 0.02 Spherical 0.33\n", - "6 38.95 0.01 Spherical 0.18\n", - "7 0.40 0.10 Elongated 0.49\n", - "8 2.75 0.11 Elongated 0.48\n", - "9 4.32 0.08 Elongated 0.35\n", - "10 9.57 0.03 Elongated 0.42\n", - "11 14.55 0.04 Elongated 0.55\n", - "12 20.53 0.03 Elongated 0.63\n", - "13 38.95 0.02 Elongated 0.66\n", - "14 0.40 0.34 Filamentous 0.07\n", - "15 2.75 0.19 Filamentous 0.07\n", - "16 4.32 0.20 Filamentous 0.13\n", - "17 9.57 0.11 Filamentous 0.04\n", - "18 14.55 0.10 Filamentous 0.04\n", - "19 20.53 0.22 Filamentous 0.03\n", - "20 38.95 0.08 Filamentous 0.16" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "braun_volumes = pd.read_excel('marine_deep_subsurface_prok_carbon_content_data.xlsx','Braun', skiprows=1)\n", - "braun_volumes" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "We first calculate the characteristic volume of a single cell from the data in Braun et al. to be able to compare it with the other resources:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The characteristic volume of bacterial and archaeal cells in the marine deep subsurface based on Braun et al. is ≈0.05µm^3\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Mean cell volume (µm^3)RemarksStudy
00.21NaNParkes et al.
10.07Calculated assuming a spherical cell with diam...Lipp et al. (coccoid)
20.20Calculated assuming a cylinderical cell with d...Lipp et al. (rod)
30.04NaNKallmeter et al.
00.05NaNBraun et al.
\n", - "
" - ], - "text/plain": [ - " Mean cell volume (µm^3) Remarks \\\n", - "0 0.21 NaN \n", - "1 0.07 Calculated assuming a spherical cell with diam... \n", - "2 0.20 Calculated assuming a cylinderical cell with d... \n", - "3 0.04 NaN \n", - "0 0.05 NaN \n", - "\n", - " Study \n", - "0 Parkes et al. \n", - "1 Lipp et al. (coccoid) \n", - "2 Lipp et al. (rod) \n", - "3 Kallmeter et al. \n", - "0 Braun et al. " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Group by depth\n", - "\n", - "braun_depth_binned = braun_volumes.groupby(['Depth (m)'])\n", - "\n", - "# Define the function which will to the weighted average of volume based on the fraction of the\n", - "# population of each cell type\n", - "\n", - "def groupby_weighted_average(input):\n", - " return np.average(input['Mean volume (µm^3)'],weights=input['Fraction FM'])\n", - "\n", - "# Calculate the weighted average volume for each depth sample\n", - "braun_weighted_average = braun_depth_binned.apply(groupby_weighted_average)\n", - "\n", - "# Calculate the geometric mean of the volumes from different depths\n", - "braun_characteristic_volume = gmean(braun_weighted_average)\n", - "print(r'The characteristic volume of bacterial and archaeal cells in the marine deep subsurface based on Braun et al. is ≈%.2fµm^3' %braun_characteristic_volume)\n", - "volumes.append(pd.DataFrame.from_dict([{'Study': 'Braun et al.', 'Mean cell volume (µm^3)':braun_characteristic_volume}]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to covert the five different estimates for the characteristic volume of bacterial and archaeal cell in the marine deep subsurface into estimates of carbon content, we use two independent models that have been used in the literature: [Fry et al.](http://dx.doi.org/10.1016/S0580-9517(08)70239-3) which estimates ≈300 fg C per $µm^3$, and [Simon & Azam](http://dx.doi.org/10.3354/meps051201), which developed an allometric model of the carbon content of cells with different volumes. The allometric model they developed is:\n", - "$$C = 88.1 \\times V^{0.59}$$\n", - "Where C is the carbon content of a single cell [fg C cell$^{-1}$], and V is cell volume [$µm^3$]. We apply these two independent conversion equations to the volumes we gathered from the literature to produce 10 estimates for the characteristic carbon content of bacterial and archaeal cells in the marine deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StudyMean cell volume (µm^3)RemarksFry et al.Simon and Azam
0Parkes et al.0.21NaN65.1041.61
1Lipp et al. (coccoid)0.07Calculated assuming a spherical cell with diam...20.2820.91
2Lipp et al. (rod)0.20Calculated assuming a cylinderical cell with d...60.8439.98
3Kallmeter et al.0.04NaN13.0216.10
\n", - "
" - ], - "text/plain": [ - " Study Mean cell volume (µm^3) \\\n", - "0 Parkes et al. 0.21 \n", - "1 Lipp et al. (coccoid) 0.07 \n", - "2 Lipp et al. (rod) 0.20 \n", - "3 Kallmeter et al. 0.04 \n", - "\n", - " Remarks Fry et al. \\\n", - "0 NaN 65.10 \n", - "1 Calculated assuming a spherical cell with diam... 20.28 \n", - "2 Calculated assuming a cylinderical cell with d... 60.84 \n", - "3 NaN 13.02 \n", - "\n", - " Simon and Azam \n", - "0 41.61 \n", - "1 20.91 \n", - "2 39.98 \n", - "3 16.10 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Apply the conversion equations to the volumes reported in the literature\n", - "volumes['Fry et al.'] = volumes['Mean cell volume (µm^3)']*310\n", - "volumes['Simon and Azam'] = 88.1*volumes['Mean cell volume (µm^3)']**0.59\n", - "volumes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the values from different studies using the same conversion equation to generate a characteristic carbon content for each conversion method." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The characteristic carbon content of a single bacterial or archaeal cell in the marine deep subsurface based on cell volume converted using the conversion equation from Fry et al. is ≈32 fg C cell^-1\n", - "\n", - "The characteristic carbon content of a single bacterial or archaeal cell in the marine deep subsurface based on cell volume converted using the conversion equation from Simon & Azam is ≈27 fg C cell^-1\n" - ] - } - ], - "source": [ - "fry_volume_mean = gmean(volumes['Fry et al.'])\n", - "sa_volume_mean = gmean(volumes['Simon and Azam'])\n", - "\n", - "print('The characteristic carbon content of a single bacterial or archaeal cell in the marine deep subsurface based on cell volume converted using the conversion equation from Fry et al. is ≈%.0f fg C cell^-1\\n' %fry_volume_mean)\n", - "print('The characteristic carbon content of a single bacterial or archaeal cell in the marine deep subsurface based on cell volume converted using the conversion equation from Simon & Azam is ≈%.0f fg C cell^-1' %sa_volume_mean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We compute the geometric mean of the characteristic values from the two volume to carbon content conversion methods and use it as our best estimate for the carbon content of bacterial and archaeal cells in the marine deep subsurface, based on volume measurements." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best volume-based estimate for the carbon content of bacterial and archaeal cells in the marine deep subsurface is 30 fg C cell^-1\n" - ] - } - ], - "source": [ - "vol_best_carbon_content = gmean([fry_volume_mean,sa_volume_mean])\n", - "print('Our best volume-based estimate for the carbon content of bacterial and archaeal cells in the marine deep subsurface is %.0f fg C cell^-1' %vol_best_carbon_content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Amino acid-based estimate\n", - "We rely on the study by Braun et al., which measured carobon content of bacterial and archaeal cells in the marine deep subsurface based on amino acid carbon mass, and assuming ≈55% of the carbon mass of single cells is stored in amino acids. Here are the values reported by Braun et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Depth (m)Carbon content (fg C cell-1)
00.4019
12.7526
24.3229
39.5731
414.5521
520.5314
638.9517
\n", - "
" - ], - "text/plain": [ - " Depth (m) Carbon content (fg C cell-1)\n", - "0 0.40 19\n", - "1 2.75 26\n", - "2 4.32 29\n", - "3 9.57 31\n", - "4 14.55 21\n", - "5 20.53 14\n", - "6 38.95 17" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "aa_based = pd.read_excel('marine_deep_subsurface_prok_carbon_content_data.xlsx', 'Amino acid based', skiprows=1)\n", - "aa_based" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the geometric mean of the values reported by Braun et al. as our best estimate for the amino acid-based estimate of the carbon content of bacterial and archaeal cells in the marine deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best amino acid-based estimate for the carbon content of bacterial and archaeal cells in the marine deep subsurface is 22 fg C cell^-1\n" - ] - } - ], - "source": [ - "aa_best_carbon_content = gmean(aa_based['Carbon content (fg C cell-1)'])\n", - "\n", - "print('Our best amino acid-based estimate for the carbon content of bacterial and archaeal cells in the marine deep subsurface is %.0f fg C cell^-1' %aa_best_carbon_content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the carbon content of bacterial and archaeal cells in the marine deep subsurface, we use the geometric mean of the volume-based and amino acid-based estimates." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of bacterial and archaeal cells in the marine deep subsurface is 25 fg C cell^-1\n" - ] - } - ], - "source": [ - "best_estimate = gmean([vol_best_carbon_content,aa_best_carbon_content])\n", - "print('Our best estimate for the carbon content of bacterial and archaeal cells in the marine deep subsurface is %.0f fg C cell^-1' %best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To calculate the uncertainty associated with the estimate for the total number of of bacteria and archaea in the marine deep subsurface, we first collect all available uncertainties and then take the largest value as our best projection for the uncertainty. \n", - "\n", - "## Volume-based\n", - "\n", - "### intra-study uncertainty\n", - "For the volume based approaches, we had data on intra-study uncertainty only for the Braun et al. study. We calculate the intra study uncertainty of the volumes reported in Braun et al. by calculating the 95% confidence interval of the values reported in Braun et al." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty for Braun et al. is ≈1.5-fold\n" - ] - } - ], - "source": [ - "vol_braun_intra_CI = geo_CI_calc(braun_weighted_average)\n", - "print('The intra-study uncertainty for Braun et al. is ≈%.1f-fold' %vol_braun_intra_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Interstudy uncertainty\n", - "As a measure of the interstudy uncertainty, we compare the 95% confidence interval for the geometric mean of the carbon content from different studies, using the same conversion method.\n", - "We also use the 95% confidence interval for the geometric mean of the carbon content estimates from the two different conversion methods (Fry et al. and Simon & Azam) as a measure of interstudy uncertainty." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the geometric mean of carbon content using the conversion method of Fry et al. is ≈2.2-fold\n", - "The interstudy uncertainty of the geometric mean of carbon content using the conversion method of Simon & Azam is ≈1.6-fold\n", - "The interstudy uncertainty of the geometric mean of carbon content between conversion methods is ≈1.2-fold\n" - ] - } - ], - "source": [ - "carbon_content_fry_CI = geo_CI_calc(volumes['Fry et al.'])\n", - "carbon_content_sa_CI = geo_CI_calc(volumes['Simon and Azam'])\n", - "print('The interstudy uncertainty of the geometric mean of carbon content using the conversion method of Fry et al. is ≈%.1f-fold' %carbon_content_fry_CI)\n", - "print('The interstudy uncertainty of the geometric mean of carbon content using the conversion method of Simon & Azam is ≈%.1f-fold' %carbon_content_sa_CI)\n", - "\n", - "carbon_content_vol_CI = geo_CI_calc([fry_volume_mean,sa_volume_mean])\n", - "print('The interstudy uncertainty of the geometric mean of carbon content between conversion methods is ≈%.1f-fold' %carbon_content_vol_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Amino acid-based\n", - "\n", - "### Intra-study uncertainty\n", - "We calculate the 95% confidence interval of the geometric mean of values for the carbon content from Braun et al. as a measure of the intra-study uncertainty." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of amino acid-based carbon content estimates from Braun et al. is ≈1.2-fold\n" - ] - } - ], - "source": [ - "aa_intra_CI = geo_CI_calc(aa_based['Carbon content (fg C cell-1)'])\n", - "print('The intra-study uncertainty of amino acid-based carbon content estimates from Braun et al. is ≈%.1f-fold' %aa_intra_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty\n", - "As another measure of uncertainty we calculate the 95% confidence interval of the geometric mean of the estimates for carbon content calculated using either the volume-based method or the amino acid-based method." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-method uncertainty for the caron content of bacretial and archaeal cells in the marine deep subsurface is ≈1.4-fold\n" - ] - } - ], - "source": [ - "inter_method_CI = geo_CI_calc([vol_best_carbon_content,aa_best_carbon_content])\n", - "print('The intra-method uncertainty for the caron content of bacretial and archaeal cells in the marine deep subsurface is ≈%.1f-fold' %inter_method_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the highest uncertainty among this collection, which is ≈2.2-fold, as our best projection of the uncertainty associated with our estimate of the carbon content of bacterial and archaeal cells in the marine deep subsurface.\n", - "\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Carbon content of bacterial and archaeal cells in the marine deep subsurface: 25 fg C\n", - "Uncertainty associated with the carbon content of bacterial and archaeal cells in the marine deep subsurface: 2.2-fold\n" - ] - } - ], - "source": [ - "# Take the maximal uncetainty as our best projection of uncertainty\n", - "mul_CI = np.max([inter_method_CI,aa_intra_CI,carbon_content_vol_CI,carbon_content_fry_CI,carbon_content_sa_CI,vol_braun_intra_CI])\n", - "\n", - "print('Carbon content of bacterial and archaeal cells in the marine deep subsurface: %.0f fg C' % best_estimate)\n", - "print('Uncertainty associated with the carbon content of bacterial and archaeal cells in the marine deep subsurface: %.1f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../marine_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Carbon content of bacterial and archaeal cells in the marine deep subsurface',\n", - " 'Value': int(best_estimate),\n", - " 'Units': 'fg C cell^-1',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../marine_deep_subsurface_prok_biomass_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine_deep_subsurface/cell_num/.ipynb_checkpoints/marine_deep_subsurface_prok_cell_num-checkpoint.ipynb b/bacteria_archaea/marine_deep_subsurface/cell_num/.ipynb_checkpoints/marine_deep_subsurface_prok_cell_num-checkpoint.ipynb deleted file mode 100644 index a4397de..0000000 --- a/bacteria_archaea/marine_deep_subsurface/cell_num/.ipynb_checkpoints/marine_deep_subsurface_prok_cell_num-checkpoint.ipynb +++ /dev/null @@ -1,214 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "from scipy.stats import gmean\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of bacteria and archaea in the marine deep subsurface\n", - "In order to estimate the total number of cells of bacteria and archaea in the marine deep subsurface, we rely of estimates from two studies - [Parkes et al.](http://www.sciencedirect.com/science/article/pii/S0025322714000425), and [Kallmeyer et al.](http://dx.doi.org/10.1073/pnas.1203849109). Our best esimate for the total number of cells of bacteria and archaea in the marine deep subsurface is the geometric mean of the estimates by Parkes et al. and Kallmeyer et al." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of cells of bacteria and archaea the marine deep subsurface is 4.0e+29.\n" - ] - } - ], - "source": [ - "#Kallmeyer et al. estimate ≈2.9×10^29 cells in the marine deep subsurface\n", - "kallmeyer = 2.9e29\n", - "#Parkes et al. estimate ≈5.4×10^29 cells in the marine deep subsurface\n", - "parkes = 5.4e29\n", - "\n", - "best_estimate = gmean([kallmeyer,parkes])\n", - "print('Our best estimate for the total number of cells of bacteria and archaea the marine deep subsurface is %.1e.' % best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "\n", - "To calculate the uncertainty associated with the estimate for the total number of of bacteria and archaea in the marine deep subsurface, we first collect all available uncertainties and then take the largest value as our best projection for the uncertainty. \n", - "\n", - "## Intra-study uncertainty\n", - "We survey the uncertainties reported in Parkes et al. and Kallmeyer et al. for their estimates of the total number of cells in the marine deep subsurface.\n", - "Parkes et al. reports a 95% confidence interval of $1.95×10^{29}-4.35×10^{30}$ with a mean of $8.65×10^{29}$ excluding contribution from Ocean Gyre sites, and a mean of $5.39×10^{29}$. As we do not know the 95% confidence interval of the estimate including Ocean Gyre sites, we use the 95% confidence interval from the estimate excluding Ocean Gyre sites as a measure of the intra-study uncertainty of the estimate of the total number of bacteria and archaea in the marine deep subsurface. We report uncertainty as a multiplicative factor, so we calculate the fold change of the minimal and maximal values in the 95% confidence interval relative to the mean estiamte and use this value as the intra-study uncertainty of Parkes et al." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the estimate by Parkes et al. is 4.7-fold\n" - ] - } - ], - "source": [ - "parkes_low = 1.95e29\n", - "parkes_high = 4.34e30\n", - "parkes_mean_ex_ocean_gyre = 8.65e29\n", - "\n", - "# We calculate the fold change in the minimum and maximum of the 95% confidence interval relative to the mean estimate\n", - "parkes_high_mul_CI = parkes_high/parkes_mean_ex_ocean_gyre\n", - "parkes_low_mul_CI = parkes_mean_ex_ocean_gyre/parkes_low\n", - "\n", - "# We use the average of the fold changes as our estimate for the intra-study uncertainty of the estimate by Parkes et al.\n", - "parkes_mul_CI = np.mean([parkes_high_mul_CI,parkes_low_mul_CI])\n", - "\n", - "print('The intra-study uncertainty of the estimate by Parkes et al. is %.1f-fold' % parkes_mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Kallmeyer et al., report an estimate of ≈$2.9×10^{29}$ cells of bacteria and archaea in the marine deep subsurface. Kallmeyer et al. bootstrap the parameters of the model for estimating the total number of cells, which results in a distribution of estimates for the total number of cells. Kallmeyer et al. report a range of one standard deviation from the mean estimate of ≈$1.2×10^{29}-≈8×10^{29}$. We use this range as a measure of the intra-study uncertainty of the estimate by Kallmeyer et al. As Kallmeyer et al. only reports a standard deviation range, we convert this range to 95% multiplicative confidence interval by calculating the fold change of the minimum and maximum of the range relative to the average estimate by Kallmeyer et al., and taking this fold change to the power of 1.96 to move from one standard deviation to 95% confidence interval." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the estimate by Kallmeyer et al. is 6.4-fold\n" - ] - } - ], - "source": [ - "kallmeyer_low = 1.2e29\n", - "kallmeyer_high = 8e29\n", - "\n", - "# We calculate the fold change in the minimum and maximum of the standard deviation range relative to the mean estimate\n", - "kallmeyer_high_mul_std = (kallmeyer_high/kallmeyer)\n", - "kallmeyer_low_mul_std = (kallmeyer/kallmeyer_low)\n", - "\n", - "# We use the average of the fold changes as our estimate for the intra-study uncertainty of the estimate by Parkes et al.\n", - "kallmeyer_mul_CI = np.mean([kallmeyer_high_mul_std,kallmeyer_low_mul_std])**1.96\n", - "print('The intra-study uncertainty of the estimate by Kallmeyer et al. is %.1f-fold' % kallmeyer_mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Interstudy uncetainty\n", - "We calculate the 95% confidence interval of the geometric mean of the estimates by Parkes et al. and Kallmeyer et al. as a measure of the interstudy uncertainty." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the geometric mean of the estimates by Parkes et al. and Kallmeyer et al. is 1.8-fold\n" - ] - } - ], - "source": [ - "inter_mul_CI = geo_CI_calc(np.array([parkes,kallmeyer]))\n", - "\n", - "print('The interstudy uncertainty of the geometric mean of the estimates by Parkes et al. and Kallmeyer et al. is %.1f-fold' % inter_mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the highest uncertainty among the intra-study and interstudy uncertainties as our projection of the uncertainty associated with the estimate of the total number of cells of bacteria and archaea in the marine deep subsurface. Are final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total number of bacteria and archaea in the marine deep subsurface: 4.0e+29\n", - "Uncertainty associated with the total number of bacteria and archaea in the marine deep subsurface: 6.4-fold\n" - ] - } - ], - "source": [ - "# Take the maximal uncetainty as our best projection of uncertainty\n", - "mul_CI = np.max([parkes_mul_CI,kallmeyer_mul_CI,inter_mul_CI])\n", - "\n", - "print('Total number of bacteria and archaea in the marine deep subsurface: %.1e' % best_estimate)\n", - "print('Uncertainty associated with the total number of bacteria and archaea in the marine deep subsurface: %.1f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../marine_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Total number of bacteria and archaea in the marine deep subsurface',\n", - " 'Value': best_estimate,\n", - " 'Units': 'Cells',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../marine_deep_subsurface_prok_biomass_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/marine_deep_subsurface/marine_deep_subsurface_prok_biomass_estimate_OLD.xlsx b/bacteria_archaea/marine_deep_subsurface/marine_deep_subsurface_prok_biomass_estimate_OLD.xlsx deleted file mode 100644 index 8a109ad..0000000 Binary files a/bacteria_archaea/marine_deep_subsurface/marine_deep_subsurface_prok_biomass_estimate_OLD.xlsx and /dev/null differ diff --git a/bacteria_archaea/soil/.ipynb_checkpoints/soil_arch_bac_ratio-checkpoint.ipynb b/bacteria_archaea/soil/.ipynb_checkpoints/soil_arch_bac_ratio-checkpoint.ipynb deleted file mode 100644 index dd40c97..0000000 --- a/bacteria_archaea/soil/.ipynb_checkpoints/soil_arch_bac_ratio-checkpoint.ipynb +++ /dev/null @@ -1,884 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the fraction of archaea out of the total soil prokaryote population\n", - "In order to estimate the fraction of archaea out of the total population of soil bacteria and archaea, we rely of four independent methods: fluorescent in-situ hybridization (FISH), CARD-FISH and 16S rDNA sequencing and 16S rDNA qPCR.\n", - "\n", - "## FISH-based estimate\n", - "In order to estimate the fraction of archaea out of the total biomass of soil bacteria and archae, we collected data from several studies. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceDOISiteHabitatFraction of archaeaRemarks
0Pozdnyakov et al.http://dx.doi.org/10.3103/S0147687411010042RussiaCropland0.069000Taken from figure 4
1Dedysh et al.http://aem.asm.org/content/72/3/2110.shortRussiaTundra0.181208Taken from Table 1 – total FISH counts very lo...
2Dedysh et al.http://aem.asm.org/content/72/3/2110.shortRussiaTundra0.149606Taken from Table 1 – total FISH counts very lo...
3Dedysh et al.http://aem.asm.org/content/72/3/2110.shortRussiaTundra0.274074Taken from Table 1 – total FISH counts very lo...
4Dedysh et al.http://aem.asm.org/content/72/3/2110.shortRussiaTundra0.302632Taken from Table 1 – total FISH counts very lo...
\n", - "
" - ], - "text/plain": [ - " Reference DOI Site \\\n", - "0 Pozdnyakov et al. http://dx.doi.org/10.3103/S0147687411010042 Russia \n", - "1 Dedysh et al. http://aem.asm.org/content/72/3/2110.short Russia \n", - "2 Dedysh et al. http://aem.asm.org/content/72/3/2110.short Russia \n", - "3 Dedysh et al. http://aem.asm.org/content/72/3/2110.short Russia \n", - "4 Dedysh et al. http://aem.asm.org/content/72/3/2110.short Russia \n", - "\n", - " Habitat Fraction of archaea \\\n", - "0 Cropland 0.069000 \n", - "1 Tundra 0.181208 \n", - "2 Tundra 0.149606 \n", - "3 Tundra 0.274074 \n", - "4 Tundra 0.302632 \n", - "\n", - " Remarks \n", - "0 Taken from figure 4 \n", - "1 Taken from Table 1 – total FISH counts very lo... \n", - "2 Taken from Table 1 – total FISH counts very lo... \n", - "3 Taken from Table 1 – total FISH counts very lo... \n", - "4 Taken from Table 1 – total FISH counts very lo... " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load FISH data\n", - "FISH_data = pd.read_excel('soil_bac_arch_data.xlsx','FISH')\n", - "FISH_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We Calculate the geometric mean of the fractions for each study in each habitat to generate characteristic values for each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "FISH_study_mean = FISH_data.groupby(['Habitat','DOI'])['Fraction of archaea'].apply(frac_mean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then calculate the geometric mean between different studies in the same habitat to generate characteristic values for each habitat:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "FISH_habitat_mean = FISH_study_mean.groupby('Habitat').apply(frac_mean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we calculate the geometric mean between the characteristic values in each habitat as our best estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea based on FISH:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea based on FISH is ≈22 percent\n" - ] - } - ], - "source": [ - "FISH_mean = frac_mean(FISH_habitat_mean)\n", - "\n", - "print('Our best estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea based on FISH is ≈%.0f percent' %(FISH_mean*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## CARD-FISH-based estimate\n", - "In order to estimate the fraction of archaea out of the total biomass of soil bacteria and archae, we collected data from several studies. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceDOISiteHabitatFraction of archaeaRemarks
0Sheibani et al.http://dx.doi.org/10.4141/cjss2012-040CanadaCropland0.096000NaN
1Eickhorst & Tippkotterhttp://dx.doi.org/10.1016/j.soilbio.2008.03.024GermanyCropland0.200000NaN
2Eickhorst & Tippkotterhttp://dx.doi.org/10.1016/j.soilbio.2008.03.024GermanyCropland0.129730NaN
3Eickhorst & Tippkotterhttp://dx.doi.org/10.1016/j.soilbio.2008.03.024GermanyCropland0.266332NaN
4Schmidt & Eickhorsthttp://dx.doi.org/10.1016/j.apsoil.2013.06.002ChinaCropland0.190000NaN
\n", - "
" - ], - "text/plain": [ - " Reference DOI \\\n", - "0 Sheibani et al. http://dx.doi.org/10.4141/cjss2012-040 \n", - "1 Eickhorst & Tippkotter http://dx.doi.org/10.1016/j.soilbio.2008.03.024 \n", - "2 Eickhorst & Tippkotter http://dx.doi.org/10.1016/j.soilbio.2008.03.024 \n", - "3 Eickhorst & Tippkotter http://dx.doi.org/10.1016/j.soilbio.2008.03.024 \n", - "4 Schmidt & Eickhorst http://dx.doi.org/10.1016/j.apsoil.2013.06.002 \n", - "\n", - " Site Habitat Fraction of archaea Remarks \n", - "0 Canada Cropland 0.096000 NaN \n", - "1 Germany Cropland 0.200000 NaN \n", - "2 Germany Cropland 0.129730 NaN \n", - "3 Germany Cropland 0.266332 NaN \n", - "4 China Cropland 0.190000 NaN " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load CARD-FISH data\n", - "CARDFISH_data = pd.read_excel('soil_bac_arch_data.xlsx','CARD-FISH')\n", - "CARDFISH_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We Calculate the geometric mean of the fractions for each study in each habitat to generate characteristic values for each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "CARDFISH_study_mean = CARDFISH_data.groupby('DOI')['Fraction of archaea'].apply(frac_mean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we calculate the geometric mean between the characteristic values in each study as our best estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea based on CARD-FISH:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea based on CARD-FISH is ≈19 percent\n" - ] - } - ], - "source": [ - "CARDFISH_mean = frac_mean(CARDFISH_study_mean)\n", - "print('Our best estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea based on CARD-FISH is ≈%.0f percent' %(CARDFISH_mean*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 16S rDNA sequencing-based estimate\n", - "For our 16S rDNA sequencing-based estimate, we rely on a study which reported values for the fraction of archaea out of the total population of soil bacteria and archaea in 146 soils from across the globe ([Bates et al.](http://dx.doi.org/10.1038/ismej.2010.171)). We calculate the geometric mean of values within each biome, and then calculate the geometric mean of the characteristic values of each biome. We account for the lower rRNA operon copy number in archaea ([Sun et al.](http://dx.doi.org/10.1128/AEM.01282-13)) by multiplying the measured fractions by a factor of 2. " - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# 16S sequencing data from Bates et al. corrected for lower operon copy number\n", - "bates_data = pd.read_excel('soil_bac_arch_data.xlsx','bates',skiprows=1)\n", - "\n", - "# Calculate the average fraction of archaea out of the total biomass of soil bacteria and archaea\n", - "# Correct for the lower rDNA operon content in archaea\n", - "seq = frac_mean(bates_data.groupby('Biome')['Fraction of archaea'].apply(frac_mean))*2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 16S rDNA qPCR-based estimate\n", - "For our 16S qPCR-based estimate, we rely on a recent study which reported the fraction of archaea out of the total population of soil bacteria and archaea in grasslands, forests and croplands in Korea ([Hong & Cho](http://dx.doi.org/10.1371/journal.pone.0133763)). The mean fraction of archaea reported by Hong & Cho is ≈3%." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# qPCR data from Hong & Cho\n", - "qpcr = 0.027" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the geomtric mean of our estimates from the four different methods as our best estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of archaea out of the total biomass of soil bacteria and archaea is ≈7 percent\n" - ] - } - ], - "source": [ - "best_frac = frac_mean(np.array([seq,qpcr,FISH_mean,CARDFISH_mean]))\n", - "\n", - "print('Our best estimate for the fraction of archaea out of the total biomass of soil bacteria and archaea is ≈%.0f percent' %(best_frac*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We multiply the fraction of archaea out of the total biomass of soil bacteria and archaea by our estimate for the total biomass of soil bacteria and archaea to estimate the total biomass of soil archaea:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of soil archaea is ≈0.5 Gt C\n", - "Our best estimate for the total biomass of soil bacteria is ≈7.4 Gt C\n" - ] - } - ], - "source": [ - "# Load fungi biomass estimate\n", - "fungi_biomass_estimate = pd.read_excel('../../fungi/fungi_biomass_estimate.xlsx')\n", - "\n", - "# In case soil fungi biomass estimate is empty, run the scripts \n", - "if(fungi_biomass_estimate.shape[0]==0):\n", - " import os\n", - " os.system('jupyter nbconvert --execute ../../fungi/soil_microbial_biomass/soil_microbial_biomass.ipynb' )\n", - " os.system('jupyter nbconvert --execute ../../fungi/fungi_fraction/fungi_fraction.ipynb' )\n", - " fungi_biomass_estimate = pd.read_excel('../../fungi/fungi_biomass_estimate.xlsx')\n", - "\n", - "# Calculate the total biomass of soil bactria and archaea\n", - "soil_prok_biomass = fungi_biomass_estimate['Value'][0]*(1-fungi_biomass_estimate['Value'][1])\n", - "\n", - "# Calculate the total biomass of soil archaea\n", - "best_soil_arch_biomass = soil_prok_biomass*best_frac\n", - "\n", - "# Calculate the total biomass of soil bacteria\n", - "best_soil_bac_biomass = soil_prok_biomass*(1-best_frac)\n", - "\n", - "print('Our best estimate for the total biomass of soil archaea is ≈%.1f Gt C' %(best_soil_arch_biomass/1e15))\n", - "print('Our best estimate for the total biomass of soil bacteria is ≈%.1f Gt C' %(best_soil_bac_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "We collect uncertainties associated with our estimate of the fraction of archaea out of the total biomass of soil bacteria and archae at different levels - intra-study uncertainty, inter-study uncertainty, inter-habitat uncertainty, and inter-method uncertainty. We use the heighest uncertainty out of this collection as our best projection for the uncertainty associated with our estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Intra-study uncertainty\n", - "We calculate the 95% confidence interval of the geometric mean of values within each study. The cases in which we have multiply measurements within the same study are FISH, CARD-FISH and 16S rDNA sequencing. For 16S rDNA qPCR we rely on the standard deviation reported in Hong & Cho.\n", - "### FISH\n", - "We calculate the 95% confidence interval of the geometric mean of values within each study. We use the maximal uncertainty as our best projection of the intra-study uncertainty of studies using FISH:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:4291: RuntimeWarning: Invalid value encountered in percentile\n", - " interpolation=interpolation)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on FISH is ≈2.3-fold.\n", - "Our best projection of the intra-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on FISH is ≈1.4-fold.\n" - ] - } - ], - "source": [ - "FISH_intra_arch_CI = FISH_data.groupby(['Habitat','DOI'])['Fraction of archaea'].apply(frac_CI)\n", - "FISH_data['Fraction of bacteria'] = 1 - FISH_data['Fraction of archaea']\n", - "FISH_intra_bac_CI = FISH_data.groupby(['Habitat','DOI'])['Fraction of bacteria'].apply(frac_CI)\n", - "print('Our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on FISH is ≈%.1f-fold.' %FISH_intra_arch_CI.max())\n", - "print('Our best projection of the intra-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on FISH is ≈%.1f-fold.' %FISH_intra_bac_CI.max())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### CARD-FISH\n", - "We calculate the 95% confidence interval of the geometric mean of values within each study. We use the maximal uncertainty as our best projection of the intra-study uncertainty of studies using CARD-FISH:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on CARD-FISH is ≈1.5-fold.\n", - "Our best projection of the intra-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on CARD-FISH is ≈1.1-fold.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:4291: RuntimeWarning: Invalid value encountered in percentile\n", - " interpolation=interpolation)\n" - ] - } - ], - "source": [ - "CARDFISH_data['Fraction of bacteria'] = 1 - CARDFISH_data['Fraction of archaea']\n", - "CARDFISH_intra_arch_CI = CARDFISH_data.groupby('DOI')['Fraction of archaea'].apply(frac_CI)\n", - "CARDFISH_intra_bac_CI = CARDFISH_data.groupby('DOI')['Fraction of bacteria'].apply(frac_CI)\n", - "print('Our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on CARD-FISH is ≈%.1f-fold.' %CARDFISH_intra_arch_CI.max())\n", - "print('Our best projection of the intra-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on CARD-FISH is ≈%.1f-fold.' %CARDFISH_intra_bac_CI.max())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 16S rDNA sequencing\n", - "We calculate the 95% confidence interval of the geometric mean of values reported in Bates et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on 16S rDNA sequencing is ≈1.3-fold.\n", - "Our best projection of the intra-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on 16S rDNA sequencing is ≈1.002-fold.\n" - ] - } - ], - "source": [ - "bates_data['Fraction of bacteria'] = 1 - bates_data['Fraction of archaea']\n", - "seq_intra_arch_CI = frac_CI(bates_data['Fraction of archaea'])\n", - "seq_intra_bac_CI = frac_CI(bates_data['Fraction of bacteria'])\n", - "print('Our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on 16S rDNA sequencing is ≈%.1f-fold.' %seq_intra_arch_CI)\n", - "print('Our best projection of the intra-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on 16S rDNA sequencing is ≈%.3f-fold.' %seq_intra_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 16S rDNA qPCR\n", - "We rely on the standard deviation reported in Hong & Cho of 1.5%. We use 1.96 stantard deviations as our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of soil bacteria and archaea:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on 16S rDNA qPCR is ≈2.1-fold.\n", - "Our best projection of the intra-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on 16S rDNA qPCR is ≈1.03-fold.\n" - ] - } - ], - "source": [ - "# Calculate the multiplicative error using 1.96 standard deviations to approximate 95%\n", - "# confidence interval\n", - "qpcr_intra_arch_CI = (qpcr+1.96*0.015)/qpcr\n", - "qpcr_intra_bac_CI = ((1-qpcr)+1.96*0.015)/(1-qpcr)\n", - "print('Our best projection of the intra-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on 16S rDNA qPCR is ≈%.1f-fold.' %qpcr_intra_arch_CI)\n", - "print('Our best projection of the intra-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on 16S rDNA qPCR is ≈%.2f-fold.' %qpcr_intra_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-study uncertainty\n", - "For our FISH and CARD-FISH-based estimates, we rely in several studies. We calculate the 95% confidence interval around the geometric mean of the values from different studies within the same habitat as our best projection of the inter-study uncertainty associates with the fraction of archaea out of the total biomass of soil bacteria and archaea." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:4291: RuntimeWarning: Invalid value encountered in percentile\n", - " interpolation=interpolation)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the inter-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on FISH is ≈3.7-fold.\n", - "Our best projection of the inter-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on FISH is ≈1.3-fold.\n", - "Our best projection of the inter-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on CARD-FISH is ≈1.7-fold.\n", - "Our best projection of the inter-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on CARD-FISH is ≈1.1-fold.\n" - ] - } - ], - "source": [ - "FISH_interstudy_arch_CI = FISH_study_mean.groupby('Habitat').apply(frac_CI)\n", - "FISH_interstudy_bac_CI = (1-FISH_study_mean).groupby('Habitat').apply(frac_CI)\n", - "\n", - "print('Our best projection of the inter-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on FISH is ≈%.1f-fold.' %FISH_interstudy_arch_CI.max())\n", - "print('Our best projection of the inter-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on FISH is ≈%.1f-fold.' %FISH_interstudy_bac_CI.max())\n", - "\n", - "CARDFISH_interstudy_arch_CI = frac_CI(CARDFISH_study_mean)\n", - "CARDFISH_interstudy_bac_CI = frac_CI(1-CARDFISH_study_mean)\n", - "\n", - "print('Our best projection of the inter-study uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on CARD-FISH is ≈%.1f-fold.' %CARDFISH_interstudy_arch_CI.max())\n", - "print('Our best projection of the inter-study uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on CARD-FISH is ≈%.1f-fold.' %CARDFISH_interstudy_bac_CI.max())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-habitat uncertainty\n", - "For the FISH-based estimate, we also rely on characteristic values in different habitats to estimate the fraction of archaea of of the total biomass of soil bacteria and archaea. We calculate the 95% confidence interval around the geometric mean of the characteristic values from different habitats as a measure of the inter-habitat uncertainty associated with our estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea. " - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the inter-habitat uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on FISH is ≈1.7-fold.\n", - "Our best projection of the inter-habitat uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on FISH is ≈1.2-fold.\n" - ] - } - ], - "source": [ - "FISH_inter_habitat_arch_CI = frac_CI(FISH_habitat_mean)\n", - "FISH_inter_habitat_bac_CI = frac_CI(1-FISH_habitat_mean)\n", - "\n", - "print('Our best projection of the inter-habitat uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea based on FISH is ≈%.1f-fold.' %FISH_inter_habitat_arch_CI)\n", - "print('Our best projection of the inter-habitat uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea based on FISH is ≈%.1f-fold.' %FISH_inter_habitat_bac_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty\n", - "We calculate the 95% confidence interval around the geometric mean of the estimates from the four different values as our best projection of the " - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the inter-method uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea is ≈4.0-fold.\n", - "Our best projection of the inter-method uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea is ≈1.1-fold.\n" - ] - } - ], - "source": [ - "inter_arch_CI = frac_CI(np.array([seq,qpcr,FISH_mean,CARDFISH_mean]))\n", - "inter_bac_CI = frac_CI(1-np.array([seq,qpcr,FISH_mean,CARDFISH_mean]))\n", - "print('Our best projection of the inter-method uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea is ≈%.1f-fold.' %inter_arch_CI)\n", - "print('Our best projection of the inter-method uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea is ≈%.1f-fold.' %inter_bac_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximal uncertainty out of the collection of uncertainties calculated abouce as our best projection of the uncertainty associated with the estimate of the fraction of archaea out of the total biomass of soil bacteria and archaea." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea is ≈4.0-fold.\n", - "Our best projection of the uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea is ≈1.4-fold.\n" - ] - } - ], - "source": [ - "\n", - "arch_frac_mul_CI = np.max([FISH_intra_arch_CI.max(),CARDFISH_intra_arch_CI.max(),seq_intra_arch_CI,qpcr_intra_arch_CI,FISH_interstudy_arch_CI.max(),CARDFISH_interstudy_arch_CI.max(),FISH_inter_habitat_arch_CI,inter_arch_CI])\n", - "bac_frac_mul_CI = np.max([FISH_intra_bac_CI.max(),CARDFISH_intra_bac_CI.max(),seq_intra_bac_CI,qpcr_intra_bac_CI,FISH_interstudy_bac_CI.max(),CARDFISH_interstudy_bac_CI.max(),FISH_inter_habitat_bac_CI,inter_bac_CI])\n", - "\n", - "print('Our best projection of the uncertainty associated with the fraction of archaea out of the total biomass of bacteria and archaea is ≈%.1f-fold.' %arch_frac_mul_CI)\n", - "print('Our best projection of the uncertainty associated with the fraction of bacteria out of the total biomass of bacteria and archaea is ≈%.1f-fold.' %bac_frac_mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine the uncertainty associated with the fraction of archaea out of the total biomass of soil bacteria and archaea with the uncertainty of the total biomass of soil bacteria and archaea as our best projection of the uncertainty associated with the our estimate of the total biomass of soil archaea." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection of the uncertainty associated with our estimate of the biomass of soil archaea is ≈3.6-fold.\n", - "Our best projection of the uncertainty associated with our estimate of the biomass of soil bacteria is ≈6.4-fold.\n" - ] - } - ], - "source": [ - "# Our best projection for the uncertainty associated with the total biomass of soil bacteria and archaea\n", - "soil_prok_CI = CI_prod_prop(np.array([fungi_biomass_estimate['Uncertainty'][0],fungi_biomass_estimate['Uncertainty'][1]]))\n", - "\n", - "# Combine the uncertainty of the total biomass of soil prokaryotes with the uncertainty\n", - "# of the fraction of archaea out of the total biomass of soil prokaryotes\n", - "arch_mul_CI = CI_prod_prop(np.array([bac_frac_mul_CI,soil_prok_CI]))\n", - "bac_mul_CI = CI_prod_prop(np.array([arch_frac_mul_CI,soil_prok_CI]))\n", - "\n", - "print('Our best projection of the uncertainty associated with our estimate of the biomass of soil archaea is ≈%.1f-fold.' %arch_mul_CI)\n", - "print('Our best projection of the uncertainty associated with our estimate of the biomass of soil bacteria is ≈%.1f-fold.' %bac_mul_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of soil bacterial and archaeal cells\n", - "To estimate the total number of bacterial and archaeal cells in soils, we divide our estimates for the total biomass of soil bacteria and archaea by the characteristic carbon content of a single cell. We use a characteristic carbon content of a single cell of ≈30 fg C cell$^{-1}$, based on [Bakken](http://aem.asm.org/content/49/6/1482.abstract)." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Characteristic carbon content of bacterial and archaeal cells in soil from Bakken\n", - "carbon_content = 30e-15\n", - "\n", - "# Calculate the total number of bacterial and archaeal cells in the soil\n", - "tot_bac_num = best_soil_bac_biomass/carbon_content\n", - "tot_arch_num = best_soil_arch_biomass/carbon_content" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed bacteria results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Bacteria','Soil'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_soil_bac_biomass/1e15,bac_mul_CI],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed archaea results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Archaea','Soil'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[best_soil_arch_biomass/1e15,arch_mul_CI],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed bacteria results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Bacteria','Soil'), \n", - " col=['Number of individuals'],\n", - " values= tot_bac_num,\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed archaea results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Archaea','Soil'), \n", - " col=['Number of individuals'],\n", - " values= tot_arch_num,\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/terrestrial_deep_subsurface/.ipynb_checkpoints/terrestrial_deep_subsurface_biomass_estimate-checkpoint.ipynb b/bacteria_archaea/terrestrial_deep_subsurface/.ipynb_checkpoints/terrestrial_deep_subsurface_biomass_estimate-checkpoint.ipynb deleted file mode 100644 index 1996b6f..0000000 --- a/bacteria_archaea/terrestrial_deep_subsurface/.ipynb_checkpoints/terrestrial_deep_subsurface_biomass_estimate-checkpoint.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import numpy as np\n", - "import pandas as pd\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of terrestrial deep subsurface archaea and bacteria\n", - "\n", - "We use our best estimates for the total biomass of terrestrial deep subsurface prokaryotes and the fraction of archaea and bacteria out of the total population of terrestrial deep subsurface prokaryotes to estimate the total biomass of terrestrial deep subsurface bacteria and archaea." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ParameterValueUnitsUncertainty
0Total biomass of bacteria and archaea in the t...6.2e+16g C2.0e+01
1Fraction of archaea6.0e-02Unitless1.7e+01
2Fraction of bacteria9.4e-01Unitless1.6e+00
3Carbon content of a single cell2.6e-14g Cnan
\n", - "
" - ], - "text/plain": [ - " Parameter Value Units \\\n", - "0 Total biomass of bacteria and archaea in the t... 6.2e+16 g C \n", - "1 Fraction of archaea 6.0e-02 Unitless \n", - "2 Fraction of bacteria 9.4e-01 Unitless \n", - "3 Carbon content of a single cell 2.6e-14 g C \n", - "\n", - " Uncertainty \n", - "0 2.0e+01 \n", - "1 1.7e+01 \n", - "2 1.6e+00 \n", - "3 nan " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_excel('terrestrial_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We multiply all the relevant parameters to arrive at our best estimate for the biomass of terrestrial deep subsurface archaea and bacteria, and propagate the uncertainties associated with each parameter to calculate the uncertainty associated with the estimate for the total biomass." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of terrestrial deep subsurface archaea is 4 Gt C\n", - "Our best estimate for the total biomass of terrestrial deep subsurface bacteria is 58 Gt C\n", - "The uncertainty associated with the estimate for the biomass of archaea is 62.0-fold\n", - "The uncertainty associated with the estimate for the biomass of bacteria is 20.7-fold\n" - ] - } - ], - "source": [ - "# Calculate the total biomass of marine archaea and bacteria\n", - "total_arch_biomass = results['Value'][0]*results['Value'][1]\n", - "total_bac_biomass = results['Value'][0]*results['Value'][2]\n", - "\n", - "print('Our best estimate for the total biomass of terrestrial deep subsurface archaea is %.0f Gt C' %(total_arch_biomass/1e15))\n", - "print('Our best estimate for the total biomass of terrestrial deep subsurface bacteria is %.0f Gt C' %(total_bac_biomass/1e15))\n", - "\n", - "# Propagate the uncertainty associated with each parameter to the final estimate\n", - "\n", - "arch_biomass_uncertainty = CI_prod_prop(results['Uncertainty'][:2])\n", - "bac_biomass_uncertainty = CI_prod_prop(results.iloc[[0,2]]['Uncertainty'])\n", - "\n", - "print('The uncertainty associated with the estimate for the biomass of archaea is %.1f-fold' %arch_biomass_uncertainty)\n", - "print('The uncertainty associated with the estimate for the biomass of bacteria is %.1f-fold' %bac_biomass_uncertainty)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed bacteria results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Bacteria','Terrestrial deep subsurface'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[total_bac_biomass/1e15,bac_biomass_uncertainty],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed archaea results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Archaea','Terrestrial deep subsurface'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[total_arch_biomass/1e15,arch_biomass_uncertainty],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed bacteria results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Bacteria','Terrestrial deep subsurface'), \n", - " col=['Number of individuals'],\n", - " values= results['Value'][0]*results['Value'][2]/results['Value'][3],\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed archaea results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Archaea','Terrestrial deep subsurface'), \n", - " col=['Number of individuals'],\n", - " values= results['Value'][0]*results['Value'][1]/results['Value'][3],\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/terrestrial_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/terrestrial_deep_subsurface_arch_frac-checkpoint.ipynb b/bacteria_archaea/terrestrial_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/terrestrial_deep_subsurface_arch_frac-checkpoint.ipynb deleted file mode 100644 index fab9789..0000000 --- a/bacteria_archaea/terrestrial_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/terrestrial_deep_subsurface_arch_frac-checkpoint.ipynb +++ /dev/null @@ -1,751 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from fraction_helper import *\n", - "\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the fraction of archaea out of the total terrestrial deep subsurface prokaryote population\n", - "\n", - "In order to estimate the fraction of archaea out of the total population of terrestrial deep subsurface bacteria and archaea, we rely of three sources of data. Two of those sources are measurements made in the terrestrial deep subsurface of the fraction of archaea using two independent methods: 16S rDNA sequencing (FISH) and quantitative PCR (qPCR). For each method we collect several studies which used the method to measure the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface. We calculate the geometric means of samples within each study. We then calculate the geometric mean of the average estimates from each study using the same method to generate a characteristic estimate for the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface for each method. \n", - "\n", - "## 16S rDNA sequencing-based estimate\n", - "For our 16S rDNA sequencing-based estimate we rely on data from [Rempfert et al.](http://dx.doi.org/10.3389/fmicb.2017.00056), [Lau et al.](http://dx.doi.org/10.1073/pnas.1612244113), [Osburn et al.](http://dx.doi.org/10.3389/fmicb.2014.00610), and [Simkus et al.](http://dx.doi.org/10.1016/j.gca.2015.10.003). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StudyLinkWellYearArchaea fractionDepthRock typeLocationRemarks
0Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056NSHQ1420144.3e-031.8e+01PeridotiteOmanCalculated from Table 3
1Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056NSHQ1420157.0e-042.0e+01PeridotiteOmanCalculated from Table 3
2Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056NSHQ1420162.8e-017.0e+01PeridotiteOmanCalculated from Table 3
3Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056WAB5620153.8e-021.2e+01PeridotiteOmanCalculated from Table 3
4Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056WAB5620163.7e-035.0e+01PeridotiteOmanCalculated from Table 3
\n", - "
" - ], - "text/plain": [ - " Study Link Well Year \\\n", - "0 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 NSHQ14 2014 \n", - "1 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 NSHQ14 2015 \n", - "2 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 NSHQ14 2016 \n", - "3 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 WAB56 2015 \n", - "4 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 WAB56 2016 \n", - "\n", - " Archaea fraction Depth Rock type Location Remarks \n", - "0 4.3e-03 1.8e+01 Peridotite Oman Calculated from Table 3 \n", - "1 7.0e-04 2.0e+01 Peridotite Oman Calculated from Table 3 \n", - "2 2.8e-01 7.0e+01 Peridotite Oman Calculated from Table 3 \n", - "3 3.8e-02 1.2e+01 Peridotite Oman Calculated from Table 3 \n", - "4 3.7e-03 5.0e+01 Peridotite Oman Calculated from Table 3 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Define a function that will calculate the geometric mean of fractions for each bin of a groupby\n", - "def frac_geo_mean_groupby(input):\n", - " return frac_mean(input['Archaea fraction'])\n", - "\n", - "# Define a function that will calculate the CI of geometric mean of fractions for each bin of a groupby\n", - "def frac_CI_groupby(input):\n", - " return frac_CI(input['Archaea fraction'])\n", - "\n", - "\n", - "seq_data = pd.read_excel('terrestrial_deep_subsurface_arch_frac_data.xlsx','16S rDNA sequencing')\n", - "seq_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the fraction of archaea out of the total population of bacteria and archea for each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Study\n", - "Lau et al. 1.9e-02\n", - "Osburn et al. 6.3e-02\n", - "Rempfert et al. 2.6e-02\n", - "Simkus et al. 1.5e-02\n", - "dtype: float64" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seq_bin = seq_data.groupby('Study')\n", - "\n", - "seq_study_mean = seq_bin.apply(frac_geo_mean_groupby)\n", - "seq_study_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the average fractions from each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The characteristic 16S rDNA sequencing-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is 2.6%\n" - ] - } - ], - "source": [ - "seq_mean = frac_mean(seq_study_mean)\n", - "print('The characteristic 16S rDNA sequencing-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is ' + '{:,.1f}%'.format(seq_mean*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## qPCR-based estimate\n", - "For our qPCR-based estimate we rely on data from [Purkamo et al.](https://helda.helsinki.fi/handle/10138/165462), [Takai et al.](http://dx.doi.org/10.1128/AEM.67.21.5750-5760.2001), and [Bomberg et al.](http://dx.doi.org/10.5194/bg-13-6031-2016). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StudyLinkWellYearArchaea fractionDepthRock typeLocationRemarks
0Purkamo et al.https://helda.helsinki.fi/handle/10138/1654621802009-20111.2e-03180Mica schist, biotite gneissOutokumpu, FinlandCalculated from Table 2
1Purkamo et al.https://helda.helsinki.fi/handle/10138/1654625002009-20114.6e-05500Chlorite–sericite schistOutokumpu, FinlandCalculated from Table 2
2Purkamo et al.https://helda.helsinki.fi/handle/10138/1654629672009-20113.9e-03967Mica schist, chlorite–sericite schistOutokumpu, FinlandCalculated from Table 2
3Purkamo et al.https://helda.helsinki.fi/handle/10138/16546222602009-20112.5e-022260Biotite gneissOutokumpu, FinlandCalculated from Table 2
4Takai et al.http://dx.doi.org/10.1128/AEM.67.21.5750-5760....F1NaN1.8e-012700DriefonteinWitwatersrand Basin, South Africa.From first paragraph of Archaeal rDNA abundanc...
\n", - "
" - ], - "text/plain": [ - " Study Link Well \\\n", - "0 Purkamo et al. https://helda.helsinki.fi/handle/10138/165462 180 \n", - "1 Purkamo et al. https://helda.helsinki.fi/handle/10138/165462 500 \n", - "2 Purkamo et al. https://helda.helsinki.fi/handle/10138/165462 967 \n", - "3 Purkamo et al. https://helda.helsinki.fi/handle/10138/165462 2260 \n", - "4 Takai et al. http://dx.doi.org/10.1128/AEM.67.21.5750-5760.... F1 \n", - "\n", - " Year Archaea fraction Depth Rock type \\\n", - "0 2009-2011 1.2e-03 180 Mica schist, biotite gneiss \n", - "1 2009-2011 4.6e-05 500 Chlorite–sericite schist \n", - "2 2009-2011 3.9e-03 967 Mica schist, chlorite–sericite schist \n", - "3 2009-2011 2.5e-02 2260 Biotite gneiss \n", - "4 NaN 1.8e-01 2700 Driefontein \n", - "\n", - " Location \\\n", - "0 Outokumpu, Finland \n", - "1 Outokumpu, Finland \n", - "2 Outokumpu, Finland \n", - "3 Outokumpu, Finland \n", - "4 Witwatersrand Basin, South Africa. \n", - "\n", - " Remarks \n", - "0 Calculated from Table 2 \n", - "1 Calculated from Table 2 \n", - "2 Calculated from Table 2 \n", - "3 Calculated from Table 2 \n", - "4 From first paragraph of Archaeal rDNA abundanc... " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "qpcr_data = pd.read_excel('terrestrial_deep_subsurface_arch_frac_data.xlsx','qPCR')\n", - "qpcr_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the fraction of archaea out of the total population of bacteria and archea for each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Study\n", - "Bomberg et al. 4.3e-02\n", - "Purkamo et al. 1.5e-03\n", - "Takai et al. 4.5e-02\n", - "dtype: float64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "qpcr_bin = qpcr_data.groupby('Study')\n", - "\n", - "qpcr_study_mean = qpcr_bin.apply(frac_geo_mean_groupby)\n", - "qpcr_study_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the average fractions from each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The characteristic qPCR-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is 1.5%\n" - ] - } - ], - "source": [ - "qpcr_mean = frac_mean(qpcr_study_mean)\n", - "print('The characteristic qPCR-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is ' + '{:,.1f}%'.format(qpcr_mean*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Due to the scarcity of data in the terrestrial deep subsurface, we use as a third source of data our estimate for the fraction of archaea out of the total population of bacteria and archea in subseafloor sediments.\n", - "\n", - "Our best estimate for the fraction of archaea out of the total population of bacteria and archaea is the geometric mean of these three sources of data:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface is 6%\n" - ] - } - ], - "source": [ - "# As a third data source we use our estimate for the fraction of archaea out of the total population of bacteria\n", - "# and archaea in subseafloor sediments.\n", - "subseafloor_sed_arch_frac = 0.35\n", - "\n", - "# Calculate the geometric mean of the three data sources\n", - "best_estimate = frac_mean(np.array([qpcr_mean, seq_mean, subseafloor_sed_arch_frac]))\n", - "\n", - "print('Our best estimate for the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface is ' + '{:,.0f}%'.format(best_estimate*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "In order to assess the uncertainty associated with our estimate for the fraction of terrestrial deep subsurface archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface, we gather all possible indices of uncertainty. We compare the uncertainty of values within each one of the methods and the uncertainty stemming from the variability of the values provided by the two methods. \n", - "\n", - "## Intra-study uncertainty \n", - "### 16S rDNA sequencing-based method\n", - "We calculate the intra-study 95% confidence inteval for the geometric mean of the values for the fraction of archaea out of the total population of bacteria and archaea measured using 16S rDNA seuqencing." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the 16S rDNA sequencing-based estimate of the fraction of archaea out of the population of bacteria nad archaea are:\n", - "Study\n", - "Lau et al. nan\n", - "Osburn et al. 2.0e+00\n", - "Rempfert et al. 2.4e+00\n", - "Simkus et al. 3.0e+00\n", - "dtype: float64\n", - "The intra-study uncertainty of the 16S rDNA sequencing-based estimate of the fraction of bacteria out of the population of bacteria nad archaea are:\n", - "Study\n", - "Lau et al. nan\n", - "Osburn et al. 1.1e+00\n", - "Rempfert et al. 1.0e+00\n", - "Simkus et al. 1.0e+00\n", - "dtype: float64\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:4291: RuntimeWarning: Invalid value encountered in percentile\n", - " interpolation=interpolation)\n" - ] - } - ], - "source": [ - "seq_arc_CI = seq_bin.apply(frac_CI_groupby)\n", - "\n", - "seq_data_bac = seq_data.copy()\n", - "seq_data_bac['Archaea fraction'] = 1.- seq_data_bac['Archaea fraction']\n", - "seq_bin_bac = seq_data_bac.groupby('Study')\n", - "seq_bac_CI = seq_bin_bac.apply(frac_CI_groupby)\n", - "\n", - "\n", - "print('The intra-study uncertainty of the 16S rDNA sequencing-based estimate of the fraction of archaea out of the population of bacteria nad archaea are:')\n", - "print(seq_arc_CI)\n", - "print('The intra-study uncertainty of the 16S rDNA sequencing-based estimate of the fraction of bacteria out of the population of bacteria nad archaea are:')\n", - "print(seq_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### qPCR-based method\n", - "We calculate the intra-study 95% confidence inteval for the geometric mean of the values for the fraction of archaea out of the total population of bacteria and archaea measured using qPCR." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea are:\n", - "Study\n", - "Bomberg et al. 3.7e+00\n", - "Purkamo et al. 1.3e+01\n", - "Takai et al. 1.7e+01\n", - "dtype: float64\n", - "The intra-study uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea are:\n", - "Study\n", - "Bomberg et al. 1.1e+00\n", - "Purkamo et al. 1.0e+00\n", - "Takai et al. 1.5e+00\n", - "dtype: float64\n" - ] - } - ], - "source": [ - "qpcr_arc_CI = qpcr_bin.apply(frac_CI_groupby)\n", - "\n", - "qpcr_data_bac = qpcr_data.copy()\n", - "qpcr_data_bac['Archaea fraction'] = 1.- qpcr_data_bac['Archaea fraction']\n", - "qpcr_bin_bac = qpcr_data_bac.groupby('Study')\n", - "qpcr_bac_CI = qpcr_bin_bac.apply(frac_CI_groupby)\n", - "\n", - "\n", - "print('The intra-study uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea are:')\n", - "print(qpcr_arc_CI)\n", - "print('The intra-study uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea are:')\n", - "print(qpcr_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Interstudy uncertainty \n", - "### 16S rDNA sequencing-based method\n", - "We calculate the interstudy 95% confidence inteval for the geometric mean of the average values from each study for the fraction of archaea out of the total population of bacteria and archaea measured using 16S rDNA sequencing." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the 16S rDNA sequencing-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈1.9-fold\n", - "The interstudy uncertainty of the 16S rDNA sequencing-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.0-fold\n" - ] - } - ], - "source": [ - "inter_seq_arc_CI = frac_CI(seq_study_mean)\n", - "inter_seq_bac_CI = frac_CI(1-seq_study_mean)\n", - "print('The interstudy uncertainty of the 16S rDNA sequencing-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_seq_arc_CI)\n", - "print('The interstudy uncertainty of the 16S rDNA sequencing-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_seq_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### qPCR-based method\n", - "We calculate the interstudy 95% confidence inteval for the geometric mean of the average values from each study for the fraction of archaea out of the total population of bacteria and archaea measured using qPCR." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈8.7-fold\n", - "The interstudy uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.1-fold\n" - ] - } - ], - "source": [ - "inter_qpcr_arc_CI = frac_CI(qpcr_study_mean)\n", - "inter_qpcr_bac_CI = frac_CI(1-qpcr_study_mean)\n", - "print('The interstudy uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_qpcr_arc_CI)\n", - "print('The interstudy uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_qpcr_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty\n", - "We calculate the interstudy 95% confidence inteval for the geometric mean of the estimates from the three different sources - the 16S rDNA sequencing-based estimate, the pPCR-based estiamte and the estimate for the fraction of archea out of the total population of bacteria and archaea in subseafloor sediments." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The inter-method uncertainty of the estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈7.3-fold\n", - "The inter-method uncertainty of the estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.2-fold\n" - ] - } - ], - "source": [ - "inter_method_arc_CI = frac_CI(np.array([seq_mean,qpcr_mean,subseafloor_sed_arch_frac]))\n", - "inter_method_bac_CI = frac_CI(1-np.array([seq_mean,qpcr_mean,subseafloor_sed_arch_frac]))\n", - "print('The inter-method uncertainty of the estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_method_arc_CI)\n", - "print('The inter-method uncertainty of the estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_method_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimates for the uncertainty associated with the fraction of archaea and bacteria out of the total population of terrestrial deep subsurface bacteria and archaea, we use the highest uncertainty out of the available set pf uncertainties we collected.\n", - "\n", - "The highest uncertainty for the fraction of archaea is the intra-study uncertainty of the Takai et al. study, which is ≈20-fold. Similarly, the highest uncertainty for the fraction of bacteria is intra-study uncertainty of the Takai et al. study, which is ≈1.5-fold.\n", - "\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraction of archaea out of the total population of terrestrial deep subsurface bacteria and archaea: 6 percent\n", - "Fraction of bacteria out of the total population of terrestrial deep subsurface bacteria and archaea: 94 percent\n", - "Uncertainty associated with the fraction of terrestrial deep subsurface archaea: 17.4-fold\n", - "Uncertainty associated with the fraction of terrestrial deep subsurface bacteria: 1.5-fold\n" - ] - } - ], - "source": [ - "# Take the maximum uncertainty as our best projection of uncertainty\n", - "arc_mul_CI = np.max([seq_arc_CI.max(),qpcr_arc_CI.max(),inter_seq_arc_CI,inter_method_arc_CI])\n", - "bac_mul_CI = np.max([seq_bac_CI.max(),qpcr_bac_CI.max(),inter_seq_bac_CI,inter_qpcr_bac_CI,inter_method_bac_CI])\n", - "\n", - "print('Fraction of archaea out of the total population of terrestrial deep subsurface bacteria and archaea: %.0f percent' %(best_estimate*100))\n", - "print('Fraction of bacteria out of the total population of terrestrial deep subsurface bacteria and archaea: %.0f percent' %(100.-best_estimate*100))\n", - "print('Uncertainty associated with the fraction of terrestrial deep subsurface archaea: %.1f-fold' % arc_mul_CI)\n", - "print('Uncertainty associated with the fraction of terrestrial deep subsurface bacteria: %.1f-fold' % bac_mul_CI)\n", - "\n", - "old_results = pd.read_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "\n", - "if (result.shape[0]==0):\n", - " result = pd.DataFrame(index= range(1), columns=['Parameter','Value','Units','Uncertainty'])\n", - "\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Fraction of archaea',\n", - " 'Value': \"{0:.2f}\".format(best_estimate),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(arc_mul_CI)\n", - " })\n", - "\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Fraction of bacteria',\n", - " 'Value': \"{0:.2f}\".format(1.0 - best_estimate),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(bac_mul_CI)\n", - " })\n", - "\n", - "\n", - "result.to_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/terrestrial_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/terrestrial_deep_subsurface_arch_frac-median-checkpoint.ipynb b/bacteria_archaea/terrestrial_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/terrestrial_deep_subsurface_arch_frac-median-checkpoint.ipynb deleted file mode 100644 index 63469a1..0000000 --- a/bacteria_archaea/terrestrial_deep_subsurface/arch_bac_ratio/.ipynb_checkpoints/terrestrial_deep_subsurface_arch_frac-median-checkpoint.ipynb +++ /dev/null @@ -1,755 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from fraction_helper import *\n", - "\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the fraction of archaea out of the total marine deep subsurface prokaryote population\n", - "\n", - "In order to estimate the fraction of archaea out of the total population of marine deep subsurface bacteria and archaea, we rely of three sources of data. Two of those sources are measurements made in the terrestrial deep subsurface of the fraction of archaea using two independent methods: 16S rDNA sequencing (FISH) and quantitative PCR (qPCR). For each method we collect several studies which used the method to measure the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface. We calculate the geometric means of samples within each study. We then calculate the geometric mean of the average estimates from each study using the same method to generate a characteristic estimate for the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface for each method. \n", - "\n", - "## 16S rDNA sequencing-based estimate\n", - "For our 16S rDNA sequencing-based estimate we rely on data from [Rempfert et al.](http://dx.doi.org/10.3389/fmicb.2017.00056), [Lau et al.](http://dx.doi.org/10.1073/pnas.1612244113), [Osburn et al.](http://dx.doi.org/10.3389/fmicb.2014.00610), and [Simkus et al.](http://dx.doi.org/10.1016/j.gca.2015.10.003). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StudyLinkWellYearArchaea fractionDepthRock typeLocationRemarks
0Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056NSHQ1420144.3e-031.8e+01PeridotiteOmanCalculated from Table 3
1Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056NSHQ1420157.0e-042.0e+01PeridotiteOmanCalculated from Table 3
2Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056NSHQ1420162.8e-017.0e+01PeridotiteOmanCalculated from Table 3
3Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056WAB5620153.8e-021.2e+01PeridotiteOmanCalculated from Table 3
4Rempfert et al.http://dx.doi.org/10.3389/fmicb.2017.00056WAB5620163.7e-035.0e+01PeridotiteOmanCalculated from Table 3
\n", - "
" - ], - "text/plain": [ - " Study Link Well Year \\\n", - "0 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 NSHQ14 2014 \n", - "1 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 NSHQ14 2015 \n", - "2 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 NSHQ14 2016 \n", - "3 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 WAB56 2015 \n", - "4 Rempfert et al. http://dx.doi.org/10.3389/fmicb.2017.00056 WAB56 2016 \n", - "\n", - " Archaea fraction Depth Rock type Location Remarks \n", - "0 4.3e-03 1.8e+01 Peridotite Oman Calculated from Table 3 \n", - "1 7.0e-04 2.0e+01 Peridotite Oman Calculated from Table 3 \n", - "2 2.8e-01 7.0e+01 Peridotite Oman Calculated from Table 3 \n", - "3 3.8e-02 1.2e+01 Peridotite Oman Calculated from Table 3 \n", - "4 3.7e-03 5.0e+01 Peridotite Oman Calculated from Table 3 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Define a function that will calculate the geometric mean of fractions for each bin of a groupby\n", - "def frac_geo_mean_groupby(input):\n", - " return frac_mean(input['Archaea fraction'])\n", - "\n", - "# Define a function that will calculate the CI of geometric mean of fractions for each bin of a groupby\n", - "def frac_CI_groupby(input):\n", - " return frac_CI(input['Archaea fraction'])\n", - "\n", - "\n", - "seq_data = pd.read_excel('terrestrial_deep_subsurface_arch_frac_data.xlsx','16S rDNA sequencing')\n", - "seq_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the fraction of archaea out of the total population of bacteria and archea for each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "seq_bin = seq_data.groupby('Study')\n", - "\n", - "seq_study_mean = seq_bin.apply(frac_geo_mean_groupby)\n", - "seq_study_mean\n", - "\n", - "# Median\n", - "seq_study_median = seq_bin.median()['Archaea fraction']\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the average fractions from each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The characteristic 16S rDNA sequencing-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is 2.6%\n", - "The median 16S rDNA sequencing-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is 2.3%\n" - ] - } - ], - "source": [ - "seq_mean = frac_mean(seq_study_mean)\n", - "#Median\n", - "seq_median = np.median(seq_study_mean)\n", - "print('The characteristic 16S rDNA sequencing-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is ' + '{:,.1f}%'.format(seq_mean*100))\n", - "print('The median 16S rDNA sequencing-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is ' + '{:,.1f}%'.format(seq_median*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## qPCR-based estimate\n", - "For our qPCR-based estimate we rely on data from [Purkamo et al.](https://helda.helsinki.fi/handle/10138/165462), [Takai et al.](http://dx.doi.org/10.1128/AEM.67.21.5750-5760.2001), and [Bomberg et al.](http://dx.doi.org/10.5194/bg-13-6031-2016). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StudyLinkWellYearArchaea fractionDepthRock typeLocationRemarks
0Purkamo et al.https://helda.helsinki.fi/handle/10138/1654621802009-20111.2e-03180Mica schist, biotite gneissOutokumpu, FinlandCalculated from Table 2
1Purkamo et al.https://helda.helsinki.fi/handle/10138/1654625002009-20114.6e-05500Chlorite–sericite schistOutokumpu, FinlandCalculated from Table 2
2Purkamo et al.https://helda.helsinki.fi/handle/10138/1654629672009-20113.9e-03967Mica schist, chlorite–sericite schistOutokumpu, FinlandCalculated from Table 2
3Purkamo et al.https://helda.helsinki.fi/handle/10138/16546222602009-20112.5e-022260Biotite gneissOutokumpu, FinlandCalculated from Table 2
4Takai et al.http://dx.doi.org/10.1128/AEM.67.21.5750-5760....F1NaN1.8e-012700DriefonteinWitwatersrand Basin, South Africa.From first paragraph of Archaeal rDNA abundanc...
\n", - "
" - ], - "text/plain": [ - " Study Link Well \\\n", - "0 Purkamo et al. https://helda.helsinki.fi/handle/10138/165462 180 \n", - "1 Purkamo et al. https://helda.helsinki.fi/handle/10138/165462 500 \n", - "2 Purkamo et al. https://helda.helsinki.fi/handle/10138/165462 967 \n", - "3 Purkamo et al. https://helda.helsinki.fi/handle/10138/165462 2260 \n", - "4 Takai et al. http://dx.doi.org/10.1128/AEM.67.21.5750-5760.... F1 \n", - "\n", - " Year Archaea fraction Depth Rock type \\\n", - "0 2009-2011 1.2e-03 180 Mica schist, biotite gneiss \n", - "1 2009-2011 4.6e-05 500 Chlorite–sericite schist \n", - "2 2009-2011 3.9e-03 967 Mica schist, chlorite–sericite schist \n", - "3 2009-2011 2.5e-02 2260 Biotite gneiss \n", - "4 NaN 1.8e-01 2700 Driefontein \n", - "\n", - " Location \\\n", - "0 Outokumpu, Finland \n", - "1 Outokumpu, Finland \n", - "2 Outokumpu, Finland \n", - "3 Outokumpu, Finland \n", - "4 Witwatersrand Basin, South Africa. \n", - "\n", - " Remarks \n", - "0 Calculated from Table 2 \n", - "1 Calculated from Table 2 \n", - "2 Calculated from Table 2 \n", - "3 Calculated from Table 2 \n", - "4 From first paragraph of Archaeal rDNA abundanc... " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "qpcr_data = pd.read_excel('terrestrial_deep_subsurface_arch_frac_data.xlsx','qPCR')\n", - "qpcr_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the fraction of archaea out of the total population of bacteria and archea for each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Study\n", - "Bomberg et al. 6.5e-02\n", - "Purkamo et al. 2.5e-03\n", - "Takai et al. 1.8e-01\n", - "Name: Archaea fraction, dtype: float64" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "qpcr_bin = qpcr_data.groupby('Study')\n", - "\n", - "qpcr_study_mean = qpcr_bin.apply(frac_geo_mean_groupby)\n", - "qpcr_study_mean\n", - "\n", - "# Median\n", - "qpcr_study_median = qpcr_bin.median()['Archaea fraction']\n", - "qpcr_study_median" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the average fractions from each study:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The characteristic qPCR-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is 1.5%\n", - "The median qPCR-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is 6.5%\n" - ] - } - ], - "source": [ - "qpcr_mean = frac_mean(qpcr_study_mean)\n", - "#Median\n", - "qpcr_median = qpcr_study_median.median()\n", - "print('The characteristic qPCR-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is ' + '{:,.1f}%'.format(qpcr_mean*100))\n", - "print('The median qPCR-based fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep susurface is ' + '{:,.1f}%'.format(qpcr_median*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Due to the scarcity of data in the terrestrial deep subsurface, we use as a third source of data our estimate for the fraction of archaea out of the total population of bacteria and archea in subseafloor sediments.\n", - "\n", - "Our best estimate for the fraction of archaea out of the total population of bacteria and archaea is the geometric mean of these three sources of data:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface is 6%\n", - "Our median estimate for the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface is 7%\n" - ] - } - ], - "source": [ - "# As a third data source we use our estimate for the fraction of archaea out of the total population of bacteria\n", - "# and archaea in subseafloor sediments.\n", - "subseafloor_sed_arch_frac = 0.35\n", - "\n", - "# Calculate the geometric mean of the three data sources\n", - "best_estimate = frac_mean(np.array([qpcr_mean, seq_mean, subseafloor_sed_arch_frac]))\n", - "\n", - "#Median\n", - "best_estimate_median = np.median([qpcr_median,seq_median,subseafloor_sed_arch_frac])\n", - "\n", - "print('Our best estimate for the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface is ' + '{:,.0f}%'.format(best_estimate*100))\n", - "print('Our median estimate for the fraction of archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface is ' + '{:,.0f}%'.format(best_estimate_median*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "In order to assess the uncertainty associated with our estimate for the fraction of marine archaea out of the total population of bacteria and archaea in the terrestrial deep subsurface, we gather all possible indices of uncertainty. We compare the uncertainty of values within each one of the methods and the uncertainty stemming from the variability of the values provided by the two methods. \n", - "\n", - "## Intra-study uncertainty \n", - "### 16S rDNA sequencing-based method\n", - "We calculate the intra-study 95% confidence inteval for the geometric mean of the values for the fraction of archaea out of the total population of bacteria and archaea measured using 16S rDNA seuqencing." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the 16S rDNA sequencing-based estimate of the fraction of archaea out of the population of bacteria nad archaea are:\n", - "Study\n", - "Lau et al. nan\n", - "Osburn et al. 2.0e+00\n", - "Rempfert et al. 2.4e+00\n", - "Simkus et al. 3.0e+00\n", - "dtype: float64\n", - "The intra-study uncertainty of the 16S rDNA sequencing-based estimate of the fraction of bacteria out of the population of bacteria nad archaea are:\n", - "Study\n", - "Lau et al. nan\n", - "Osburn et al. 1.1e+00\n", - "Rempfert et al. 1.0e+00\n", - "Simkus et al. 1.0e+00\n", - "dtype: float64\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:4291: RuntimeWarning: Invalid value encountered in percentile\n", - " interpolation=interpolation)\n" - ] - } - ], - "source": [ - "seq_arc_CI = seq_bin.apply(frac_CI_groupby)\n", - "\n", - "seq_data_bac = seq_data.copy()\n", - "seq_data_bac['Archaea fraction'] = 1.- seq_data_bac['Archaea fraction']\n", - "seq_bin_bac = seq_data_bac.groupby('Study')\n", - "seq_bac_CI = seq_bin_bac.apply(frac_CI_groupby)\n", - "\n", - "\n", - "print('The intra-study uncertainty of the 16S rDNA sequencing-based estimate of the fraction of archaea out of the population of bacteria nad archaea are:')\n", - "print(seq_arc_CI)\n", - "print('The intra-study uncertainty of the 16S rDNA sequencing-based estimate of the fraction of bacteria out of the population of bacteria nad archaea are:')\n", - "print(seq_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### qPCR-based method\n", - "We calculate the intra-study 95% confidence inteval for the geometric mean of the values for the fraction of archaea out of the total population of bacteria and archaea measured using qPCR." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea are:\n", - "Study\n", - "Bomberg et al. 3.7e+00\n", - "Purkamo et al. 1.3e+01\n", - "Takai et al. 1.7e+01\n", - "dtype: float64\n", - "The intra-study uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea are:\n", - "Study\n", - "Bomberg et al. 1.1e+00\n", - "Purkamo et al. 1.0e+00\n", - "Takai et al. 1.6e+00\n", - "dtype: float64\n" - ] - } - ], - "source": [ - "qpcr_arc_CI = qpcr_bin.apply(frac_CI_groupby)\n", - "\n", - "qpcr_data_bac = qpcr_data.copy()\n", - "qpcr_data_bac['Archaea fraction'] = 1.- qpcr_data_bac['Archaea fraction']\n", - "qpcr_bin_bac = qpcr_data_bac.groupby('Study')\n", - "qpcr_bac_CI = qpcr_bin_bac.apply(frac_CI_groupby)\n", - "\n", - "\n", - "print('The intra-study uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea are:')\n", - "print(qpcr_arc_CI)\n", - "print('The intra-study uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea are:')\n", - "print(qpcr_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Interstudy uncertainty \n", - "### 16S rDNA sequencing-based method\n", - "We calculate the interstudy 95% confidence inteval for the geometric mean of the average values from each study for the fraction of archaea out of the total population of bacteria and archaea measured using 16S rDNA sequencing." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the 16S rDNA sequencing-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈1.8-fold\n", - "The interstudy uncertainty of the 16S rDNA sequencing-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.0-fold\n" - ] - } - ], - "source": [ - "inter_seq_arc_CI = frac_CI(seq_study_mean)\n", - "inter_seq_bac_CI = frac_CI(1-seq_study_mean)\n", - "print('The interstudy uncertainty of the 16S rDNA sequencing-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_seq_arc_CI)\n", - "print('The interstudy uncertainty of the 16S rDNA sequencing-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_seq_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### qPCR-based method\n", - "We calculate the interstudy 95% confidence inteval for the geometric mean of the average values from each study for the fraction of archaea out of the total population of bacteria and archaea measured using qPCR." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈8.6-fold\n", - "The interstudy uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.1-fold\n" - ] - } - ], - "source": [ - "inter_qpcr_arc_CI = frac_CI(qpcr_study_mean)\n", - "inter_qpcr_bac_CI = frac_CI(1-qpcr_study_mean)\n", - "print('The interstudy uncertainty of the qPCR-based estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_qpcr_arc_CI)\n", - "print('The interstudy uncertainty of the qPCR-based estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_qpcr_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty\n", - "We calculate the interstudy 95% confidence inteval for the geometric mean of the estimates from the three different sources - the 16S rDNA sequencing-based estimate, the pPCR-based estiamte and the estimate for the fraction of archea out of the total population of bacteria and archaea in subseafloor sediments." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The inter-method uncertainty of the estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈7.3-fold\n", - "The inter-method uncertainty of the estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈1.3-fold\n" - ] - } - ], - "source": [ - "inter_method_arc_CI = frac_CI(np.array([seq_mean,qpcr_mean,subseafloor_sed_arch_frac]))\n", - "inter_method_bac_CI = frac_CI(1-np.array([seq_mean,qpcr_mean,subseafloor_sed_arch_frac]))\n", - "print('The inter-method uncertainty of the estimate of the fraction of archaea out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_method_arc_CI)\n", - "print('The inter-method uncertainty of the estimate of the fraction of bacteria out of the population of bacteria nad archaea is ≈%.1f-fold' % inter_method_bac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimates for the uncertainty associated with the fraction of archaea and bacteria out of the total population of terrestrial deep subsurface bacteria and archaea, we use the highest uncertainty out of the available set pf uncertainties we collected.\n", - "\n", - "The highest uncertainty for the fraction of archaea is the intra-study uncertainty of the Takai et al. study, which is ≈20-fold. Similarly, the highest uncertainty for the fraction of bacteria is intra-study uncertainty of the Takai et al. study, which is ≈1.5-fold.\n", - "\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraction of archaea out of the total population of terrestrial deep subsurface bacteria and archaea: 6 percent\n", - "Fraction of bacteria out of the total population of terrestrial deep subsurface bacteria and archaea: 94 percent\n", - "Uncertainty associated with the fraction of marine archaea: 17.5-fold\n", - "Uncertainty associated with the fraction of marine bacteria: 1.6-fold\n" - ] - } - ], - "source": [ - "# Take the maximum uncertainty as our best projection of uncertainty\n", - "arc_mul_CI = np.max([seq_arc_CI.max(),qpcr_arc_CI.max(),inter_seq_arc_CI,inter_method_arc_CI])\n", - "bac_mul_CI = np.max([seq_bac_CI.max(),qpcr_bac_CI.max(),inter_seq_bac_CI,inter_qpcr_bac_CI,inter_method_bac_CI])\n", - "\n", - "print('Fraction of archaea out of the total population of terrestrial deep subsurface bacteria and archaea: %.0f percent' %(best_estimate*100))\n", - "print('Fraction of bacteria out of the total population of terrestrial deep subsurface bacteria and archaea: %.0f percent' %(100.-best_estimate*100))\n", - "print('Uncertainty associated with the fraction of marine archaea: %.1f-fold' % arc_mul_CI)\n", - "print('Uncertainty associated with the fraction of marine bacteria: %.1f-fold' % bac_mul_CI)\n", - "\n", - "old_results = pd.read_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "\n", - "if (result.shape[0]==0):\n", - " result = pd.DataFrame(index= range(1), columns=['Parameter','Value','Units','Uncertainty'])\n", - "\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Fraction of archaea',\n", - " 'Value': \"{0:.2f}\".format(best_estimate),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(arc_mul_CI)\n", - " })\n", - "\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Fraction of bacteria',\n", - " 'Value': \"{0:.2f}\".format(1.0 - best_estimate),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(bac_mul_CI)\n", - " })\n", - "\n", - "\n", - "result.to_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass-checkpoint.ipynb b/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass-checkpoint.ipynb deleted file mode 100644 index 8606531..0000000 --- a/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass-checkpoint.ipynb +++ /dev/null @@ -1,532 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "from scipy.optimize import curve_fit\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of bacteria and archaea in the terrestrial deep subsurface\n", - "This notebook details the procedure for estimating the total biomass of of prokaryotes (bacteria and archaea) in the terrestrial deep subsurface. Our estimate is based on the data on cellconcentration in the terrestrial deep subsurface collected by [McMahon & Parnell](http://dx.doi.org/10.1111/1574-6941.12196), as well as data on the global volume of groundwater from [Gleeson et al.](http://dx.doi.org/10.1038/ngeo2590).\n", - "\n", - "## Number of cells\n", - "To estimate the total number of cells of bacteria and archaea in the terrestrial deep subsurface, we follow a similar methodology to that detailed in McMahon & Parnell. We use ≈100 measurements of cell concentration in groundwater samples from depths of 0-2000 m. We bin the samples based on their depths to 250 meter bins. For each bin we calculate both the arithmetic and geometric means. Depth bins missing from the data were extrapolated by using a regression equation that predicts the concentration of cells from the depth of the sample. This yields two estimates for the characteristic cell concentration at each depth bin." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " if sys.path[0] == '':\n", - "/usr/local/lib/python3.5/dist-packages/scipy/stats/stats.py:315: RuntimeWarning: Mean of empty slice.\n", - " return np.exp(log_a.mean(axis=axis))\n", - "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:45: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" - ] - } - ], - "source": [ - "\n", - "# Load original data from Figure 1 of McMahon & Parnell\n", - "mp_data = pd.read_excel('terrestrial_deep_subsurface_prok_biomass_data.xlsx', 'McMahon & Parnell', skiprows=1)\n", - "\n", - "# Define depth bins every 250 meter \n", - "bins = np.linspace(0,2000,9)\n", - "\n", - "# Filter deeper then 2km\n", - "mp_data_filt = mp_data[mp_data['Depth [m]'] < 2000]\n", - "\n", - "# Bin data based on depth bins\n", - "mp_data_filt['Depth bin'] = pd.cut(mp_data_filt['Depth [m]'],bins)\n", - "depth_binned = mp_data_filt.groupby('Depth bin')\n", - "\n", - "# Calculate the mean concentration at each depth bin\n", - "bin_mean = depth_binned['Cell concentration [cells mL-1]'].mean().dropna()\n", - "bin_geo_mean = depth_binned['Cell concentration [cells mL-1]'].apply(gmean)\n", - "\n", - "# To estimate the last bin (1750-2000), which doesn't have data, we either use the fit produced by McMahon & Parnell\n", - "# or fit a function to the geometric means\n", - "\n", - "# The fit of McMahon & Parnell from Figure 1\n", - "mp_fit = lambda x: np.exp(-(x-5771.2)/390.6)\n", - "\n", - "# Extrapolate the average cell concentration based on the fit by McMahon & Parnell\n", - "extrapolated_mean = pd.DataFrame({'Depth bin': '(1750.0, 2000.0]', 'Cell concentration [cells mL-1]': mp_fit(1875)},index=[0])\n", - "\n", - "# Add the extrapolated value to the depth averages\n", - "bin_mean = bin_mean.reset_index().append(extrapolated_mean,ignore_index=True).set_index('Depth bin')\n", - "\n", - "\n", - "# Define an exponential function to fit the data\n", - "def depth_func_log(x, a, b):\n", - " return np.log(a) - b*x\n", - "\n", - "# Fit the geometric means\n", - "xdata = bins[1:-1]-125\n", - "popt2, pcov2 = curve_fit(depth_func_log, xdata, np.log(bin_geo_mean[:-1]))\n", - "\n", - "# Extrapolate the geometric mean cell concentration based on the fit we calculated\n", - "extrapolated_geo_mean = np.exp(depth_func_log(1875, *popt2))\n", - "\n", - "# Add the extrapolated value to the depth averages\n", - "tmp = bin_geo_mean.reset_index()\n", - "tmp['Cell concentration [cells mL-1]'][7] = extrapolated_geo_mean\n", - "bin_geo_mean = tmp.set_index('Depth bin')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total number of cells in groundwater from the characteristic concentrations at each depth bin, we use estimates of the total volume of ground water, and the fraction of the total ground water at each depth bin.\n", - "\n", - "We rely of data from Gleeson et al. which estimate $≈2.2×10^{22}$ mL of groundwater in the top 2 km of the terrestrial crust. Glesson et al. also estimate the fraction of the total volume of groundwater found at each depth. To estimate the fraction of groundwater found at each bin, we fit a function to the data provided in Figure 1 of Gleeson et al., and integrate it over the depth range of each depth bin. We then normalize the numbers by the integral over the entire 2000 meter range to calculate the fraction of water found at each depth bin.\n", - "\n", - "We multiply the average cell concentration at each bin by the total volume of groundwater at each bin, and sum over all bins to calculate the total number of cells in groundwater. We have two estimates for the total number of cells in groundwater, one based on arithmetic means of cell concentrations at each bin and the second based on geometric means." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total of number of cells cells in groundwater based on arithmetic means of cell concentrations is 1e+28 cells.\n", - "Our estimate for the total of number of cells cells in groundwater based on geometric means of cell concentrations is 5e+27 cells.\n" - ] - } - ], - "source": [ - "\n", - "# Total volume of groundwater [mL], based on Gleeson et al.\n", - "tot_gw_vol = 2.26e22\n", - "\n", - "# Load data from Gleeson et al. on the distribution of groundwater with depth\n", - "gw_depth_dist = pd.read_excel('terrestrial_deep_subsurface_prok_biomass_data.xlsx', 'Gleeson', skiprows=1)\n", - "\n", - "# Generate functions to fit the data an calculate partial integrals\n", - "def func(x,a,b,c):\n", - " return a*np.exp(-b*x)+c\n", - "\n", - "def frac_func(x0,x1,a,b,c):\n", - " integral = lambda x: -a/b*np.exp(-b*x) + c*x\n", - " int_x = integral(x1) - integral(x0)\n", - " int_total = integral(2000) - integral(0)\n", - " fraction = int_x/int_total\n", - " return fraction\n", - "\n", - "# Fit the data with the fiting function\n", - "popt, pcov = curve_fit(func, gw_depth_dist['depth [m]'], gw_depth_dist['fraction'],bounds=(0, [0.2, 2., 0.5]))\n", - "\n", - "# Calculate the fraction of groundwater in each bin\n", - "depth_gw_fraction = []\n", - "lower_depth_range = np.linspace(0,1750,8)\n", - "upper_depth_range = np.linspace(250,2000,8)\n", - "for ix, y in enumerate(lower_depth_range):\n", - " depth_gw_fraction.append(frac_func(lower_depth_range[ix],upper_depth_range[ix], *popt))\n", - "depth_gw_fraction = np.array(depth_gw_fraction)\n", - "\n", - "\n", - "\n", - "cell_mean = (bin_mean['Cell concentration [cells mL-1]']*depth_gw_fraction).sum()\n", - "cell_geo_mean = (bin_geo_mean['Cell concentration [cells mL-1]']*depth_gw_fraction).sum()\n", - "\n", - "tot_cell_gw_mean = cell_mean*tot_gw_vol\n", - "tot_cell_gw_geo_mean = cell_geo_mean*tot_gw_vol\n", - "\n", - "print('Our estimate for the total of number of cells cells in groundwater based on arithmetic means of cell concentrations is %.0e cells.' %tot_cell_gw_mean)\n", - "print('Our estimate for the total of number of cells cells in groundwater based on geometric means of cell concentrations is %.0e cells.' %tot_cell_gw_geo_mean)\n", - "\n", - "# We need this data for also for estimating the total biomass of phages in the terrestrial deep subsurface,\n", - "# so we export these results as data for the section estimating the total number of phages in the \n", - "# terrestrial deep subsurface\n", - "writer = pd.ExcelWriter('../../../viruses/phage_num/terrestrial_deep_subsurface/terrestrial_deep_subsurface_prok_num.xlsx', engine='xlsxwriter') \n", - "\n", - "export_mean = bin_mean['Cell concentration [cells mL-1]'].reset_index()\n", - "export_mean['Depth bin'] = export_mean['Depth bin'].astype(str)\n", - "export_geo_mean = bin_geo_mean['Cell concentration [cells mL-1]'].reset_index()\n", - "export_geo_mean['Depth bin'] = export_geo_mean['Depth bin'].astype(str)\n", - "export_data_frame = export_mean.merge(export_geo_mean,on='Depth bin')\n", - "export_data_frame.columns = ['Depth bin [m]','Mean cell concentration [cells mL-1]','Geometric mean cell concentration [cells mL-1]']\n", - "export_data_frame.to_excel(writer,sheet_name='Cell concentration')\n", - "export_water_vol = pd.concat([export_data_frame['Depth bin [m]'].astype(str),pd.Series(depth_gw_fraction*tot_gw_vol)],axis=1)\n", - "export_water_vol.columns = ['Depth bin [m]','Water volume [mL]']\n", - "export_water_vol.iloc[7,0] = '(1750.0, 2000.0]'\n", - "export_water_vol.to_excel(writer,sheet_name='Water volume')\n", - "writer.save()\n", - "writer.close()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Most of the cells in the terrestrial subsurface are actually found attached to surfaces and not free-living in groundwater. McMahon & Parnell rely on data from the literature of the attached to unattached cell number ratio, and report a range of $10^2-10^3$ for this range. We use as our best estimate for this ratio the geometric mean of this range, which is roughly 300. Multiplying the total number of cells in groundwater by this ratio gives us an estimate for the total number of bacteria and archaea in the terrestrial deep subsurface. " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on arithmetic means of cell concentrations is 4e+30 cells.\n", - "Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on geometric means of cell concentrations is 2e+30 cells.\n" - ] - } - ], - "source": [ - "# Fraction of attached/unattached cells (geometric mean of 10^2 and 10^3)\n", - "attached_unattached_ratio = gmean([1e2,1e3])\n", - "\n", - "# Calculate the total number of cells in the terrestrial deep subsurface\n", - "tot_cell_num_mean = tot_cell_gw_mean*attached_unattached_ratio\n", - "tot_cell_num_geo_mean = tot_cell_gw_geo_mean*attached_unattached_ratio\n", - "\n", - "print('Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on arithmetic means of cell concentrations is %.0e cells.' %tot_cell_num_mean)\n", - "print('Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on geometric means of cell concentrations is %.0e cells.' %tot_cell_num_geo_mean)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We generated two types of estimates for the total number of cells in the terrestrial deep subsurface: an estimate which uses the arithmetic mean of cell concentrations at each depth bin, and an estimate which uses the geometric mean of cell concentrations at each depth bin. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are contaminated with organic carbon sources, or samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total of number of cells cells in the terrestrial deep subsurface 2.4e+30 cells.\n" - ] - } - ], - "source": [ - "best_tot_cell_num = gmean([tot_cell_num_mean,tot_cell_num_geo_mean])\n", - "print('Our best estimate for the total of number of cells cells in the terrestrial deep subsurface %.1e cells.' %best_tot_cell_num)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Carbon content of a single prokaryote\n", - "McMahon & Parnell estimate the characteristic carbon content of single bacterial and arhcaeal cells in the terrestrial deep subsurface at 26 fg C cell$^{-1}$, based on carbon content of cultivated cells under starvation. This value is consistent with our estimates for the carbon content of cells in the marine and marine deep subsurface environments.\n", - "\n", - "To estimate the total biomass of bacteria and archaea in the terrestrial deep subsurface, we muliply our best estimate for the total number of cells in the terrestrial deep subsurface by the characteristic carbon content of cells in the terrestrial deep subsurface. Our best estimate is ≈60 Gt C." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of 62 Gt C\n" - ] - } - ], - "source": [ - "# The characteristic carbon content of a single prokaryote in the terrestrial deep subsurface\n", - "carb_content = 26e-15\n", - "\n", - "# Calculate the biomass of bacteria and archaea in the terrestrial deep subsurface\n", - "best_estimate = best_tot_cell_num*carb_content\n", - "print('We estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of %.0f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty of our estimate of the total biomass of bacteria and archaea in the terrestrial deep subsurface, we calculate the uncertainty associated with each of the components of the estimate: the average cell concentration in groundwater, the total amount of groundwater, the ratio of attached to unattached cells, and the carbon content of single cells.\n", - "\n", - "## Average cell concentration\n", - "McMahon & Parnell do not supply an uncertainty estimate for the average concentration of cells in the terretrial deep subsurface. The only effect estimated by McMahon & Parnell related to the average concentration of cells, was the effect of different compaction coefficients, determining the relative fraction of water located at different depths on the average cells concentration. By varying the compaction coeffieinct, McMahon & Parnell reported an effect of ≈30% on the average concentration of cells.\n", - "To calculate the uncertainty associated with the process of estimating average cell concentrations at each depth bin, we collect uncertainties stemming from different sources.\n", - "\n", - "### Intra-depth bin uncertainty\n", - "Based on the data of cell concentrations, we estimate the 95% confidence interval for the average cell concentration at each depth bin, and propagate this uncertainty to the total number of cells. We estimate the 95% confidence interval for both the arithmetic mean and geometric mean of the cell concentration at each depth bin.\n", - "We estimate the uncertainty around the estimate of cell concentration at each depth bin, and then propagate the uncertainty at each depth bin to the final estimate of the average cell concentration. " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty associated with the arithmetic mean of cell concentrations at each depth bin is ≈1.3-fold\n", - "The uncertainty associated with the geometric mean of cell concentrations at each depth bin is ≈1.4-fold\n" - ] - } - ], - "source": [ - "# Define a function that will estimate the 95% confidence interval for the arithmetic mean of each bin\n", - "def bin_se(input):\n", - " se = input['Cell concentration [cells mL-1]'].std(ddof=1)/np.sqrt(input.shape[0])\n", - " mean = input['Cell concentration [cells mL-1]'].mean()\n", - " return (1.96*se+mean)/mean\n", - "\n", - "# Define a function that will estimate the 95% confidence interval for the geometric mean of each bin\n", - "def bin_geo_CI_calc(input):\n", - " return geo_CI_calc(input['Cell concentration [cells mL-1]'])\n", - "\n", - "# Calculate the 95% confidence interval for the arithmetic mean at each bin\n", - "bin_mean_CI = depth_binned.apply(bin_se).dropna()\n", - "\n", - "# Calculate the 95% confidence interval for the geometric mean at each bin\n", - "bin_geo_mean_CI = depth_binned.apply(bin_geo_CI_calc).dropna()\n", - "\n", - "# Propoagate the uncertainty at each bin to the average cell concentration\n", - "av_conc_mean_CI = CI_sum_prop(bin_mean['Cell concentration [cells mL-1]'][:-1]*depth_gw_fraction[:-1],bin_mean_CI)\n", - "print('The uncertainty associated with the arithmetic mean of cell concentrations at each depth bin is ≈%.1f-fold' %av_conc_mean_CI)\n", - "\n", - "# Propoagate the uncertainty at each bin to the average cell concentration\n", - "av_conc_geo_mean_CI = CI_sum_prop(bin_geo_mean['Cell concentration [cells mL-1]'][:-1]*depth_gw_fraction[:-1],bin_geo_mean_CI)\n", - "print('The uncertainty associated with the geometric mean of cell concentrations at each depth bin is ≈%.1f-fold' %av_conc_geo_mean_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inter-method uncertainty\n", - "For our best estimate of the total number of cells in the terrestrial deep subsurface, we used the geometric mean of the two estimates - the one based on arithmetic means of cells concentrations at each depth bin and the one based on the geometric mean of cell concentrations at each depth bin. We estimate the 95% confidence interval fo the geometric mean of these two estimates, which is ≈\n", - "We calculate an uncertainty of ≈1.3-fold from this source. Combining these two sources together, we estimate ≈1.4-fold uncertainty associated with the average concentration of cells of bacteria and archaea in the terrestrial deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The total uncertainty of the geometric mean of our estimates based on the two different methodologies for calculating the average cell concentration at each depth bin is ≈2.3-fold\n" - ] - } - ], - "source": [ - "inter_method_CI = geo_CI_calc(np.array([tot_cell_num_mean,tot_cell_num_geo_mean]))\n", - "print('The total uncertainty of the geometric mean of our estimates based on the two different methodologies for calculating the average cell concentration at each depth bin is ≈%.1f-fold' %inter_method_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection for the uncertainty associated with the average concentration of cells in groundwater, we take the maximum uncertainty from the intra-depth bin and inter-method uncertainties, which is ≈2.3-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the average concentration of cell in groundwater is ≈2.3-fold\n" - ] - } - ], - "source": [ - "av_cell_CI = np.max([av_conc_mean_CI,av_conc_geo_mean_CI,inter_method_CI])\n", - "print('Our best projection for the uncertainty associated with the average concentration of cell in groundwater is ≈%.1f-fold' %av_cell_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Total volume of groundwater\n", - "As a measure of the uncertainty associated with the total volume of groundwater, we use the range reported in Gleeson et al. of ≈2.2-fold. This range does not represent 95% confidence interval, but rather a 25% and 75% range. As no 95% confidence interval is available, we assume the distribution of estimates of the global volume of groundwater is nearly gaussian, and take about two standard deviations as our estimate for the 95% confidence interval. We calculate the fold change of the 95% confidence interval relative to the mean estimate." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the uncertainty associated with the total volume of groundwater is ≈2-fold\n" - ] - } - ], - "source": [ - "# We take the lower and upper range reported by Gleeson et al.\n", - "lower_gleeson = 1.6e22\n", - "upper_gleeson = 3.0e22\n", - "\n", - "# Calculate the relative fold change of the 95% confidence interval\n", - "gw_upper_CI = upper_gleeson*1.96/tot_gw_vol\n", - "gw_lower_CI = lower_gleeson*1.96/tot_gw_vol\n", - "\n", - "# Our estimate for the 95% confidence interval is the mean of the upper and lower fold changes\n", - "gw_CI = np.mean([gw_upper_CI,gw_lower_CI])\n", - "\n", - "print('Our estimate for the uncertainty associated with the total volume of groundwater is ≈%.0f-fold' % gw_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Ratio of attached to unattached cells\n", - "McMahon & Parnell report a range of $10^2-10^3$ for the attached to unattached cell ratio. As we chose to use the goemetric mean of this range for our estimate, we use the 95% confidence interval around the geometric mean of the two extremes of the range, as a measure of the uncertainty associated with the ratio of attached to unattached cells.\n", - "\n", - "## Carbon content of single cells\n", - "McMahon & Parnell do not suply an uncertainty analysis for the carbon content of single cells in the terrestrial deep subsurface. Our estimate for carbon content of subseafloor sediments is similar to the value used by McMahon & Parnell. Therefore, we use the values for the uncertainty associated with the carbon content of cells in subseafloor sediments as a measure of the uncertainty associated with the carbon content of cells in the terrestrial deep subsurface. The uncertainty we calculated for the carbon content of cell in subseafloor sediments is ≈2.2-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface is ≈14-fold\n" - ] - } - ], - "source": [ - "attached_unattached_CI = geo_CI_calc(np.array([100,1000]))\n", - "\n", - "carbon_content_CI = 2.2\n", - "\n", - "mul_CI = CI_prod_prop(np.array([av_cell_CI,gw_CI,attached_unattached_CI,carbon_content_CI]))\n", - "print('The uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface is ≈%.0f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Additional sources of uncertainty\n", - "Combining all the uncertainty of the factors above, we calculate an uncertainty of ≈14-fold in the biomass of bacteria and archaea in the terrestrial deep subsurface.\n", - "As we state in the Supplementary Information, there are other sources of uncertainty that for which we are not able to provide a quantitative estimate. The procedure of binning cell concentrations with depth and fitting an equation which extrapolates cell concentrations across all depths has uncertainty associated with it, and while we did calculate some uncertainty associated with this process, it probably does not represent the entire uncertainty associated with this process. The uncertainty stemming from possible contribution from groundwater deeper than 2 km is also hard to quantify, as the cell concentration at those depths and the volume of groundwater are poorly explored. We thus chose to project an uncertainty of ≈20-fold as our best projection of the uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total biomass of terrestrial deep subsurface bacteria and archaea: 62 Gt C\n", - "Uncertainty associated with the total biomasss of terrestrial deep subsurface bacteria and archaea: 20-fold\n" - ] - } - ], - "source": [ - "# Modify the uncertainty of the estimate\n", - "mul_CI = 20\n", - "\n", - "print('Total biomass of terrestrial deep subsurface bacteria and archaea: %.0f Gt C' % (best_estimate/1e15))\n", - "print('Uncertainty associated with the total biomasss of terrestrial deep subsurface bacteria and archaea: %.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Total biomass of bacteria and archaea in the terrestrial deep subsurface',\n", - " 'Value': int(best_estimate),\n", - " 'Units': 'g C',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.loc[3] = pd.Series({\n", - " 'Parameter': 'Carbon content of a single cell',\n", - " 'Value': carb_content,\n", - " 'Units': 'g C',\n", - " 'Uncertainty': np.nan\n", - " })\n", - "\n", - "result.to_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass-median-checkpoint.ipynb b/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass-median-checkpoint.ipynb deleted file mode 100644 index b3eed2d..0000000 --- a/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass-median-checkpoint.ipynb +++ /dev/null @@ -1,591 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "from scipy.optimize import curve_fit\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of bacteria and archaea in the terrestrial deep subsurface\n", - "This notebook details the procedure for estimating the total biomass of of prokaryotes (bacteria and archaea) in the terrestrial deep subsurface. Our estimate is based on the data on cellconcentration in the terrestrial deep subsurface collected by [McMahon & Parnell](http://dx.doi.org/10.1111/1574-6941.12196), as well as data on the global volume of groundwater from [Gleeson et al.](http://dx.doi.org/10.1038/ngeo2590).\n", - "\n", - "## Number of cells\n", - "To estimate the total number of cells of bacteria and archaea in the terrestrial deep subsurface, we follow a similar methodology to that detailed in McMahon & Parnell. We use ≈100 measurements of cell concentration in groundwater samples from depths of 0-2000 m. We bin the samples based on their depths to 250 meter bins. For each bin we calculate both the arithmetic and geometric means. Depth bins missing from the data were extrapolated by using a regression equation that predicts the concentration of cells from the depth of the sample. This yields two estimates for the characteristic cell concentration at each depth bin." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " if sys.path[0] == '':\n", - "/usr/local/lib/python3.5/dist-packages/scipy/stats/stats.py:315: RuntimeWarning: Mean of empty slice.\n", - " return np.exp(log_a.mean(axis=axis))\n", - "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:51: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:57: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" - ] - } - ], - "source": [ - "\n", - "# Load original data from Figure 1 of McMahon & Parnell\n", - "mp_data = pd.read_excel('terrestrial_deep_subsurface_prok_biomass_data.xlsx', 'McMahon & Parnell', skiprows=1)\n", - "\n", - "# Define depth bins every 250 meter \n", - "bins = np.linspace(0,2000,9)\n", - "\n", - "# Filter deeper then 2km\n", - "mp_data_filt = mp_data[mp_data['Depth [m]'] < 2000]\n", - "\n", - "# Bin data based on depth bins\n", - "mp_data_filt['Depth bin'] = pd.cut(mp_data_filt['Depth [m]'],bins)\n", - "depth_binned = mp_data_filt.groupby('Depth bin')\n", - "\n", - "# Calculate the mean concentration at each depth bin\n", - "bin_mean = depth_binned['Cell concentration [cells mL-1]'].mean().dropna()\n", - "bin_geo_mean = depth_binned['Cell concentration [cells mL-1]'].apply(gmean)\n", - "# Median\n", - "bin_median = depth_binned['Cell concentration [cells mL-1]'].median()\n", - "\n", - "# To estimate the last bin (1750-2000), which doesn't have data, we either use the fit produced by McMahon & Parnell\n", - "# or fit a function to the geometric means\n", - "\n", - "# The fit of McMahon & Parnell from Figure 1\n", - "mp_fit = lambda x: np.exp(-(x-5771.2)/390.6)\n", - "\n", - "# Extrapolate the average cell concentration based on the fit by McMahon & Parnell\n", - "extrapolated_mean = pd.DataFrame({'Depth bin': '(1750.0, 2000.0]', 'Cell concentration [cells mL-1]': mp_fit(1875)},index=[0])\n", - "\n", - "# Add the extrapolated value to the depth averages\n", - "bin_mean = bin_mean.reset_index().append(extrapolated_mean,ignore_index=True).set_index('Depth bin')\n", - "\n", - "\n", - "# Define an exponential function to fit the data\n", - "def depth_func_log(x, a, b):\n", - " return np.log(a) - b*x\n", - "\n", - "# Fit the geometric means\n", - "xdata = bins[1:-1]-125\n", - "popt2, pcov2 = curve_fit(depth_func_log, xdata, np.log(bin_geo_mean[:-1]))\n", - "#Median\n", - "popt3, pcov3 = curve_fit(depth_func_log, xdata, np.log(bin_median[:-1]))\n", - "\n", - "# Extrapolate the geometric mean cell concentration based on the fit we calculated\n", - "extrapolated_geo_mean = np.exp(depth_func_log(1875, *popt2))\n", - "#Median\n", - "extrapolated_median = np.exp(depth_func_log(1875, *popt3))\n", - "\n", - "# Add the extrapolated value to the depth averages\n", - "tmp = bin_geo_mean.reset_index()\n", - "tmp['Cell concentration [cells mL-1]'][7] = extrapolated_geo_mean\n", - "bin_geo_mean = tmp.set_index('Depth bin')\n", - "\n", - "# Add the extrapolated value to the depth averages\n", - "# Median\n", - "tmp = bin_median.reset_index()\n", - "tmp['Cell concentration [cells mL-1]'][7] = extrapolated_median\n", - "bin_median = tmp.set_index('Depth bin')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total number of cells in groundwater from the characteristic concentrations at each depth bin, we use estimates of the total volume of ground water, and the fraction of the total ground water at each depth bin.\n", - "\n", - "We rely of data from Gleeson et al. which estimate $≈2.2×10^{22}$ mL of groundwater in the top 2 km of the terrestrial crust. Glesson et al. also estimate the fraction of the total volume of groundwater found at each depth. To estimate the fraction of groundwater found at each bin, we fit a function to the data provided in Figure 1 of Gleeson et al., and integrate it over the depth range of each depth bin. We then normalize the numbers by the integral over the entire 2000 meter range to calculate the fraction of water found at each depth bin.\n", - "\n", - "We multiply the average cell concentration at each bin by the total volume of groundwater at each bin, and sum over all bins to calculate the total number of cells in groundwater. We have two estimates for the total number of cells in groundwater, one based on arithmetic means of cell concentrations at each bin and the second based on geometric means." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total of number of cells cells in groundwater based on arithmetic means of cell concentrations is 1e+28 cells.\n", - "Our estimate for the total of number of cells cells in groundwater based on geometric means of cell concentrations is 4.90e+27 cells.\n", - "Our estimate for the total of number of cells cells in groundwater based on median of cell concentrations is 4.62e+27 cells.\n" - ] - } - ], - "source": [ - "\n", - "# Total volume of groundwater [mL], based on Gleeson et al.\n", - "tot_gw_vol = 2.26e22\n", - "\n", - "# Load data from Gleeson et al. on the distribution of groundwater with depth\n", - "gw_depth_dist = pd.read_excel('terrestrial_deep_subsurface_prok_biomass_data.xlsx', 'Gleeson', skiprows=1)\n", - "\n", - "# Generate functions to fit the data an calculate partial integrals\n", - "def func(x,a,b,c):\n", - " return a*np.exp(-b*x)+c\n", - "\n", - "def frac_func(x0,x1,a,b,c):\n", - " integral = lambda x: -a/b*np.exp(-b*x) + c*x\n", - " int_x = integral(x1) - integral(x0)\n", - " int_total = integral(2000) - integral(0)\n", - " fraction = int_x/int_total\n", - " return fraction\n", - "\n", - "# Fit the data with the fiting function\n", - "popt, pcov = curve_fit(func, gw_depth_dist['depth [m]'], gw_depth_dist['fraction'],bounds=(0, [0.2, 2., 0.5]))\n", - "\n", - "# Calculate the fraction of groundwater in each bin\n", - "depth_gw_fraction = []\n", - "lower_depth_range = np.linspace(0,1750,8)\n", - "upper_depth_range = np.linspace(250,2000,8)\n", - "for ix, y in enumerate(lower_depth_range):\n", - " depth_gw_fraction.append(frac_func(lower_depth_range[ix],upper_depth_range[ix], *popt))\n", - "depth_gw_fraction = np.array(depth_gw_fraction)\n", - "\n", - "\n", - "\n", - "cell_mean = (bin_mean['Cell concentration [cells mL-1]']*depth_gw_fraction).sum()\n", - "cell_geo_mean = (bin_geo_mean['Cell concentration [cells mL-1]']*depth_gw_fraction).sum()\n", - "\n", - "#Median\n", - "cell_median = (bin_median['Cell concentration [cells mL-1]']*depth_gw_fraction).sum()\n", - "\n", - "tot_cell_gw_mean = cell_mean*tot_gw_vol\n", - "tot_cell_gw_geo_mean = cell_geo_mean*tot_gw_vol\n", - "\n", - "#Median\n", - "tot_cell_gw_median = cell_median*tot_gw_vol\n", - "\n", - "print('Our estimate for the total of number of cells cells in groundwater based on arithmetic means of cell concentrations is %.0e cells.' %tot_cell_gw_mean)\n", - "print('Our estimate for the total of number of cells cells in groundwater based on geometric means of cell concentrations is %.2e cells.' %tot_cell_gw_geo_mean)\n", - "#Median\n", - "print('Our estimate for the total of number of cells cells in groundwater based on median of cell concentrations is %.2e cells.' %tot_cell_gw_median)\n", - "\n", - "# We need this data for also for estimating the total biomass of phages in the terrestrial deep subsurface,\n", - "# so we export these results as data for the section estimating the total number of phages in the \n", - "# terrestrial deep subsurface\n", - "writer = pd.ExcelWriter('../../../viruses/phage_num/terrestrial_deep_subsurface/terrestrial_deep_subsurface_prok_num.xlsx', engine='xlsxwriter') \n", - "\n", - "export_mean = bin_mean['Cell concentration [cells mL-1]'].reset_index()\n", - "export_mean['Depth bin'] = export_mean['Depth bin'].astype(str)\n", - "export_geo_mean = bin_geo_mean['Cell concentration [cells mL-1]'].reset_index()\n", - "export_geo_mean['Depth bin'] = export_geo_mean['Depth bin'].astype(str)\n", - "export_data_frame = export_mean.merge(export_geo_mean,on='Depth bin')\n", - "export_data_frame.columns = ['Depth bin [m]','Mean cell concentration [cells mL-1]','Geometric mean cell concentration [cells mL-1]']\n", - "export_data_frame.to_excel(writer,sheet_name='Cell concentration')\n", - "export_water_vol = pd.concat([export_data_frame['Depth bin [m]'].astype(str),pd.Series(depth_gw_fraction*tot_gw_vol)],axis=1)\n", - "export_water_vol.columns = ['Depth bin [m]','Water volume [mL]']\n", - "export_water_vol.iloc[7,0] = '(1750.0, 2000.0]'\n", - "export_water_vol.to_excel(writer,sheet_name='Water volume')\n", - "writer.save()\n", - "writer.close()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Most of the cells in the terrestrial subsurface are actually found attached to surfaces and not free-living in groundwater. McMahon & Parnell rely on data from the literature of the attached to unattached cell number ratio, and report a range of $10^2-10^3$ for this range. We use as our best estimate for this ratio the geometric mean of this range, which is roughly 300. Multiplying the total number of cells in groundwater by this ratio gives us an estimate for the total number of bacteria and archaea in the terrestrial deep subsurface. " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on arithmetic means of cell concentrations is 4e+30 cells.\n", - "Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on geometric means of cell concentrations is 2e+30 cells.\n", - "Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on median of cell concentrations is 3e+30 cells.\n" - ] - }, - { - "data": { - "text/plain": [ - "550.0" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Fraction of attached/unattached cells (geometric mean of 10^2 and 10^3)\n", - "attached_unattached_ratio = gmean([1e2,1e3])\n", - "#Median\n", - "attached_unattached_ratio_median = np.median([1e2,1e3])\n", - "\n", - "# Calculate the total number of cells in the terrestrial deep subsurface\n", - "tot_cell_num_mean = tot_cell_gw_mean*attached_unattached_ratio\n", - "tot_cell_num_geo_mean = tot_cell_gw_geo_mean*attached_unattached_ratio\n", - "#Median\n", - "tot_cell_num_median = tot_cell_gw_median*attached_unattached_ratio_median\n", - "print('Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on arithmetic means of cell concentrations is %.0e cells.' %tot_cell_num_mean)\n", - "print('Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on geometric means of cell concentrations is %.0e cells.' %tot_cell_num_geo_mean)\n", - "#Median\n", - "print('Our estimate for the total of number of cells cells in the terrestrial deep subsurface based on median of cell concentrations is %.0e cells.' %tot_cell_num_median)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We generated two types of estimates for the total number of cells in the terrestrial deep subsurface: an estimate which uses the arithmetic mean of cell concentrations at each depth bin, and an estimate which uses the geometric mean of cell concentrations at each depth bin. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are contaminated with organic carbon sources, or samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total of number of cells cells in the terrestrial deep subsurface 2.4e+30 cells.\n" - ] - } - ], - "source": [ - "best_tot_cell_num = gmean([tot_cell_num_mean,tot_cell_num_geo_mean])\n", - "print('Our best estimate for the total of number of cells cells in the terrestrial deep subsurface %.1e cells.' %best_tot_cell_num)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Carbon content of a single prokaryote\n", - "McMahon & Parnell estimate the characteristic carbon content of single bacterial and arhcaeal cells in the terrestrial deep subsurface at 26 fg C cell$^{-1}$, based on carbon content of cultivated cells under starvation. This value is consistent with our estimates for the carbon content of cells in the marine and marine deep subsurface environments.\n", - "\n", - "To estimate the total biomass of bacteria and archaea in the terrestrial deep subsurface, we muliply our best estimate for the total number of cells in the terrestrial deep subsurface by the characteristic carbon content of cells in the terrestrial deep subsurface. Our best estimate is ≈60 Gt C." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of 62 Gt C\n", - "We estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of 66 Gt C\n" - ] - }, - { - "data": { - "text/plain": [ - "37.944253460542114" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# The characteristic carbon content of a single prokaryote in the terrestrial deep subsurface\n", - "carb_content = 26e-15\n", - "\n", - "# Calculate the biomass of bacteria and archaea in the terrestrial deep subsurface\n", - "best_estimate = best_tot_cell_num*carb_content\n", - "\n", - "# Median\n", - "best_estimate_median = tot_cell_num_median*carb_content\n", - "print('We estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of %.0f Gt C' %(best_estimate/1e15))\n", - "#Median\n", - "print('We estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of %.0f Gt C' %(best_estimate_median/1e15))\n", - "# Using the same attached-unattached cell ratio\n", - "tot_cell_gw_median*attached_unattached_ratio*carb_content/1e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty of our estimate of the total biomass of bacteria and archaea in the terrestrial deep subsurface, we calculate the uncertainty associated with each of the components of the estimate: the average cell concentration in groundwater, the total amount of groundwater, the ratio of attached to unattached cells, and the carbon content of single cells.\n", - "\n", - "## Average cell concentration\n", - "McMahon & Parnell do not supply an uncertainty estimate for the average concentration of cells in the terretrial deep subsurface. The only effect estimated by McMahon & Parnell related to the average concentration of cells, was the effect of different compaction coefficients, determining the relative fraction of water located at different depths on the average cells concentration. By varying the compaction coeffieinct, McMahon & Parnell reported an effect of ≈30% on the average concentration of cells.\n", - "To calculate the uncertainty associated with the process of estimating average cell concentrations at each depth bin, we collect uncertainties stemming from different sources.\n", - "\n", - "### Intra-depth bin uncertainty\n", - "Based on the data of cell concentrations, we estimate the 95% confidence interval for the average cell concentration at each depth bin, and propagate this uncertainty to the total number of cells. We estimate the 95% confidence interval for both the arithmetic mean and geometric mean of the cell concentration at each depth bin.\n", - "We estimate the uncertainty around the estimate of cell concentration at each depth bin, and then propagate the uncertainty at each depth bin to the final estimate of the average cell concentration. " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty associated with the arithmetic mean of cell concentrations at each depth bin is ≈1.3-fold\n", - "The uncertainty associated with the geometric mean of cell concentrations at each depth bin is ≈1.4-fold\n" - ] - } - ], - "source": [ - "# Define a function that will estimate the 95% confidence interval for the arithmetic mean of each bin\n", - "def bin_se(input):\n", - " se = input['Cell concentration [cells mL-1]'].std(ddof=1)/np.sqrt(input.shape[0])\n", - " mean = input['Cell concentration [cells mL-1]'].mean()\n", - " return (1.96*se+mean)/mean\n", - "\n", - "# Define a function that will estimate the 95% confidence interval for the geometric mean of each bin\n", - "def bin_geo_CI_calc(input):\n", - " return geo_CI_calc(input['Cell concentration [cells mL-1]'])\n", - "\n", - "# Calculate the 95% confidence interval for the arithmetic mean at each bin\n", - "bin_mean_CI = depth_binned.apply(bin_se).dropna()\n", - "\n", - "# Calculate the 95% confidence interval for the geometric mean at each bin\n", - "bin_geo_mean_CI = depth_binned.apply(bin_geo_CI_calc).dropna()\n", - "\n", - "# Propoagate the uncertainty at each bin to the average cell concentration\n", - "av_conc_mean_CI = CI_sum_prop(bin_mean['Cell concentration [cells mL-1]'][:-1]*depth_gw_fraction[:-1],bin_mean_CI)\n", - "print('The uncertainty associated with the arithmetic mean of cell concentrations at each depth bin is ≈%.1f-fold' %av_conc_mean_CI)\n", - "\n", - "# Propoagate the uncertainty at each bin to the average cell concentration\n", - "av_conc_geo_mean_CI = CI_sum_prop(bin_geo_mean['Cell concentration [cells mL-1]'][:-1]*depth_gw_fraction[:-1],bin_geo_mean_CI)\n", - "print('The uncertainty associated with the geometric mean of cell concentrations at each depth bin is ≈%.1f-fold' %av_conc_geo_mean_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inter-method uncertainty\n", - "For our best estimate of the total number of cells in the terrestrial deep subsurface, we used the geometric mean of the two estimates - the one based on arithmetic means of cells concentrations at each depth bin and the one based on the geometric mean of cell concentrations at each depth bin. We estimate the 95% confidence interval fo the geometric mean of these two estimates, which is ≈\n", - "We calculate an uncertainty of ≈1.3-fold from this source. Combining these two sources together, we estimate ≈1.4-fold uncertainty associated with the average concentration of cells of bacteria and archaea in the terrestrial deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The total uncertainty of the geometric mean of our estimates based on the two different methodologies for calculating the average cell concentration at each depth bin is ≈2.3-fold\n" - ] - } - ], - "source": [ - "inter_method_CI = geo_CI_calc(np.array([tot_cell_num_mean,tot_cell_num_geo_mean]))\n", - "print('The total uncertainty of the geometric mean of our estimates based on the two different methodologies for calculating the average cell concentration at each depth bin is ≈%.1f-fold' %inter_method_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection for the uncertainty associated with the average concentration of cells in groundwater, we take the maximum uncertainty from the intra-depth bin and inter-method uncertainties, which is ≈2.3-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the average concentration of cell in groundwater is ≈2.3-fold\n" - ] - } - ], - "source": [ - "av_cell_CI = np.max([av_conc_mean_CI,av_conc_geo_mean_CI,inter_method_CI])\n", - "print('Our best projection for the uncertainty associated with the average concentration of cell in groundwater is ≈%.1f-fold' %av_cell_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Total volume of groundwater\n", - "As a measure of the uncertainty associated with the total volume of groundwater, we use the range reported in Gleeson et al. of ≈2.2-fold. This range does not represent 95% confidence interval, but rather a 25% and 75% range. As no 95% confidence interval is available, we assume the distribution of estimates of the global volume of groundwater is nearly gaussian, and take about two standard deviations as our estimate for the 95% confidence interval. We calculate the fold change of the 95% confidence interval relative to the mean estimate." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the uncertainty associated with the total volume of groundwater is ≈2-fold\n" - ] - } - ], - "source": [ - "# We take the lower and upper range reported by Gleeson et al.\n", - "lower_gleeson = 1.6e22\n", - "upper_gleeson = 3.0e22\n", - "\n", - "# Calculate the relative fold change of the 95% confidence interval\n", - "gw_upper_CI = upper_gleeson*1.96/tot_gw_vol\n", - "gw_lower_CI = lower_gleeson*1.96/tot_gw_vol\n", - "\n", - "# Our estimate for the 95% confidence interval is the mean of the upper and lower fold changes\n", - "gw_CI = np.mean([gw_upper_CI,gw_lower_CI])\n", - "\n", - "print('Our estimate for the uncertainty associated with the total volume of groundwater is ≈%.0f-fold' % gw_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Ratio of attached to unattached cells\n", - "McMahon & Parnell report a range of $10^2-10^3$ for the attached to unattached cell ratio. As we chose to use the goemetric mean of this range for our estimate, we use the 95% confidence interval around the geometric mean of the two extremes of the range, as a measure of the uncertainty associated with the ratio of attached to unattached cells.\n", - "\n", - "## Carbon content of single cells\n", - "McMahon & Parnell do not suply an uncertainty analysis for the carbon content of single cells in the terrestrial deep subsurface. Our estimate for carbon content of subseafloor sediments is similar to the value used by McMahon & Parnell. Therefore, we use the values for the uncertainty associated with the carbon content of cells in subseafloor sediments as a measure of the uncertainty associated with the carbon content of cells in the terrestrial deep subsurface. The uncertainty we calculated for the carbon content of cell in subseafloor sediments is ≈2.2-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface is ≈14-fold\n" - ] - } - ], - "source": [ - "attached_unattached_CI = geo_CI_calc(np.array([100,1000]))\n", - "\n", - "carbon_content_CI = 2.2\n", - "\n", - "mul_CI = CI_prod_prop(np.array([av_cell_CI,gw_CI,attached_unattached_CI,carbon_content_CI]))\n", - "print('The uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface is ≈%.0f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Additional sources of uncertainty\n", - "Combining all the uncertainty of the factors above, we calculate an uncertainty of ≈14-fold in the biomass of bacteria and archaea in the terrestrial deep subsurface.\n", - "As we state in the Supplementary Information, there are other sources of uncertainty that for which we are not able to provide a quantitative estimate. The procedure of binning cell concentrations with depth and fitting an equation which extrapolates cell concentrations across all depths has uncertainty associated with it, and while we did calculate some uncertainty associated with this process, it probably does not represent the entire uncertainty associated with this process. The uncertainty stemming from possible contribution from groundwater deeper than 2 km is also hard to quantify, as the cell concentration at those depths and the volume of groundwater are poorly explored. We thus chose to project an uncertainty of ≈20-fold as our best projection of the uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total biomass of terrestrial deep subsurface bacteria and archaea: 62 Gt C\n", - "Uncertainty associated with the total biomasss of terrestrial deep subsurface bacteria and archaea: 20-fold\n" - ] - } - ], - "source": [ - "# Modify the uncertainty of the estimate\n", - "mul_CI = 20\n", - "\n", - "print('Total biomass of terrestrial deep subsurface bacteria and archaea: %.0f Gt C' % (best_estimate/1e15))\n", - "print('Uncertainty associated with the total biomasss of terrestrial deep subsurface bacteria and archaea: %.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Total biomass of bacteria and archaea in the terrestrial deep subsurface',\n", - " 'Value': int(best_estimate),\n", - " 'Units': 'g C',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.loc[3] = pd.Series({\n", - " 'Parameter': 'Carbon content of a single cell',\n", - " 'Value': carb_content,\n", - " 'Units': 'g C',\n", - " 'Uncertainty': np.nan\n", - " })\n", - "\n", - "result.to_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass_old-checkpoint.ipynb b/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass_old-checkpoint.ipynb deleted file mode 100644 index 83c4c8e..0000000 --- a/bacteria_archaea/terrestrial_deep_subsurface/prok_biomass/.ipynb_checkpoints/terrestrial_deep_subsurface_prok_biomass_old-checkpoint.ipynb +++ /dev/null @@ -1,362 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of bacteria and archaea in the terrestrial deep subsurface\n", - "This notebook details the procedure for estimating the total biomass of of prokaryotes (bacteria and archaea) in the terrestrial deep subsurface. Our estimate is based on the estimate made by [McMahon & Parnell](http://dx.doi.org/10.1111/1574-6941.12196).\n", - "\n", - "## Number of cells\n", - "McMahon & Parnell rely on measurements of groundwater cell concentrations along different depths. McMahon & Parnell bin measurements to 250 meter bins, and calculate the mean concentration of cells per mL of ground water at each bio. McMahon & Parnell then calculate a global weighted average cell concentration by using data on the fraction of groundwater found at each depth. Depth bins missing from the data were extrapolated by using a regression equation that predicts the concentration of cells from the depth of the sample. In total, McMahon & Parnell estimate an average concentration of $≈5×10^5$ cells mL$^{-1}$.\n", - "\n", - "To get from the average cell concentration to an estimate of the total number of cells in the terrestrial deep subsurface, McMahon & Parnell multiply the average cell concentration by the total volume of groundwater of ~$10^{22}$ mL. McMahon & Parnell thus estimate $≈5×10^{27}$ cells found in groundwater.\n", - "\n", - "Most of the cells in the terrestrial subsurface are actually found attached to surfaces and not free-living in groundwater. McMahon & Parnell rely on data from the literature of the attached to unattached cell number ratio, and report a range of $10^2-10^3$ for this range. We use as our best estimate for this ratio the geometric mean of this range, which is roughly 300. Multiplying the total number of cells in groundwater by this ratio gives us an estimate for the total number of bacteria and archaea in the terrestrial deep subsurface. Our best estimate for the total number of bacteria and archaea in the terrestrial deep subsurface is $≈2×10^{30}$ cells.\n", - "\n", - "## Carbon content of a single prokaryote\n", - "McMahon & Parnell estimate the characteristic carbon content of single bacterial and arhcaeal cells in the terrestrial deep subsurface at 26 fg C cell$^{-1}$, based on carbon content of cultivated cells under starvation. This value is consistent with our estimates for the carbon content of cells in the marine and marine deep subsurface environments.\n", - "\n", - "To estimate the total biomass of bacteria and archaea in the terrestrial deep subsurface, we muliply our best estimate for the total number of cells in the terrestrial deep subsurface by the characteristic carbon content of cells in the terrestrial deep subsurface. We estimate ≈40 Gt C of bacteria and archaea in the terrestrial deep subsurface.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "McMahon & Parnell estimate an average concentration of 5e+05 cells per mL in groundwater\n", - "McMahon & Parnell estimate a total of 1e+28 cells in groundwater\n", - "McMahon & Parnell estimate a total of 4e+30 cells in the terrestrial deep subsurface\n", - "McMahon & Parnell estimate a total of 4e+30 cells in the terrestrial deep subsurface\n", - "McMahon & Parnell estimate a total of 2e+30 cells in the terrestrial deep subsurface\n", - "McMahon & Parnell estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of 97 Gt C\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:22: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - "/usr/local/lib/python3.5/dist-packages/scipy/stats/stats.py:315: RuntimeWarning: Mean of empty slice.\n", - " return np.exp(log_a.mean(axis=axis))\n", - "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:86: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *\n", - "\n", - "# The global average cell concentration in groundwater estimated by McMahon & Parnell [cells mL^-1]\n", - "tot_cell_conc = 5.2e5\n", - "print('McMahon & Parnell estimate an average concentration of %.0e cells per mL in groundwater' %tot_cell_conc)\n", - "\n", - "# Load original data from figure 1 of McMahon & Parnell\n", - "mp_data = pd.read_csv('terrestrial_deep_subsurface_prok_cell_num.csv',skiprows=1)\n", - "\n", - "# Define depth bins every 250 meter \n", - "bins = np.linspace(0,2000,9)\n", - "\n", - "# Filter deeper then 2km\n", - "mp_data_filt = mp_data[mp_data['Depth [m]'] < 2000]\n", - "\n", - "# Bin data based on depth bins\n", - "mp_data_filt['Depth bin'] = pd.cut(mp_data_filt['Depth [m]'],bins)\n", - "depth_binned = mp_data_filt.groupby('Depth bin')\n", - "\n", - "# Calculate the mean concentration at each depth bin\n", - "bin_mean = depth_binned['Cell concentration [cells mL-1]'].mean().dropna()\n", - "bin_geo_mean = depth_binned['Cell concentration [cells mL-1]'].apply(gmean)\n", - "\n", - "\n", - "# Total volume of groundwater [mL], based on Gleeson et al.\n", - "tot_gw_vol = 2.26e22\n", - "print('McMahon & Parnell estimate a total of %.0e cells in groundwater' %(tot_cell_conc*tot_gw_vol))\n", - "\n", - "# Load data from Gleeson et al. on the distribution of groundwater with depth\n", - "gw_depth_dist = pd.read_csv('gleeson_fraction_gw_data.csv', skiprows=1)\n", - "\n", - "\n", - "# Generate functions to fit the data an calculate partial integrals\n", - "def func(x,a,b,c):\n", - " return a*np.exp(-b*x)+c\n", - "\n", - "def frac_func(x0,x1,a,b,c):\n", - " integral = lambda x: -a/b*np.exp(-b*x) + c*x\n", - " int_x = integral(x1) - integral(x0)\n", - " int_total = integral(2000) - integral(0)\n", - " fraction = int_x/int_total\n", - " return fraction\n", - "\n", - "# Fit the data with the fiting function\n", - "popt, pcov = curve_fit(func, gw_depth_dist['depth [m]'], gw_depth_dist['fraction'],bounds=(0, [0.2, 2., 0.5]))\n", - "\n", - "# Calculate the fraction of groundwater in each bin\n", - "depth_gw_fraction = []\n", - "lower_depth_range = np.linspace(0,1750,8)\n", - "upper_depth_range = np.linspace(250,2000,8)\n", - "for ix, y in enumerate(lower_depth_range):\n", - " depth_gw_fraction.append(frac_func(lower_depth_range[ix],upper_depth_range[ix], *popt))\n", - "depth_gw_fraction = np.array(depth_gw_fraction)\n", - "\n", - "# To estimate the last bin (1750-2000), which doesn't have data, we either use the fit produced by McMahon & Parnell\n", - "# or fit a function to the geometric means\n", - "\n", - "# The fit of McMahon & Parnell from Figure 1\n", - "mp_fit = lambda x: np.exp(-(x-5771.2)/390.6)\n", - "\n", - "# Extrapolate the average cell concentration based on the fit by McMahon & Parnell\n", - "extrapolated_mean = pd.DataFrame({'Depth bin': '(1750.0, 2000.0]', 'Cell concentration [cells mL-1]': mp_fit(1875)},index=[0])\n", - "\n", - "# Add the extrapolated value to the depth averages\n", - "bin_mean = bin_mean.reset_index().append(extrapolated_mean,ignore_index=True).set_index('Depth bin')\n", - "\n", - "\n", - "# Define an exponential function to fit the data\n", - "def depth_func_log(x, a, b):\n", - " return np.log(a) - b*x\n", - "\n", - "# Fit the geometric means\n", - "xdata = bins[1:-1]-125\n", - "popt2, pcov2 = curve_fit(depth_func_log, xdata, np.log(bin_geo_mean[:-1]))\n", - "\n", - "# Extrapolate the geometric mean cell concentration based on the fit we calculated\n", - "extrapolated_geo_mean = np.exp(depth_func_log(1875, *popt2))\n", - "\n", - "# Add the extrapolated value to the depth averages\n", - "tmp = bin_geo_mean.reset_index()\n", - "tmp['Cell concentration [cells mL-1]'][7] = extrapolated_geo_mean\n", - "bin_geo_mean = tmp.set_index('Depth bin')\n", - "\n", - "cell_mean = (bin_mean['Cell concentration [cells mL-1]']*depth_gw_fraction).sum()\n", - "cell_geo_mean = (bin_geo_mean['Cell concentration [cells mL-1]']*depth_gw_fraction).sum()\n", - "# Fraction of attached/unattached cells (geometric mean of 10^2 and 10^3)\n", - "attached_unattached_ratio = gmean([1e2,1e3])\n", - "\n", - "# Calculate the total number of cells in the terrestrial deep subsurface\n", - "tot_cell_num = tot_cell_conc*tot_gw_vol*attached_unattached_ratio\n", - "tot_cell_num_mean = cell_mean*tot_gw_vol*attached_unattached_ratio\n", - "tot_cell_num_geo_mean = cell_geo_mean*tot_gw_vol*attached_unattached_ratio\n", - "print('McMahon & Parnell estimate a total of %.0e cells in the terrestrial deep subsurface' %tot_cell_num)\n", - "print('McMahon & Parnell estimate a total of %.0e cells in the terrestrial deep subsurface' %tot_cell_num_mean)\n", - "print('McMahon & Parnell estimate a total of %.0e cells in the terrestrial deep subsurface' %tot_cell_num_geo_mean)\n", - "\n", - "# The characteristic carbon content of a single prokaryote in the terrestrial deep subsurface\n", - "carb_content = 26e-15\n", - "\n", - "biomass_mean = tot_cell_num_mean*carb_content\n", - "biomass_geo_mean = tot_cell_num_geo_mean*carb_content\n", - "print('McMahon & Parnell estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of %.0f Gt C' %(biomass_mean/1e15))\n", - "print('McMahon & Parnell estimate a total biomass of bacteria and archaea in the terrestrial deep subsurface of %.0f Gt C' %(biomass_geo_mean/1e15))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "We assess the uncertainty of our estimate of the total biomass of bacteria and archaea in the terrestrial deep subsurface, we calculate the uncertainty associated with each of the components of the estimate: the average cell concentration in groundwater, the total amount of groundwater, the ratio of attached to unattached cells, and the carbon content of single cells.\n", - "\n", - "## Average cell concentration\n", - "McMahon & Parnell do not supply an uncertainty estimate for the average concentration of cells in the terretrial deep subsurface. The only effect estimated by McMahon & Parnell related to the average concentration of cells, was the effect of different compaction coefficients, determining the relative fraction of water located at different depths on the average cells concentration. By varying the compaction coeffieinct, McMahon & Parnell reported an effect of ≈30% on the average concentration of cells. \n", - "We estimate the uncertainty around the estimate of cell concentration at each depth bin, and then propagate the uncertainty at each depth bin to the final estimate of the average cell concentration. We calculate an uncertainty of ≈1.3-fold from this source. Combining these two sources together, we estimate ≈1.4-fold uncertainty associated with the average concentration of cells of bacteria and archaea in the terrestrial deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Depth bin\n", - "(0.0, 250.0] 0.412499\n", - "(250.0, 500.0] 0.334719\n", - "(500.0, 750.0] 0.559026\n", - "(750.0, 1000.0] 0.407967\n", - "(1000.0, 1250.0] 0.767936\n", - "(1250.0, 1500.0] 0.087212\n", - "(1500.0, 1750.0] 0.116358\n", - "(1750.0, 2000.0] NaN\n", - "Name: Cell concentration [cells mL-1], dtype: float64\n", - "5.65012587554e+16\n", - "Our best estimate for the uncertainty of the average concentration of cells in the terrestrial deep subsurface is ≈1.4-fold\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:11: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " # This is added back by InteractiveShellApp.init_path()\n", - "/usr/local/lib/python3.5/dist-packages/scipy/stats/stats.py:315: RuntimeWarning: Mean of empty slice.\n", - " return np.exp(log_a.mean(axis=axis))\n", - "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - } - ], - "source": [ - "# Load original data from figure 1 of McMahon & Parnell\n", - "mp_data = pd.read_csv('terrestrial_deep_subsurface_prok_cell_num.csv',skiprows=1)\n", - "\n", - "# Define depth bins every 250 meter \n", - "bins = np.linspace(0,2000,9)\n", - "\n", - "# Filter deeper then 2km\n", - "mp_data_filt = mp_data[mp_data['Depth [m]'] < 2000]\n", - "\n", - "# Bin data based on depth bins\n", - "mp_data_filt['Depth bin'] = pd.cut(mp_data_filt['Depth [m]'],bins)\n", - "depth_binned = mp_data_filt.groupby('Depth bin')\n", - "\n", - "# Define a function that will estimate the 95% confidence interval for each bin\n", - "def bin_se(input):\n", - " se = input['Cell concentration [cells mL-1]'].std(ddof=1)/np.sqrt(input.shape[0])\n", - " mean = input['Cell concentration [cells mL-1]'].mean()\n", - " return (1.96*se+mean)/mean\n", - "\n", - "# Calculate the 95% confidence interval for each bin\n", - "bin_se = depth_binned.apply(bin_se).dropna()\n", - "\n", - "# Calculate the mean concentration at each depth bin\n", - "bin_mean = depth_binned['Cell concentration [cells mL-1]'].mean().dropna()\n", - "bin_geo_mean = depth_binned['Cell concentration [cells mL-1]'].apply(gmean)\n", - "print(bin_geo_mean/bin_mean)\n", - "#print(bin_geo_mean.mean()*tot_gw_vol*attached_unattached_ratio*carb_content)\n", - "print(gmean([bin_geo_mean.mean(),tot_cell_conc])*tot_gw_vol*attached_unattached_ratio*carb_content)\n", - "# Propoagate the uncertainty at each bin to the average cell concentration\n", - "av_conc_CI = CI_sum_prop(bin_mean,bin_se)\n", - "\n", - "# The uncertainty of the average concentration stemming from the compaction coefficient\n", - "comp_coeff_CI = 1.3\n", - "\n", - "total_av_conc_CI = CI_prod_prop(np.array([av_conc_CI,comp_coeff_CI]))\n", - "print('Our best estimate for the uncertainty of the average concentration of cells in the terrestrial deep subsurface is ≈%.1f-fold' % total_av_conc_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Total volume of groundwater\n", - "As a measure of the uncertainty associated with the total volume of groundwater, we use the range reported in Gleeson et al. of ≈2.2-fold. This range does not represent 95% confidence interval, but rather a 25% and 75% range. As no 95% confidence interval is available, we \n", - "\n", - "## Ratio of attached to unattached cells\n", - "McMahon & Parnell report a range of $10^2-10^3$ for the attached to unattached cell ratio. As we chose to use the goemetric mean of this range for our estimate, we use the multiplicative factor of this range relative to the geometric mean, which is ≈3-fold, as a measure of the uncertainty associated with the ratio of attached to unattached cells.\n", - "\n", - "## Carbon content of single cells\n", - "McMahon & Parnell do not suply an uncertainty analysis for the carbon content of single cells in the terrestrial deep subsurface. Our estimate for carbon content of subseafloor sediments is similar to the value used by McMahon & Parnell. Therefore, we use the values for the uncertainty associated with the carbon content of cells in subseafloor sediments as a measure of the uncertainty associated with the carbon content of cells in the terrestrial deep subsurface. The uncertainty we calculated for the carbon content of cell in subseafloor sediments is ≈2.2-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface is ≈5-fold\n" - ] - } - ], - "source": [ - "gw_CI = 2\n", - "attached_unattached_CI = 3\n", - "carbon_content_CI = 2.2\n", - "\n", - "mul_CI = CI_prod_prop(np.array([total_av_conc_CI,gw_CI,attached_unattached_CI,carbon_content_CI]))\n", - "print('The uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface is ≈%.0f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Additional sources of uncertainty\n", - "Comnining all the uncertainty of the factors above, we calculate an uncertainty of ≈5-fold in the biomass of bacteria and archaea in the terrestrial deep subsurface.\n", - "As we state in the Supplementary Information, there are other sources of uncertainty that for which we are not able to provide a quantitative estimate. The procedure of binning cell concentrations with depth and fitting an equation which extrapolates cell concentrations across all depths has uncertainty associated with it, and while we did calculate some uncertainty associated with this process, it probably does not represent the entire uncertainty associated with this process. The uncertainty stemming from possible contribution from groundwater deeper than 2 km is also hard to quantify, as the cell concentration at those depths and the volume of groundwater are poorly explored. We thus chose to project an uncertainty of one order of magnitude as our best projection of the uncertainty associated with the biomass of bacteria and archaea in the terrestrial deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total biomass of terrestrial deep subsurface bacteria and archaea: 97 Gt C\n", - "Uncertainty associated with the total biomasss of terrestrial deep subsurface bacteria and archaea: 10-fold\n" - ] - } - ], - "source": [ - "# Modify the uncertainty of the estimate\n", - "mul_CI = 10\n", - "\n", - "print('Total biomass of terrestrial deep subsurface bacteria and archaea: %.0f Gt C' % (best_estimate/1e15))\n", - "print('Uncertainty associated with the total biomasss of terrestrial deep subsurface bacteria and archaea: %.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Total number of marine bacteria and archaea',\n", - " 'Value': int(best_estimate),\n", - " 'Units': 'Cells',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../terrestrial_deep_subsurface_prok_biomass_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/bacteria_archaea/terrestrial_deep_subsurface/terrestrial_deep_subsurface_prok_biomass_estimate_OLD.xlsx b/bacteria_archaea/terrestrial_deep_subsurface/terrestrial_deep_subsurface_prok_biomass_estimate_OLD.xlsx deleted file mode 100644 index fd87d56..0000000 Binary files a/bacteria_archaea/terrestrial_deep_subsurface/terrestrial_deep_subsurface_prok_biomass_estimate_OLD.xlsx and /dev/null differ diff --git a/figures/.ipynb_checkpoints/marine_inverted_pyramid_prob-checkpoint.ipynb b/figures/.ipynb_checkpoints/marine_inverted_pyramid_prob-checkpoint.ipynb deleted file mode 100644 index c67d2ec..0000000 --- a/figures/.ipynb_checkpoints/marine_inverted_pyramid_prob-checkpoint.ipynb +++ /dev/null @@ -1,98 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "import sys\n", - "sys.path.insert(0, '../statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Quantifying the probability of the marine trophic pyramids being inverted\n", - "In order to quantify the probability of consumer biomass in the marine environment being lrager than producer biomass, we randomly sample from the distribution of our estimates for the biomass of each taxon of producer of consumer biomass. For each taxon with no uncertainty estimate, we assume its uncertainty is an order of magnitude.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The probability marine consumers have larger biomass than marine producers is ≈95%\n" - ] - } - ], - "source": [ - "# Load results\n", - "results = pd.read_excel('../results.xlsx','Fig2B')\n", - "\n", - "# Extract marine producers biomass data\n", - "marine_producers = results.iloc[20:25,0:3]\n", - "marine_producers.columns = results.iloc[19,0:3]\n", - "\n", - "# Extract marine consumers biomass data\n", - "marine_consumers = results.iloc[20:30,3:]\n", - "marine_consumers.columns = results.iloc[19,0:3]\n", - "\n", - "\n", - "sample_size = 100000\n", - "\n", - "# Define the funcion that samples from the distribution of biomass of each taxon for producers and consumers\n", - "def sample_biomass(estimates,sample_size):\n", - " ans = np.empty([sample_size,estimates.shape[0]])\n", - " for x,ind in enumerate(estimates.index): \n", - " ans[:,x:x+1] = np.random.lognormal(mean = np.log(estimates.loc[ind,'Biomass']), sigma = np.log(estimates.loc[ind,'Uncertainty'])/1.96,size=sample_size).reshape([-1,1])\n", - " return ans.sum(axis=1)\n", - "\n", - "# Sample from the distribution of estimates for each of the taxa of the producers and consumers\n", - "marine_consumer_sample = sample_biomass(marine_consumers,sample_size)\n", - "marine_producer_sample = sample_biomass(marine_producers,sample_size)\n", - "\n", - "# Calculate the probability that marine consumers have larger biomass than marine producers\n", - "inverted_prob = (marine_consumer_sample>marine_producer_sample).sum()/sample_size\n", - "\n", - "print('The probability marine consumers have larger biomass than marine producers is ≈%.0f' %(inverted_prob*100) + \"%\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/figures/.ipynb_checkpoints/plant_bacteria_comparison-checkpoint.ipynb b/figures/.ipynb_checkpoints/plant_bacteria_comparison-checkpoint.ipynb deleted file mode 100644 index 973d826..0000000 --- a/figures/.ipynb_checkpoints/plant_bacteria_comparison-checkpoint.ipynb +++ /dev/null @@ -1,144 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "import sys\n", - "sys.path.insert(0, '../statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Quantifying the probability of plant biomass being higher than bacterial biomass\n", - "In order to quantify the probability of plants having more biomass than bacteria, we randomly sample from the distribution of our estimates for the biomass of plants and bacteria." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The probability of plants having more biomass than bacteria is ≈90%\n" - ] - } - ], - "source": [ - "# Load results\n", - "results = pd.read_excel('../results.xlsx','Table1 & Fig1',index_col=[0,1])\n", - "\n", - "# Sample 100000 times from the distribution of our estimates for the biomass of plants and bacteria\n", - "sample_size = 100000\n", - "\n", - "# Load the results for the biomass of bacteria\n", - "means = results.loc['Bacteria','Biomass [Gt C]']\n", - "mul_CIs = results.loc['Bacteria','Uncertainty']\n", - "\n", - "# Define the function sampling from the distribution of estimates on the biomass of each taxon\n", - "sample = lambda x,y: np.random.lognormal(mean = np.log(x), sigma = np.log(y)/1.96,size=sample_size).reshape([-1,1]) \n", - "\n", - "# Sample from the distribution of each group of bacteria\n", - "bacteria_dist = np.empty([sample_size,4])\n", - "for x,ind in enumerate(means.index):\n", - " bacteria_dist[:,x:x+1] = sample(means.loc[ind],mul_CIs.loc[ind])\n", - "\n", - "# Sum over all groups of bacteria\n", - "bac_sum = bacteria_dist.sum(axis=1)\n", - "\n", - "# Sample from the distribution of estimates of the biomass of plants\n", - "plants = sample(results.loc[('Plants','Plants'),'Biomass [Gt C]'],results.loc[('Plants','Plants'),'Total uncertainty'])\n", - "\n", - "\n", - "# Calculate the probability of plant biomass being higher than that of bacteria\n", - "p_val = (plants[:,0] > bac_sum).sum()/sample_size\n", - "\n", - "print('The probability of plants having more biomass than bacteria is ≈%.0f' %(p_val*100) + '%')\n", - "update_MS_data(row='Probability of plant biomass being larger than bacterial biomass',values=p_val,path='../results.xlsx')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAEOCAYAAACpVv3VAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFNZJREFUeJzt3Xtw1eWdx/HPl0gNVS4StK3SbWJr\nK5eQAHGxVapVV61i3GlJcaV16VJmWcVtl1qv2LGzdNS62pvbsq3sBlrYolgQuzD2Imh3g5eAqIB0\nhA5OoywNYYtQggp894/zSwzhJOd3kvM75zzJ+zWTyTm/y3O+55nMJ0+e88vzM3cXACAcAwpdAAAg\nOwQ3AASG4AaAwBDcABAYghsAAkNwA0BgCG4ACAzBDQCBIbgBIDAENwAE5oQkGh0xYoSXl5cn0TQA\n9FkbNmzY4+6nZjoukeAuLy9XY2NjEk0DQJ9lZq/FOY6pEgAIDMENAIEhuAEgMInMcQPoP9555x01\nNTXp0KFDhS4lGKWlpRo5cqQGDhzYo/MJbgC90tTUpMGDB6u8vFxmVuhyip67q6WlRU1NTaqoqOhR\nG0yVAOiVQ4cOqaysjNCOycxUVlbWq79QCG4AvUZoZ6e3/UVwAwheSUmJqqurVVVVpQkTJqihoaFH\n7axcuVJbt27N+rxVq1bpnnvu6dFr9gRz3ECIlk6Trl1W6CrSmln/fE7bWzjjnIzHDBo0SJs2bZIk\nPfHEE7rtttv01FNPZf1aK1eu1JQpUzR69OjY5xw+fFi1tbWqra3N+vV6ihE3gD7lzTff1CmnnCJJ\nOnDggC6++GJNmDBBlZWVeuyxx9qPW7x4scaNG6eqqip94QtfUENDg1atWqWvfe1rqq6u1o4dO7Rj\nxw5dfvnlmjhxoiZPnqxt27ZJkmbMmKHZs2dr0qRJuvnmm1VfX685c+ZIkh5//HFNmjRJ48eP1yWX\nXKLdu3fn/D0y4gYQvNbWVlVXV+vQoUPatWuXnnzySUmpy+5WrFihIUOGaM+ePTr33HNVW1urrVu3\nav78+WpoaNCIESO0d+9eDR8+XLW1tZoyZYqmTp0qSbr44ou1YMECnXXWWXr22Wd1/fXXt7fd1NSk\nhoYGlZSUqL6+vr2W888/X88884zMTA899JC+9a1v6f7778/p+yW4AQSv41TJ+vXrdd1112nz5s1y\nd91+++16+umnNWDAAL3++uvavXu3nnzySdXV1WnEiBGSpOHDhx/X5oEDB9TQ0KC6urr2bW+99Vb7\n47q6OpWUlBx3XlNTk6ZNm6Zdu3bp7bff7vElf90huAH0KR//+Me1Z88eNTc3a/Xq1WpubtaGDRs0\ncOBAlZeXx74M7+jRoxo2bFj7L4TOTjrppLTbb7zxRs2dO1e1tbVat26d7rrrrp6+lS4xxw2EZum0\nQldQ1LZt26YjR46orKxM+/bt02mnnaaBAwdq7dq1eu211OJ7F110kR555BG1tLRIkvbu3StJGjx4\nsPbv3y9JGjJkiCoqKvTII49ISv3jzIsvvpjx9fft26czzjhDkrRo0aKcvz+J4AbQB7TNcVdXV2va\ntGlatGiRSkpKNH36dDU2NqqyslKLFy/W2WefLUkaM2aM7rjjDl1wwQWqqqrS3LlzJUnXXHON7rvv\nPo0fP147duzQkiVLtHDhQlVVVWnMmDHHfLjZlbvuukt1dXWaOHFi+1RMrpm757zRmpoaZz1uICFt\nI+4iuRzwlVde0ahRowpdRnDS9ZuZbXD3mkznMuIGgMAQ3AAQGIIbAAJDcANAYAhuAAgMwQ0AgSG4\ngVDxjzjt2pZ1HTt2rOrq6nTw4EFJ0sknn9zjNuvr6/XGG2/kqsSc4l/eAeRWrn+hxLheveNaJdOn\nT9eCBQva/6mmp+rr6zV27FidfvrpvWonCYy4AfQpkydP1vbt24/Z1tXyrjt37tSoUaM0a9YsjRkz\nRpdeeqlaW1u1fPlyNTY2avr06aqurlZra6tuvfVWjR49WuPGjdNNN91UiLfWjuAG0GccPnxYa9as\nUWVl5THb25Z33bhxo9auXauvfvWravuv8VdffVU33HCDtmzZomHDhunRRx/V1KlTVVNToyVLlmjT\npk06ePCgVqxYoS1btuill17SvHnzCvH22jFVAiB4bWuVSKkR98yZM4/Z39XyrpJUUVHRfu7EiRO1\nc+fO49ofOnSoSktLNXPmTE2ZMkVTpkxJ9g1lQHADCF7HOe50lixZ0uXyrieeeGL7cSUlJWptbT3u\n/BNOOEHPPfecfvOb32j58uV68MEH22+oUAgEN4A+r6vlXbvTcYnXAwcO6ODBg7riiit03nnn6cwz\nz0y65G4R3AD6vOnTp+uqq65SZWWlampq2pd37U7bfSUHDRqkNWvW6Oqrr9ahQ4fk7nrggQfyUHXX\nWNYVCE3Hy+2KYGlXlnXtGZZ1BYB+hOAGgMAQ3AAQGIIbQK8l8VlZX9bb/iK4AfRKaWmpWlpaCO+Y\n3F0tLS0qLS3tcRtcDgigV0aOHKmmpiY1NzcXupRglJaWauTIkT0+n+AG0CsDBw5URUVFocvoV5gq\nAYDAENwAEBiCGwACQ3ADQGAIbgAIDMENAIEhuAEgMAQ3EJJc30EdQSK4ASAwBDcABIbgBoDAENwA\nEBiCGwACQ3ADQGAIbgAIDMENAIEhuAEgMAQ3AASG4AaAwBDcABAYghsIGYtO9UsENwAEhuAGgMAQ\n3AAQGIIbAAJDcANAYAhuAAgMwQ0AgSG4ASAwBDcABIbgBoDAENwAEBiCGwACQ3ADQGAIbgAIDMEN\nAIEhuAEgMAQ3AASG4AaAwBDcABAYghsAAkNwA0BgCG4ACAzBDQCBiRXcZvZzM7vSzAh6oFCWTit0\nBSgScYP4B5KulfSqmd1jZh9LsCYAQDdiBbe7/9rdp0uaIGmnpF+bWYOZfdHMBiZZIADgWLGnPsys\nTNIMSV+S9IKk7yoV5L9KpDIAQFonxDnIzFZI+pikn0i6yt13RbuWmVljUsUBAI4XK7gl/djdV3fc\nYGYnuvtb7l6TQF0AgC7EnSqZn2bb+lwWAgCIp9sRt5m9X9IZkgaZ2XhJFu0aIum9CdcGAEgj01TJ\nZUp9IDlS0gMdtu+XdHtCNQEAutFtcLv7IkmLzOyz7v5onmoCAHQj01TJ5939p5LKzWxu5/3u/kCa\n0wAACco0VXJS9P3kpAsBAMSTaark36Lv38hPOQCATDJNlXyvu/3u/o+5LQcAkEmmqZINeakCABBb\nnKtKAABFJNNUyXfc/Stm9rgk77zf3WsTqwwAkFamqZKfRN//JelCAADxZJoq2RB9f8rM3iPpbKVG\n3r9z97fzUB8AoJO4y7peKWmBpB1KrVdSYWZ/7+5rkiwOAHC8uKsD3i/pU+5+obtfIOlTkr6dXFkA\nYuNelP1O3ODe7+7bOzz/vVILTQEA8izTVSWfiR42mtlqSQ8rNcddJ+n5hGsDAKSRaY77qg6Pd0u6\nIHrcLGlQIhUBALqV6aqSL+arEABAPHGvKimVNFPSGEmlbdvd/e8SqgsA0IW4H07+RNL7lbojzlNK\n3RGHDycBoADiBvdH3P1OSX+O1i+5UtKk5MoCAHQlbnC/E33/k5mNlTRU0mnJlAQA6E6sOW5JPzKz\nUyTdKWmVUnfEuTOxqgAAXYoV3O7+UPTwKUlnJlcOACCTWFMlZlZmZt83s41mtsHMvmNmZUkXBwA4\nXtw57p9J+qOkz0qaKmmPpGVJFQUA6FrcOe4PuPs/d3g+38xY2QYACiDuiPuXZnaNmQ2Ivj4n6Ykk\nCwMApJdpkan9Si0qZZK+Iumn0a4Bkg5IuinR6gAAx8m0VsngfBUCAIgn7hy3zKxW0iejp+vc/RfJ\nlAQA6E7cywHvkfRlSVujry+b2d1JFgYASC/uiPsKSdXuflSSzGyRpBck3ZZUYQCA9OJeVSJJwzo8\nHprrQgB0g/tKooO4I+67Jb1gZmuVusLkk5JuTawqAECXMga3mZmk/5Z0rqRzos23uPv/JlkYACC9\njMHt7m5mq929UqmVAQEABRR3jnujmZ2T+TAAQNLiznFPkvR5M9sp6c9KzXO7u49LqjAAQHpxg/uy\nRKsAAMSWaa2SUkmzJX1E0suSFrr74XwUBgBIL9Mc9yJJNUqF9qcl3Z94RQCAbmWaKhkdXU0iM1so\n6bnkSwIAdCfTiLvt7u5iigQAikOmEXeVmb0ZPTZJg6LnbVeVDEm0OgDAcTKtx12Sr0IAAPFks8gU\nAKAIENwAEBiCG+gLWPa1XyG4ASAwBDcABIbgBoDAENwAEBiCGwACQ3ADQGAIbgAIDMENAIEhuAEg\nMAQ3AASG4AaAwBDcABAYghsAAkNwA0BgCG4ACAzBDQCBIbgBIDAENwAEhuAGgMAQ3AAQGIIbAAJD\ncANAYAhuAAgMwQ0Uu6XTCl0BigzBDQCBIbgBIDAENwAEhuAGgMAQ3EBfwYeY/QbBDQCBIbgBIDAE\nNwAEhuAGgMAQ3AAQGIIbAAJDcANAYAhuAAgMwQ0AgSG4ASAwBDcABIbgBoDAENwAEBiCGwACQ3AD\nQGAIbgAIDMENAIEhuIFixl1tkAbBDQCBIbgBIDAEN9CXMLXSLxDcABAYghsAAkNwA0BgCG4ACAzB\nDQCBIbgBIDAENwAEhuAGgMAQ3AAQmBMKXQCQzsz657M6fuGMcxKqBCg+BDcKJttwzmVbQQQ9/76O\nLhDcyKtchnVvdKwjiBAHOiC4kbhiCeuuEOIIDcGNRBR7WHelq7oJdBQTghs5E2pYx9H5vRHkKCSC\nG73Sl8O6O4zMUUgEN5BDRTFfvnSadO2ywrw28oLgBhJSFCGOPongRtb66/RIbxDiyCWCG7EQ1rkT\npy8Jd3SH4EaXCOvC2XTvZe2Pqz84rICVoBgR3DgGYV18Nv3hT2m3E+j9F8ENwjpQXQW6JOney/T9\n982XxLRLX0Rw91OEdf/BB6N9D8HdjxDW6JOrKPZD5u45b7SmpsYbGxtz3i4yI5zDd+PueTlrq226\nJF8I+t4xsw3uXpPxOII7TAR035TL0G6T7/BOWl/+5VDQ4DazZkmvddg0VNK+mM9HSNqT86LSv24u\nz8t0TFf7023Ppr+k5PqM/speT/qM/krmnO6OK9b++pC7n5rxKHdP/EvSj+I+l9SYrzpyeV6mY7ra\nn257Nv2VZJ/RX/npM/ormXO6Oy7U/mr7ytfNgh/P8nm+6sjleZmO6Wp/uu30V5j91dPXor+SOae7\n40LtL0kJTZX0hpk1eow5HryLPssO/ZUd+is7+eivfI24s/GjQhcQIPosO/RXduiv7CTeX0U34gYA\ndK8YR9wAgG4Q3AAQGIIbAAJT9MFtZieZ2SIz+7GZTS90PcXOzM40s4VmtrzQtYTAzP46+tlaZmaX\nFrqeYmdmo8xsgZktN7N/KHQ9IYgyrNHMpuSqzYIEt5n9u5n90cw2d9p+uZn9zsy2m9mt0ebPSFru\n7rMk1ea92CKQTX+5++/dfWZhKi0OWfbXyuhna7akaYWot9Cy7K9X3H22pM9JOq8Q9RZalvklSbdI\nejiXNRRqxF0v6fKOG8ysRNK/Svq0pNGS/sbMRksaKekP0WFH8lhjMalX/P5Cz/prXrS/P6pXFv1l\nZrWS/kvS6vyWWTTqFbO/zOyvJG2V9MdcFlCQ4Hb3pyXt7bT5LyVtj0aMb0v6maSrJTUpFd5SAFM7\nSciyv/q9bPrLUu6VtMbdN+a71mKQ7c+Xu69y909L6pdTl1n214WSzpV0raRZZpaTDCum9bjP0Lsj\naykV2JMkfU/Sg2Z2pQrwr6VFLG1/mVmZpG9KGm9mt7n73QWprvh09fN1o6RLJA01s4+4+4JCFFeE\nuvr5ulCp6csT1X9H3Omk7S93nyNJZjZD0h53P5qLFyum4E7L3f8s6YuFriMU7t6i1HwtYnD37yk1\nOEAM7r5O0roClxEcd6/PZXvFNPXwuqQPdng+MtqG9Oiv7NBf2aG/spPX/iqm4H5e0llmVmFm75F0\njaRVBa6pmNFf2aG/skN/ZSev/VWoywH/U9J6SR8zsyYzm+nuhyXNkfSEpFckPezuWwpRX7Ghv7JD\nf2WH/spOMfQXi0wBQGCKaaoEABADwQ0AgSG4ASAwBDcABIbgBoDAENwAEBiCGwACQ3AjUWZ2xMw2\nmdmLZrbRzD4RbT+9WG/20KHm06PnJ5vZD81sR/QeNpjZrGhfuZld201bHzWz1Wb2anTuw2b2PjOb\nbGZbO6/pDMRBcCNpre5e7e5Vkm6TdLckufsb7j61sKV1qa3mN6LnD0n6P0lnufsEpdZiHh7tK1dq\nyc7jmFmpUutW/9Dd2879gaRT3f23kq5I8D2gDyO4kU9DlArAtpHq5uhxqZn9h5m9bGYvmNmnou0z\nzGylmf3KzHaa2Rwzmxsd84yZDY+Om2Vmz0ej+kfN7L3R9joz2xxtfzraNsbMnotG1C+Z2VndFWxm\nH1ZqreV5bUtyunuzu98bHXKPpMlRe//U6fRrJa139/bliN19nbszykavFP2yrgjeIDPbJKlU0gck\nXZTmmBskubtXmtnZkn5pZh+N9o2VND46f7ukW9x9vJl9W9J1kr4j6efu/mNJMrP5kmZK+r6kr0u6\nzN1fN7NhUXuzJX3X3ZdEiwGVZKh/jKQXu1lH+VZJN7l7uvsJjpW0IUP7QNYYcSNpbdMOZys1xbDY\nzKzTMedL+qkkufs2Sa9Jagvute6+392bJe3TuzfTeFmpaQpJGmtmvzWzl5W6K8uYaPv/SKqP5qPb\nAnq9pNvN7BZJH3L31mzejJndEY2u38h8NJAMght54+7rJY2QdGoWp73V4fHRDs+P6t2/GOslzXH3\nSknfUGp0ruimtvOUWid5g5mVuftSpW463SpptZml+wugo62SqtpuOeXu33T3aqWmfTLZImlijOOA\nrBDcyJtoGqREUkunXb9VdP/CaIrkLyT9LoumB0vaZWYD1eE+iGb2YXd/1t2/LqlZ0gfN7ExJv4/u\nfPOYpHHdNezu2yU1SppvqRvCtn3o2PZXw/7o9dNZKukT0W332mr6pJmNzeK9AcchuJG0QdHUwiZJ\nyyT9rbsf6XTMDyQNiKY6lkma4e5vdW6oG3dKelapqZFtHbbfF33guVlSg6QXJX1O0uaonrGSFsdo\n/0uSyiRtN7NGSb+SdHO07yVJR6IPQI/5cDKahpki6cbocsCtkq5X6pcI0GOsxw10YmYH3P3kPLxO\nuaRfuDsjcGSFETdwvDc7/gNOEsxsslIftO5J6jXQdzHiBoDAMOIGgMAQ3AAQGIIbAAJDcANAYAhu\nAAjM/wN0JUiauhJEjwAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Plot the distributions \n", - "bins = 10**(np.linspace(0,4,1000))\n", - "weights = np.ones(sample_size)/sample_size\n", - "\n", - "bins2 = 10**(np.linspace(0,4,100))\n", - "weights2 = np.ones(sample_size)/sample_size/10\n", - "\n", - "axes = plt.hist(bac_sum,bins=bins2,weights=weights2,alpha=0.7)\n", - "\n", - "r = plt.hist(plants[:,0],bins=bins,weights=weights,alpha=0.7)\n", - "plt.legend(['Bacteria', 'Plants'])\n", - "plt.xscale('log')\n", - "plt.xlabel('Biomass [Gt C]')\n", - "plt.ylabel('Probability')\n", - "plt.gca().set_yticks([])\n", - "#plt.savefig('Figure.png',dpi=300)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/.ipynb_checkpoints/fungi_biomass-checkpoint.ipynb b/fungi/.ipynb_checkpoints/fungi_biomass-checkpoint.ipynb deleted file mode 100644 index 699c033..0000000 --- a/fungi/.ipynb_checkpoints/fungi_biomass-checkpoint.ipynb +++ /dev/null @@ -1,296 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import numpy as np\n", - "import pandas as pd\n", - "import sys\n", - "sys.path.insert(0, '../statistics_helper')\n", - "from CI_helper import *\n", - "from excel_utils import *\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of fungi\n", - "We use our best estimates for the total biomass of soil microbes and the fraction of fungi out of the total biomass of soil microbes to estimate the total biomass of fungi." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "results = pd.read_excel('fungi_biomass_estimate.xlsx')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "These are our best estimates for the different parameters required for the estimate, along with the associated uncertainties" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ParameterValueUnitsUncertainty
0Total biomass of soil microbes2.0e+16g C2.0e+00
1Fraction of fungi ou out the total biomass of ...6.0e-01Unitless2.8e+00
2Biomass of marine fungi3.2e+14Gt C1.0e+01
\n", - "
" - ], - "text/plain": [ - " Parameter Value Units \\\n", - "0 Total biomass of soil microbes 2.0e+16 g C \n", - "1 Fraction of fungi ou out the total biomass of ... 6.0e-01 Unitless \n", - "2 Biomass of marine fungi 3.2e+14 Gt C \n", - "\n", - " Uncertainty \n", - "0 2.0e+00 \n", - "1 2.8e+00 \n", - "2 1.0e+01 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of soil fungi is 12 Gt C\n", - "The uncertainty associated with the estimate for the biomass of soil fungi is 3.5-fold\n" - ] - } - ], - "source": [ - "# Calculate the total biomass of fungi\n", - "soil_fungi_biomass = results.loc[[0,1],'Value'].prod()\n", - "print('Our best estimate for the total biomass of soil fungi is %.f Gt C' %(soil_fungi_biomass/1e15))\n", - "\n", - "# Propagate the uncertainty associated with each parameter to the final estimate\n", - "\n", - "soil_fungi_biomass_CI = CI_prod_prop(results.loc[[0,1],'Uncertainty'])\n", - "\n", - "print('The uncertainty associated with the estimate for the biomass of soil fungi is %.1f-fold' %soil_fungi_biomass_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We multiply all the relevant parameters to arrive at our best estimate for the biomass of fungi, and propagate the uncertainties associated with each parameter to calculate the uncertainty associated with the estimate for the total biomass. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We add to the our estimate of the biomass of soil fungi our estimates for the contribution of marine and deep subsurface fungi. For marine fungi, we project an uncertainty of 10-fold (similar to our uncertainties for other marine taxa." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "marine_fungi = results.loc[2,'Value']\n", - "marine_fungi_CI = results.loc[2,'Uncertainty']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine all the biomass contributions of fungi from the different environments, and combine their uncertainties:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of fungi is 12 Gt C\n", - "The uncertainty associated with the estimate for the biomass of fungi is 3.3-fold\n" - ] - } - ], - "source": [ - "total_fungi_biomass = soil_fungi_biomass + marine_fungi\n", - "\n", - "print('Our best estimate for the total biomass of fungi is %.f Gt C' %(total_fungi_biomass/1e15))\n", - "\n", - "# Propagate the uncertainty associated with each parameter to the final estimate\n", - "\n", - "mul_CI = CI_sum_prop(np.array([soil_fungi_biomass, marine_fungi]), np.array([ soil_fungi_biomass_CI, marine_fungi_CI]))\n", - "\n", - "print('The uncertainty associated with the estimate for the biomass of fungi is %.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of fungal cells\n", - "To estimate the total number of fungal cells we divide our biomass estimate by an average carbon\n", - "content per fungal cell. We very roughly estimate the volume of fungal cells to be ≈100 μm$^3$\n", - "based on [Veses et al.](https://doi.org/10.1111/j.1365-2958.2008.06545.x), and thus we estimate a carbon content of a cell to be ≈15 pg C cell$^{-1}$.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of fungal cells is ≈8e+26.\n" - ] - } - ], - "source": [ - "# Carbon content of a single fungal cell based on Veses et al.\n", - "carbon_content = 15e-12\n", - "\n", - "# Calculate the total number of fungal cells\n", - "soil_fungi_num = soil_fungi_biomass/carbon_content\n", - "marine_fungi_num = marine_fungi/carbon_content\n", - "print('Our best estimate for the total number of fungal cells is ≈%.0e.' %(soil_fungi_num+marine_fungi_num))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# Feed soil fungi results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Fungi','Terrestrial'), \n", - " col=['Biomass [Gt C]', 'Uncertainty','Total uncertainty'],\n", - " values=[soil_fungi_biomass/1e15,soil_fungi_biomass_CI, mul_CI],\n", - " path='../results.xlsx')\n", - "\n", - "# Feed marine fungi results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Fungi','Marine'), \n", - " col=['Biomass [Gt C]', 'Uncertainty'],\n", - " values=[marine_fungi/1e15,marine_fungi_CI],\n", - " path='../results.xlsx')\n", - "\n", - "# Feed soil fungi results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Fungi','Terrestrial'), \n", - " col=['Number of individuals'],\n", - " values=soil_fungi_num,\n", - " path='../results.xlsx')\n", - "\n", - "# Feed marine fungi results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Fungi','Marine'), \n", - " col=['Number of individuals'],\n", - " values=marine_fungi_num,\n", - " path='../results.xlsx')\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/.ipynb_checkpoints/fungi_fraction-checkpoint.ipynb b/fungi/.ipynb_checkpoints/fungi_fraction-checkpoint.ipynb deleted file mode 100644 index 2fd6442..0000000 --- a/fungi/.ipynb_checkpoints/fungi_fraction-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/.ipynb_checkpoints/soil_biomass-checkpoint.ipynb b/fungi/.ipynb_checkpoints/soil_biomass-checkpoint.ipynb deleted file mode 100644 index 4ce805e..0000000 --- a/fungi/.ipynb_checkpoints/soil_biomass-checkpoint.ipynb +++ /dev/null @@ -1,77 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of soil microbes\n", - "\n", - "In order to estimate the total biomass of soil microbes, we rely on two recent studies - [Xu et al.](http://dx.doi.org/10.1111/geb.12029) and [Serna-Chavez et al.](http://dx.doi.org/10.1111/geb.12070)\n", - "\n", - "We use the final estimates in each of the studies as two independent estimates of the biomass of soil microbes. Xu et al. estimate a biomass of ≈23 Gt C of soil microbes, whereas Serna-Chavez et al. estimate ≈15 Gt C." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from scipy.stats import gmean\n", - "\n", - "# Define the values for the estimates of the biomass of soil microbes from Xu et al. and Serna-Chavez et al.\n", - "xu = 23e15\n", - "serna_chavez = 14.6e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the biomass of soil microbes, we use the geometric mean of the values from Xu et al. and Serna-Chavez et al." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Our best estimate is the geometric mean of values from Xu et al. and Serna-Chavez et al.\n", - "best_estimate = gmean([xu,serna_chavez])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The estimates reported in Xu et al. and Serna-Chavez et al. are for microbial biomass in the top 1 meter of soil. To take into account microbial biomass in depths lower than 1 meter, we try to estimate the fraction of microbial biomass in the top 1 meter out of the total biomass of soil microbes.\n", - "\n", - "Xu et al. uses regression equations from $$ט = \\sqrt{a^2 + b^2}$$" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/fungi_fraction/.ipynb_checkpoints/fungi_fraction-checkpoint.ipynb b/fungi/fungi_fraction/.ipynb_checkpoints/fungi_fraction-checkpoint.ipynb deleted file mode 100644 index fc7bc38..0000000 --- a/fungi/fungi_fraction/.ipynb_checkpoints/fungi_fraction-checkpoint.ipynb +++ /dev/null @@ -1,486 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper')\n", - "from fraction_helper import *\n", - "pd.options.display.float_format = '{:,.3f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the fraction of fungi out of the biomass of soil microbes\n", - "Our estimate for the fraction of fungi out of the biomass of soil microbes is based on a study by [Joergensen & Wichern ](http://dx.doi.org/10.1016/j.soilbio.2008.08.017). Joergensen & Wichern survey the fraction of fungi out of the total microbial biomass using several independent methods. The data in Joergensen & Wichern contains measurements of the fraction of fungi out of the total biomass of soil microbes in four differennt soil types - arable soil, forest soil, grassland soil and litter. We rely on measurement collected in these four soil types using two independent methods - microscopy and measurement of cell wall components.\n", - "\n", - "Here is a sample of the data from Joergensen & Wichern:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceFractionNMethodType
0Shields et al. (1973)0.86015MicroscopyArable
1West (1986)0.7505MicroscopyArable
2West (1986)0.58010MicroscopyForest
3Ingham and Horton (1987)0.09010MicroscopyArable
4Neely et al. (1991)0.64072MicroscopyLitter
\n", - "
" - ], - "text/plain": [ - " Reference Fraction N Method Type\n", - "0 Shields et al. (1973) 0.860 15 Microscopy Arable\n", - "1 West (1986) 0.750 5 Microscopy Arable\n", - "2 West (1986) 0.580 10 Microscopy Forest\n", - "3 Ingham and Horton (1987) 0.090 10 Microscopy Arable\n", - "4 Neely et al. (1991) 0.640 72 Microscopy Litter" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "data = pd.read_excel('fungi_fraction_data.xlsx',skiprows=1)\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our general methodology for calculating the fraction of fungi out of the biomass of soil microbes is the following. We calculate the geometric mean of all values reported from the same soil type using the same method. This gives us estimates for characteric fraction of fungi in each soil type for each method. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MethodMicroscopyglucosamine and muramic acid
Type
Arable0.3120.747
Forest0.4760.714
Grassland0.2510.687
Litter0.6430.784
\n", - "
" - ], - "text/plain": [ - "Method Microscopy glucosamine and muramic acid\n", - "Type \n", - "Arable 0.312 0.747\n", - "Forest 0.476 0.714\n", - "Grassland 0.251 0.687\n", - "Litter 0.643 0.784" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def groupby_geo_frac_mean(input):\n", - " return frac_mean(input['Fraction'],weights=input['N'])\n", - "\n", - "type_method_mean = data.groupby(['Method','Type']).apply(groupby_geo_frac_mean).unstack(level=0)\n", - "type_method_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then calculate the geometric mean of the characteristic fractions from different soil types using the same method. This gives us a characteristic fraction of fungi for each of the two methods." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Method\n", - "Microscopy 0.414\n", - "glucosamine and muramic acid 0.735\n", - "dtype: float64" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "method_mean = type_method_mean.apply(frac_mean)\n", - "method_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the last stage, we calculate the geometric mean of the characteristic values from the two methods. We use the geometric mean as our best estimate for the fraction of fungi out of the total biomass of soil microbes." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of fungi out of the total biomass of fungi is ≈58%\n" - ] - } - ], - "source": [ - "best_estimate = frac_mean(method_mean)\n", - "print('Our best estimate for the fraction of fungi out of the total biomass of fungi is ≈' + '{:,.0f}%'.format(best_estimate*100))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "\n", - "To calculate the uncertainty associated with the estimate for the fraction of fungi out of the total biomass of number of of bacteria and archaea, we first collect all available uncertainties and then take the largest value as our best projection for the uncertainty.\n", - "\n", - "**Variability of studies using the same method and done in the same soil type**
\n", - "We calculate the 95% confidence confidence interval of the values reported by studies performed in the same soil type and using the same method.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MethodMicroscopyglucosamine and muramic acid
Type
Arable2.8111.049
Forest1.2701.182
Grassland2.4931.096
Litter1.2131.155
\n", - "
" - ], - "text/plain": [ - "Method Microscopy glucosamine and muramic acid\n", - "Type \n", - "Arable 2.811 1.049\n", - "Forest 1.270 1.182\n", - "Grassland 2.493 1.096\n", - "Litter 1.213 1.155" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def groupby_frac_CI(input):\n", - " return frac_CI(input['Fraction'])\n", - "\n", - "type_method_CI = data.groupby(['Method','Type']).apply(groupby_frac_CI).unstack(level=0)\n", - "type_method_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Variability of fractions from different soil types measured using the same method**
\n", - "We calculate the 95% confidence interval of the characteristic values from each soil type measured in the same method." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Method\n", - "Microscopy 1.534\n", - "glucosamine and muramic acid 1.058\n", - "dtype: float64" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "intra_method_CI = type_method_mean.apply(frac_CI)\n", - "intra_method_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Variability of fraction measured using different methods**
\n", - "We calculate the 95% confidence interval of the characteristic values from each method." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95% confidence interval of the characteristic values from each method is ≈1.8-fold\n" - ] - } - ], - "source": [ - "inter_method_CI = frac_CI(method_mean)\n", - "print('The 95' + '%'+' confidence interval of the characteristic values from each method is ≈%.1f-fold' % inter_method_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We choose the highest uncertainty among the uncertianties we collected which is ≈3-fold, as our projection for the uncertainty of the fraction of fungi out of the total biomass of soil microbes.\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraction of fungi out of the total biomass of microbes:58.3%\n", - "Uncertainty associated with the estimate of the total biomass of soil microbes ≈2.8-fold\n" - ] - } - ], - "source": [ - "mul_CI = np.max([type_method_CI.values.flatten().max(),intra_method_CI.max(),inter_method_CI])\n", - "print('Fraction of fungi out of the total biomass of microbes:' +'{:.1f}%'.format(best_estimate*100))\n", - "print('Uncertainty associated with the estimate of the total biomass of soil microbes ≈%.1f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../fungi_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Fraction of fungi ou out the total biomass of soil microbes',\n", - " 'Value': '{0:.1f}'.format(best_estimate),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../fungi_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/fungi_fraction/.ipynb_checkpoints/fungi_fraction-median-checkpoint.ipynb b/fungi/fungi_fraction/.ipynb_checkpoints/fungi_fraction-median-checkpoint.ipynb deleted file mode 100644 index fc7bc38..0000000 --- a/fungi/fungi_fraction/.ipynb_checkpoints/fungi_fraction-median-checkpoint.ipynb +++ /dev/null @@ -1,486 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper')\n", - "from fraction_helper import *\n", - "pd.options.display.float_format = '{:,.3f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the fraction of fungi out of the biomass of soil microbes\n", - "Our estimate for the fraction of fungi out of the biomass of soil microbes is based on a study by [Joergensen & Wichern ](http://dx.doi.org/10.1016/j.soilbio.2008.08.017). Joergensen & Wichern survey the fraction of fungi out of the total microbial biomass using several independent methods. The data in Joergensen & Wichern contains measurements of the fraction of fungi out of the total biomass of soil microbes in four differennt soil types - arable soil, forest soil, grassland soil and litter. We rely on measurement collected in these four soil types using two independent methods - microscopy and measurement of cell wall components.\n", - "\n", - "Here is a sample of the data from Joergensen & Wichern:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceFractionNMethodType
0Shields et al. (1973)0.86015MicroscopyArable
1West (1986)0.7505MicroscopyArable
2West (1986)0.58010MicroscopyForest
3Ingham and Horton (1987)0.09010MicroscopyArable
4Neely et al. (1991)0.64072MicroscopyLitter
\n", - "
" - ], - "text/plain": [ - " Reference Fraction N Method Type\n", - "0 Shields et al. (1973) 0.860 15 Microscopy Arable\n", - "1 West (1986) 0.750 5 Microscopy Arable\n", - "2 West (1986) 0.580 10 Microscopy Forest\n", - "3 Ingham and Horton (1987) 0.090 10 Microscopy Arable\n", - "4 Neely et al. (1991) 0.640 72 Microscopy Litter" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "data = pd.read_excel('fungi_fraction_data.xlsx',skiprows=1)\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our general methodology for calculating the fraction of fungi out of the biomass of soil microbes is the following. We calculate the geometric mean of all values reported from the same soil type using the same method. This gives us estimates for characteric fraction of fungi in each soil type for each method. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MethodMicroscopyglucosamine and muramic acid
Type
Arable0.3120.747
Forest0.4760.714
Grassland0.2510.687
Litter0.6430.784
\n", - "
" - ], - "text/plain": [ - "Method Microscopy glucosamine and muramic acid\n", - "Type \n", - "Arable 0.312 0.747\n", - "Forest 0.476 0.714\n", - "Grassland 0.251 0.687\n", - "Litter 0.643 0.784" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def groupby_geo_frac_mean(input):\n", - " return frac_mean(input['Fraction'],weights=input['N'])\n", - "\n", - "type_method_mean = data.groupby(['Method','Type']).apply(groupby_geo_frac_mean).unstack(level=0)\n", - "type_method_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then calculate the geometric mean of the characteristic fractions from different soil types using the same method. This gives us a characteristic fraction of fungi for each of the two methods." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Method\n", - "Microscopy 0.414\n", - "glucosamine and muramic acid 0.735\n", - "dtype: float64" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "method_mean = type_method_mean.apply(frac_mean)\n", - "method_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the last stage, we calculate the geometric mean of the characteristic values from the two methods. We use the geometric mean as our best estimate for the fraction of fungi out of the total biomass of soil microbes." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of fungi out of the total biomass of fungi is ≈58%\n" - ] - } - ], - "source": [ - "best_estimate = frac_mean(method_mean)\n", - "print('Our best estimate for the fraction of fungi out of the total biomass of fungi is ≈' + '{:,.0f}%'.format(best_estimate*100))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "\n", - "To calculate the uncertainty associated with the estimate for the fraction of fungi out of the total biomass of number of of bacteria and archaea, we first collect all available uncertainties and then take the largest value as our best projection for the uncertainty.\n", - "\n", - "**Variability of studies using the same method and done in the same soil type**
\n", - "We calculate the 95% confidence confidence interval of the values reported by studies performed in the same soil type and using the same method.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MethodMicroscopyglucosamine and muramic acid
Type
Arable2.8111.049
Forest1.2701.182
Grassland2.4931.096
Litter1.2131.155
\n", - "
" - ], - "text/plain": [ - "Method Microscopy glucosamine and muramic acid\n", - "Type \n", - "Arable 2.811 1.049\n", - "Forest 1.270 1.182\n", - "Grassland 2.493 1.096\n", - "Litter 1.213 1.155" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def groupby_frac_CI(input):\n", - " return frac_CI(input['Fraction'])\n", - "\n", - "type_method_CI = data.groupby(['Method','Type']).apply(groupby_frac_CI).unstack(level=0)\n", - "type_method_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Variability of fractions from different soil types measured using the same method**
\n", - "We calculate the 95% confidence interval of the characteristic values from each soil type measured in the same method." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Method\n", - "Microscopy 1.534\n", - "glucosamine and muramic acid 1.058\n", - "dtype: float64" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "intra_method_CI = type_method_mean.apply(frac_CI)\n", - "intra_method_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Variability of fraction measured using different methods**
\n", - "We calculate the 95% confidence interval of the characteristic values from each method." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95% confidence interval of the characteristic values from each method is ≈1.8-fold\n" - ] - } - ], - "source": [ - "inter_method_CI = frac_CI(method_mean)\n", - "print('The 95' + '%'+' confidence interval of the characteristic values from each method is ≈%.1f-fold' % inter_method_CI)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We choose the highest uncertainty among the uncertianties we collected which is ≈3-fold, as our projection for the uncertainty of the fraction of fungi out of the total biomass of soil microbes.\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraction of fungi out of the total biomass of microbes:58.3%\n", - "Uncertainty associated with the estimate of the total biomass of soil microbes ≈2.8-fold\n" - ] - } - ], - "source": [ - "mul_CI = np.max([type_method_CI.values.flatten().max(),intra_method_CI.max(),inter_method_CI])\n", - "print('Fraction of fungi out of the total biomass of microbes:' +'{:.1f}%'.format(best_estimate*100))\n", - "print('Uncertainty associated with the estimate of the total biomass of soil microbes ≈%.1f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../fungi_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Fraction of fungi ou out the total biomass of soil microbes',\n", - " 'Value': '{0:.1f}'.format(best_estimate),\n", - " 'Units': 'Unitless',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../fungi_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/marine_fungi/.ipynb_checkpoints/marine_fungi-checkpoint.ipynb b/fungi/marine_fungi/.ipynb_checkpoints/marine_fungi-checkpoint.ipynb deleted file mode 100644 index d4a5b2a..0000000 --- a/fungi/marine_fungi/.ipynb_checkpoints/marine_fungi-checkpoint.ipynb +++ /dev/null @@ -1,774 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from CI_helper import *\n", - "from excel_utils import *\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of marine fungi\n", - "To estimate the total biomass of marine fungi, we consider different locations in which marine fungi might reside, and estimate the total biomass of fungi in each region. The main regions we consider are epipelagic and deep-sea planktonic fungi, and particle-attached fungi.\n", - "\n", - "## Epipelagic fungi\n", - "To estimate the total biomass of epipelagic free-living fungi, we rely on studies using two independent methods: qPCR and direct counts of the concentration of fungi.\n", - "\n", - "### qPCR-based method\n", - "Our qPCR-based estimate measured the ratio between DNA copy numbers for bacteria and fugni in the West Pacific Warm Pool ([Wang et al.](https://doi.org/10.1371/journal.pone.0101523)). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StationDepth [m]Basidiomycota [ng µl^-1]Ascomycota [ng µl^-1]Bacteria [ng µl^-1]
0S251.5e-013.8e-032.8e+00
1S2251.0e-013.5e-032.5e+00
2S2501.7e-011.3e-032.5e+00
3S2751.8e-012.9e-039.8e-01
4S21002.5e-011.3e-031.6e+00
\n", - "
" - ], - "text/plain": [ - " Station Depth [m] Basidiomycota [ng µl^-1] Ascomycota [ng µl^-1] \\\n", - "0 S2 5 1.5e-01 3.8e-03 \n", - "1 S2 25 1.0e-01 3.5e-03 \n", - "2 S2 50 1.7e-01 1.3e-03 \n", - "3 S2 75 1.8e-01 2.9e-03 \n", - "4 S2 100 2.5e-01 1.3e-03 \n", - "\n", - " Bacteria [ng µl^-1] \n", - "0 2.8e+00 \n", - "1 2.5e+00 \n", - "2 2.5e+00 \n", - "3 9.8e-01 \n", - "4 1.6e+00 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "qPCR_data = pd.read_excel('marine_fungi_data.xlsx','Wang',skiprows=1)\n", - "qPCR_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the ratio of fungal DNA copy number to bacterial DNA copy number:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The ratio of fungal DNA copy number and bacterial DNA copy number is ≈12%\n" - ] - } - ], - "source": [ - "# Calculate the total DNA copy number of fungi\n", - "fungal_DNA = qPCR_data['Basidiomycota [ng µl^-1]']+qPCR_data['Ascomycota [ng µl^-1]']\n", - "\n", - "# Calculate the mean ratio of fungal DNA copy number and bacterial DNA copy number\n", - "qPCR_fungal_fraction = (fungal_DNA/qPCR_data['Bacteria [ng µl^-1]']).mean()\n", - "\n", - "print('The ratio of fungal DNA copy number and bacterial DNA copy number is ≈%.0f' %(qPCR_fungal_fraction*100) + '%')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Direct count method\n", - "As an independent method for estimating the total biomass of marine fungi in the epipelagic layer, we use a study which measured the carbon concentration of fungi in the epipelagic layer in upwelling ecosystem off Chile using direct counts ([Gutiérrez et al.]( https://doi.org/10.1007/s00227-010-1552-z)). We calculate the average concentration of fungal carbon in relation to the carbon concentration of prokaryotes in the same site:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The ratio of fungal carbon and bacterial carbon is ≈27%\n" - ] - } - ], - "source": [ - "# Load data on direct counts of fungal carbon concentration\n", - "direct_data = pd.read_excel('marine_fungi_data.xlsx','Gutiérrez',skiprows=1)\n", - "\n", - "# Calculate the mean fungal carbon concentration\n", - "mean_fungal_conc = direct_data['Fungi carbon concentration [µg C L^-1]'].mean()\n", - "\n", - "# Calculate the mean carbon concentration of prokaryotes\n", - "mean_prok_conc = direct_data['Prokaryote carbon concentration [µg C L^-1]'].mean()\n", - "\n", - "direct_fungal_fraction = mean_fungal_conc/mean_prok_conc\n", - "\n", - "print('The ratio of fungal carbon and bacterial carbon is ≈%.0f' %(direct_fungal_fraction*100) + '%')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the ratio of fungal and prokaryote carbon, we use the geometric mean of the ratios estimated based on qPCR and direct counts:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The ratio of fungal carbon and bacterial carbon is ≈18%\n" - ] - } - ], - "source": [ - "best_fungal_fraction = gmean([qPCR_fungal_fraction,direct_fungal_fraction])\n", - "print('The ratio of fungal carbon and bacterial carbon is ≈%.0f' %(best_fungal_fraction*100) + '%')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of fungi using qPCR, we rely on our estimate from the total biomass of bacteria and archaea in the top 200 meters, which we estimate in the marine bacteria and archaea section:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total biomass of bacteria and archaea in the epipelagic layer is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "# Load total biomass of marine bacteria and archaea\n", - "marine_prok_biomass = pd.read_excel('../../bacteria_archaea/marine/marine_prok_biomass_estimate.xlsx')\n", - "\n", - "# Load our estimate of the fraction of prokaryote biomass in the epipelagic layer\n", - "epi_frac = pd.read_excel('marine_fungi_data.xlsx','Bacteria biomass')\n", - "\n", - "# Calculate the biomass of prokaryotes in the epipelagic realm\n", - "epi_prok_biomass = marine_prok_biomass.iloc[0:2,1].prod()*epi_frac['Value']*1e-15\n", - "\n", - "print('Our estimate for the total biomass of bacteria and archaea in the epipelagic layer is ≈%.1f Gt C' %(epi_prok_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We estimate the total biomass of fungi in the epipelagic layer by multiplying the total biomass of prokaryotes by the ratio of fungal and prokaryote biomass we calculated:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total biomass of free-living epipelagic fungi based of qPCR is ≈0.07 Gt C\n" - ] - } - ], - "source": [ - "best_epi_fungi = epi_prok_biomass*best_fungal_fraction\n", - "\n", - "print('Our estimate for the total biomass of free-living epipelagic fungi based of qPCR is ≈%.2f Gt C' %(best_epi_fungi/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Deep-sea fungi\n", - "In the deep ocean, recent studies have quantified the contribution of fungi to the total 18S rDNA of microbial eukaryotes ([Pernice et al.](http://dx.doi.org/10.1038/ismej.2015.170)). Pernice et al. estimate ≈15% of the 18S rDNA sequences are fungal. The biomass concentration of deep-sea microbial eukaryotes was measured by [Pernice et al.](https://dx.doi.org/10.1038/ismej.2014.168). Here are the results of the measurements:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Min depth [m]Max depth [m]Microbial eukaryotes biomass density [pg C mL^-1]Protist SEProkaryote biomass density [pg C mL^-1]Prokaryote SE
020045028046837152
145170015023661160
2701140011228534106
314014000501430959
\n", - "
" - ], - "text/plain": [ - " Min depth [m] Max depth [m] \\\n", - "0 200 450 \n", - "1 451 700 \n", - "2 701 1400 \n", - "3 1401 4000 \n", - "\n", - " Microbial eukaryotes biomass density [pg C mL^-1] Protist SE \\\n", - "0 280 46 \n", - "1 150 23 \n", - "2 112 28 \n", - "3 50 14 \n", - "\n", - " Prokaryote biomass density [pg C mL^-1] Prokaryote SE \n", - "0 837 152 \n", - "1 661 160 \n", - "2 534 106 \n", - "3 309 59 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pernice_data = pd.read_excel('marine_fungi_data.xlsx','Pernice',skiprows=1)\n", - "pernice_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We estimate the total biomass of microbial eukaryotes by multiplying the the measured biomass densities by the depth range of the measurements, and applying the concentrations to the total ≈3.6×10$^{14}\\ m^2$ of ocean." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of deep-sea microbial eukaryotes is ≈0.1 Gt C\n" - ] - } - ], - "source": [ - "depth_range = pernice_data['Max depth [m]']-pernice_data['Min depth [m]']\n", - "\n", - "# Convert units: mL to m^3, pg C to g C\n", - "unit_conversion = 1e6*1e-12\n", - "ocean_area = 3.6e14\n", - "\n", - "# Calculate the total biomass of deep-sea microbial eukaryotes\n", - "miceuk_biomass = (depth_range*pernice_data['Microbial eukaryotes biomass density [pg C mL^-1]']).sum()*unit_conversion*ocean_area\n", - "\n", - "print('Our best estimate for the biomass of deep-sea microbial eukaryotes is ≈%.1f Gt C' %(miceuk_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the biomass of deep-sea fungi, we multiply our estimate of the total biomass of deep-sea microbial eukaryotes with the estimate by Pernice et al. of the fraction of the 18S rDNA sequences of deep-sea microbial eukaryotes contributed by fungi:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate of the biomass of deep-sea fungi based on 18S rDNA sequencing is ≈0.02 Gt C\n" - ] - } - ], - "source": [ - "deep_sea_fungi = miceuk_biomass*0.15\n", - "\n", - "print('Our estimate of the biomass of deep-sea fungi based on 18S rDNA sequencing is ≈%.2f Gt C' %(deep_sea_fungi/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pernice et al. were mainly focused on measuring the biomass of heterotrophic protists, and thus they might capture only unicellular fungi and not filamentous fungi. To take into account the possibility of deep-sea filamentous fungi, we extend our estimate of the ratio between planktonic fungi and prokaryotes to the mesopelagic and bathypelagic realms." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate of the biomass of deep-sea fungi based on the ratio between fungal and prokaryote biomass is ≈0.17 Gt C\n" - ] - } - ], - "source": [ - "# Estimate the total biomass of prokaryotes in the mesopelagic and bathypelagic layers\n", - "meso_bathy_prok_biomass = marine_prok_biomass.iloc[0:2,1].prod()*1e-15*(1-epi_frac['Value'])\n", - "\n", - "# Apply the ratio between fungal and prokaryote biomass to the mesopelagic and bathypelagic layers\n", - "meso_bathy_fungi = best_fungal_fraction*meso_bathy_prok_biomass\n", - "\n", - "print('Our estimate of the biomass of deep-sea fungi based on the ratio between fungal and prokaryote biomass is ≈%.2f Gt C' %(meso_bathy_fungi/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate of the biomass of deep-sea planktonic fungi, we use the geometric mean of the two estimates based on 18S rDNA sequencing and the ratio between fungal and prokaryote biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the biomass of deep-sea fungi is ≈0.05 Gt C\n" - ] - } - ], - "source": [ - "best_deep_fungi = gmean([deep_sea_fungi,meso_bathy_fungi])\n", - "print('Our best estimate of the biomass of deep-sea fungi is ≈%.2f Gt C' %(best_deep_fungi/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Particle-attached fungi\n", - "To estimate the total biomass of particle-attached fungi, we rely on measurements of the biomass ratio between fungi and prokaryotes on marine particles in the bathypelagic layer ([Bochdansky et al.](http://dx.doi.org/10.1038/ismej.2016.113)). Bochdansky et al. use several different methods to estimate the biomass of fungi on particles, and provide a range of estimates for the ratio between the biomass of fungi and prokaryotes for each method. Here are the estimates provided in Bochdansky et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Min biomass ratioMax biomass ratio
Method
Newell and Statzell-Tallman3.5e-011.4e+00
Menden-Deuer and Lessard4.4e-012.0e+00
van Veen and Paul5.3e-015.3e-01
\n", - "
" - ], - "text/plain": [ - " Min biomass ratio Max biomass ratio\n", - "Method \n", - "Newell and Statzell-Tallman 3.5e-01 1.4e+00\n", - "Menden-Deuer and Lessard 4.4e-01 2.0e+00\n", - "van Veen and Paul 5.3e-01 5.3e-01" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "poc_fungi_biomass_data = pd.read_excel('marine_fungi_data.xlsx','Bochdansky',skiprows=1,index_col=0)\n", - "poc_fungi_biomass_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the ratio between the biomass of particle-attached fungi and prokaryotes in the bathypelagic layer, we first calculate the geometric mean of the range provided by Bochdansky et al. for each method:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Method\n", - "Newell and Statzell-Tallman 7.0e-01\n", - "Menden-Deuer and Lessard 9.3e-01\n", - "van Veen and Paul 5.3e-01\n", - "dtype: float64" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "method_mean_fungi_ratio = poc_fungi_biomass_data.apply(gmean,axis=1)\n", - "method_mean_fungi_ratio" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate of the ratio between the biomass of particle-attached fungi and prokaryotes in the bathypelagic layer, we use the geometric mean of the mean estimates from each method used in Bochdansky et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the ratio between the biomass of particle-attached fungi and prokaryotes in the bathypelagic layer is ≈0.7\n" - ] - } - ], - "source": [ - "best_poc_fungi_ratio = gmean(method_mean_fungi_ratio)\n", - "print('Our best estimate of the ratio between the biomass of particle-attached fungi and prokaryotes in the bathypelagic layer is ≈%.1f' %best_poc_fungi_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We could not find reliable data on the ratio between the biomass of particle-attached fungi and prokaryotes in shallower layers of the ocean, and thus we apply this ratio throughout all the layers of the ocean. We estimate the total biomass of particle-attached fungi in the ocean by using our estimate of the total biomass of particle-attached prokaryotes, and multiplying it by our best estimate for the ratio between the biomass of fungi and prokaryotes:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of particle-attached fungi is ≈0.2 Gt C\n" - ] - } - ], - "source": [ - "# Use our estimate of the total biomass of particle-attached prokaryotes\n", - "poc_prok_biomass = marine_prok_biomass.iloc[[0,1,4],1].prod()*1e-15\n", - "\n", - "# Calculate the total biomass of particle-attached fungi\n", - "poc_fungi_biomass = poc_prok_biomass*best_poc_fungi_ratio\n", - "\n", - "print('Our best estimate of the total biomass of particle-attached fungi is ≈%.1f Gt C' %(poc_fungi_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate of the total biomass of marine fungi is a sum of our estimates for the biomass of epipelagic planktonic fungi, deep-sea planktonic fungi and particle-attached fungi:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of marine fungi is ≈0.3 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = poc_fungi_biomass + best_epi_fungi + best_deep_fungi\n", - "print('Our best estimate of the total biomass of marine fungi is ≈%.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "The available data on the biomass of marine fungi is scarce, and thus we chose to use a crude estimate of an order of magnitude as our projection for the uncertainty associated with the estimate of the total biomass of marine fungi. Our final parameters are" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Biomass of marine fungi: 0.3 Gt C\n", - "Uncertainty associated with the estimate of the total biomass of marine fungi ≈10-fold\n" - ] - } - ], - "source": [ - "mul_CI = 10\n", - "\n", - "\n", - "print('Biomass of marine fungi: %.1f Gt C' %(best_estimate/1e15))\n", - "print('Uncertainty associated with the estimate of the total biomass of marine fungi ≈%.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../fungi_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Biomass of marine fungi',\n", - " 'Value': float(best_estimate),\n", - " 'Units': 'Gt C',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../fungi_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/marine_fungi/.ipynb_checkpoints/marine_fungi-median-checkpoint.ipynb b/fungi/marine_fungi/.ipynb_checkpoints/marine_fungi-median-checkpoint.ipynb deleted file mode 100644 index d4a5b2a..0000000 --- a/fungi/marine_fungi/.ipynb_checkpoints/marine_fungi-median-checkpoint.ipynb +++ /dev/null @@ -1,774 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from CI_helper import *\n", - "from excel_utils import *\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of marine fungi\n", - "To estimate the total biomass of marine fungi, we consider different locations in which marine fungi might reside, and estimate the total biomass of fungi in each region. The main regions we consider are epipelagic and deep-sea planktonic fungi, and particle-attached fungi.\n", - "\n", - "## Epipelagic fungi\n", - "To estimate the total biomass of epipelagic free-living fungi, we rely on studies using two independent methods: qPCR and direct counts of the concentration of fungi.\n", - "\n", - "### qPCR-based method\n", - "Our qPCR-based estimate measured the ratio between DNA copy numbers for bacteria and fugni in the West Pacific Warm Pool ([Wang et al.](https://doi.org/10.1371/journal.pone.0101523)). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StationDepth [m]Basidiomycota [ng µl^-1]Ascomycota [ng µl^-1]Bacteria [ng µl^-1]
0S251.5e-013.8e-032.8e+00
1S2251.0e-013.5e-032.5e+00
2S2501.7e-011.3e-032.5e+00
3S2751.8e-012.9e-039.8e-01
4S21002.5e-011.3e-031.6e+00
\n", - "
" - ], - "text/plain": [ - " Station Depth [m] Basidiomycota [ng µl^-1] Ascomycota [ng µl^-1] \\\n", - "0 S2 5 1.5e-01 3.8e-03 \n", - "1 S2 25 1.0e-01 3.5e-03 \n", - "2 S2 50 1.7e-01 1.3e-03 \n", - "3 S2 75 1.8e-01 2.9e-03 \n", - "4 S2 100 2.5e-01 1.3e-03 \n", - "\n", - " Bacteria [ng µl^-1] \n", - "0 2.8e+00 \n", - "1 2.5e+00 \n", - "2 2.5e+00 \n", - "3 9.8e-01 \n", - "4 1.6e+00 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "qPCR_data = pd.read_excel('marine_fungi_data.xlsx','Wang',skiprows=1)\n", - "qPCR_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the ratio of fungal DNA copy number to bacterial DNA copy number:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The ratio of fungal DNA copy number and bacterial DNA copy number is ≈12%\n" - ] - } - ], - "source": [ - "# Calculate the total DNA copy number of fungi\n", - "fungal_DNA = qPCR_data['Basidiomycota [ng µl^-1]']+qPCR_data['Ascomycota [ng µl^-1]']\n", - "\n", - "# Calculate the mean ratio of fungal DNA copy number and bacterial DNA copy number\n", - "qPCR_fungal_fraction = (fungal_DNA/qPCR_data['Bacteria [ng µl^-1]']).mean()\n", - "\n", - "print('The ratio of fungal DNA copy number and bacterial DNA copy number is ≈%.0f' %(qPCR_fungal_fraction*100) + '%')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Direct count method\n", - "As an independent method for estimating the total biomass of marine fungi in the epipelagic layer, we use a study which measured the carbon concentration of fungi in the epipelagic layer in upwelling ecosystem off Chile using direct counts ([Gutiérrez et al.]( https://doi.org/10.1007/s00227-010-1552-z)). We calculate the average concentration of fungal carbon in relation to the carbon concentration of prokaryotes in the same site:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The ratio of fungal carbon and bacterial carbon is ≈27%\n" - ] - } - ], - "source": [ - "# Load data on direct counts of fungal carbon concentration\n", - "direct_data = pd.read_excel('marine_fungi_data.xlsx','Gutiérrez',skiprows=1)\n", - "\n", - "# Calculate the mean fungal carbon concentration\n", - "mean_fungal_conc = direct_data['Fungi carbon concentration [µg C L^-1]'].mean()\n", - "\n", - "# Calculate the mean carbon concentration of prokaryotes\n", - "mean_prok_conc = direct_data['Prokaryote carbon concentration [µg C L^-1]'].mean()\n", - "\n", - "direct_fungal_fraction = mean_fungal_conc/mean_prok_conc\n", - "\n", - "print('The ratio of fungal carbon and bacterial carbon is ≈%.0f' %(direct_fungal_fraction*100) + '%')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the ratio of fungal and prokaryote carbon, we use the geometric mean of the ratios estimated based on qPCR and direct counts:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The ratio of fungal carbon and bacterial carbon is ≈18%\n" - ] - } - ], - "source": [ - "best_fungal_fraction = gmean([qPCR_fungal_fraction,direct_fungal_fraction])\n", - "print('The ratio of fungal carbon and bacterial carbon is ≈%.0f' %(best_fungal_fraction*100) + '%')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of fungi using qPCR, we rely on our estimate from the total biomass of bacteria and archaea in the top 200 meters, which we estimate in the marine bacteria and archaea section:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total biomass of bacteria and archaea in the epipelagic layer is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "# Load total biomass of marine bacteria and archaea\n", - "marine_prok_biomass = pd.read_excel('../../bacteria_archaea/marine/marine_prok_biomass_estimate.xlsx')\n", - "\n", - "# Load our estimate of the fraction of prokaryote biomass in the epipelagic layer\n", - "epi_frac = pd.read_excel('marine_fungi_data.xlsx','Bacteria biomass')\n", - "\n", - "# Calculate the biomass of prokaryotes in the epipelagic realm\n", - "epi_prok_biomass = marine_prok_biomass.iloc[0:2,1].prod()*epi_frac['Value']*1e-15\n", - "\n", - "print('Our estimate for the total biomass of bacteria and archaea in the epipelagic layer is ≈%.1f Gt C' %(epi_prok_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We estimate the total biomass of fungi in the epipelagic layer by multiplying the total biomass of prokaryotes by the ratio of fungal and prokaryote biomass we calculated:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total biomass of free-living epipelagic fungi based of qPCR is ≈0.07 Gt C\n" - ] - } - ], - "source": [ - "best_epi_fungi = epi_prok_biomass*best_fungal_fraction\n", - "\n", - "print('Our estimate for the total biomass of free-living epipelagic fungi based of qPCR is ≈%.2f Gt C' %(best_epi_fungi/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Deep-sea fungi\n", - "In the deep ocean, recent studies have quantified the contribution of fungi to the total 18S rDNA of microbial eukaryotes ([Pernice et al.](http://dx.doi.org/10.1038/ismej.2015.170)). Pernice et al. estimate ≈15% of the 18S rDNA sequences are fungal. The biomass concentration of deep-sea microbial eukaryotes was measured by [Pernice et al.](https://dx.doi.org/10.1038/ismej.2014.168). Here are the results of the measurements:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Min depth [m]Max depth [m]Microbial eukaryotes biomass density [pg C mL^-1]Protist SEProkaryote biomass density [pg C mL^-1]Prokaryote SE
020045028046837152
145170015023661160
2701140011228534106
314014000501430959
\n", - "
" - ], - "text/plain": [ - " Min depth [m] Max depth [m] \\\n", - "0 200 450 \n", - "1 451 700 \n", - "2 701 1400 \n", - "3 1401 4000 \n", - "\n", - " Microbial eukaryotes biomass density [pg C mL^-1] Protist SE \\\n", - "0 280 46 \n", - "1 150 23 \n", - "2 112 28 \n", - "3 50 14 \n", - "\n", - " Prokaryote biomass density [pg C mL^-1] Prokaryote SE \n", - "0 837 152 \n", - "1 661 160 \n", - "2 534 106 \n", - "3 309 59 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pernice_data = pd.read_excel('marine_fungi_data.xlsx','Pernice',skiprows=1)\n", - "pernice_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We estimate the total biomass of microbial eukaryotes by multiplying the the measured biomass densities by the depth range of the measurements, and applying the concentrations to the total ≈3.6×10$^{14}\\ m^2$ of ocean." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of deep-sea microbial eukaryotes is ≈0.1 Gt C\n" - ] - } - ], - "source": [ - "depth_range = pernice_data['Max depth [m]']-pernice_data['Min depth [m]']\n", - "\n", - "# Convert units: mL to m^3, pg C to g C\n", - "unit_conversion = 1e6*1e-12\n", - "ocean_area = 3.6e14\n", - "\n", - "# Calculate the total biomass of deep-sea microbial eukaryotes\n", - "miceuk_biomass = (depth_range*pernice_data['Microbial eukaryotes biomass density [pg C mL^-1]']).sum()*unit_conversion*ocean_area\n", - "\n", - "print('Our best estimate for the biomass of deep-sea microbial eukaryotes is ≈%.1f Gt C' %(miceuk_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the biomass of deep-sea fungi, we multiply our estimate of the total biomass of deep-sea microbial eukaryotes with the estimate by Pernice et al. of the fraction of the 18S rDNA sequences of deep-sea microbial eukaryotes contributed by fungi:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate of the biomass of deep-sea fungi based on 18S rDNA sequencing is ≈0.02 Gt C\n" - ] - } - ], - "source": [ - "deep_sea_fungi = miceuk_biomass*0.15\n", - "\n", - "print('Our estimate of the biomass of deep-sea fungi based on 18S rDNA sequencing is ≈%.2f Gt C' %(deep_sea_fungi/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pernice et al. were mainly focused on measuring the biomass of heterotrophic protists, and thus they might capture only unicellular fungi and not filamentous fungi. To take into account the possibility of deep-sea filamentous fungi, we extend our estimate of the ratio between planktonic fungi and prokaryotes to the mesopelagic and bathypelagic realms." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate of the biomass of deep-sea fungi based on the ratio between fungal and prokaryote biomass is ≈0.17 Gt C\n" - ] - } - ], - "source": [ - "# Estimate the total biomass of prokaryotes in the mesopelagic and bathypelagic layers\n", - "meso_bathy_prok_biomass = marine_prok_biomass.iloc[0:2,1].prod()*1e-15*(1-epi_frac['Value'])\n", - "\n", - "# Apply the ratio between fungal and prokaryote biomass to the mesopelagic and bathypelagic layers\n", - "meso_bathy_fungi = best_fungal_fraction*meso_bathy_prok_biomass\n", - "\n", - "print('Our estimate of the biomass of deep-sea fungi based on the ratio between fungal and prokaryote biomass is ≈%.2f Gt C' %(meso_bathy_fungi/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate of the biomass of deep-sea planktonic fungi, we use the geometric mean of the two estimates based on 18S rDNA sequencing and the ratio between fungal and prokaryote biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the biomass of deep-sea fungi is ≈0.05 Gt C\n" - ] - } - ], - "source": [ - "best_deep_fungi = gmean([deep_sea_fungi,meso_bathy_fungi])\n", - "print('Our best estimate of the biomass of deep-sea fungi is ≈%.2f Gt C' %(best_deep_fungi/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Particle-attached fungi\n", - "To estimate the total biomass of particle-attached fungi, we rely on measurements of the biomass ratio between fungi and prokaryotes on marine particles in the bathypelagic layer ([Bochdansky et al.](http://dx.doi.org/10.1038/ismej.2016.113)). Bochdansky et al. use several different methods to estimate the biomass of fungi on particles, and provide a range of estimates for the ratio between the biomass of fungi and prokaryotes for each method. Here are the estimates provided in Bochdansky et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Min biomass ratioMax biomass ratio
Method
Newell and Statzell-Tallman3.5e-011.4e+00
Menden-Deuer and Lessard4.4e-012.0e+00
van Veen and Paul5.3e-015.3e-01
\n", - "
" - ], - "text/plain": [ - " Min biomass ratio Max biomass ratio\n", - "Method \n", - "Newell and Statzell-Tallman 3.5e-01 1.4e+00\n", - "Menden-Deuer and Lessard 4.4e-01 2.0e+00\n", - "van Veen and Paul 5.3e-01 5.3e-01" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "poc_fungi_biomass_data = pd.read_excel('marine_fungi_data.xlsx','Bochdansky',skiprows=1,index_col=0)\n", - "poc_fungi_biomass_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the ratio between the biomass of particle-attached fungi and prokaryotes in the bathypelagic layer, we first calculate the geometric mean of the range provided by Bochdansky et al. for each method:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Method\n", - "Newell and Statzell-Tallman 7.0e-01\n", - "Menden-Deuer and Lessard 9.3e-01\n", - "van Veen and Paul 5.3e-01\n", - "dtype: float64" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "method_mean_fungi_ratio = poc_fungi_biomass_data.apply(gmean,axis=1)\n", - "method_mean_fungi_ratio" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate of the ratio between the biomass of particle-attached fungi and prokaryotes in the bathypelagic layer, we use the geometric mean of the mean estimates from each method used in Bochdansky et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the ratio between the biomass of particle-attached fungi and prokaryotes in the bathypelagic layer is ≈0.7\n" - ] - } - ], - "source": [ - "best_poc_fungi_ratio = gmean(method_mean_fungi_ratio)\n", - "print('Our best estimate of the ratio between the biomass of particle-attached fungi and prokaryotes in the bathypelagic layer is ≈%.1f' %best_poc_fungi_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We could not find reliable data on the ratio between the biomass of particle-attached fungi and prokaryotes in shallower layers of the ocean, and thus we apply this ratio throughout all the layers of the ocean. We estimate the total biomass of particle-attached fungi in the ocean by using our estimate of the total biomass of particle-attached prokaryotes, and multiplying it by our best estimate for the ratio between the biomass of fungi and prokaryotes:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of particle-attached fungi is ≈0.2 Gt C\n" - ] - } - ], - "source": [ - "# Use our estimate of the total biomass of particle-attached prokaryotes\n", - "poc_prok_biomass = marine_prok_biomass.iloc[[0,1,4],1].prod()*1e-15\n", - "\n", - "# Calculate the total biomass of particle-attached fungi\n", - "poc_fungi_biomass = poc_prok_biomass*best_poc_fungi_ratio\n", - "\n", - "print('Our best estimate of the total biomass of particle-attached fungi is ≈%.1f Gt C' %(poc_fungi_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate of the total biomass of marine fungi is a sum of our estimates for the biomass of epipelagic planktonic fungi, deep-sea planktonic fungi and particle-attached fungi:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of marine fungi is ≈0.3 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = poc_fungi_biomass + best_epi_fungi + best_deep_fungi\n", - "print('Our best estimate of the total biomass of marine fungi is ≈%.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "The available data on the biomass of marine fungi is scarce, and thus we chose to use a crude estimate of an order of magnitude as our projection for the uncertainty associated with the estimate of the total biomass of marine fungi. Our final parameters are" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Biomass of marine fungi: 0.3 Gt C\n", - "Uncertainty associated with the estimate of the total biomass of marine fungi ≈10-fold\n" - ] - } - ], - "source": [ - "mul_CI = 10\n", - "\n", - "\n", - "print('Biomass of marine fungi: %.1f Gt C' %(best_estimate/1e15))\n", - "print('Uncertainty associated with the estimate of the total biomass of marine fungi ≈%.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../fungi_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Biomass of marine fungi',\n", - " 'Value': float(best_estimate),\n", - " 'Units': 'Gt C',\n", - " 'Uncertainty': \"{0:.1f}\".format(mul_CI)\n", - " })\n", - "\n", - "result.to_excel('../fungi_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/soil_microbial_biomass/.ipynb_checkpoints/soil_microbial_biomass-checkpoint.ipynb b/fungi/soil_microbial_biomass/.ipynb_checkpoints/soil_microbial_biomass-checkpoint.ipynb deleted file mode 100644 index 960b631..0000000 --- a/fungi/soil_microbial_biomass/.ipynb_checkpoints/soil_microbial_biomass-checkpoint.ipynb +++ /dev/null @@ -1,553 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "from scipy.stats import gmean\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper')\n", - "from CI_helper import *\n", - "from fraction_helper import *\n", - "pd.options.display.float_format = '{:,.1f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of soil microbes\n", - "\n", - "In order to estimate the total biomass of soil microbes, we rely on two recent studies - [Xu et al.](http://dx.doi.org/10.1111/geb.12029) and [Serna-Chavez et al.](http://dx.doi.org/10.1111/geb.12070)\n", - "\n", - "We use the final estimates in each of the studies as two independent estimates of the biomass of soil microbes. Xu et al. estimate a biomass of ≈23 Gt C of soil microbes, whereas Serna-Chavez et al. estimate ≈15 Gt C." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Define the values for the estimates of the biomass of soil microbes from Xu et al. and Serna-Chavez et al.\n", - "xu = 23.2e15\n", - "serna_chavez = 14.6e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the biomass of soil microbes, we use the geometric mean of the values from Xu et al. and Serna-Chavez et al." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of soil microbes is ≈18 Gt C\n" - ] - } - ], - "source": [ - "# Our best estimate is the geometric mean of values from Xu et al. and Serna-Chavez et al.\n", - "best_estimate = gmean([xu,serna_chavez])\n", - "print('Our best estimate for the biomass of soil microbes is ≈%.0f Gt C' % (best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cells in deeper layers\n", - "The estimates reported in Xu et al. and Serna-Chavez et al. are for microbial biomass in the top 1 meter of soil. To take into account microbial biomass in depths lower than 1 meter, we try to estimate the fraction of microbial biomass in the top 1 meter out of the total biomass of soil microbes.\n", - "\n", - "Xu et al. extrapolate the microbial biomass across the soil profile based on empirical equations for the distribution of root biomass along soil depth from [Jackson et al.](http://dx.doi.org/10.1007/BF00333714). The empirical equations are biome-specific, and follow the general form: $$Y = 1-\\beta^d$$ Where Y is the cumulative fraction of roots, d is depth in centimeters, and $\\beta$ is a coefficient fitted for each biome. This means the $\\beta^d$ represents the fraction of roots present in layers lower deeper than d centimeters.\n", - "\n", - "We use the fitted $\\beta$ coefficients from Jackson et al., along with estimates for the total microbial biomass in the top meter fo soils in each biome from Xu et al. to estimate the amount of biomass present in soil layers deeper than 1 meter." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of microbial biomass in layers deeper than 1 meter based on Xu et al. is 2%\n" - ] - } - ], - "source": [ - "# Load data on the microbial biomass from each biome and the coefficients for the depth distribution of roots\n", - "# in each biome\n", - "data = pd.read_excel('soil_microbial_biomass_data.xlsx',skiprows=1)\n", - "\n", - "# Calculate the fraction of biomass deeper than 100 centimeters for each biome\n", - "biome_deeper_frac = (data['beta']**100)\n", - "\n", - "# Calculate the relative fraction of total microbial biomass that is present in layers deeper than 1 meter\n", - "total_deeper_relative_fraction = (data['Total microbial biomass 100 cm (g C)']*biome_deeper_frac).sum()/xu\n", - "print('The fraction of microbial biomass in layers deeper than 1 meter based on Xu et al. is ' + '{:,.0f}%'.format(total_deeper_relative_fraction*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As an additional source for estimating the fraction of biomass of microbes in soil layers deeper than 1 meter, we use the concentration of bacterial cells present in layers deeper than 1 meter reported in [Whitman et al.](https://www.ncbi.nlm.nih.gov/pubmed/9618454). Whitman et al. estimate that in forests there are ≈$4×10^7$ cells per gram in the top 1 meter and ≈$10^6$ cells per gram in depths of 1-8 meters. For other soils, Whitman et al. estimate ≈$2×10^9$ cells per gram in the top 1 meterand ≈$10^8$ cells per gram in depth of 1-8 meters. Assuming cells in deeper layers are similar in size to cells in the top 1 meter, this is equivalent to: " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of cells found in soil layers deeper than 1 meter is 15% in forests and 26% in other soils.\n", - "The average fraction of cells found in deeper layers is 20%\n" - ] - } - ], - "source": [ - "# Concentration of cells in top 1 meter of forest soils\n", - "forest_upper = 4e7\n", - "# Top 1 meter is 1 meter in depth\n", - "forest_upper_depth = 1\n", - "# Concentration of cells in depths of 1-7 meters of forest soils\n", - "forest_lower = 1e6\n", - "# The deeper layer of soils is 1-8 meters - 7 meters in depth\n", - "forest_lower_depth = 7\n", - "# Concentration of cells in top 1 meter of other soils\n", - "other_upper = 2e9\n", - "# Top 1 meter is 1 meter in depth\n", - "other_upper_depth = 1\n", - "# Concentration of cells in depths of 1-7 meters of other soils\n", - "other_lower = 1e8\n", - "# The deeper layer of soils is 1-8 meters - 7 meters in depth\n", - "other_lower_depth = 7\n", - "\n", - "#Calculate the fraction of cells present in deeper layers of soil in forests and other soils\n", - "forest_lower_frac = forest_lower*forest_lower_depth/(forest_lower*forest_lower_depth + forest_upper*forest_upper_depth)\n", - "other_lower_frac = other_lower*other_lower_depth/(other_lower*other_lower_depth + other_upper*other_upper_depth)\n", - "whitman_mean_frac = frac_mean(np.array([forest_lower_frac,other_lower_frac]))\n", - "print('The fraction of cells found in soil layers deeper than 1 meter is ' + '{:,.0f}%'.format(forest_lower_frac*100) + ' in forests and ' '{:,.0f}%'.format(other_lower_frac*100) + ' in other soils.')\n", - "print('The average fraction of cells found in deeper layers is ' + '{:,.0f}%'.format(100*whitman_mean_frac))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our estimate for the fraction of biomass present in layers deeper than 1 meter, we take the geometric mean of the fractions estimated by Xu et al. and by Whitman et al." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of biomass present in layers deeper than 1 meter is 7%\n", - "Our best estimate for the biomass of soil microbes, including contributions from microbes present in layers deeper than 1 meter is 20 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates by Xu et al. and Whitman et al.\n", - "mean_deep_frac = frac_mean(np.array([total_deeper_relative_fraction,whitman_mean_frac]))\n", - "print('Our best estimate for the fraction of biomass present in layers deeper than 1 meter is ' + '{:,.0f}%'.format(100*mean_deep_frac))\n", - "\n", - "# Correct out best estimate to account for the biomass of microbes in soil layers deeper than 1 meter\n", - "best_estimate_corrected = best_estimate*(1+mean_deep_frac)\n", - "print('Our best estimate for the biomass of soil microbes, including contributions from microbes present in layers deeper than 1 meter is %.0f Gt C' % (best_estimate_corrected/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To calculate the uncertainty associated with the estimate for the total number of of bacteria and archaea, we first collect all available uncertainties and then take the largest value as our best projection for the uncertainty.\n", - "\n", - "## Total biomass of microbes in the top 1 meter\n", - "\n", - "### Intra-study uncertainty\n", - "As noted above, our estimate is based on two studies which report the total biomass of soil microbes - [Xu et al.](http://dx.doi.org/10.1111/geb.12029) and [Serna-Chavez et al.](http://dx.doi.org/10.1111/geb.12070). Xu et al. does not report uncertainties associated with the total estimate of microbial biomass. However, Xu et al. report 95% confidence intervals for the average microbial biomass densities in each biome. We use these ranges as a measure of the intra-study uncertainty in Xu et al. The highest uncertainty across biomes is ≈1.5-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biome95% confidence interval
0Boreal forest1.5
1Temperate coniferous forest1.2
2Temperate broadleaf forest1.2
3Tropical/subtropical forest1.2
4Mixed forest1.1
5Grassland1.1
6Shrub1.3
7Tundra1.4
8Desert1.3
9Natural wetlands1.3
10Cropland1.0
11Pasture1.1
\n", - "
" - ], - "text/plain": [ - " Biome 95% confidence interval\n", - "0 Boreal forest 1.5\n", - "1 Temperate coniferous forest 1.2\n", - "2 Temperate broadleaf forest 1.2\n", - "3 Tropical/subtropical forest 1.2\n", - "4 Mixed forest 1.1\n", - "5 Grassland 1.1\n", - "6 Shrub 1.3\n", - "7 Tundra 1.4\n", - "8 Desert 1.3\n", - "9 Natural wetlands 1.3\n", - "10 Cropland 1.0\n", - "11 Pasture 1.1" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# We calculate the upper and lower multiplicative 95% confidence interval of the average microbial \n", - "# biomass density for each biome\n", - "xu_upper_CI = data['upper 95% confidence interval of Cmic']/data['Cmic (0-30 cm) [mmol C kg^-1]']\n", - "xu_lower_CI = data['Cmic (0-30 cm) [mmol C kg^-1]']/data['lower 95% confidence interval of Cmic']\n", - "\n", - "# Our estimate for the final uncertainty is the average of the upper and lower confidence intervals.\n", - "data['95% confidence interval'] = (pd.concat([xu_upper_CI,xu_lower_CI],axis=1).mean(axis=1))\n", - "data[['Biome','95% confidence interval']]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The maximal intra-study uncertainty in Xu et al. across biomes is 1.5-fold\n" - ] - } - ], - "source": [ - "print('The maximal intra-study uncertainty in Xu et al. across biomes is %.1f-fold' % data['95% confidence interval'].max())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Serna-Chavez et al. report 95% confidence internal of the final estimate for the total biomass of soil microbes. It is not clear from the paper what does this uncertainty represents, but it most probably represents 95% range of values from bootstrapping the parameters of the model used to estimate the local biomass density of soil microbes at each location. The reported 95% confidence interval is ≈0.007 Gt C, which represents an uncertainty of ≈1.0005-fold.\n", - "\n", - "### Interstudy uncertainty\n", - "We estimate the 95% multiplicative error of the geometric mean of the estimates from Xu et al. and Serna-Chavez et al. " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty is ≈1.6-fold\n" - ] - } - ], - "source": [ - "mul_CI_top = geo_CI_calc([xu,serna_chavez])\n", - "print('The interstudy uncertainty is ≈%.1f-fold' % mul_CI_top)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximal uncertainty out of the collection of intra-study and interstudy uncertainties as our projection for the uncertainty associated with the biomass of soil microbes. The maximal uncertainty is the interstudy uncertainty of ≈1.6-fold. \n", - "\n", - "## Fraction of biomass in soil layers deeper than 1 meter\n", - "### Intra-study uncertainty\n", - "We estimate the intra-study uncetainty in the fraction of microbial biomass located in soil layers deeper than 1 meter in Xu et al. and in Whitman et al. For Xu et al. we calculating the 95% confidence interval of the $\\beta^d$ measure across biomes. For Whitman et al. we calculate the 95% confidence interval of the estimates for the fraction of bacterial cells in depth of 1-8 meters out of the total number of cells in forests and other soil types." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the fraction of microbial biomass present in soil layers deeper than 1 meter is ≈2.7-fold for Xu et al. and ≈1.7-fold for Whitman et al.\n" - ] - } - ], - "source": [ - "xu_deep_frac_CI = frac_CI(data['beta']**100)\n", - "whitman_deep_frac_CI = frac_CI(np.array([forest_lower_frac,other_lower_frac]))\n", - "print('The intra-study uncertainty of the fraction of microbial biomass present in soil layers deeper than 1 meter is ≈%.1f-fold for Xu et al. and ≈%.1f-fold for Whitman et al.' %(xu_deep_frac_CI,whitman_deep_frac_CI))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "### Interstudy uncertainty\n", - "We calculate the 95% confidence interval from the average estimates of Xu et al. and Whitman et al. and use them as a measure of the interstudy uncertainty." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the fraction of microbial biomass present in soil layers deeper than 1 meter is ≈8.2-fold.\n" - ] - } - ], - "source": [ - "inter_deep_frac_CI = frac_CI(np.array([total_deeper_relative_fraction,whitman_mean_frac]))\n", - "print('The interstudy uncertainty of the fraction of microbial biomass present in soil layers deeper than 1 meter is ≈%.1f-fold.' %(inter_deep_frac_CI))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We take the highest uncertainty of the intra-study and interstudy uncertainty of ≈7.5-fold. This uncertainty relates to the fraction of biomass present in soil layers deeper than 1 meter. In order to propagate this uncertainty to the estimate of the total biomass of soil microbes, we sample 1000 times from a lognormal distribution with a mean and std of the fraction biomass from layers deeper than 1 meter, and add 1 to each sample to estimate the coefficient by which the total biomass of soil microbes should be corrected.\n", - "The 97.5% and 2.5% percentiles of the resulting distribution of coefficients will be used as an estimate for the uncertainty of the total biomass of soil microbes contributed by the uncertainty in the estimate of the fraction of the biomass of microbes in soil layers deeper than 1 meter." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the uncertainty of the total biomass of soil microbes contributed by the uncertainty in the esti,ate of the fraction of the biomass of microbes in soil layers deeper than 1 meter is 1.3-fold\n" - ] - } - ], - "source": [ - "# Calculate the maximal uncertainty between the intra-study and interstudy uncertainty\n", - "best_deep_frac_CI = np.max([xu_deep_frac_CI,whitman_deep_frac_CI,inter_deep_frac_CI])\n", - "# Sample the fraction of biomass in soil layers deeper than 1 meter from a lognormal distribution \n", - "deep_frac_dist = np.random.lognormal(np.log(mean_deep_frac),np.log(best_deep_frac_CI**(1./1.96)),1000)\n", - "# Calculate the distribution of coefficients by which the total biomass of soil microbes should be corrected\n", - "cor_coeff_dist = 1 + deep_frac_dist\n", - "\n", - "# Calculate the 97.5% and 2.5% percentiles of the correction coefficient distribution, relative to the mean\n", - "cor_coeff_upper_CI = np.percentile(cor_coeff_dist,97.5)/(1+mean_deep_frac)\n", - "cor_coeff_lower_CI = (1+mean_deep_frac)/np.percentile(cor_coeff_dist,2.5)\n", - "# our estimate for the 95% confidence interval is the average between the 97.5% and 2.5% fold changes\n", - "cor_coeff_CI = np.mean([cor_coeff_upper_CI,cor_coeff_lower_CI])\n", - "print('Our estimate for the uncertainty of the total biomass of soil microbes contributed by the uncertainty in the esti,ate of the fraction of the biomass of microbes in soil layers deeper than 1 meter is %.1f-fold' % cor_coeff_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Total uncertainty\n", - "To estimate the total uncertainty of the biomass of soil microbes, we combine the ucnertainty assoicated with the estiamte of biomass of soil microbes in the top 1 meter of soil, and the uncertainty of the correction coefficient to include biomass contribution from soil layer deeper than 1 meter." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The total uncertainty for the biomass of soil microbes is 1.7-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_prod_prop(np.array([mul_CI_top,cor_coeff_CI]))\n", - "print('The total uncertainty for the biomass of soil microbes is %.1f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We also take into account additional sources of uncertainty which are difficult to quantify, as detailed in the section about soil fungi in the Supplementary Information. We thus project an uncertainty of ≈2-fold for the biomass of soil microbes. Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total biomass of soil microbes: 18 Gt C\n", - "Uncertainty associated with the estimate of the total biomass of soil microbes ≈2-fold\n" - ] - } - ], - "source": [ - "print('Total biomass of soil microbes: %.0f Gt C' % (best_estimate/1e15))\n", - "print('Uncertainty associated with the estimate of the total biomass of soil microbes ≈2-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../fungi_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Total biomass of soil microbes',\n", - " 'Value': int(best_estimate_corrected),\n", - " 'Units': 'g C',\n", - " 'Uncertainty': \"{0:.1f}\".format(2)\n", - " })\n", - "\n", - "result.to_excel('../fungi_biomass_estimate.xlsx',index=False)\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/fungi/soil_microbial_biomass/.ipynb_checkpoints/soil_microbial_biomass-median-checkpoint.ipynb b/fungi/soil_microbial_biomass/.ipynb_checkpoints/soil_microbial_biomass-median-checkpoint.ipynb deleted file mode 100644 index 42f2bab..0000000 --- a/fungi/soil_microbial_biomass/.ipynb_checkpoints/soil_microbial_biomass-median-checkpoint.ipynb +++ /dev/null @@ -1,553 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "from scipy.stats import gmean\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper')\n", - "from CI_helper import *\n", - "from fraction_helper import *\n", - "pd.options.display.float_format = '{:,.1f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of soil microbes\n", - "\n", - "In order to estimate the total biomass of soil microbes, we rely on two recent studies - [Xu et al.](http://dx.doi.org/10.1111/geb.12029) and [Serna-Chavez et al.](http://dx.doi.org/10.1111/geb.12070)\n", - "\n", - "We use the final estimates in each of the studies as two independent estimates of the biomass of soil microbes. Xu et al. estimate a biomass of ≈23 Gt C of soil microbes, whereas Serna-Chavez et al. estimate ≈15 Gt C." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Define the values for the estimates of the biomass of soil microbes from Xu et al. and Serna-Chavez et al.\n", - "xu = 23.2e15\n", - "serna_chavez = 14.6e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the biomass of soil microbes, we use the geometric mean of the values from Xu et al. and Serna-Chavez et al." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of soil microbes is ≈18 Gt C\n" - ] - } - ], - "source": [ - "# Our best estimate is the geometric mean of values from Xu et al. and Serna-Chavez et al.\n", - "best_estimate = gmean([xu,serna_chavez])\n", - "print('Our best estimate for the biomass of soil microbes is ≈%.0f Gt C' % (best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cells in deeper layers\n", - "The estimates reported in Xu et al. and Serna-Chavez et al. are for microbial biomass in the top 1 meter of soil. To take into account microbial biomass in depths lower than 1 meter, we try to estimate the fraction of microbial biomass in the top 1 meter out of the total biomass of soil microbes.\n", - "\n", - "Xu et al. extrapolate the microbial biomass across the soil profile based on empirical equations for the distribution of root biomass along soil depth from [Jackson et al.](http://dx.doi.org/10.1007/BF00333714). The empirical equations are biome-specific, and follow the general form: $$Y = 1-\\beta^d$$ Where Y is the cumulative fraction of roots, d is depth in centimeters, and $\\beta$ is a coefficient fitted for each biome. This means the $\\beta^d$ represents the fraction of roots present in layers lower deeper than d centimeters.\n", - "\n", - "We use the fitted $\\beta$ coefficients from Jackson et al., along with estimates for the total microbial biomass in the top meter fo soils in each biome from Xu et al. to estimate the amount of biomass present in soil layers deeper than 1 meter." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of microbial biomass in layers deeper than 1 meter based on Xu et al. is 2%\n" - ] - } - ], - "source": [ - "# Load data on the microbial biomass from each biome and the coefficients for the depth distribution of roots\n", - "# in each biome\n", - "data = pd.read_excel('soil_microbial_biomass_data.xlsx',skiprows=1)\n", - "\n", - "# Calculate the fraction of biomass deeper than 100 centimeters for each biome\n", - "biome_deeper_frac = (data['beta']**100)\n", - "\n", - "# Calculate the relative fraction of total microbial biomass that is present in layers deeper than 1 meter\n", - "total_deeper_relative_fraction = (data['Total microbial biomass 100 cm (g C)']*biome_deeper_frac).sum()/xu\n", - "print('The fraction of microbial biomass in layers deeper than 1 meter based on Xu et al. is ' + '{:,.0f}%'.format(total_deeper_relative_fraction*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As an additional source for estimating the fraction of biomass of microbes in soil layers deeper than 1 meter, we use the concentration of bacterial cells present in layers deeper than 1 meter reported in [Whitman et al.](https://www.ncbi.nlm.nih.gov/pubmed/9618454). Whitman et al. estimate that in forests there are ≈$4×10^7$ cells per gram in the top 1 meter and ≈$10^6$ cells per gram in depths of 1-8 meters. For other soils, Whitman et al. estimate ≈$2×10^9$ cells per gram in the top 1 meterand ≈$10^8$ cells per gram in depth of 1-8 meters. Assuming cells in deeper layers are similar in size to cells in the top 1 meter, this is equivalent to: " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The fraction of cells found in soil layers deeper than 1 meter is 15% in forests and 26% in other soils.\n", - "The average fraction of cells found in deeper layers is 20%\n" - ] - } - ], - "source": [ - "# Concentration of cells in top 1 meter of forest soils\n", - "forest_upper = 4e7\n", - "# Top 1 meter is 1 meter in depth\n", - "forest_upper_depth = 1\n", - "# Concentration of cells in depths of 1-7 meters of forest soils\n", - "forest_lower = 1e6\n", - "# The deeper layer of soils is 1-8 meters - 7 meters in depth\n", - "forest_lower_depth = 7\n", - "# Concentration of cells in top 1 meter of other soils\n", - "other_upper = 2e9\n", - "# Top 1 meter is 1 meter in depth\n", - "other_upper_depth = 1\n", - "# Concentration of cells in depths of 1-7 meters of other soils\n", - "other_lower = 1e8\n", - "# The deeper layer of soils is 1-8 meters - 7 meters in depth\n", - "other_lower_depth = 7\n", - "\n", - "#Calculate the fraction of cells present in deeper layers of soil in forests and other soils\n", - "forest_lower_frac = forest_lower*forest_lower_depth/(forest_lower*forest_lower_depth + forest_upper*forest_upper_depth)\n", - "other_lower_frac = other_lower*other_lower_depth/(other_lower*other_lower_depth + other_upper*other_upper_depth)\n", - "whitman_mean_frac = frac_mean(np.array([forest_lower_frac,other_lower_frac]))\n", - "print('The fraction of cells found in soil layers deeper than 1 meter is ' + '{:,.0f}%'.format(forest_lower_frac*100) + ' in forests and ' '{:,.0f}%'.format(other_lower_frac*100) + ' in other soils.')\n", - "print('The average fraction of cells found in deeper layers is ' + '{:,.0f}%'.format(100*whitman_mean_frac))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our estimate for the fraction of biomass present in layers deeper than 1 meter, we take the geometric mean of the fractions estimated by Xu et al. and by Whitman et al." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of biomass present in layers deeper than 1 meter is 7%\n", - "Our best estimate for the biomass of soil microbes, including contributions from microbes present in layers deeper than 1 meter is 20 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the estimates by Xu et al. and Whitman et al.\n", - "mean_deep_frac = frac_mean(np.array([total_deeper_relative_fraction,whitman_mean_frac]))\n", - "print('Our best estimate for the fraction of biomass present in layers deeper than 1 meter is ' + '{:,.0f}%'.format(100*mean_deep_frac))\n", - "\n", - "# Correct out best estimate to account for the biomass of microbes in soil layers deeper than 1 meter\n", - "best_estimate_corrected = best_estimate*(1+mean_deep_frac)\n", - "print('Our best estimate for the biomass of soil microbes, including contributions from microbes present in layers deeper than 1 meter is %.0f Gt C' % (best_estimate_corrected/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To calculate the uncertainty associated with the estimate for the total number of of bacteria and archaea, we first collect all available uncertainties and then take the largest value as our best projection for the uncertainty.\n", - "\n", - "## Total biomass of microbes in the top 1 meter\n", - "\n", - "### Intra-study uncertainty\n", - "As noted above, our estimate is based on two studies which report the total biomass of soil microbes - [Xu et al.](http://dx.doi.org/10.1111/geb.12029) and [Serna-Chavez et al.](http://dx.doi.org/10.1111/geb.12070). Xu et al. does not report uncertainties associated with the total estimate of microbial biomass. However, Xu et al. report 95% confidence intervals for the average microbial biomass densities in each biome. We use these ranges as a measure of the intra-study uncertainty in Xu et al. The highest uncertainty across biomes is ≈1.5-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Biome95% confidence interval
0Boreal forest1.5
1Temperate coniferous forest1.2
2Temperate broadleaf forest1.2
3Tropical/subtropical forest1.2
4Mixed forest1.1
5Grassland1.1
6Shrub1.3
7Tundra1.4
8Desert1.3
9Natural wetlands1.3
10Cropland1.0
11Pasture1.1
\n", - "
" - ], - "text/plain": [ - " Biome 95% confidence interval\n", - "0 Boreal forest 1.5\n", - "1 Temperate coniferous forest 1.2\n", - "2 Temperate broadleaf forest 1.2\n", - "3 Tropical/subtropical forest 1.2\n", - "4 Mixed forest 1.1\n", - "5 Grassland 1.1\n", - "6 Shrub 1.3\n", - "7 Tundra 1.4\n", - "8 Desert 1.3\n", - "9 Natural wetlands 1.3\n", - "10 Cropland 1.0\n", - "11 Pasture 1.1" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# We calculate the upper and lower multiplicative 95% confidence interval of the average microbial \n", - "# biomass density for each biome\n", - "xu_upper_CI = data['upper 95% confidence interval of Cmic']/data['Cmic (0-30 cm) [mmol C kg^-1]']\n", - "xu_lower_CI = data['Cmic (0-30 cm) [mmol C kg^-1]']/data['lower 95% confidence interval of Cmic']\n", - "\n", - "# Our estimate for the final uncertainty is the average of the upper and lower confidence intervals.\n", - "data['95% confidence interval'] = (pd.concat([xu_upper_CI,xu_lower_CI],axis=1).mean(axis=1))\n", - "data[['Biome','95% confidence interval']]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The maximal intra-study uncertainty in Xu et al. across biomes is 1.5-fold\n" - ] - } - ], - "source": [ - "print('The maximal intra-study uncertainty in Xu et al. across biomes is %.1f-fold' % data['95% confidence interval'].max())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Serna-Chavez et al. report 95% confidence internal of the final estimate for the total biomass of soil microbes. It is not clear from the paper what does this uncertainty represents, but it most probably represents 95% range of values from bootstrapping the parameters of the model used to estimate the local biomass density of soil microbes at each location. The reported 95% confidence interval is ≈0.007 Gt C, which represents an uncertainty of ≈1.0005-fold.\n", - "\n", - "### Interstudy uncertainty\n", - "We estimate the 95% multiplicative error of the geometric mean of the estimates from Xu et al. and Serna-Chavez et al. " - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty is ≈1.6-fold\n" - ] - } - ], - "source": [ - "mul_CI_top = geo_CI_calc([xu,serna_chavez])\n", - "print('The interstudy uncertainty is ≈%.1f-fold' % mul_CI_top)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximal uncertainty out of the collection of intra-study and interstudy uncertainties as our projection for the uncertainty associated with the biomass of soil microbes. The maximal uncertainty is the interstudy uncertainty of ≈1.6-fold. \n", - "\n", - "## Fraction of biomass in soil layers deeper than 1 meter\n", - "### Intra-study uncertainty\n", - "We estimate the intra-study uncetainty in the fraction of microbial biomass located in soil layers deeper than 1 meter in Xu et al. and in Whitman et al. For Xu et al. we calculating the 95% confidence interval of the $\\beta^d$ measure across biomes. For Whitman et al. we calculate the 95% confidence interval of the estimates for the fraction of bacterial cells in depth of 1-8 meters out of the total number of cells in forests and other soil types." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the fraction of microbial biomass present in soil layers deeper than 1 meter is ≈2.7-fold for Xu et al. and ≈1.7-fold for Whitman et al.\n" - ] - } - ], - "source": [ - "xu_deep_frac_CI = frac_CI(data['beta']**100)\n", - "whitman_deep_frac_CI = frac_CI(np.array([forest_lower_frac,other_lower_frac]))\n", - "print('The intra-study uncertainty of the fraction of microbial biomass present in soil layers deeper than 1 meter is ≈%.1f-fold for Xu et al. and ≈%.1f-fold for Whitman et al.' %(xu_deep_frac_CI,whitman_deep_frac_CI))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "### Interstudy uncertainty\n", - "We calculate the 95% confidence interval from the average estimates of Xu et al. and Whitman et al. and use them as a measure of the interstudy uncertainty." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty of the fraction of microbial biomass present in soil layers deeper than 1 meter is ≈8.2-fold.\n" - ] - } - ], - "source": [ - "inter_deep_frac_CI = frac_CI(np.array([total_deeper_relative_fraction,whitman_mean_frac]))\n", - "print('The interstudy uncertainty of the fraction of microbial biomass present in soil layers deeper than 1 meter is ≈%.1f-fold.' %(inter_deep_frac_CI))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We take the highest uncertainty of the intra-study and interstudy uncertainty of ≈7.5-fold. This uncertainty relates to the fraction of biomass present in soil layers deeper than 1 meter. In order to propagate this uncertainty to the estimate of the total biomass of soil microbes, we sample 1000 times from a lognormal distribution with a mean and std of the fraction biomass from layers deeper than 1 meter, and add 1 to each sample to estimate the coefficient by which the total biomass of soil microbes should be corrected.\n", - "The 97.5% and 2.5% percentiles of the resulting distribution of coefficients will be used as an estimate for the uncertainty of the total biomass of soil microbes contributed by the uncertainty in the estimate of the fraction of the biomass of microbes in soil layers deeper than 1 meter." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the uncertainty of the total biomass of soil microbes contributed by the uncertainty in the esti,ate of the fraction of the biomass of microbes in soil layers deeper than 1 meter is 1.2-fold\n" - ] - } - ], - "source": [ - "# Calculate the maximal uncertainty between the intra-study and interstudy uncertainty\n", - "best_deep_frac_CI = np.max([xu_deep_frac_CI,whitman_deep_frac_CI,inter_deep_frac_CI])\n", - "# Sample the fraction of biomass in soil layers deeper than 1 meter from a lognormal distribution \n", - "deep_frac_dist = np.random.lognormal(np.log(mean_deep_frac),np.log(best_deep_frac_CI**(1./1.96)),1000)\n", - "# Calculate the distribution of coefficients by which the total biomass of soil microbes should be corrected\n", - "cor_coeff_dist = 1 + deep_frac_dist\n", - "\n", - "# Calculate the 97.5% and 2.5% percentiles of the correction coefficient distribution, relative to the mean\n", - "cor_coeff_upper_CI = np.percentile(cor_coeff_dist,97.5)/(1+mean_deep_frac)\n", - "cor_coeff_lower_CI = (1+mean_deep_frac)/np.percentile(cor_coeff_dist,2.5)\n", - "# our estimate for the 95% confidence interval is the average between the 97.5% and 2.5% fold changes\n", - "cor_coeff_CI = np.mean([cor_coeff_upper_CI,cor_coeff_lower_CI])\n", - "print('Our estimate for the uncertainty of the total biomass of soil microbes contributed by the uncertainty in the esti,ate of the fraction of the biomass of microbes in soil layers deeper than 1 meter is %.1f-fold' % cor_coeff_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Total uncertainty\n", - "To estimate the total uncertainty of the biomass of soil microbes, we combine the ucnertainty assoicated with the estiamte of biomass of soil microbes in the top 1 meter of soil, and the uncertainty of the correction coefficient to include biomass contribution from soil layer deeper than 1 meter." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The total uncertainty for the biomass of soil microbes is 1.7-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_prod_prop(np.array([mul_CI_top,cor_coeff_CI]))\n", - "print('The total uncertainty for the biomass of soil microbes is %.1f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We also take into account additional sources of uncertainty which are difficult to quantify, as detailed in the section about soil fungi in the Supplementary Information. We thus project an uncertainty of ≈2-fold for the biomass of soil microbes. Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total biomass of soil microbes: 18 Gt C\n", - "Uncertainty associated with the estimate of the total biomass of soil microbes ≈2-fold\n" - ] - } - ], - "source": [ - "print('Total biomass of soil microbes: %.0f Gt C' % (best_estimate/1e15))\n", - "print('Uncertainty associated with the estimate of the total biomass of soil microbes ≈2-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../fungi_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Total biomass of soil microbes',\n", - " 'Value': int(best_estimate_corrected),\n", - " 'Units': 'g C',\n", - " 'Uncertainty': \"{0:.1f}\".format(2)\n", - " })\n", - "\n", - "result.to_excel('../fungi_biomass_estimate.xlsx',index=False)\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/plants/.ipynb_checkpoints/plants-checkpoint.ipynb b/plants/.ipynb_checkpoints/plants-checkpoint.ipynb deleted file mode 100644 index 5ffe568..0000000 --- a/plants/.ipynb_checkpoints/plants-checkpoint.ipynb +++ /dev/null @@ -1,331 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "#Load dependencies\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../statistics_helper/')\n", - "from CI_helper import *\n", - "from excel_utils import *\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of plants\n", - "In order to estimate the biomass of plants, we rely on data generated by [Erb et al.](https://doi.org/10.1038/nature25138), which generated seven different estimates of the global biomass of plants. The seven estimates are:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MapTotal biomass estimate [g C]
0Cell-based maxima of remote sensing maps5.4e+17
1Ruesch & Gibbs 20084.9e+17
2Baccini-based map4.9e+17
3Pan-based map4.7e+17
4Saatchi-based map4.5e+17
5FRA-based map4.0e+17
6Cell-based minima of remote sensing maps3.9e+17
\n", - "
" - ], - "text/plain": [ - " Map Total biomass estimate [g C]\n", - "0 Cell-based maxima of remote sensing maps 5.4e+17\n", - "1 Ruesch & Gibbs 2008 4.9e+17\n", - "2 Baccini-based map 4.9e+17\n", - "3 Pan-based map 4.7e+17\n", - "4 Saatchi-based map 4.5e+17\n", - "5 FRA-based map 4.0e+17\n", - "6 Cell-based minima of remote sensing maps 3.9e+17" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_excel('plant_data.xlsx', skiprows=1)\n", - "data['Total biomass estimate [g C]'] = data['Total biomass estimate [g C]'].astype(float)\n", - "data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As best estimate of the total biomass of plants, we use the value reported in Erb et al. of 450 Gt C." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of plants is ≈450 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = 450e15\n", - "print('Our best estimate for the biomass of plants is ≈%.0f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Marine plants\n", - "We estimate the total biomass of seagrass from data reported in [Fourqurean et al.](https://doi.org/10.1038/ngeo1477). Fourqurean report an average carbon density in living biomass of seagrass at ≈250 g C $m^{-2}$. They also estimate the total area of seagrass is ≈300,000-600,000 $km^2$. We multiply the average biomass denisty of plants by the average area covered by seagrass to generate our best estimate of the total biomass of seagrass." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of seagrasses is ≈0.1 Gt C\n" - ] - } - ], - "source": [ - "# Mean biomass concentration of seagrasses\n", - "mean_biomass_conc_seagrass =250\n", - "\n", - "# Mean area covered by seagrasses\n", - "mean_area = np.average([3e11,6e11])\n", - "\n", - "best_seagrass = mean_biomass_conc_seagrass*mean_area\n", - "print('Our best estimate for the total biomass of seagrasses is ≈%.1f Gt C' %(best_seagrass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For macroalgea, we rely on a range of estimates for the total biomass of macroalgae from De Vooys, which reports 0.0075 Gt C and Cherpy-Roubaud & Sournia which report 2.55 Gt C. We use the geometric mean of this range as a crude estimate of the biomass of macroalgae:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of seagrasses is ≈0.1 Gt C\n" - ] - } - ], - "source": [ - "best_macroalgae = gmean([0.0075e15,2.55e15])\n", - "print('Our best estimate for the total biomass of seagrasses is ≈%.1f Gt C' %(best_macroalgae/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "As noted in the plants section in the Supplementary Information, one possible strategy to assess the uncertainty associated with the estimate of the total biomass of plants is to calculate the 95% confidence interval around the best estimate:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval around our best estimate for the total biomass of plants is ≈1.1-fold\n" - ] - } - ], - "source": [ - "estimate_CI = geo_CI_calc(data['Total biomass estimate [g C]'])\n", - "print('The 95 percent confidence interval around our best estimate for the total biomass of plants is ≈%.1f-fold' %estimate_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to account for additional sources of uncertainty not captured by calculating the 95% confidence interval, we use the ratio between uper and lower most estimates relative to our best estimate as our best projection for the uncertainty associated with our estimate of the total biomass of plants:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the total biomass of plants is ≈1.2-fold\n" - ] - } - ], - "source": [ - "upper_CI = data['Total biomass estimate [g C]'].max()/best_estimate\n", - "lower_CI = best_estimate/data['Total biomass estimate [g C]'].min()\n", - "\n", - "mul_CI = np.max([upper_CI,lower_CI])\n", - "\n", - "print('Our best projection for the uncertainty associated with the estimate of the total biomass of plants is ≈%.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Total number of invididuals\n", - "We estimate the total number of trees, based on a recent study ([Crowther et al.](http://dx.doi.org/10.1038/nature16178)). Include all plant species will definitely increase the estimate dramatically, but due to the high diversity of species and characteristic sizes of different plant species, it is very difficult to estimate the total number of plants in the biosphere. Crowther et al. estimate ≈$3×10^{12}$ trees." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "tot_tree_num = 3e12" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Plants','Plants'), \n", - " col=['Biomass [Gt C]', 'Uncertainty','Total uncertainty'],\n", - " values=[best_estimate/1e15,mul_CI,mul_CI],\n", - " path='../results.xlsx')\n", - "\n", - "# Feed results to Fig. 2C\n", - "# Feed seagrass biomass\n", - "update_fig2c(row=21,col=1,values=best_seagrass/1e15, path='../results.xlsx')\n", - "\n", - "# Feed macroalgae biomass\n", - "update_fig2c(row=22,col=1,values=best_macroalgae/1e15, path='../results.xlsx')\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Plants','Plants'), \n", - " col=['Number of individuals'],\n", - " values=tot_tree_num,\n", - " path='../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/plants/non_wood_biomass/.ipynb_checkpoints/non_wood_biomass-checkpoint.ipynb b/plants/non_wood_biomass/.ipynb_checkpoints/non_wood_biomass-checkpoint.ipynb deleted file mode 100644 index ee45162..0000000 --- a/plants/non_wood_biomass/.ipynb_checkpoints/non_wood_biomass-checkpoint.ipynb +++ /dev/null @@ -1,747 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the fraction of plant biomass which is not woody\n", - "To estimate the total non-woody plant biomass, we rely on two methods. The first is to estimate the global average leaf and root mass fractions, and the second is by estimating the total biomass of roots and leaves.\n", - "\n", - "## Method1 - fraction of leaves and roots\n", - "To estimate the global average leaf and root mass fractions, we rely on a recent meta-analysis which collected data on the lead, shoot and root mass fractions in several different biomes ([Poorter et al.](http://dx.doi.org/10.1111/j.1469-8137.2011.03952.x)). Here are the mean leaf, shoot, and root mass fractions in each biome:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
LMFSMFRMF
Biome
Tundra0.090.370.69
Grassland0.170.270.77
Boreal forest0.040.750.21
Temperate forest0.030.760.21
Tropical forest0.020.820.16
Woodland0.060.570.36
Shrubland0.090.490.47
Desert0.090.280.56
\n", - "
" - ], - "text/plain": [ - " LMF SMF RMF\n", - "Biome \n", - "Tundra 0.09 0.37 0.69\n", - "Grassland 0.17 0.27 0.77\n", - "Boreal forest 0.04 0.75 0.21\n", - "Temperate forest 0.03 0.76 0.21\n", - "Tropical forest 0.02 0.82 0.16\n", - "Woodland 0.06 0.57 0.36\n", - "Shrubland 0.09 0.49 0.47\n", - "Desert 0.09 0.28 0.56" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data from Poorter et al.\n", - "fractions = pd.read_excel('non_wood_biomass_data.xlsx','Poorter',skiprows=1,index_col=0)\n", - "fractions" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate weighted mean of leaf and root mass fractions. We use the fraction of total plant biomass in each biome as our weights from [Erb et al.](http://dx.doi.org/10.1038/ngeo2782) for the weighted mean. Here is the data from Erb et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
BiomeTotal biomass [Gt C]Categories included in Poorter
0(Sub)tropical humid-moist forest205Tropical forest
1(Sub)tropical dry forest25Tropical forest
2(Sub)Tropical shrubland, desert, mountain62Shrubland
3Temperate forest22Temperate forest
4Temperate steppe, desert, mountain27Grassland, shrubland
5Boreal forest28Boreal forest
6Boreal tundra, mountain, other33Tundra
\n", - "
" - ], - "text/plain": [ - " Biome Total biomass [Gt C] \\\n", - "0 (Sub)tropical humid-moist forest 205 \n", - "1 (Sub)tropical dry forest 25 \n", - "2 (Sub)Tropical shrubland, desert, mountain 62 \n", - "3 Temperate forest 22 \n", - "4 Temperate steppe, desert, mountain 27 \n", - "5 Boreal forest 28 \n", - "6 Boreal tundra, mountain, other 33 \n", - "\n", - " Categories included in Poorter \n", - "0 Tropical forest \n", - "1 Tropical forest \n", - "2 Shrubland \n", - "3 Temperate forest \n", - "4 Grassland, shrubland \n", - "5 Boreal forest \n", - "6 Tundra " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data on the total plant biomass in each biome from Erb et al.\n", - "biomes = pd.read_excel('non_wood_biomass_data.xlsx','Erb',skiprows=1)\n", - "biomes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The specific biomes in Erb et al. are not fully matching the biomes in Poorter et al., and thus we traslate between the biomes in the two studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our global average for non-woody mass fraction is ≈32 percent\n" - ] - } - ], - "source": [ - "# Calculate the sum of the mass fractions of leaves and roots\n", - "non_wood_frac = (fractions['LMF']+fractions['RMF'])/fractions.sum(axis=1)\n", - "\n", - "# Calculate the total biomass of each biome by the biomes reported in Poorter et al.\n", - "tot_biomass = biomes.groupby('Categories included in Poorter').sum()\n", - "\n", - "# For the temperate steppe, desert and mountain, we use the mean values from grassland and shrubland in Poorter et al.\n", - "non_wood_frac.loc['Grassland, shrubland'] = frac_mean(np.array([non_wood_frac.loc['Grassland'],non_wood_frac.loc['Shrubland']]))\n", - "\n", - "\n", - "# Set the non-woody fraction as a column in the biome data\n", - "tot_biomass['Non wood fraction'] = non_wood_frac\n", - "\n", - "# Calculate the weighed average of the non-woody biomass fraction\n", - "mean_non_wood_frac = np.average(tot_biomass['Non wood fraction'], weights= tot_biomass['Total biomass [Gt C]'])\n", - "print('Our global average for non-woody mass fraction is ≈%.0f percent' %(mean_non_wood_frac*100))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our estimate of the total non-woody plant biomass is the product of our best estimate of the total plant biomass and our estimate of the global average non-woody mass fraction:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total non-wood plant biomass based on the fraction of roots and leaves is ≈142 Gt C\n" - ] - } - ], - "source": [ - "\n", - "# Our best estimate for the total biomass\n", - "tot_plant_biomass = 450e15\n", - "\n", - "# Multiply our estimate for the non-woody mass fraction by our estimate\n", - "# of the total plant biomass\n", - "method1_non_wood_biomass = mean_non_wood_frac*tot_plant_biomass\n", - "\n", - "print('Our best estimate for the total non-wood plant biomass based on the fraction of roots and leaves is ≈%.0f Gt C' %(method1_non_wood_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Method2 - total biomass of leaves and roots\n", - "Our second method for estimating the total non-woody plant biomass is based on estimating the total biomass of roots and leaves. For roots, we rely on the estimate made by [Jackson et al.](http://dx.doi.org/10.1007/BF00333714):" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "roots_jackson = 146e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of leaves, we rely on biome averages on the leaf area index (LAI) from [Asner et al.](http://dx.doi.org/10.1046/j.1466-822X.2003.00026.x). Here is the data from Asner et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
LAI [m^2 m^-2]
Biome
Desert1.3
Grassland1.7
Tundra1.9
Shrubland2.1
Boreal DBL2.6
Boreal ENL2.7
Crops3.6
Tropical DBL3.9
Boreal/Temperate DNL4.6
Tropical EBL4.8
Temperate DBL5.1
Temperate ENL5.5
Temperate EBL5.7
Wetlands6.3
Plantations8.7
\n", - "
" - ], - "text/plain": [ - " LAI [m^2 m^-2]\n", - "Biome \n", - "Desert 1.3\n", - "Grassland 1.7\n", - "Tundra 1.9\n", - "Shrubland 2.1\n", - "Boreal DBL 2.6\n", - "Boreal ENL 2.7\n", - "Crops 3.6\n", - "Tropical DBL 3.9\n", - "Boreal/Temperate DNL 4.6\n", - "Tropical EBL 4.8\n", - "Temperate DBL 5.1\n", - "Temperate ENL 5.5\n", - "Temperate EBL 5.7\n", - "Wetlands 6.3\n", - "Plantations 8.7" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "biome_LAI = pd.read_excel('non_wood_biomass_data.xlsx','Asner',skiprows=1,index_col=0)\n", - "biome_LAI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use data on the area on each biome from the book \"Biogeochemistry\", and multiply the LAI in each biome by the total area of each biome to estimate the global leaf area:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total leaf area is ≈3.1e+14 m^2\n" - ] - } - ], - "source": [ - "# Load biome area data\n", - "biome_area = pd.read_excel('non_wood_biomass_data.xlsx','Biome area',skiprows=1,index_col=0)\n", - "\n", - "# Calculate the mean LAI for boreal forests\n", - "biome_LAI.loc['Boreal forest'] = gmean(biome_LAI.loc[['Boreal DBL','Boreal ENL']])\n", - "\n", - "# Calculate the mean LAI for temperate forests\n", - "biome_LAI.loc['Temperate forest'] = gmean(biome_LAI.loc[['Temperate DBL','Temperate EBL','Temperate ENL']])\n", - "\n", - "# Calculate the mean LAI for tropical forests\n", - "biome_LAI.loc['Tropical forest'] = gmean(biome_LAI.loc[['Tropical DBL','Tropical EBL']])\n", - "\n", - "# Calculate the mean LAI for temperate grasslands\n", - "biome_LAI.loc['Temperate grassland'] = biome_LAI.loc['Grassland']\n", - "\n", - "# Calculate the mean LAI for tropical savanna\n", - "biome_LAI.loc['Tropical savanna'] = gmean(biome_LAI.loc[['Grassland','Shrubland']])\n", - "\n", - "# Multiply the mean LAI in each biome by the total area of each biome\n", - "tot_leaf_area = (biome_LAI['LAI [m^2 m^-2]']*biome_area['Area [m^2]']).sum()\n", - "print('Our estimate for the total leaf area is ≈%.1e m^2' % tot_leaf_area)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To convert the total leaf area into total biomass of leaves, we use an estimate for the average leaf mass per area (LMA) from the Glopnet database [Wright et al.](http://dx.doi.org/10.1038/nature02403):" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the global leaf biomass is ≈16.6 Gt C\n" - ] - } - ], - "source": [ - "# Load the glopnet data\n", - "glopnet_data = pd.read_excel('non_wood_biomass_data.xlsx','glopnet_data',skiprows=1)\n", - "\n", - "# Calculate the geometric mean of the LMA\n", - "geomean_LMA = 10**glopnet_data.loc[glopnet_data['GF']=='T',['log LMA']].mean()\n", - "\n", - "# Convert the global leaf area to global leaf biomass\n", - "tot_leaf_biomass = tot_leaf_area*geomean_LMA/2\n", - "\n", - "print('Our estimate for the global leaf biomass is ≈%.1f Gt C' %(tot_leaf_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum our estimates for the total biomass of roots and leaves to produce our estimate of the total non-woody plant biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total non-wood plant biomass based on estimates of the total biomass of roots and leaves is ≈163 Gt C\n" - ] - } - ], - "source": [ - "method2_non_wood_biomass = tot_leaf_biomass + roots_jackson\n", - "print('Our best estimate for the total non-wood plant biomass based on estimates of the total biomass of roots and leaves is ≈%.0f Gt C' %(method2_non_wood_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the geometric mean of our estimates from the two methods as our best estimate for the total non-woody plant biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total non-wood plant biomass is ≈152 Gt C\n" - ] - } - ], - "source": [ - "best_non_wood_biomass = gmean([method1_non_wood_biomass,method2_non_wood_biomass])\n", - "print('Our best estimate for the total non-wood plant biomass is ≈%.0f Gt C' %(best_non_wood_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total belowground plant biomass\n", - "To estimate the total belowground plant biomass, we use the same procedure as for estimating the total non-woody plant biomass. We rely on two methods - the first is based on calculating the mean root mass fraction.\n", - "## Method1 - fraction of roots\n", - "To estimate the global average root mass fractions, we rely on a recent meta-analysis which collected data on the lead, shoot and root mass fractions in several different biomes ([Poorter et al.](http://dx.doi.org/10.1111/j.1469-8137.2011.03952.x)). We calculate the global average root mass fraction by taking into account the relative plant biomass present in each biome, based on data from [Erb et al.](http://dx.doi.org/10.1038/ngeo2782)." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the global average root mass fraction is ≈27.2 percent\n" - ] - } - ], - "source": [ - "# Calculate the root mass fraction in each biome based on data from Poorter et al.\n", - "root_frac = (fractions['RMF'])/fractions.sum(axis=1)\n", - "\n", - "# For the temperate steppe, desert and mountain, we use the mean values from grassland and shrubland in Poorter et al.\n", - "root_frac.loc['Grassland, shrubland'] = frac_mean(np.array([root_frac.loc['Grassland'],root_frac.loc['Shrubland']]))\n", - "\n", - "\n", - "# Set the root fraction as a column in the biome data\n", - "tot_biomass['Root fraction'] = root_frac\n", - "\n", - "# Calculate the weighted average root mass fraction\n", - "mean_root_frac = np.average(tot_biomass['Root fraction'], weights= tot_biomass['Total biomass [Gt C]'])\n", - "\n", - "print('Our estimate for the global average root mass fraction is ≈%.1f percent' %(mean_root_frac*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of roots, we multiply the global average root mass fraction by our best estimate for the total plant biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate of the total root biomass based on the global average root mass fraction is ≈122.6 Gt C\n" - ] - } - ], - "source": [ - "method1_root_biomass = mean_root_frac*tot_plant_biomass\n", - "\n", - "print('Our estimate of the total root biomass based on the global average root mass fraction is ≈%0.1f Gt C' %(method1_root_biomass/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As a second source for estimating the global biomass of roots, we rely on the estimate in [Jackson et al.](http://dx.doi.org/10.1007/BF00333714). We use the geometric mean of the estimate from the two methods as our best estimate of the total biomass of roots, which we use as our best estimate for the total belowground plant biomass:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total belowground plant biomass is ≈133.8 Gt C\n" - ] - } - ], - "source": [ - "best_root_biomass = gmean([method1_root_biomass,roots_jackson])\n", - "\n", - "print('Our best estimate for the total belowground plant biomass is ≈%0.1f Gt C' %(best_root_biomass/1e15))" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# Feed results to Fig S1\n", - "update_results(sheet='FigS1', \n", - " row=('Plants','Plants'), \n", - " col=['Biomass [Gt C]'],\n", - " values=best_non_wood_biomass/1e15,\n", - " path='../../results.xlsx')\n", - "\n", - "# Feed results to Data mentioned in MS\n", - "update_MS_data(row='Biomass of roots',\n", - " values=best_root_biomass/1e15,\n", - " path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/protists/.ipynb_checkpoints/protist_biomass-checkpoint.ipynb b/protists/.ipynb_checkpoints/protist_biomass-checkpoint.ipynb deleted file mode 100644 index 6d73456..0000000 --- a/protists/.ipynb_checkpoints/protist_biomass-checkpoint.ipynb +++ /dev/null @@ -1,250 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../statistics_helper/')\n", - "from fraction_helper import *\n", - "from CI_helper import *\n", - "from excel_utils import *\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of protists\n", - "To estimate the total biomass of protists, we combine our estimates for the total biomass of marine and terrestrial protists, which we have generated in the dedicated sections for each group. Our estimates for the biomass of the marine and terrestrial protists are presented below:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ParameterValueUnitsUncertainty
0Biomass of terrestrial protists1.6e+00Gt C3.7e+00
1Biomass of marine protists2.1e+00Gt C1.0e+01
2Biomass of pico-nanoprotists3.8e-01Gt Cnan
\n", - "
" - ], - "text/plain": [ - " Parameter Value Units Uncertainty\n", - "0 Biomass of terrestrial protists 1.6e+00 Gt C 3.7e+00\n", - "1 Biomass of marine protists 2.1e+00 Gt C 1.0e+01\n", - "2 Biomass of pico-nanoprotists 3.8e-01 Gt C nan" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_excel('protists_biomass_estimate.xlsx')\n", - "data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of protists, we sum up the contributions from terrestrial and marine protists. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of protists is ≈4 Gt C\n", - "Our projection for the uncertainty of our estimate of the total biomass of archaea is ≈4-fold\n" - ] - } - ], - "source": [ - "best_estimate = data.loc[[0,1],'Value'].sum()\n", - "mul_CI = CI_sum_prop(estimates=data.loc[[0,1],'Value'], mul_CIs=data.loc[[0,1],'Uncertainty'])\n", - "\n", - "print('Our best estimate for the biomass of protists is ≈%.0f Gt C' %best_estimate)\n", - "print('Our projection for the uncertainty of our estimate of the total biomass of archaea is ≈%.0f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of protists\n", - "To estimate the total number of individual protists, we estimate the total number of nano-pico eukaryotes, as they are the smallest eukaryotes and still have significant biomass. The diameter range of pico-nanoplankton is 0.8-5 µm. We use the geometric mean of the radius range, which is ≈1 µm. This means that the mean cell volume is ≈4 $µm^3$. We use a conversion equation from biovolume to carbon content reported in [Pernice et al.](https://dx.doi.org/10.1038%2Fismej.2014.168) of: $$carbon\\ content\\ [pg\\ C\\ cell^-1] = 0.216×V^{0.939} $$" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We estimate a pico-nanoprotists has a carbon content of ≈1 pg C\n" - ] - } - ], - "source": [ - "# Conversion equation from Pernice et al.\n", - "conversion_eq = lambda x: 0.216*x**0.939\n", - "\n", - "# We estimate a biovolume of ≈4 µm^3 per pico-nano eukaryote\n", - "pico_nano_vol = 4\n", - "\n", - "# Convert biovolume to carbon content\n", - "pico_nano_cc = conversion_eq(pico_nano_vol)\n", - "\n", - "print('We estimate a pico-nanoprotists has a carbon content of ≈%.0f pg C' %pico_nano_cc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We divide our estimate of the total biomass of pico-nanoprotists by our estimate of the carbon content of a single pico-nano protist. This give us an estimate for the total number of individual protists." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate of the total number of individual protists is ≈5e+26 \n" - ] - } - ], - "source": [ - "# Load our estimate of the total biomass of pico-nanoprotists\n", - "pico_nano_biomass = data.loc[2,'Value']\n", - "\n", - "# Calculate the total number of individual protists\n", - "protist_num = pico_nano_biomass*1e15/(pico_nano_cc/1e12)\n", - "print('Our estimate of the total number of individual protists is ≈%.0e ' %protist_num)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Feed total marine protists results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Protists','Marine'), \n", - " col=['Biomass [Gt C]','Uncertainty','Total uncertainty'],\n", - " values=[data.loc[1,'Value'],data.loc[1,'Uncertainty'],mul_CI],\n", - " path='../results.xlsx')\n", - "\n", - "# Feed total terrestrial protists results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Protists','Terrestrial'), \n", - " col=['Biomass [Gt C]','Uncertainty'],\n", - " values=[data.loc[0,'Value'],data.loc[0,'Uncertainty']],\n", - " path='../results.xlsx')\n", - "\n", - "# Feed total protist results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Protists','Protists'), \n", - " col=['Number of individuals'],\n", - " values=protist_num,\n", - " path='../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/protists/marine_protists/.ipynb_checkpoints/marine_protists-checkpoint.ipynb b/protists/marine_protists/.ipynb_checkpoints/marine_protists-checkpoint.ipynb deleted file mode 100644 index a5a3bf7..0000000 --- a/protists/marine_protists/.ipynb_checkpoints/marine_protists-checkpoint.ipynb +++ /dev/null @@ -1,826 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of marine protists\n", - "Our estimate of the total biomass of marine protists relies on estimates of global biomass for many plankton groups. We included estimates of all plankton groups that are dominated by protists. The main groups with a significant biomass contribution were picoeukaryotes, microzooplankton (defined not to include copepod biomass), diatoms, *Phaeocystis* and Rhizaria. The estimates for all plankton groups except Rhizaria are based on [Buitenhuis et al.](http://search.proquest.com/openview/0e8e5672fa28111df473268e13f2f757/1?pq-origsite=gscholar&cbl=105729), which used data from the MAREDAT database. The protist group Rhizaria is under represented in the MAREDAT database, and thus our estimate for the total biomass of Rhizaria is based on *in situ* imaging work by [Biard et al.](http://dx.doi.org/10.1038/nature17652).\n", - "\n", - "For the etimates based on MAREDAT data, Buitenhuis et al. estimates the total biomass of a specific plankton group by using a characteristic biomass concentration for each depth (either a median or average of the values in the database) and applying across the entire volume of ocean at that depth. Buitenhuis et al. generates two types of estimates are supplied for the global biomass of each plankton group: a “minimum” estimate which uses the median concentration of biomass from the database, and a “maximum” estimate which uses the average biomass concentration. Because the distributions of values in the database are usually highly skewed by asymmetrically high values the median and mean are loosely associated by the authors of the MAREDAT study with a minimum and maximum estimate. The estimate based on the average value is more susceptible to biases in oversampling singular locations such as blooms of plankton species, or of coastal areas in which biomass concentrations are especially high, which might lead to an overestimate. On the other hand, the estimate based on the median biomass concentration might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. Therefore, here and in all estimates based on MAREDAT data, we take the geometric mean of the “minimum” and “maximum” estimates (actually median and mean values of the distribution) as our best estimate, which will increase our robustness to the effects discussed above. \n", - "\n", - "We now discuss the estimates for each of the groups of protists.\n", - "\n", - "## Picoeukaryotes\n", - "We estimate the total biomass of picoeukaryotes by first estimating the total biomass of picophytoplankton, and then calculating the fraction of eukaryotes out of the total biomass of picophytoplankton. Buitenhuis et al. reports a \"minimum\" estimate of 0.28 Gt C and a \"maximum\" estimate of 0.64 Gt C for the biomass of picophytoplankton. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "# for picophytoplankton\n", - "picophyto_biomsss = gmean([0.28e15,0.64e15])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the fraction of eukaryotes out of the total biomass of picophytoplankton, we rely on [Buitenhuis et al.](https://ueaeprints.uea.ac.uk/40778/) which estimates that they represent 49-69% of the global biomass of picophytoplankton. We use the geometric mean of this range as our best estimate of the fraction eukaryotes out of the total biomass of picophytoplankton." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "euk_frac = frac_mean(np.array([0.49,0.69]))\n", - "auto_picoeuk_biomass = picophyto_biomsss*euk_frac" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Picoeukaryotes contain both protists and plant species (like chlorophytes). It seems that, from the available literature, the biomass distribution between them is not strongly favored towards one class ([Li et al.](http://dx.doi.org/10.1016/0198-0149(92)90085-8)). We thus estimate the protist fraction at about 50% of the biomass of picoeukaryotes:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "auto_pico_protists_fraction = 0.5\n", - "auto_pico_protists_biomass = auto_picoeuk_biomass*auto_pico_protists_fraction" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Protists in the picoplankton to nanoplankton size range (0.8-5 µm in diameter) include not only autotrophic, but also heterotrophic organisms. As we could not find a reliable resource for estimating the biomass of heterotrophic pico-nanoplankton we use a recent global 18S ribosomal DNA sequencing effort that was part of the Tara Oceans campaign ([de Vargas et al.](http://dx.doi.org/10.1126/science.1261605)). \n", - "\n", - "We extracted data from Fig. 5A in de Vargas et al., which quantifies the ratio between autotropic and heterotrophic picoplankton and nanoplankton:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SiteOceanPhototrophic protistsHeterotrophic protist
04NAO0.30.4
17MS0.20.4
29MS0.20.4
311MS0.10.3
416MS0.20.4
\n", - "
" - ], - "text/plain": [ - " Site Ocean Phototrophic protists Heterotrophic protist\n", - "0 4 NAO 0.3 0.4\n", - "1 7 MS 0.2 0.4\n", - "2 9 MS 0.2 0.4\n", - "3 11 MS 0.1 0.3\n", - "4 16 MS 0.2 0.4" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.options.display.float_format = '{:,.1f}'.format\n", - "# Load data from de Vargas on the ratio between autotrophic and heterotrophic protists\n", - "pico_nano_data = pd.read_excel('marine_protists_data.xlsx',skiprows=1)\n", - "pico_nano_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the fraction of phototrophic and heterotrophic protists out of the total amount of 18S rDNA sequences. We use the ratio between these geometric means as our best estimate for the ratio between photosynthetic and heterotrophic protists." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the ratio between heterotrophic and phototrophic protists in pico-nanoplankton is ≈2-fold\n" - ] - } - ], - "source": [ - "hetero_photo_ratio = gmean(pico_nano_data['Heterotrophic protist'])/gmean(pico_nano_data['Phototrophic protists'])\n", - "print('Our best estimate of the ratio between heterotrophic and phototrophic protists in pico-nanoplankton is ≈%.f-fold' %hetero_photo_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We add the contribution of heterotrophic pico-nanoprotists to our estimate:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "pico_protists_biomass = (1+hetero_photo_ratio)*auto_pico_protists_biomass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Relying on 18S sequence abundance as a proxy for biomass is not a well established practice, and has various biases, but for lack of any other alternative we could find to perform the estimate, we chose to use it. Yet, we note that this plays a minor role in our analysis that in any case will not affect any of the major conclusions of our study.\n", - "\n", - "## Microzooplankton\n", - "The estimate of microzooplankton in Buitenhuis et al. does not include copepod biomass by definition, and thus is suitable in order to estimate the total biomass of microzooplankton protists. Buitenhuis et al. reports a \"minimum\" estimate of 0.48 Gt C and a \"maximum\" estimate of 0.73 Gt C for the biomass of picophytoplankton. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "# for microzooplankton\n", - "microzoo_biomsss = gmean([0.48e15,0.73e15])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Diatoms\n", - "For diatoms, Buitenhuis et al. reports a \"minimum\" estimate of 0.1 Gt C and a \"maximum\" estimate of 0.94 Gt C for the biomass of picophytoplankton. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "# for diatoms\n", - "diatom_biomsss = gmean([0.1e15,0.94e15])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Phaeocystis\n", - "For Phaeocystis, reports a \"minimum\" estimate of 0.11 Gt C and a \"maximum\" estimate of 0.71 Gt C for the biomass of picophytoplankton. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "# for Phaeocystis\n", - "phaeocystis_biomsss = gmean([0.11e15,0.71e15])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As stated in Buitenhuis et al., the data from the MAREDAT initiative doesn’t contain the biomass of nanophytoplankton (phytoplankton between 2 and 20 µm) and autotrophic dinoflagellates. Nevertheless, this omission might be compensated by overestimation of Phaeocystis biomass because of sampling bias, so overall the sum of all the different phytoplankton fits well with total chlorophyll measurements from the WOA 2005.\n", - "\n", - "## Rhizaria\n", - "For rhizaria, our estimate relies on data from Biard et al. Biard et al. divided the data into three depth layers (0-100 m, 100-200 m, and 200-500 m), and multiplied median biomass concentrations at each depth layer across the entire volume of water at that layer to generate global estimate. The biomass of Rhizaria in the top 500 meters of the ocean is estimated at ≈0.2 Gt C. " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "rhizaria_biomass = 0.2e15" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "The estimates based on the MAREDAT database include measurements only for the top 200 meters of the water column. For rhizaria, our estimate includes the top 500 meters of the water column. For more details on possible contributions from deeper ocean laters, see the marine protists section in the Supplementary information.\n", - "\n", - "## Particle-atttached protists\n", - "To estimate the total biomass of particle-attached protists, we estimate the ratio between the biomass of particle-attached protists and prokaryotes. We rely on three studies which have measured this ratio at both the epipelagic, mesopelagic and bathypelagic layers ([Bochdansky et al.](http://dx.doi.org/10.1038/ismej.2016.113), [Turley & Mackie](http://www.jstor.org/stable/24849742), [Herndl](http://www.jstor.org/stable/24827742)). We fist calculate the mean ratio between protists and prokaryotes in each study. \n", - "\n", - "### Bochdansky et al.\n", - "Bochdansky et al. provide a mean number of particle-attached eukaryotes normalized by the amount of particle-attached prokaryotes in the bathypelagic realm. They also measure the amount of fungi out of the toal population of eukaryotes. We remove the amount of fungal cells from the total number of eukaryotes:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The mean ratio between the number of particle-attached protists and prokaryotes reported in Bochdansky et al. is ≈0.004\n" - ] - } - ], - "source": [ - "# Load the data from Bochdansky et al.\n", - "bochdansky_data = pd.read_excel('marine_protists_data.xlsx','Bochdansky ratio',skiprows=1,index_col=0)\n", - "protists = bochdansky_data.loc['All Eukaryotes'] - bochdansky_data.loc['Fungi']\n", - "print('The mean ratio between the number of particle-attached protists and prokaryotes reported in Bochdansky et al. is ≈%.3f' %protists)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our of the total number of eukaryotes, we exclude the group of *Labyrinthulomycetes*, as their biomass was estimated seperatly by Bochdansky et al., as we will see later. This leaves us with protists which are not *Labyrinthulomycetes*, mainly flagellates. To estimate the ratio of the biomass of these remaining protists, we rely on a study which has measured the carbon content of free-living protists in the deep sea ([Pernice et al.](https://dx.doi.org/10.1038%2Fismej.2014.168)). We use the carbon content of free-living protists in the deep sea as our best estimate of the carbon content of particle-attached protists in the deep-sea. For the carbon content of particle-attached prokaryotes, we use our best estimate from the particle-attached prokaryotes section." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate the non-Labyrinthulomycetes protists\n", - "flagellates = protists-bochdansky_data.loc['Labyrinthulomycetes']\n", - "\n", - "# Load data on the carbon content of flagellates from the deep-sea\n", - "flagellates_cc_data = pd.read_excel('marine_protists_data.xlsx','Pernice',skiprows=1,index_col=0)\n", - "deep_flagellates_cc = flagellates_cc_data.loc['1401-4000','Protist carbon content [pg C cell^-1]']\n", - "\n", - "# Load our estimate of the carbon content of particle-attached prokaryotes\n", - "prok_cc = pd.read_excel('marine_protists_data.xlsx', 'POC prokaryotes').iloc[0,1]\n", - "\n", - "# Calculate the ratio of biomass of non-Labyrinthulomycetes protists to prokaryotes\n", - "biomass_ratio_flagellates = flagellates*(deep_flagellates_cc*1000/prok_cc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Bochdansky et al. also provide estimates for the biomass ratio between particle-attached *Labyrinthulomycetes* and prokaryotes in the bathypelagic layer. They provide several estimates for the biomass ratio. We use the geometric mean of those estimates as our best estimate of the biomass ratio between particle-attached *Labyrinthulomycetes* and prokaryotes." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the data on the biomass ratio of Labyrinthulomycetes\n", - "lab_biomass_data = pd.read_excel('marine_protists_data.xlsx','Bochdansky biomass',skiprows=1)\n", - "\n", - "# Calculate the geometric mean of the different estimates for the biomass ratio of\n", - "# Labyrinthulomycetes and prokaryotes\n", - "lab_biomass_ratio = gmean(gmean(lab_biomass_data.iloc[:,1:],axis=1))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum our estimate for the biomass ratio of non-*Labyrinthulomycetes* and *Labyrinthulomycetes* particle-attached protists and prokaryotes as our best estimate from Bochdansky et al. of the biomass ratio between particle-attached protists and bacteria in the deep ocean:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass ratio between particle-attached protists and prokaryotes in the bathypelagic layer is ≈1.3 \n" - ] - } - ], - "source": [ - "best_bochdansky = biomass_ratio_flagellates + lab_biomass_ratio\n", - "\n", - "print('Our best estimate for the biomass ratio between particle-attached protists and prokaryotes in the bathypelagic layer is ≈%.1f ' %best_bochdansky)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Herndl\n", - "\n", - "For Turley & Mackie and Herndl, we have data on the concentrations of prokaryotes and protists on particles. For each study, we fist calculate the mean concentration of prokaryotes and protists, and divide the two mean concentrations to generate an estimate for the ratio between the number of cells of protists and prokaryotes. To estimate the mean concentrations of prokaryotes and protists, we generate two types of estimates: an estimate which uses the arithmetic mean of the different measurements, and an estimate which uses the geometric mean of the different measuremetns. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high population densities. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Reference\n", - "Herndl 0.0\n", - "Turley & Mackie 0.0\n", - "dtype: float64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data from Turley & Mackie and Herndl\n", - "poc_ratio = pd.read_excel('marine_protists_data.xlsx','POC')\n", - "\n", - "# Calculate the arithmetic mean of the measurements in each study\n", - "arith_poc_mean_conc = poc_ratio.iloc[:,[1,2,3]].groupby('Reference').mean()\n", - "\n", - "# Calculate the geometric mean of the measurements in each study\n", - "geo_poc_mean_conc =poc_ratio.dropna().groupby('Reference').apply(lambda x: pd.Series(gmean(x.iloc[:,[1,2]])))\n", - "geo_poc_mean_conc.columns = poc_ratio.columns[1:3]\n", - "\n", - "# Calculate the geometric mean between the mean estimate based on arithmetic and\n", - "# geometric means\n", - "best_mean_conc = np.sqrt(arith_poc_mean_conc*geo_poc_mean_conc)\n", - "\n", - "# Calculate the ratio between the number of cells of protists and prokaryotes\n", - "best_ratio_conc = best_mean_conc['Concentration of Flagellates [cells mL^-1]']/best_mean_conc['Concentration of Bacteria [cells mL^-1]']\n", - "best_ratio_conc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Herndl has measured the biovolume of protists and prokaryotes on particles, so we can generate an estimate for the carbon content ratio between protists and prokaryotes. For flagellates, Herndl has measured a mean volume of ≈11 $µm^3$, and he uses a conversion ratio between biovolume and carbon content of 220 fg C $µm^3$. He thus estimates flagellates will have a carbon content of 2.4 pg C cell$^{-1}$. " - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "herndl_flagellates_vol = 11\n", - "herndl_flagellates_cc = herndl_flagellates_vol*0.22" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For prokaryotes, he measures a biovolume of ≈0.25 $µm^3$ for rod cells and 0.067 $µm^3$ for coccoid cells. We convert these volume to carbon content using the following coversion euqation from [Gundersen et al.](onlinelibrary.wiley.com/doi/10.4319/lo.2002.47.5.1525/abstract): $$ carbon\\ content = 108.8×V^{0.898}$$" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "rod_vol = 0.25\n", - "coccoid_vol = 0.067\n", - "conversion_eq = lambda x: 108.8*x**0.898\n", - "rod_cc = conversion_eq(rod_vol)\n", - "coccoid_cc = conversion_eq(coccoid_vol)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From his experiments, it seems as if the relative fraction of rod and coccoid cells is similar, so we use the average of the carbon contents of rod cells and coccoid cells" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "herndl_prok_cc = np.mean([rod_cc,coccoid_cc])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the ratio in carbon contents between protists and prokaryotes as reported by Herndl, and multiply it by the ratio of the number of cells of protists and prokaryotes. This gives us an estimate for the ratio of the biomass of particle-attached protists and prokaryotes." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the ratio between the biomass of particle-attached protists and prokaryotes based on Herndl is ≈2.7\n" - ] - } - ], - "source": [ - "herndl_cc_ratio = herndl_flagellates_cc*1000/herndl_prok_cc\n", - "herndl_biomass_ratio = best_ratio_conc.loc['Herndl']*herndl_cc_ratio\n", - "print('Our estimate for the ratio between the biomass of particle-attached protists and prokaryotes based on Herndl is ≈%.1f' %herndl_biomass_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Turley & Mackie\n", - "For Turley & Mackie, we only have measurements of the ratio between the number of cells of particle-attached protists and prokaryotes. Turkey & Mackie report measurements both in the epipelagic layer and in the mesopelagic layer. We calculate the mean concentrations of protists and prokaryotes in each layer. As we noted above we first calculate the arithmetic and geometric means of the measurements in each layer, and then use the geometric mean between the two values generate by using the arithmetric mean and geometric mean as our best estimate." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/lib/python3/dist-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " \"\"\"\n" - ] - }, - { - "data": { - "text/plain": [ - "Epipelagic\n", - "False 0.002\n", - "True 0.004\n", - "dtype: float64" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Take the data in Turley & Mackie\n", - "tm_data = poc_ratio[poc_ratio['Reference'] =='Turley & Mackie']\n", - "\n", - "# Divide the data to data in the epipelagic and mesopelagic layers\n", - "tm_data['Epipelagic'] = tm_data['Depth [m]'] < 200\n", - "\n", - "# Calculate the arithmetic mean of the measurements in each layer\n", - "arith_tm_mean_conc = tm_data.iloc[:,[1,2,6]].groupby('Epipelagic').mean()\n", - "\n", - "# Calculate the geometric mean of the measurements in each layer\n", - "geo_tm_mean_conc =tm_data.dropna().groupby('Epipelagic').apply(lambda x: pd.Series(gmean(x.iloc[:,[1,2]])))\n", - "geo_tm_mean_conc.columns = tm_data.columns[1:3]\n", - "\n", - "# Calculate the geometric mean between the mean estimate based on arithmetic and\n", - "# geometric means\n", - "best_tm_mean_conc = np.sqrt(arith_tm_mean_conc*geo_tm_mean_conc)\n", - "\n", - "# Calculate the ratio between the number of cells of protists and prokaryotes\n", - "best_tm_ratio_conc = best_tm_mean_conc['Concentration of Flagellates [cells mL^-1]']/best_tm_mean_conc['Concentration of Bacteria [cells mL^-1]']\n", - "pd.options.display.float_format = '{:,.3f}'.format\n", - "best_tm_ratio_conc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the ratio between the biomass of particle-attached protists and prokaryotes based on the ratios of cell concentrations in Turley & Mackie, we need to estimate the ratios in the carbon contents of particle-attached protists and prokaryotes. For the sampled collected in the epipelagic layer, we use the data measured in Herndl, which was also measured in the epipelagic layer. For the mesopelagic layer, we use data from Pernice et al. measured in the mesopelagic layer." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "pd.options.display.float_format = '{:,.1f}'.format\n", - "# For the epipelagic layer multiply cell concentration ratios by the carbon\n", - "# content ratios measured by Herndl\n", - "epi_tm_biomass = best_tm_ratio_conc.loc[True]*herndl_cc_ratio\n", - "\n", - "# For the mesopelagic layer, calculate the mean carbon content of protists\n", - "# From data in Pernice et al.\n", - "meso_protist_cc = np.average(flagellates_cc_data['Protist carbon content [pg C cell^-1]'],weights=[250,250,700,0])\n", - "\n", - "# Calculate the ratio of carbon content in the mesopelagic layer using\n", - "# our best estiamte for the carbon content of particle-attached prokaryotes \n", - "meso_cc_ratio = meso_protist_cc*1000/prok_cc\n", - "\n", - "# Calculate the biomass ratio between particle-attached protists and\n", - "# prokaryotes in the mesopelagic layer\n", - "meso_biomass_ratio = best_tm_ratio_conc.loc[False]*meso_cc_ratio" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Estimating the biomass ratio of particle-attached protists and prokaryotes\n", - "We integrate the estimates from the three different studies in the following manner: We estimate a biomass ratio between particle-attached protists and bacteria in each layer of the ocean - the epipelagic, mesopelagic and bathypelagic layers. For the epipelagic layer, we use the geometric mean of the estimates based on Herndl and Turley & Mackie. For the mesopelagic layer we use the estimates by Turley & Mackie, and for the bathypelagic layer we use the estimates by Bochdansky et al." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass ratio between particle-attached protists and prokaryotes is ≈1.1\n" - ] - } - ], - "source": [ - "best_biomass_ratio = np.average([gmean([herndl_biomass_ratio,epi_tm_biomass]),meso_biomass_ratio,best_bochdansky], weights=[200,800,3000])\n", - "print('Our best estimate for the biomass ratio between particle-attached protists and prokaryotes is ≈%.1f' %best_biomass_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use our best estimate for the total biomass of particle attached prokaryotes and multiply it by our estimate of the biomass ratio between particle-attached protists and prokaryotes to estimate the total biomass of particle-attached protists." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of particle-attached protists is ≈0.3 Gt C\n" - ] - } - ], - "source": [ - "poc_prok = pd.read_excel('../../bacteria_archaea/marine/marine_prok_biomass_estimate.xlsx').loc[[0,1,4],'Value'].prod()*1e-15\n", - "\n", - "best_poc_protists = poc_prok*best_biomass_ratio\n", - "\n", - "print('Our best estimate for the total biomass of particle-attached protists is ≈%.1f Gt C' %(best_poc_protists/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Estimating the total biomass of protists\n", - "To estimate the total biomass of marine protists, we sum up all of our estimates of the biomass of the different groups of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of marine protists is ≈2.1 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = rhizaria_biomass + phaeocystis_biomsss + diatom_biomsss + microzoo_biomsss + pico_protists_biomass + best_poc_protists\n", - "\n", - "print('Our best estimate for the total biomass of marine protists is ≈%.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Uncertanity analysis\n", - "We discuss the uncertainty of estimates based on the MAREDAT database in a dedicated section in the Supplementary Information. We crudly project an uncertainty of about an order of magnitude. We project the same uncertainty for the biomass of particle-attached protists." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "# We crudely estimate and uncertainty of an order of magnitude\n", - "mul_CI = 10" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Biomass of marine protists: 2.1 Gt C\n", - "Uncertainty associated with the estimate of the total biomass of marine protists: ≈10-fold\n" - ] - } - ], - "source": [ - "\n", - "print('Biomass of marine protists: %.1f Gt C' %(best_estimate/1e15))\n", - "print('Uncertainty associated with the estimate of the total biomass of marine protists: ≈%.0f-fold' % mul_CI)\n", - "\n", - "\n", - "old_results = pd.read_excel('../protists_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "\n", - "\n", - "if (result.shape[0]==0):\n", - " result = pd.DataFrame(index= range(1), columns=['Parameter','Value','Units','Uncertainty'])\n", - "\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Biomass of marine protists',\n", - " 'Value': float(best_estimate)/1e15,\n", - " 'Units': 'Gt C',\n", - " 'Uncertainty': mul_CI\n", - " })\n", - "\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Biomass of pico-nanoprotists',\n", - " 'Value': float(pico_protists_biomass)/1e15,\n", - " 'Units': 'Gt C',\n", - " 'Uncertainty': None\n", - " })\n", - "\n", - "result.to_excel('../protists_biomass_estimate.xlsx',index=False)\n", - "\n", - "\n", - "# Feed results to Fig. 2C\n", - "\n", - "# Feed green algae picophytoplankton biomass\n", - "update_fig2c(row=23,col=1,values=auto_picoeuk_biomass*(1-auto_pico_protists_fraction)/1e15, path='../../results.xlsx')\n", - "\n", - "\n", - "# Feed bacterial picophytoplankton biomass\n", - "update_fig2c(row=24,col=1,values=picophyto_biomsss*(1-euk_frac)/1e15, path='../../results.xlsx')\n", - "\n", - "\n", - "# Feed protist picophytoplankton biomass\n", - "update_fig2c(row=25,col=1,values=auto_picoeuk_biomass*auto_pico_protists_fraction/1e15, path='../../results.xlsx')\n", - "\n", - "# Feed diatom biomass\n", - "update_fig2c(row=26,col=1,values=diatom_biomsss/1e15, path='../../results.xlsx')\n", - "\n", - "# Feed Phaeocystis biomass\n", - "update_fig2c(row=27,col=1,values=phaeocystis_biomsss/1e15, path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/protists/marine_protists/.ipynb_checkpoints/marine_protists-median-checkpoint.ipynb b/protists/marine_protists/.ipynb_checkpoints/marine_protists-median-checkpoint.ipynb deleted file mode 100644 index 96c5b0b..0000000 --- a/protists/marine_protists/.ipynb_checkpoints/marine_protists-median-checkpoint.ipynb +++ /dev/null @@ -1,878 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the biomass of marine protists\n", - "Our estimate of the total biomass of marine protists relies on estimates of global biomass for many plankton groups. We included estimates of all plankton groups that are dominated by protists. The main groups with a significant biomass contribution were picoeukaryotes, microzooplankton (defined not to include copepod biomass), diatoms, *Phaeocystis* and Rhizaria. The estimates for all plankton groups except Rhizaria are based on [Buitenhuis et al.](http://search.proquest.com/openview/0e8e5672fa28111df473268e13f2f757/1?pq-origsite=gscholar&cbl=105729), which used data from the MAREDAT database. The protist group Rhizaria is under represented in the MAREDAT database, and thus our estimate for the total biomass of Rhizaria is based on *in situ* imaging work by [Biard et al.](http://dx.doi.org/10.1038/nature17652).\n", - "\n", - "For the etimates based on MAREDAT data, Buitenhuis et al. estimates the total biomass of a specific plankton group by using a characteristic biomass concentration for each depth (either a median or average of the values in the database) and applying across the entire volume of ocean at that depth. Buitenhuis et al. generates two types of estimates are supplied for the global biomass of each plankton group: a “minimum” estimate which uses the median concentration of biomass from the database, and a “maximum” estimate which uses the average biomass concentration. Because the distributions of values in the database are usually highly skewed by asymmetrically high values the median and mean are loosely associated by the authors of the MAREDAT study with a minimum and maximum estimate. The estimate based on the average value is more susceptible to biases in oversampling singular locations such as blooms of plankton species, or of coastal areas in which biomass concentrations are especially high, which might lead to an overestimate. On the other hand, the estimate based on the median biomass concentration might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. Therefore, here and in all estimates based on MAREDAT data, we take the geometric mean of the “minimum” and “maximum” estimates (actually median and mean values of the distribution) as our best estimate, which will increase our robustness to the effects discussed above. \n", - "\n", - "We now discuss the estimates for each of the groups of protists.\n", - "\n", - "## Picoeukaryotes\n", - "We estimate the total biomass of picoeukaryotes by first estimating the total biomass of picophytoplankton, and then calculating the fraction of eukaryotes out of the total biomass of picophytoplankton. Buitenhuis et al. reports a \"minimum\" estimate of 0.28 Gt C and a \"maximum\" estimate of 0.64 Gt C for the biomass of picophytoplankton. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "460000000000000.0" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "# for picophytoplankton\n", - "picophyto_biomsss = gmean([0.28e15,0.64e15])\n", - "\n", - "#Median\n", - "picophyto_biomsss = 0.28e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the fraction of eukaryotes out of the total biomass of picophytoplankton, we rely on [Buitenhuis et al.](https://ueaeprints.uea.ac.uk/40778/) which estimates that they represent 49-69% of the global biomass of picophytoplankton. We use the geometric mean of this range as our best estimate of the fraction eukaryotes out of the total biomass of picophytoplankton." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "euk_frac = gmean([0.49,0.69])\n", - "\n", - "#Median\n", - "euk_frac = np.median([0.49,0.69])\n", - "auto_picoeuk_biomass = picophyto_biomsss*euk_frac" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Picoeukaryotes contain both protists and plant species (like chlorophytes). It seems that, from the available literature, the biomass distribution between them is not strongly favored towards one class ([Li et al.](http://dx.doi.org/10.1016/0198-0149(92)90085-8)). We thus estimate the protist fraction at about 50% of the biomass of picoeukaryotes:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "auto_pico_protists_fraction = 0.5\n", - "auto_pico_protists_biomass = auto_picoeuk_biomass*auto_pico_protists_fraction" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Protists in the picoplankton to nanoplankton size range (0.8-5 µm in diameter) include not only autotrophic, but also heterotrophic organisms. As we could not find a reliable resource for estimating the biomass of heterotrophic pico-nanoplankton we use a recent global 18S ribosomal DNA sequencing effort that was part of the Tara Oceans campaign ([de Vargas et al.](http://dx.doi.org/10.1126/science.1261605)). \n", - "\n", - "We extracted data from Fig. 5A in de Vargas et al., which quantifies the ratio between autotropic and heterotrophic picoplankton and nanoplankton:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SiteOceanPhototrophic protistsHeterotrophic protist
04NAO0.30.4
17MS0.20.4
29MS0.20.4
311MS0.10.3
416MS0.20.4
\n", - "
" - ], - "text/plain": [ - " Site Ocean Phototrophic protists Heterotrophic protist\n", - "0 4 NAO 0.3 0.4\n", - "1 7 MS 0.2 0.4\n", - "2 9 MS 0.2 0.4\n", - "3 11 MS 0.1 0.3\n", - "4 16 MS 0.2 0.4" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.options.display.float_format = '{:,.1f}'.format\n", - "# Load data from de Vargas on the ratio between autotrophic and heterotrophic protists\n", - "pico_nano_data = pd.read_excel('marine_protists_data.xlsx',skiprows=1)\n", - "pico_nano_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the fraction of phototrophic and heterotrophic protists out of the total amount of 18S rDNA sequences. We use the ratio between these geometric means as our best estimate for the ratio between photosynthetic and heterotrophic protists." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the ratio between heterotrophic and phototrophic protists in pico-nanoplankton is ≈2-fold\n" - ] - } - ], - "source": [ - "hetero_photo_ratio = gmean(pico_nano_data['Heterotrophic protist'])/gmean(pico_nano_data['Phototrophic protists'])\n", - "\n", - "#Median\n", - "hetero_photo_ratio = pico_nano_data['Heterotrophic protist'].median()/pico_nano_data['Phototrophic protists'].median()\n", - "print('Our best estimate of the ratio between heterotrophic and phototrophic protists in pico-nanoplankton is ≈%.f-fold' %hetero_photo_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We add the contribution of heterotrophic pico-nanoprotists to our estimate:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "pico_protists_biomass = (1+hetero_photo_ratio)*auto_pico_protists_biomass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Relying on 18S sequence abundance as a proxy for biomass is not a well established practice, and has various biases, but for lack of any other alternative we could find to perform the estimate, we chose to use it. Yet, we note that this plays a minor role in our analysis that in any case will not affect any of the major conclusions of our study.\n", - "\n", - "## Microzooplankton\n", - "The estimate of microzooplankton in Buitenhuis et al. does not include copepod biomass by definition, and thus is suitable in order to estimate the total biomass of microzooplankton protists. Buitenhuis et al. reports a \"minimum\" estimate of 0.48 Gt C and a \"maximum\" estimate of 0.73 Gt C for the biomass of picophytoplankton. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "# for microzooplankton\n", - "microzoo_biomsss = gmean([0.48e15,0.73e15])\n", - "\n", - "#Median\n", - "microzoo_biomsss = 0.48e15" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Diatoms\n", - "For diatoms, Buitenhuis et al. reports a \"minimum\" estimate of 0.1 Gt C and a \"maximum\" estimate of 0.94 Gt C for the biomass of picophytoplankton. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "# for diatoms\n", - "diatom_biomsss = gmean([0.1e15,0.94e15])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Phaeocystis\n", - "For Phaeocystis, reports a \"minimum\" estimate of 0.11 Gt C and a \"maximum\" estimate of 0.71 Gt C for the biomass of picophytoplankton. We calculate the geometric mean of those estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the geometric mean of the \"minimum\" and \"maximum\" estimates from Buitenhuis et al.\n", - "# for Phaeocystis\n", - "phaeocystis_biomsss = gmean([0.11e15,0.71e15])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As stated in Buitenhuis et al., the data from the MAREDAT initiative doesn’t contain the biomass of nanophytoplankton (phytoplankton between 2 and 20 µm) and autotrophic dinoflagellates. Nevertheless, this omission might be compensated by overestimation of Phaeocystis biomass because of sampling bias, so overall the sum of all the different phytoplankton fits well with total chlorophyll measurements from the WOA 2005.\n", - "\n", - "## Rhizaria\n", - "For rhizaria, our estimate relies on data from Biard et al. Biard et al. divided the data into three depth layers (0-100 m, 100-200 m, and 200-500 m), and multiplied median biomass concentrations at each depth layer across the entire volume of water at that layer to generate global estimate. The biomass of Rhizaria in the top 500 meters of the ocean is estimated at ≈0.2 Gt C. " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "rhizaria_biomass = 0.2e15" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "The estimates based on the MAREDAT database include measurements only for the top 200 meters of the water column. For rhizaria, our estimate includes the top 500 meters of the water column. For more details on possible contributions from deeper ocean laters, see the marine protists section in the Supplementary information.\n", - "\n", - "## Particle-atttached protists\n", - "To estimate the total biomass of particle-attached protists, we estimate the ratio between the biomass of particle-attached protists and prokaryotes. We rely on three studies which have measured this ratio at both the epipelagic, mesopelagic and bathypelagic layers ([Bochdansky et al.](http://dx.doi.org/10.1038/ismej.2016.113), [Turley & Mackie](http://www.jstor.org/stable/24849742), [Herndl](http://www.jstor.org/stable/24827742)). We fist calculate the mean ratio between protists and prokaryotes in each study. \n", - "\n", - "### Bochdansky et al.\n", - "Bochdansky et al. provide a mean number of particle-attached eukaryotes normalized by the amount of particle-attached prokaryotes in the bathypelagic realm. They also measure the amount of fungi out of the toal population of eukaryotes. We remove the amount of fungal cells from the total number of eukaryotes:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The mean ratio between the number of particle-attached protists and prokaryotes reported in Bochdansky et al. is ≈0.004\n" - ] - } - ], - "source": [ - "# Load the data from Bochdansky et al.\n", - "bochdansky_data = pd.read_excel('marine_protists_data.xlsx','Bochdansky ratio',skiprows=1,index_col=0)\n", - "protists = bochdansky_data.loc['All Eukaryotes'] - bochdansky_data.loc['Fungi']\n", - "print('The mean ratio between the number of particle-attached protists and prokaryotes reported in Bochdansky et al. is ≈%.3f' %protists)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our of the total number of eukaryotes, we exclude the group of *Labyrinthulomycetes*, as their biomass was estimated seperatly by Bochdansky et al., as we will see later. This leaves us with protists which are not *Labyrinthulomycetes*, mainly flagellates. To estimate the ratio of the biomass of these remaining protists, we rely on a study which has measured the carbon content of free-living protists in the deep sea ([Pernice et al.](https://dx.doi.org/10.1038%2Fismej.2014.168)). We use the carbon content of free-living protists in the deep sea as our best estimate of the carbon content of particle-attached protists in the deep-sea. For the carbon content of particle-attached prokaryotes, we use our best estimate from the particle-attached prokaryotes section." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the non-Labyrinthulomycetes protists\n", - "flagellates = protists-bochdansky_data.loc['Labyrinthulomycetes']\n", - "\n", - "# Load data on the carbon content of flagellates from the deep-sea\n", - "flagellates_cc_data = pd.read_excel('marine_protists_data.xlsx','Pernice',skiprows=1,index_col=0)\n", - "deep_flagellates_cc = flagellates_cc_data.loc['1401-4000','Protist carbon content [pg C cell^-1]']\n", - "\n", - "# Load our estimate of the carbon content of particle-attached prokaryotes\n", - "prok_cc = pd.read_excel('marine_protists_data.xlsx', 'POC prokaryotes').iloc[0,1]\n", - "\n", - "# Calculate the ratio of biomass of non-Labyrinthulomycetes protists to prokaryotes\n", - "biomass_ratio_flagellates = flagellates*(deep_flagellates_cc*1000/prok_cc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Bochdansky et al. also provide estimates for the biomass ratio between particle-attached *Labyrinthulomycetes* and prokaryotes in the bathypelagic layer. They provide several estimates for the biomass ratio. We use the geometric mean of those estimates as our best estimate of the biomass ratio between particle-attached *Labyrinthulomycetes* and prokaryotes." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load the data on the biomass ratio of Labyrinthulomycetes\n", - "lab_biomass_data = pd.read_excel('marine_protists_data.xlsx','Bochdansky biomass',skiprows=1)\n", - "\n", - "# Calculate the geometric mean of the different estimates for the biomass ratio of\n", - "# Labyrinthulomycetes and prokaryotes\n", - "lab_biomass_ratio = gmean(gmean(lab_biomass_data.iloc[:,1:],axis=1))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We sum our estimate for the biomass ratio of non-*Labyrinthulomycetes* and *Labyrinthulomycetes* particle-attached protists and prokaryotes as our best estimate from Bochdansky et al. of the biomass ratio between particle-attached protists and bacteria in the deep ocean:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass ratio between particle-attached protists and prokaryotes in the bathypelagic layer is ≈1.3 \n" - ] - } - ], - "source": [ - "best_bochdansky = biomass_ratio_flagellates + lab_biomass_ratio\n", - "\n", - "print('Our best estimate for the biomass ratio between particle-attached protists and prokaryotes in the bathypelagic layer is ≈%.1f ' %best_bochdansky)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Herndl\n", - "\n", - "For Turley & Mackie and Herndl, we have data on the concentrations of prokaryotes and protists on particles. For each study, we fist calculate the mean concentration of prokaryotes and protists, and divide the two mean concentrations to generate an estimate for the ratio between the number of cells of protists and prokaryotes. To estimate the mean concentrations of prokaryotes and protists, we generate two types of estimates: an estimate which uses the arithmetic mean of the different measurements, and an estimate which uses the geometric mean of the different measuremetns. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high population densities. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Reference\n", - "Herndl 0.0\n", - "Turley & Mackie 0.0\n", - "dtype: float64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data from Turley & Mackie and Herndl\n", - "poc_ratio = pd.read_excel('marine_protists_data.xlsx','POC')\n", - "\n", - "# Calculate the arithmetic mean of the measurements in each study\n", - "arith_poc_mean_conc = poc_ratio.iloc[:,[1,2,3]].groupby('Reference').mean()\n", - "\n", - "# Calculate the geometric mean of the measurements in each study\n", - "geo_poc_mean_conc =poc_ratio.dropna().groupby('Reference').apply(lambda x: pd.Series(gmean(x.iloc[:,[1,2]])))\n", - "geo_poc_mean_conc.columns = poc_ratio.columns[1:3]\n", - "\n", - "# Calculate the geometric mean between the mean estimate based on arithmetic and\n", - "# geometric means\n", - "best_mean_conc = np.sqrt(arith_poc_mean_conc*geo_poc_mean_conc)\n", - "\n", - "# Calculate the ratio between the number of cells of protists and prokaryotes\n", - "best_ratio_conc = best_mean_conc['Concentration of Flagellates [cells mL^-1]']/best_mean_conc['Concentration of Bacteria [cells mL^-1]']\n", - "best_ratio_conc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Herndl has measured the biovolume of protists and prokaryotes on particles, so we can generate an estimate for the carbon content ratio between protists and prokaryotes. For flagellates, Herndl has measured a mean volume of ≈11 $µm^3$, and he uses a conversion ratio between biovolume and carbon content of 220 fg C $µm^3$. He thus estimates flagellates will have a carbon content of 2.4 pg C cell$^{-1}$. " - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "herndl_flagellates_vol = 11\n", - "herndl_flagellates_cc = herndl_flagellates_vol*0.22" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For prokaryotes, he measures a biovolume of ≈0.25 $µm^3$ for rod cells and 0.067 $µm^3$ for coccoid cells. We convert these volume to carbon content using the following coversion euqation from [Gundersen et al.](onlinelibrary.wiley.com/doi/10.4319/lo.2002.47.5.1525/abstract): $$ carbon\\ content = 108.8×V^{0.898}$$" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "rod_vol = 0.25\n", - "coccoid_vol = 0.067\n", - "conversion_eq = lambda x: 108.8*x**0.898\n", - "rod_cc = conversion_eq(rod_vol)\n", - "coccoid_cc = conversion_eq(coccoid_vol)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From his experiments, it seems as if the relative fraction of rod and coccoid cells is similar, so we use the average of the carbon contents of rod cells and coccoid cells" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "herndl_prok_cc = np.mean([rod_cc,coccoid_cc])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the ratio in carbon contents between protists and prokaryotes as reported by Herndl, and multiply it by the ratio of the number of cells of protists and prokaryotes. This gives us an estimate for the ratio of the biomass of particle-attached protists and prokaryotes." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the ratio between the biomass of particle-attached protists and prokaryotes based on Herndl is ≈2.7\n" - ] - } - ], - "source": [ - "herndl_cc_ratio = herndl_flagellates_cc*1000/herndl_prok_cc\n", - "herndl_biomass_ratio = best_ratio_conc.loc['Herndl']*herndl_cc_ratio\n", - "print('Our estimate for the ratio between the biomass of particle-attached protists and prokaryotes based on Herndl is ≈%.1f' %herndl_biomass_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Turley & Mackie\n", - "For Turley & Mackie, we only have measurements of the ratio between the number of cells of particle-attached protists and prokaryotes. Turkey & Mackie report measurements both in the epipelagic layer and in the mesopelagic layer. We calculate the mean concentrations of protists and prokaryotes in each layer. As we noted above we first calculate the arithmetic and geometric means of the measurements in each layer, and then use the geometric mean between the two values generate by using the arithmetric mean and geometric mean as our best estimate." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " \"\"\"\n" - ] - }, - { - "data": { - "text/plain": [ - "Epipelagic\n", - "False 0.002\n", - "True 0.004\n", - "dtype: float64" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Take the data in Turley & Mackie\n", - "tm_data = poc_ratio[poc_ratio['Reference'] =='Turley & Mackie']\n", - "\n", - "# Divide the data to data in the epipelagic and mesopelagic layers\n", - "tm_data['Epipelagic'] = tm_data['Depth [m]'] < 200\n", - "\n", - "# Calculate the arithmetic mean of the measurements in each layer\n", - "arith_tm_mean_conc = tm_data.iloc[:,[1,2,6]].groupby('Epipelagic').mean()\n", - "\n", - "# Calculate the geometric mean of the measurements in each layer\n", - "geo_tm_mean_conc =tm_data.dropna().groupby('Epipelagic').apply(lambda x: pd.Series(gmean(x.iloc[:,[1,2]])))\n", - "geo_tm_mean_conc.columns = tm_data.columns[1:3]\n", - "\n", - "# Calculate the geometric mean between the mean estimate based on arithmetic and\n", - "# geometric means\n", - "best_tm_mean_conc = np.sqrt(arith_tm_mean_conc*geo_tm_mean_conc)\n", - "\n", - "# Calculate the ratio between the number of cells of protists and prokaryotes\n", - "best_tm_ratio_conc = best_tm_mean_conc['Concentration of Flagellates [cells mL^-1]']/best_tm_mean_conc['Concentration of Bacteria [cells mL^-1]']\n", - "pd.options.display.float_format = '{:,.3f}'.format\n", - "best_tm_ratio_conc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the ratio between the biomass of particle-attached protists and prokaryotes based on the ratios of cell concentrations in Turley & Mackie, we need to estimate the ratios in the carbon contents of particle-attached protists and prokaryotes. For the sampled collected in the epipelagic layer, we use the data measured in Herndl, which was also measured in the epipelagic layer. For the mesopelagic layer, we use data from Pernice et al. measured in the mesopelagic layer." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "pd.options.display.float_format = '{:,.1f}'.format\n", - "# For the epipelagic layer multiply cell concentration ratios by the carbon\n", - "# content ratios measured by Herndl\n", - "epi_tm_biomass = best_tm_ratio_conc.loc[True]*herndl_cc_ratio\n", - "\n", - "# For the mesopelagic layer, calculate the mean carbon content of protists\n", - "# From data in Pernice et al.\n", - "meso_protist_cc = np.average(flagellates_cc_data['Protist carbon content [pg C cell^-1]'],weights=[250,250,700,0])\n", - "\n", - "# Calculate the ratio of carbon content in the mesopelagic layer using\n", - "# our best estiamte for the carbon content of particle-attached prokaryotes \n", - "meso_cc_ratio = meso_protist_cc*1000/prok_cc\n", - "\n", - "# Calculate the biomass ratio between particle-attached protists and\n", - "# prokaryotes in the mesopelagic layer\n", - "meso_biomass_ratio = best_tm_ratio_conc.loc[False]*meso_cc_ratio" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Estimating the biomass ratio of particle-attached protists and prokaryotes\n", - "We integrate the estimates from the three different studies in the following manner: We estimate a biomass ratio between particle-attached protists and bacteria in each layer of the ocean - the epipelagic, mesopelagic and bathypelagic layers. For the epipelagic layer, we use the geometric mean of the estimates based on Herndl and Turley & Mackie. For the mesopelagic layer we use the estimates by Turley & Mackie, and for the bathypelagic layer we use the estimates by Bochdansky et al." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass ratio between particle-attached protists and prokaryotes is ≈1.1\n" - ] - } - ], - "source": [ - "best_biomass_ratio = np.average([gmean([herndl_biomass_ratio,epi_tm_biomass]),meso_biomass_ratio,best_bochdansky], weights=[200,800,3000])\n", - "print('Our best estimate for the biomass ratio between particle-attached protists and prokaryotes is ≈%.1f' %best_biomass_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use our best estimate for the total biomass of particle attached prokaryotes and multiply it by our estimate of the biomass ratio between particle-attached protists and prokaryotes to estimate the total biomass of particle-attached protists." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of particle-attached protists is ≈0.3 Gt C\n" - ] - } - ], - "source": [ - "poc_prok = pd.read_excel('../../bacteria_archaea/marine/marine_prok_biomass_estimate.xlsx').loc[[0,1,4],'Value'].prod()*1e-15\n", - "\n", - "best_poc_protists = poc_prok*best_biomass_ratio\n", - "\n", - "print('Our best estimate for the total biomass of particle-attached protists is ≈%.1f Gt C' %(best_poc_protists/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Estimating the total biomass of protists\n", - "To estimate the total biomass of marine protists, we sum up all of our estimates of the biomass of the different groups of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of marine protists is ≈2.1 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = rhizaria_biomass + phaeocystis_biomsss + diatom_biomsss + microzoo_biomsss + pico_protists_biomass + best_poc_protists\n", - "\n", - "print('Our best estimate for the total biomass of marine protists is ≈%.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Uncertanity analysis\n", - "We discuss the uncertainty of estimates based on the MAREDAT database in a dedicated section in the Supplementary Information. We crudly project an uncertainty of about an order of magnitude. We project the same uncertainty for the biomass of particle-attached protists." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# We crudely estimate and uncertainty of an order of magnitude\n", - "mul_CI = 10" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Biomass of marine protists: 2.1 Gt C\n", - "Uncertainty associated with the estimate of the total biomass of marine protists: ≈10-fold\n" - ] - } - ], - "source": [ - "\n", - "print('Biomass of marine protists: %.1f Gt C' %(best_estimate/1e15))\n", - "print('Uncertainty associated with the estimate of the total biomass of marine protists: ≈%.0f-fold' % mul_CI)\n", - "\n", - "\n", - "old_results = pd.read_excel('../protists_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "\n", - "\n", - "if (result.shape[0]==0):\n", - " result = pd.DataFrame(index= range(1), columns=['Parameter','Value','Units','Uncertainty'])\n", - "\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Biomass of marine protists',\n", - " 'Value': float(best_estimate)/1e15,\n", - " 'Units': 'Gt C',\n", - " 'Uncertainty': mul_CI\n", - " })\n", - "\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Biomass of pico-nanoprotists',\n", - " 'Value': float(pico_protists_biomass)/1e15,\n", - " 'Units': 'Gt C',\n", - " 'Uncertainty': None\n", - " })\n", - "\n", - "result.to_excel('../protists_biomass_estimate.xlsx',index=False)\n", - "\n", - "\n", - "# Feed results to Fig. 2C\n", - "\n", - "# Feed green algae picophytoplankton biomass\n", - "update_fig2c(row=23,col=1,values=auto_picoeuk_biomass*(1-auto_pico_protists_fraction)/1e15, path='../../results.xlsx')\n", - "\n", - "\n", - "# Feed bacterial picophytoplankton biomass\n", - "update_fig2c(row=24,col=1,values=picophyto_biomsss*(1-euk_frac)/1e15, path='../../results.xlsx')\n", - "\n", - "\n", - "# Feed protist picophytoplankton biomass\n", - "update_fig2c(row=25,col=1,values=auto_picoeuk_biomass*auto_pico_protists_fraction/1e15, path='../../results.xlsx')\n", - "\n", - "# Feed diatom biomass\n", - "update_fig2c(row=26,col=1,values=diatom_biomsss/1e15, path='../../results.xlsx')\n", - "\n", - "# Feed Phaeocystis biomass\n", - "update_fig2c(row=27,col=1,values=phaeocystis_biomsss/1e15, path='../../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/protists/protists_biomass_estimate_OLD.xlsx b/protists/protists_biomass_estimate_OLD.xlsx deleted file mode 100644 index 5ba1967..0000000 Binary files a/protists/protists_biomass_estimate_OLD.xlsx and /dev/null differ diff --git a/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists-checkpoint.ipynb b/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists-checkpoint.ipynb deleted file mode 100644 index 1a26307..0000000 --- a/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists-checkpoint.ipynb +++ /dev/null @@ -1,2169 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import gdal\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from CI_helper import *\n", - "from excel_utils import *\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of terrestrial protists\n", - "After searching the literature, we could not find a comprehensive account of the biomass of protists in soils. We generated a crude estimate of the total biomass of protists in soil based on estimating the total number of individual protists in the soil, and on the characteristic carbon content of a single protist.\n", - "\n", - "In order to calculate the total biomass of soil protists we calculate a characteristic number of individual protists for each one of the morphological groups of protists (flagellates, ciliates, and naked and testate ameobae). We combine these estimates with estimates for the carbon content of each morphological group.\n", - "\n", - "## Number of protists\n", - "To estimate the total number of protists, we assembled data on the number of protists in soils which contains 160 measurements from 42 independent studies. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceDOIHabitatSiteNumber of naked amoebae [# g^-1]Number of ciliates [# g^-1]Number of testate amoebae [# g^-1]Number of flagellates [# g^-1]RemarksSampling Depth [cm]
0Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia6.1e+031.5e+024.9e+03nanSamples from termite mound dropped, taken from...nan
1Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia1.3e+041.5e+022.7e+03nanSamples from termite mound dropped, taken from...nan
2Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia4.3e+036.0e+012.3e+03nanSamples from termite mound dropped, taken from...nan
3Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia3.0e+042.7e+022.2e+03nanSamples from termite mound dropped, taken from...nan
4Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia4.0e+033.8e+024.0e+03nanSamples from termite mound dropped, taken from...nan
\n", - "
" - ], - "text/plain": [ - " Reference DOI Habitat \\\n", - "0 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "1 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "2 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "3 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "4 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "\n", - " Site Number of naked amoebae [# g^-1] Number of ciliates [# g^-1] \\\n", - "0 Australia 6.1e+03 1.5e+02 \n", - "1 Australia 1.3e+04 1.5e+02 \n", - "2 Australia 4.3e+03 6.0e+01 \n", - "3 Australia 3.0e+04 2.7e+02 \n", - "4 Australia 4.0e+03 3.8e+02 \n", - "\n", - " Number of testate amoebae [# g^-1] Number of flagellates [# g^-1] \\\n", - "0 4.9e+03 nan \n", - "1 2.7e+03 nan \n", - "2 2.3e+03 nan \n", - "3 2.2e+03 nan \n", - "4 4.0e+03 nan \n", - "\n", - " Remarks Sampling Depth [cm] \n", - "0 Samples from termite mound dropped, taken from... nan \n", - "1 Samples from termite mound dropped, taken from... nan \n", - "2 Samples from termite mound dropped, taken from... nan \n", - "3 Samples from termite mound dropped, taken from... nan \n", - "4 Samples from termite mound dropped, taken from... nan " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data\n", - "data = pd.read_excel('terrestrial_protist_data.xlsx','Density of Individuals')\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total number of protists, we group our samples to different habitats and to the study in which they were taken. We calculate the characteristic number of each of the groups of protists per gram of soil. To do this we first derive a representative value for each study in case there was more than one measurement done in it. We calculate the representative value for each study in each habitat. Then we calculate the average of different representative values from different studies within the same habitat. We calculate the averages either by using the arithmetic mean or the geometric mean." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/scipy/stats/stats.py:315: RuntimeWarning: Mean of empty slice.\n", - " return np.exp(log_a.mean(axis=axis))\n", - "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:10: RuntimeWarning: Mean of empty slice\n", - " # Remove the CWD from sys.path while we load stuff.\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: Mean of empty slice\n", - " # This is added back by InteractiveShellApp.init_path()\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:13: RuntimeWarning: Mean of empty slice\n", - " del sys.path[0]\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:12: RuntimeWarning: Mean of empty slice\n", - " if sys.path[0] == '':\n" - ] - } - ], - "source": [ - "# Define the function to calculate the geometric mean of number of each group of protists per gram\n", - "def groupby_gmean(input):\n", - " return pd.DataFrame({'Number of ciliates [# g^-1]': gmean(input['Number of ciliates [# g^-1]'].dropna()),\n", - " 'Number of naked amoebae [# g^-1]': gmean(input['Number of naked amoebae [# g^-1]'].dropna()),\n", - " 'Number of testate amoebae [# g^-1]': gmean(input['Number of testate amoebae [# g^-1]'].dropna()),\n", - " 'Number of flagellates [# g^-1]': gmean(input['Number of flagellates [# g^-1]'].dropna())},index=[0])\n", - "\n", - "# Define the function to calculate the arithmetic mean of number of each group of protists per gram\n", - "def groupby_mean(input):\n", - " return pd.DataFrame({'Number of ciliates [# g^-1]': np.nanmean(input['Number of ciliates [# g^-1]'].dropna()),\n", - " 'Number of naked amoebae [# g^-1]': np.nanmean(input['Number of naked amoebae [# g^-1]'].dropna()),\n", - " 'Number of testate amoebae [# g^-1]': np.nanmean(input['Number of testate amoebae [# g^-1]'].dropna()),\n", - " 'Number of flagellates [# g^-1]': np.nanmean(input['Number of flagellates [# g^-1]'].dropna())},index=[0])\n", - "\n", - "# Group the samples by habitat and study, and calculate the geometric mean\n", - "grouped_data_gmean = data.groupby(['Habitat','DOI']).apply(groupby_gmean)\n", - "\n", - "# Group the samples by habitat and study, and calculate the arithmetic mean\n", - "grouped_data_mean = data.groupby(['Habitat','DOI']).apply(groupby_mean)\n", - "\n", - "# Group the representative values by habitat, and calculate the geometric mean\n", - "habitat_gmean = grouped_data_gmean.groupby('Habitat').apply(groupby_gmean)\n", - "\n", - "# Group the representative values by habitat, and calculate the arithmetic mean\n", - "habitat_mean = grouped_data_mean.groupby('Habitat').apply(groupby_mean)\n", - "\n", - "habitat_gmean.set_index(habitat_gmean.index.droplevel(1),inplace=True)\n", - "habitat_mean.set_index(habitat_mean.index.droplevel(1),inplace=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here is the calculated geometric mean number of cells per gram for each habitat and each group of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Number of ciliates [# g^-1]Number of flagellates [# g^-1]Number of naked amoebae [# g^-1]Number of testate amoebae [# g^-1]
Habitat
Boreal Forestnannannan9.2e+02
Cropland2.5e+023.6e+036.2e+036.7e+02
Desert1.6e+02nan8.9e+033.3e+03
Forest4.3e+01nannan8.2e+03
Generalnan1.0e+061.0e+05nan
Grassland4.8e+024.2e+041.8e+043.4e+03
Shrubland7.2e+01nannan9.6e+03
Temperate Forest3.9e+021.8e+058.2e+041.4e+04
Tropical Forestnannannan4.2e+04
Tundra4.2e+011.1e+06nan1.3e+03
Woodland2.6e+02nannan1.8e+04
\n", - "
" - ], - "text/plain": [ - " Number of ciliates [# g^-1] Number of flagellates [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest nan nan \n", - "Cropland 2.5e+02 3.6e+03 \n", - "Desert 1.6e+02 nan \n", - "Forest 4.3e+01 nan \n", - "General nan 1.0e+06 \n", - "Grassland 4.8e+02 4.2e+04 \n", - "Shrubland 7.2e+01 nan \n", - "Temperate Forest 3.9e+02 1.8e+05 \n", - "Tropical Forest nan nan \n", - "Tundra 4.2e+01 1.1e+06 \n", - "Woodland 2.6e+02 nan \n", - "\n", - " Number of naked amoebae [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest nan \n", - "Cropland 6.2e+03 \n", - "Desert 8.9e+03 \n", - "Forest nan \n", - "General 1.0e+05 \n", - "Grassland 1.8e+04 \n", - "Shrubland nan \n", - "Temperate Forest 8.2e+04 \n", - "Tropical Forest nan \n", - "Tundra nan \n", - "Woodland nan \n", - "\n", - " Number of testate amoebae [# g^-1] \n", - "Habitat \n", - "Boreal Forest 9.2e+02 \n", - "Cropland 6.7e+02 \n", - "Desert 3.3e+03 \n", - "Forest 8.2e+03 \n", - "General nan \n", - "Grassland 3.4e+03 \n", - "Shrubland 9.6e+03 \n", - "Temperate Forest 1.4e+04 \n", - "Tropical Forest 4.2e+04 \n", - "Tundra 1.3e+03 \n", - "Woodland 1.8e+04 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitat_gmean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For some groups, not all habitats have values. We fill values for missing data by the following scheme. For missing values in the boreal forest biome, we use values from the temperate forest biome. If we have data for the group of protists from the \"General\" habitat, which is based on expert assessment of the characteristic number of individuals for that group per gram of soil, we fill the missing values with the value for the \"General\" habitat.\n", - "\n", - "The only other missing data was for ciliates in tropical forests and tundra. For tropical forest, we used the values from temperate forests. For tundra, we use the mean over all the different habitats to fill the value:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Number of ciliates [# g^-1]Number of flagellates [# g^-1]Number of naked amoebae [# g^-1]Number of testate amoebae [# g^-1]
Habitat
Boreal Forest3.9e+021.8e+058.2e+049.2e+02
Cropland2.5e+023.6e+036.2e+036.7e+02
Desert1.6e+021.0e+068.9e+033.3e+03
Forest4.3e+01nannan8.2e+03
Generalnan1.0e+061.0e+05nan
Grassland4.8e+021.0e+061.8e+043.4e+03
Shrubland7.2e+011.0e+061.0e+059.6e+03
Temperate Forest3.9e+021.8e+058.2e+041.4e+04
Tropical Forest3.9e+021.0e+061.0e+054.2e+04
Tundra3.7e+021.1e+061.0e+051.3e+03
Woodland2.6e+021.0e+061.0e+051.8e+04
\n", - "
" - ], - "text/plain": [ - " Number of ciliates [# g^-1] Number of flagellates [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest 3.9e+02 1.8e+05 \n", - "Cropland 2.5e+02 3.6e+03 \n", - "Desert 1.6e+02 1.0e+06 \n", - "Forest 4.3e+01 nan \n", - "General nan 1.0e+06 \n", - "Grassland 4.8e+02 1.0e+06 \n", - "Shrubland 7.2e+01 1.0e+06 \n", - "Temperate Forest 3.9e+02 1.8e+05 \n", - "Tropical Forest 3.9e+02 1.0e+06 \n", - "Tundra 3.7e+02 1.1e+06 \n", - "Woodland 2.6e+02 1.0e+06 \n", - "\n", - " Number of naked amoebae [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest 8.2e+04 \n", - "Cropland 6.2e+03 \n", - "Desert 8.9e+03 \n", - "Forest nan \n", - "General 1.0e+05 \n", - "Grassland 1.8e+04 \n", - "Shrubland 1.0e+05 \n", - "Temperate Forest 8.2e+04 \n", - "Tropical Forest 1.0e+05 \n", - "Tundra 1.0e+05 \n", - "Woodland 1.0e+05 \n", - "\n", - " Number of testate amoebae [# g^-1] \n", - "Habitat \n", - "Boreal Forest 9.2e+02 \n", - "Cropland 6.7e+02 \n", - "Desert 3.3e+03 \n", - "Forest 8.2e+03 \n", - "General nan \n", - "Grassland 3.4e+03 \n", - "Shrubland 9.6e+03 \n", - "Temperate Forest 1.4e+04 \n", - "Tropical Forest 4.2e+04 \n", - "Tundra 1.3e+03 \n", - "Woodland 1.8e+04 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Fill missing values for boreal forests\n", - "habitat_mean.loc['Boreal Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']] = habitat_mean.loc['Temperate Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']]\n", - "habitat_gmean.loc['Boreal Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']] = habitat_gmean.loc['Temperate Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']]\n", - "\n", - "# Fill missing values for naked amoebae\n", - "habitat_mean.loc[['Shrubland','Tropical Forest','Tundra','Woodland'],'Number of naked amoebae [# g^-1]'] = habitat_mean.loc['General','Number of naked amoebae [# g^-1]']\n", - "habitat_gmean.loc[['Shrubland','Tropical Forest','Tundra','Woodland'],'Number of naked amoebae [# g^-1]'] = habitat_gmean.loc['General','Number of naked amoebae [# g^-1]']\n", - "\n", - "# Fill missing values for flagellates\n", - "habitat_gmean.loc[['Desert','Grassland','Shrubland','Tropical Forest','Woodland'],'Number of flagellates [# g^-1]'] = habitat_gmean.loc['General','Number of flagellates [# g^-1]']\n", - "habitat_mean.loc[['Desert','Grassland','Shrubland','Tropical Forest','Woodland'],'Number of flagellates [# g^-1]'] = habitat_mean.loc['General','Number of flagellates [# g^-1]']\n", - "\n", - "# Fill missing values for ciliates\n", - "habitat_gmean.loc['Tropical Forest','Number of ciliates [# g^-1]'] = habitat_gmean.loc['Temperate Forest','Number of ciliates [# g^-1]']\n", - "habitat_mean.loc['Tropical Forest','Number of ciliates [# g^-1]'] = habitat_mean.loc['Temperate Forest','Number of ciliates [# g^-1]']\n", - "habitat_gmean.loc['Tundra','Number of ciliates [# g^-1]'] = gmean(habitat_mean['Number of ciliates [# g^-1]'].dropna())\n", - "habitat_mean.loc['Tundra','Number of ciliates [# g^-1]'] = habitat_mean['Number of ciliates [# g^-1]'].dropna().mean()\n", - "habitat_gmean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We have estimates for the total number of individual protists per gram of soil. In order to calculate the total number of individual protists we need to first convert the data to number of individuals per $m^2$. To convert number of individuals per gram of soil to number of individuals per $m^2$, we calculate a global average soil density in the top 15 cm based on [Hengl et al.](https://dx.doi.org/10.1371%2Fjournal.pone.0105992).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the global mean bulk density of soil in the top 15 cm is ≈1.2e+06 g m^3\n" - ] - } - ], - "source": [ - "# Load soil density map from Hengl et al. (in the top 15 cm, reduced in resolution to 1 degree resolution)\n", - "gtif = gdal.Open('bulk_density_data.tif')\n", - "bulk_density_map = np.array(gtif.GetRasterBand(1).ReadAsArray())\n", - "\n", - "# Fill missing values with NaN\n", - "bulk_density_map[bulk_density_map == bulk_density_map[0,1]] = np.nan\n", - "# Mean soil bulk density from Hengl et al. [in g per m^3]\n", - "bulk_density = np.nanmean(bulk_density_map[:])*1000\n", - "print('Our best estimate for the global mean bulk density of soil in the top 15 cm is ≈%.1e g m^3' %bulk_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Measuring the density of individuals per gram of soil does not take into account the distribution on biomass along the soil profile. Most of the measurements of the number of individual protists per gram of soil are done in shallow soil depths. We calculate the average sampling depth across studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average sampling depth of soil protists is ≈8 cm\n" - ] - } - ], - "source": [ - "# Calculate the average sampling depth \n", - "sampling_depth = data.groupby('DOI').mean().mean()['Sampling Depth [cm]']\n", - "\n", - "print('The average sampling depth of soil protists is ≈%.0f cm' %sampling_depth)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is not obvious what is the fraction of the total biomass of soil protists that is found in the top 8 cm of soil. To estimate the fraction of the biomass of soil protists found in the top 8 cm, we rely on two methodologies. The first is based on the distribution of microbial biomass with depth as discussed in Xu et al. Xu et al. extrapolate the microbial biomass across the soil profile based on empirical equations for the distribution of root biomass along soil depth from [Jackson et al.](http://dx.doi.org/10.1007/BF00333714). The empirical equations are biome-specific, and follow the general form: $$Y = 1-\\beta^d$$ Where Y is the cumulative fraction of roots, d is depth in centimeters, and $\\beta$ is a coefficient fitted for each biome. On a global scale, the best fit for $\\beta$ as reported in Jackson et al., is ≈0.966. We use this coefficient to calculate the fraction of total biomass of soil protists found in the top 8 cm: " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the fraction of biomass of soil protists found in soil layers sampled, based on Jackson et al. is ≈23 percent\n" - ] - } - ], - "source": [ - "# The beta coefficient from Jackson et al.\n", - "jackson_beta = 0.966\n", - "\n", - "# Calculate the fraction of the biomass of soil protists found in the top 8 cm\n", - "jackson_fraction = 1 - jackson_beta** sampling_depth\n", - "\n", - "print('Our estimate for the fraction of biomass of soil protists found in soil layers sampled, based on Jackson et al. is ≈%.0f percent' %(jackson_fraction*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As a second estimate for the fraction of the total biomass of soil protists found in the top 8 cm, we rely on an empirical equation from [Fierer et al.](http://dx.doi.org/10.1111/j.1461-0248.2009.01360.x), which estimates the fraction microbial biomass found below sampling depth d:\n", - "$$ f = [-0.132×ln(d) + 0.605]×B$$\n", - "Where f is the fraction microbial biomass found below sampling depth d (in cm). We use this equation to calculate the fraction of the total biomass of soil protists found in the top 8 cm:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the fraction of biomass of soil protists found in soil layers sampled, based on Fierer et al. is ≈66 percent\n" - ] - } - ], - "source": [ - "# The fraction of microbial biomass found in layer shallower than depth x based on Fierer et al.\n", - "fierer_eq = lambda x: 1-(-0.132*np.log(x)+0.605)\n", - "fierer_frac = fierer_eq(sampling_depth)\n", - "print('Our estimate for the fraction of biomass of soil protists found in soil layers sampled, based on Fierer et al. is ≈%.0f percent' %(fierer_frac*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the fraction of the total biomass of soil protists found in layers shallower than 8 cm, we use the geometric mean of the estimates based on Jackson et al. and Fierer et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of biomass of soil protists found in soil layers sampled is ≈44 percent\n" - ] - } - ], - "source": [ - "best_depth_frac = frac_mean(np.array([jackson_fraction,fierer_frac]))\n", - "print('Our best estimate for the fraction of biomass of soil protists found in soil layers sampled is ≈%.0f percent' %(best_depth_frac*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To convert the measurements per gram of soil to number of individuals per $m^2$, we calculate the average sampling depth across studies. We calculate the volume of soil held within this sampling depth. We use the bulk density to calculate the total weight of soil within one $m^2$ of soil with depth equal to the sampling depth. We multiply the estimates per gram of soil by the total weight of soil per $m^2$. To account for biomass present in lower layers, we divide the total number of individual protists per $m^2$ by our best estimate for the fraction of the total biomass of soil protists found in layer shallower than 8 cm." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# convert number of individuals per gram soil to number of individuals per m^2\n", - "habitat_per_m2_gmean = (habitat_gmean*bulk_density*sampling_depth/100/best_depth_frac)\n", - "habitat_per_m2_mean = (habitat_mean*bulk_density*sampling_depth/100/best_depth_frac)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total number of protists we multiply the total number of individuals per unit area of each type of protist in each habitat by the total area of each habitat taken from the book [Biogeochemistry: An analysis of Global Change](https://www.sciencedirect.com/science/book/9780123858740) by Schlesinger & Bernhardt. The areas of each habitat are:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Area [m^2]Unnamed: 2
Biome
Boreal Forest13700000000000Temperate forest, Tropical rainforest
Desert27700000000000Desert
Temperate Forest10400000000000Temperate forest
Grassland15000000000000Grassland
Tropical Forest17500000000000Tropical rainforest
Tundra5600000000000Tundra
Tropical Savanna27700000000000Scrubland,grassland,Temprate Forest, Tropical ...
Cropland15500000000000Cropland
\n", - "
" - ], - "text/plain": [ - " Area [m^2] \\\n", - "Biome \n", - "Boreal Forest 13700000000000 \n", - "Desert 27700000000000 \n", - "Temperate Forest 10400000000000 \n", - "Grassland 15000000000000 \n", - "Tropical Forest 17500000000000 \n", - "Tundra 5600000000000 \n", - "Tropical Savanna 27700000000000 \n", - "Cropland 15500000000000 \n", - "\n", - " Unnamed: 2 \n", - "Biome \n", - "Boreal Forest Temperate forest, Tropical rainforest \n", - "Desert Desert \n", - "Temperate Forest Temperate forest \n", - "Grassland Grassland \n", - "Tropical Forest Tropical rainforest \n", - "Tundra Tundra \n", - "Tropical Savanna Scrubland,grassland,Temprate Forest, Tropical ... \n", - "Cropland Cropland " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitat_area = pd.read_excel('terrestrial_protist_data.xlsx','Biome area', skiprows=1,index_col=0)\n", - "habitat_area" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "One habitat for which we do not have data is the savanna. We use the mean of the values for the tropical forest, woodland, shrubland and grassland as an estimate of the total biomass in the savanna." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "habitat_per_m2_gmean.loc['Tropical Savanna'] = gmean(habitat_per_m2_gmean.loc[['Tropical Forest','Woodland','Shrubland','Grassland']])\n", - "habitat_per_m2_mean.loc['Tropical Savanna'] = habitat_per_m2_mean.loc[['Tropical Forest','Woodland','Shrubland','Grassland']].mean(axis=0)\n", - "\n", - "tot_num_gmean = habitat_per_m2_gmean.mul(habitat_area['Area [m^2]'],axis=0)\n", - "tot_num_mean = habitat_per_m2_mean.mul(habitat_area['Area [m^2]'],axis=0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We generated two types of estimates for the total number of soil protists: an estimate which uses the arithmetic mean of the number of individuals at each habitat, and an estimate which uses the geometric mean of the number of individuals at each habitat. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are contaminated with organic carbon sources, or samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "tot_num_protist = gmean([tot_num_mean.sum(),tot_num_gmean.sum()])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Carbon content of protists\n", - "We estimate the characteristic carbon content of a single protist from each of the morphological groups of protists based on data from several sources. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceDOICarbon content of naked amoebae [g C cell^-1]Carbon content of ciliates [g C cell^-1]Carbon content of testate amoebae [g C cell^-1]Carbon content of flagellates [g C cell^-1]Remarks
0Wanner et al.http://dx.doi.org/10.1007/s00248-007-9322-2nannan1.5e-09nanCalculated from table 2, assuming 15% carbon c...
1Wanner et al.http://dx.doi.org/10.1007/s00248-007-9322-2nannan1.1e-09nanCalculated from table 2, assuming 15% carbon c...
2Wanner et al.http://dx.doi.org/10.1007/s00248-007-9322-2nannan2.1e-09nanCalculated from table 2, assuming 15% carbon c...
3Wanner et al.http://dx.doi.org/10.1007/s00248-007-9322-2nannan3.4e-09nanCalculated from table 2, assuming 15% carbon c...
4Foissnerhttp://dx.doi.org/10.1016/0167-8809(92)90093-Qnan3.8e-094.7e-09nanCalculated from table 2, assuming 15% carbon c...
\n", - "
" - ], - "text/plain": [ - " Reference DOI \\\n", - "0 Wanner et al. http://dx.doi.org/10.1007/s00248-007-9322-2 \n", - "1 Wanner et al. http://dx.doi.org/10.1007/s00248-007-9322-2 \n", - "2 Wanner et al. http://dx.doi.org/10.1007/s00248-007-9322-2 \n", - "3 Wanner et al. http://dx.doi.org/10.1007/s00248-007-9322-2 \n", - "4 Foissner http://dx.doi.org/10.1016/0167-8809(92)90093-Q \n", - "\n", - " Carbon content of naked amoebae [g C cell^-1] \\\n", - "0 nan \n", - "1 nan \n", - "2 nan \n", - "3 nan \n", - "4 nan \n", - "\n", - " Carbon content of ciliates [g C cell^-1] \\\n", - "0 nan \n", - "1 nan \n", - "2 nan \n", - "3 nan \n", - "4 3.8e-09 \n", - "\n", - " Carbon content of testate amoebae [g C cell^-1] \\\n", - "0 1.5e-09 \n", - "1 1.1e-09 \n", - "2 2.1e-09 \n", - "3 3.4e-09 \n", - "4 4.7e-09 \n", - "\n", - " Carbon content of flagellates [g C cell^-1] \\\n", - "0 nan \n", - "1 nan \n", - "2 nan \n", - "3 nan \n", - "4 nan \n", - "\n", - " Remarks \n", - "0 Calculated from table 2, assuming 15% carbon c... \n", - "1 Calculated from table 2, assuming 15% carbon c... \n", - "2 Calculated from table 2, assuming 15% carbon c... \n", - "3 Calculated from table 2, assuming 15% carbon c... \n", - "4 Calculated from table 2, assuming 15% carbon c... " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cc_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Carbon content')\n", - "cc_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine this data with an additional source from [Finlay & Fenchel](http://dx.doi.org/10.1078/1434-4610-00060). We calculate the average cell length for each group. " - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load data from Finlay & Fenchel\n", - "ff_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Finlay & Fenchel', skiprows=1)\n", - "\n", - "# Define the function to calculate the weighted average for each group of protists\n", - "def weighted_av_groupby(input):\n", - " return np.average(input['Length [µm]'],weights=input['Abundance [# g^-1]'])\n", - "\n", - "cell_lengths = ff_data.groupby('Protist type').apply(weighted_av_groupby)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We convert the cell length to biovolume according the the allometric relation decribed in Figure 10 of Finlay & Fenchel. The relation between cell volume and cell length is given by the equation: \n", - "$$V = 0.6×L^{2.36}$$\n", - "Where V is the cell volume in $µm^3$ and L is the cell length in µm." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Protist type\n", - "Ciliate 5.4e+03\n", - "Flagellate 1.2e+02\n", - "Naked amoebae 1.4e+03\n", - "Testate amoebae 3.6e+03\n", - "dtype: float64" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cell_volumes = 0.6*cell_lengths**2.36\n", - "cell_volumes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We convert cell volumes to carbon content assuming ≈150 fg C µm$^3$:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Protist type\n", - "Ciliate 8.1e-10\n", - "Flagellate 1.8e-11\n", - "Naked amoebae 2.0e-10\n", - "Testate amoebae 5.5e-10\n", - "dtype: float64" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ff_carbon_content = cell_volumes*150e-15\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "ff_carbon_content" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We add these numbers as an additional source for calculating the carbon content of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "cc_data.loc[cc_data.index[-1]+1] = pd.Series({'Reference': 'Finlay & Fenchel',\n", - " 'DOI': 'http://dx.doi.org/10.1078/1434-4610-00060',\n", - " 'Carbon content of ciliates [g C cell^-1]': ff_carbon_content.loc['Ciliate'],\n", - " 'Carbon content of naked amoebae [g C cell^-1]': ff_carbon_content.loc['Naked amoebae'],\n", - " 'Carbon content of testate amoebae [g C cell^-1]': ff_carbon_content.loc['Testate amoebae'],\n", - " 'Carbon content of flagellates [g C cell^-1]': ff_carbon_content.loc['Flagellate']\n", - " })\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of carbon contents for first for values within each study and then for the average values between studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/scipy/stats/stats.py:315: RuntimeWarning: Mean of empty slice.\n", - " return np.exp(log_a.mean(axis=axis))\n", - "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - } - ], - "source": [ - "def groupby_gmean(input):\n", - " return pd.DataFrame({'Carbon content of ciliates [g C cell^-1]': gmean(input['Carbon content of ciliates [g C cell^-1]'].dropna()),\n", - " 'Carbon content of naked amoebae [g C cell^-1]': gmean(input['Carbon content of naked amoebae [g C cell^-1]'].dropna()),\n", - " 'Carbon content of testate amoebae [g C cell^-1]': gmean(input['Carbon content of testate amoebae [g C cell^-1]'].dropna()),\n", - " 'Carbon content of flagellates [g C cell^-1]': gmean(input['Carbon content of flagellates [g C cell^-1]'].dropna())},index=[0])\n", - "\n", - "\n", - "study_mean_cc = cc_data.groupby('DOI').apply(groupby_gmean)\n", - "mean_cc = study_mean_cc.reset_index().groupby('level_1').apply(groupby_gmean)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
level_10
0
Carbon content of ciliates [g C cell^-1]8.7e-10
Carbon content of flagellates [g C cell^-1]2.2e-11
Carbon content of naked amoebae [g C cell^-1]2.0e-10
Carbon content of testate amoebae [g C cell^-1]1.6e-09
\n", - "
" - ], - "text/plain": [ - "level_1 0\n", - " 0\n", - "Carbon content of ciliates [g C cell^-1] 8.7e-10\n", - "Carbon content of flagellates [g C cell^-1] 2.2e-11\n", - "Carbon content of naked amoebae [g C cell^-1] 2.0e-10\n", - "Carbon content of testate amoebae [g C cell^-1] 1.6e-09" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gmean(study_mean_cc['Carbon content of flagellates [g C cell^-1]'].dropna())\n", - "mean_cc.T" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of soil protists based on the total number of individuals and their carbon content, we multiply our estimate for the total number of individuals for each morphological type by its characteristic carbon content. We sum over all morophological types of protists to generate our best estimate for the global biomass of soil protists" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of soil protists is ≈1.6 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the total biomass of protists\n", - "best_estimate = (tot_num_protist*mean_cc).sum(axis=1)\n", - "\n", - "print('Our best estimate of the total biomass of soil protists is ≈%.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty associated with our estimate of the total biomass of terrestrial protists, we collect available uncertainties for the values reported within studies and between studies. We use the highest uncertainty out of this collection of uncertainties as our best projection for the uncertainty associated we the estimate of the total biomass of terrestrial protists.\n", - "\n", - "## Number of individuals\n", - "We assemble different measures of uncertainty at different levels - for values within the same study, for studies within the same habitat, and between habitats.\n", - "\n", - "### Intra-study uncertainty\n", - "For each study which reports more than one value, we calculate 95% confidence interval around the geometric mean of those values. We take the maximal uncertainty in each habitat as our measure of the intra-study uncertainty" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "pd.options.display.float_format = '{:,.1f}'.format\n", - "\n", - "# Define the function ot calculate the 95% confidence interval around the\n", - "# geometric mean of number of each group of protists per gram\n", - "def groupby_geo_CI(input):\n", - " return pd.DataFrame({'Number of ciliates [# g^-1]': geo_CI_calc(input['Number of ciliates [# g^-1]'].dropna()),\n", - " 'Number of naked amoebae [# g^-1]': geo_CI_calc(input['Number of naked amoebae [# g^-1]'].dropna()),\n", - " 'Number of testate amoebae [# g^-1]': geo_CI_calc(input['Number of testate amoebae [# g^-1]'].dropna()),\n", - " 'Number of flagellates [# g^-1]': geo_CI_calc(input['Number of flagellates [# g^-1]'].dropna())},index=[0])\n", - "\n", - "# Group the samples by habitat and study, and calculate the 95% confidence\n", - "# interval around the geometric mean of values within each study\n", - "intra_study_num_CI = data.groupby(['Habitat','DOI']).apply(groupby_geo_CI)\n", - "\n", - "# Use the maximal uncertainty in each habitat as a measure of the intra-study uncertainty\n", - "intra_num_CI = intra_study_num_CI.groupby('Habitat').max()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Interstudy uncertainty\n", - "We calculate 95% confidence interval around the geometric mean of the average values from different studies." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Number of ciliates [# g^-1]Number of flagellates [# g^-1]Number of naked amoebae [# g^-1]Number of testate amoebae [# g^-1]
Habitat
Boreal Forestnannannan457.0
Cropland3.313.1nan4.8
Desertnannannan1.4
Forest17.3nannan4.2
Generalnannannannan
Grassland10.4nan1.42.4
Shrubland1.0nannannan
Temperate Forest2.642.2nan4.6
Tropical Forestnannannannan
Tundra2.75.1nan1.7
Woodland1.0nannan1.0
\n", - "
" - ], - "text/plain": [ - " Number of ciliates [# g^-1] Number of flagellates [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest nan nan \n", - "Cropland 3.3 13.1 \n", - "Desert nan nan \n", - "Forest 17.3 nan \n", - "General nan nan \n", - "Grassland 10.4 nan \n", - "Shrubland 1.0 nan \n", - "Temperate Forest 2.6 42.2 \n", - "Tropical Forest nan nan \n", - "Tundra 2.7 5.1 \n", - "Woodland 1.0 nan \n", - "\n", - " Number of naked amoebae [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest nan \n", - "Cropland nan \n", - "Desert nan \n", - "Forest nan \n", - "General nan \n", - "Grassland 1.4 \n", - "Shrubland nan \n", - "Temperate Forest nan \n", - "Tropical Forest nan \n", - "Tundra nan \n", - "Woodland nan \n", - "\n", - " Number of testate amoebae [# g^-1] \n", - "Habitat \n", - "Boreal Forest 457.0 \n", - "Cropland 4.8 \n", - "Desert 1.4 \n", - "Forest 4.2 \n", - "General nan \n", - "Grassland 2.4 \n", - "Shrubland nan \n", - "Temperate Forest 4.6 \n", - "Tropical Forest nan \n", - "Tundra 1.7 \n", - "Woodland 1.0 " - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Group the representative values by habitat, and calculate the 95% confidence interval\n", - "# around the geometric mean of values within habitat\n", - "inter_study_habitat_num_CI = grouped_data_gmean.groupby('Habitat').apply(groupby_geo_CI)\n", - "inter_study_habitat_num_CI.set_index(inter_study_habitat_num_CI.index.droplevel(level=1),inplace=True)\n", - "inter_study_habitat_num_CI\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inter-habitat uncertainty\n", - "We first use the maximum of the intra-study and interstudy uncertainty in each habitat as our best projection for the uncertainty associated with the estimate of the total number of protists in the habitat. For habitats with missing uncertainty projections, we use the geometric mean of the uncertainties for the same group of protists in other habitats." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Number of ciliates [# g^-1]Number of flagellates [# g^-1]Number of naked amoebae [# g^-1]Number of testate amoebae [# g^-1]
Habitat
Boreal Forest4.514.11.5457.0
Cropland19.213.11.24.8
Desert1.914.11.81.4
Forest17.314.11.54.2
General4.514.11.54.2
Grassland10.414.11.42.4
Shrubland2.114.11.52.6
Temperate Forest4.242.21.84.6
Tropical Forest4.514.11.51.3
Tundra2.75.11.51.7
Woodland1.014.11.52.1
Tropical Savanna4.514.11.54.2
\n", - "
" - ], - "text/plain": [ - " Number of ciliates [# g^-1] Number of flagellates [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest 4.5 14.1 \n", - "Cropland 19.2 13.1 \n", - "Desert 1.9 14.1 \n", - "Forest 17.3 14.1 \n", - "General 4.5 14.1 \n", - "Grassland 10.4 14.1 \n", - "Shrubland 2.1 14.1 \n", - "Temperate Forest 4.2 42.2 \n", - "Tropical Forest 4.5 14.1 \n", - "Tundra 2.7 5.1 \n", - "Woodland 1.0 14.1 \n", - "Tropical Savanna 4.5 14.1 \n", - "\n", - " Number of naked amoebae [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest 1.5 \n", - "Cropland 1.2 \n", - "Desert 1.8 \n", - "Forest 1.5 \n", - "General 1.5 \n", - "Grassland 1.4 \n", - "Shrubland 1.5 \n", - "Temperate Forest 1.8 \n", - "Tropical Forest 1.5 \n", - "Tundra 1.5 \n", - "Woodland 1.5 \n", - "Tropical Savanna 1.5 \n", - "\n", - " Number of testate amoebae [# g^-1] \n", - "Habitat \n", - "Boreal Forest 457.0 \n", - "Cropland 4.8 \n", - "Desert 1.4 \n", - "Forest 4.2 \n", - "General 4.2 \n", - "Grassland 2.4 \n", - "Shrubland 2.6 \n", - "Temperate Forest 4.6 \n", - "Tropical Forest 1.3 \n", - "Tundra 1.7 \n", - "Woodland 2.1 \n", - "Tropical Savanna 4.2 " - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Use the maximum of the intra-study and interstudy uncertainty as our best projection of the uncertainty \n", - "# of the number of protists in each habitat\n", - "tot_num_habitat_CI = inter_study_habitat_num_CI.where(inter_study_habitat_num_CI > intra_num_CI, intra_num_CI).fillna(inter_study_habitat_num_CI)\n", - "\n", - "# Fill missing values for each habitat with the geometric mean of the uncertainties for the same group of \n", - "# protists in the other habitats\n", - "tot_num_habitat_CI['Number of ciliates [# g^-1]'].fillna(gmean(tot_num_habitat_CI['Number of ciliates [# g^-1]'].dropna()),inplace=True)\n", - "tot_num_habitat_CI['Number of flagellates [# g^-1]'].fillna(gmean(tot_num_habitat_CI['Number of flagellates [# g^-1]'].dropna()),inplace=True)\n", - "tot_num_habitat_CI['Number of naked amoebae [# g^-1]'].fillna(gmean(tot_num_habitat_CI['Number of naked amoebae [# g^-1]'].dropna()),inplace=True)\n", - "tot_num_habitat_CI['Number of testate amoebae [# g^-1]'].fillna(gmean(tot_num_habitat_CI['Number of testate amoebae [# g^-1]'].dropna()),inplace=True)\n", - "\n", - "# Fill the uncertainty of the values for the tropical savanna with the geometric mean the uncertainties \n", - "# for the same group of protists in the other habitats\n", - "tot_num_habitat_CI.loc['Tropical Savanna'] = gmean(tot_num_habitat_CI)\n", - "tot_num_habitat_CI\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We propagate the uncertainties associated with the estimates of the total number of protists per gram soil in each habitat to the estimate of the sum across all habitats:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 2.4\n", - "Number of flagellates [# g^-1] 2.4\n", - "Number of naked amoebae [# g^-1] 1.2\n", - "Number of testate amoebae [# g^-1] 2.6\n", - "dtype: float64" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tot_num_habitat_CI = tot_num_habitat_CI.loc[tot_num_gmean.dropna().index.values]\n", - "\n", - "ciliate_num_per_g_CI = CI_sum_prop(estimates=tot_num_gmean.dropna()['Number of ciliates [# g^-1]'],mul_CIs=tot_num_habitat_CI['Number of ciliates [# g^-1]'])\n", - "flagellate_num_per_g_CI = CI_sum_prop(estimates=tot_num_gmean.dropna()['Number of ciliates [# g^-1]'],mul_CIs=tot_num_habitat_CI['Number of ciliates [# g^-1]'])\n", - "naked_amoebea_num_per_g_CI = CI_sum_prop(estimates=tot_num_gmean.dropna()['Number of naked amoebae [# g^-1]'],mul_CIs=tot_num_habitat_CI['Number of naked amoebae [# g^-1]'])\n", - "testate_amoebea_num_per_g_CI = CI_sum_prop(estimates=tot_num_gmean.dropna()['Number of testate amoebae [# g^-1]'],mul_CIs=tot_num_habitat_CI['Number of testate amoebae [# g^-1]'])\n", - "num_per_g_CI = pd.Series([ciliate_num_per_g_CI,flagellate_num_per_g_CI,naked_amoebea_num_per_g_CI,testate_amoebea_num_per_g_CI], index= tot_num_habitat_CI.columns)\n", - "num_per_g_CI\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inter-method uncertainty\n", - "We generated two types of estimates for the total number of individual protists per gram of soil - one based on the arithmetic mean and one based on the geometric mean of values. As our best estimate we used the geometric mean of the arithmetic mean and geometric mean-based estimates. We calculate the 95% confidence interval around the geometric mean of the two types of estimates as a measure of the uncertainty this procedure introduces into the estimate of the total number of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 3.0\n", - "Number of flagellates [# g^-1] 1.9\n", - "Number of naked amoebae [# g^-1] 1.2\n", - "Number of testate amoebae [# g^-1] 1.5\n", - "dtype: float64" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "inter_method_num_CI = geo_CI_calc(pd.DataFrame([tot_num_mean.sum(),tot_num_gmean.sum()]))\n", - "inter_method_num_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximum of the uncertainty stemming from the intra-study and interstudy variability and the inter-method uncertainty as our best projection of the uncertainty associated with our estimate of the number of individual protists per gram of soil:" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 3.0\n", - "Number of flagellates [# g^-1] 2.4\n", - "Number of naked amoebae [# g^-1] 1.2\n", - "Number of testate amoebae [# g^-1] 2.6\n", - "dtype: float64" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "best_num_CI = np.max([num_per_g_CI,inter_method_num_CI],axis=0)\n", - "best_num_CI = pd.Series(best_num_CI,index= inter_method_num_CI.index)\n", - "best_num_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To convert the total number of individual protists per gram soil to an estimate of the total number of protists per $m^2$, we rely on the bulk density of soil and on an estimate of the fraction of the total biomass of soil protists in the top 8 cm of soil. We now assess the uncertainty associated with each of those parameters.\n", - "\n", - "### Bulk density of soil\n", - "We do not have a good estimate for the uncertainy associated with the bulk density of soils. We thus use a crude uncertainty of ≈2-fold as a measure of the uncertainty associated with the bulk density of soils.\n", - "\n", - "### Fraction of biomass of protists in top 8 cm\n", - "To estimate the fraction of the total biomass of soil protists present in the top 8 cm of soils, we rely on two estimates - one based on data from Jackson et al. and one based on data from Fierer et al. As a measure of the uncertainty associated with the estimate of the fraction of the total biomass of soil protists present in the top 8 cm of soils, we calculate the 95% confidence interval around the geometric mean of the two estmates:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our projection for the uncertainty associated with our estimate of the fraction of the total biomass of soil protists found in the top 8 cm of soil is ≈2.9-fold\n" - ] - } - ], - "source": [ - "# We use a crude estimate of ≈2-fold as our measure of the uncertainty associated with\n", - "# the average bulk density of soils\n", - "bulk_density_CI = 2\n", - "\n", - "# Calculate the 95% confidence interval around the geometric mean of our estimates for\n", - "# the fraction of the total soil biomass present in the top 8 cm\n", - "depth_frac_CI = frac_CI(np.array([jackson_fraction,fierer_frac]))\n", - "print('Our projection for the uncertainty associated with our estimate of the fraction of the total biomass of soil protists found in the top 8 cm of soil is ≈%.1f-fold' %depth_frac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine the uncertainties associated with the total number of individual protists per gram soil with the uncertainties associated with the average bulk density of soil and the uncertainty associated with the fraction of the total biomass of soil protists found in the top 8 cm of soil:" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 5.4\n", - "Number of flagellates [# g^-1] 4.7\n", - "Number of naked amoebae [# g^-1] 3.6\n", - "Number of testate amoebae [# g^-1] 5.0\n", - "dtype: float64" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ciliate_num_CI = CI_prod_prop(np.array([best_num_CI['Number of ciliates [# g^-1]'],bulk_density_CI,depth_frac_CI]))\n", - "flagellates_num_CI = CI_prod_prop(np.array([best_num_CI['Number of flagellates [# g^-1]'],bulk_density_CI,depth_frac_CI]))\n", - "naked_amoebae_num_CI = CI_prod_prop(np.array([best_num_CI['Number of naked amoebae [# g^-1]'],bulk_density_CI,depth_frac_CI]))\n", - "testate_amoebae_num_CI = CI_prod_prop(np.array([best_num_CI['Number of testate amoebae [# g^-1]'],bulk_density_CI,depth_frac_CI]))\n", - "tot_num_CI = pd.Series([ciliate_num_CI,flagellates_num_CI,naked_amoebae_num_CI,testate_amoebae_num_CI], index= tot_num_habitat_CI.columns)\n", - "tot_num_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Carbon content\n", - "We assemble different measures of uncertainty at different levels - for values within the same study and for values between studies.\n", - "### Intra-study uncertainty\n", - "For studies which report more than one measurement, we calculate the 95% confidence interval around the mean of the values. We use the maximal uncertainty as a measure of the intra-study uncertainty associated with the carbon content of protists." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def groupby_geo_CI(input):\n", - " return pd.DataFrame({'Carbon content of ciliates [g C cell^-1]': geo_CI_calc(input['Carbon content of ciliates [g C cell^-1]'].dropna()),\n", - " 'Carbon content of naked amoebae [g C cell^-1]': geo_CI_calc(input['Carbon content of naked amoebae [g C cell^-1]'].dropna()),\n", - " 'Carbon content of testate amoebae [g C cell^-1]': geo_CI_calc(input['Carbon content of testate amoebae [g C cell^-1]'].dropna()),\n", - " 'Carbon content of flagellates [g C cell^-1]': geo_CI_calc(input['Carbon content of flagellates [g C cell^-1]'].dropna())},index=[0])\n", - "\n", - "\n", - "cc_intra_CI = cc_data.groupby('DOI').apply(groupby_geo_CI).max()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Interstudy uncertainty\n", - "We calculate the 95% confidence interval around the mean carbon content from different studies. We use the maximal uncertainty as a measure of the interstudy uncertainty associated with the carbon content of protists." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Carbon content of ciliates [g C cell^-1] 2.0\n", - "Carbon content of flagellates [g C cell^-1] 3.3\n", - "Carbon content of naked amoebae [g C cell^-1] 1.6\n", - "Carbon content of testate amoebae [g C cell^-1] 1.9\n", - "dtype: float64" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cc_inter_CI = geo_CI_calc(study_mean_cc)\n", - "cc_inter_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximum of the intra-study and interstudy uncertainties as our best projection of the uncertainty associated with the estimate of the carbon content of protists." - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Carbon content of ciliates [g C cell^-1] 2.9\n", - "Carbon content of flagellates [g C cell^-1] 3.3\n", - "Carbon content of naked amoebae [g C cell^-1] 8.1\n", - "Carbon content of testate amoebae [g C cell^-1] 2.2\n", - "dtype: float64" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "best_cc_CI = np.max([cc_intra_CI,cc_inter_CI],axis=0)\n", - "best_cc_CI = pd.Series(best_cc_CI,index=cc_inter_CI.index)\n", - "best_cc_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Calculating the total uncertainty\n", - "We propagate the uncertainty in the total number of protists and in the carbon content of protists to the total estimate of the biomass of protists. We first calculate the uncertainty associated with the estimate of biomass of each of the groups of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "ciliate_biomass_CI = CI_prod_prop(np.array([ciliate_num_CI,best_cc_CI['Carbon content of ciliates [g C cell^-1]']]))\n", - "flagellates_biomass_CI = CI_prod_prop(np.array([flagellates_num_CI,best_cc_CI['Carbon content of flagellates [g C cell^-1]']]))\n", - "naked_amoebae_biomass_CI = CI_prod_prop(np.array([naked_amoebae_num_CI,best_cc_CI['Carbon content of naked amoebae [g C cell^-1]']]))\n", - "testate_amoebae_biomass_CI = CI_prod_prop(np.array([testate_amoebae_num_CI,best_cc_CI['Carbon content of testate amoebae [g C cell^-1]']]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then propagate the uncertainty associated with the biomass of each protist group to the estimate of the total biomass of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the total biomass of terrestrial protists is ≈4-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_sum_prop(estimates=(tot_num_protist*mean_cc).values.squeeze(), mul_CIs= np.array([ciliate_biomass_CI, flagellates_biomass_CI, naked_amoebae_biomass_CI, testate_amoebae_biomass_CI]))\n", - "print('Our best projection for the uncertainty associated with the estimate of the total biomass of terrestrial protists is ≈%0.f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Biomass of terrestrial protists: 1.6 Gt C\n", - "Uncertainty associated with the estimate of the total biomass of terrestrial protists: ≈4-fold\n" - ] - } - ], - "source": [ - "\n", - "print('Biomass of terrestrial protists: %.1f Gt C' %(best_estimate/1e15))\n", - "print('Uncertainty associated with the estimate of the total biomass of terrestrial protists: ≈%.0f-fold' % mul_CI)\n", - "\n", - "\n", - "old_results = pd.read_excel('../protists_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Biomass of terrestrial protists',\n", - " 'Value': float(best_estimate)/1e15,\n", - " 'Units': 'Gt C',\n", - " 'Uncertainty': mul_CI\n", - " })\n", - "\n", - "result.to_excel('../protists_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists-median-checkpoint.ipynb b/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists-median-checkpoint.ipynb deleted file mode 100644 index 890ed1f..0000000 --- a/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists-median-checkpoint.ipynb +++ /dev/null @@ -1,2248 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import gdal\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from CI_helper import *\n", - "from excel_utils import *\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of terrestrial protists\n", - "After searching the literature, we could not find a comprehensive account of the biomass of protists in soils. We generated a crude estimate of the total biomass of protists in soil based on estimating the total number of individual protists in the soil, and on the characteristic carbon content of a single protist.\n", - "\n", - "In order to calculate the total biomass of soil protists we calculate a characteristic number of individual protists for each one of the morphological groups of protists (flagellates, ciliates, and naked and testate ameobae). We combine these estimates with estimates for the carbon content of each morphological group.\n", - "\n", - "## Number of protists\n", - "To estimate the total number of protists, we assembled data on the number of protists in soils which contains 160 measurements from 42 independent studies. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceDOIHabitatSiteNumber of naked amoebae [# g^-1]Number of ciliates [# g^-1]Number of testate amoebae [# g^-1]Number of flagellates [# g^-1]RemarksSampling Depth [cm]
0Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia6.1e+031.5e+024.9e+03nanSamples from termite mound dropped, taken from...nan
1Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia1.3e+041.5e+022.7e+03nanSamples from termite mound dropped, taken from...nan
2Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia4.3e+036.0e+012.3e+03nanSamples from termite mound dropped, taken from...nan
3Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia3.0e+042.7e+022.2e+03nanSamples from termite mound dropped, taken from...nan
4Robinson et al.http://dx.doi.org/10.1111/j.1550-7408.2002.tb0...DesertAustralia4.0e+033.8e+024.0e+03nanSamples from termite mound dropped, taken from...nan
\n", - "
" - ], - "text/plain": [ - " Reference DOI Habitat \\\n", - "0 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "1 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "2 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "3 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "4 Robinson et al. http://dx.doi.org/10.1111/j.1550-7408.2002.tb0... Desert \n", - "\n", - " Site Number of naked amoebae [# g^-1] Number of ciliates [# g^-1] \\\n", - "0 Australia 6.1e+03 1.5e+02 \n", - "1 Australia 1.3e+04 1.5e+02 \n", - "2 Australia 4.3e+03 6.0e+01 \n", - "3 Australia 3.0e+04 2.7e+02 \n", - "4 Australia 4.0e+03 3.8e+02 \n", - "\n", - " Number of testate amoebae [# g^-1] Number of flagellates [# g^-1] \\\n", - "0 4.9e+03 nan \n", - "1 2.7e+03 nan \n", - "2 2.3e+03 nan \n", - "3 2.2e+03 nan \n", - "4 4.0e+03 nan \n", - "\n", - " Remarks Sampling Depth [cm] \n", - "0 Samples from termite mound dropped, taken from... nan \n", - "1 Samples from termite mound dropped, taken from... nan \n", - "2 Samples from termite mound dropped, taken from... nan \n", - "3 Samples from termite mound dropped, taken from... nan \n", - "4 Samples from termite mound dropped, taken from... nan " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data\n", - "data = pd.read_excel('terrestrial_protist_data.xlsx','Density of Individuals')\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total number of protists, we group our samples to different habitats and to the study in which they were taken. We calculate the characteristic number of each of the groups of protists per gram of soil. To do this we first derive a representative value for each study in case there was more than one measurement done in it. We calculate the representative value for each study in each habitat. Then we calculate the average of different representative values from different studies within the same habitat. We calculate the averages either by using the arithmetic mean or the geometric mean." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/scipy/stats/stats.py:315: RuntimeWarning: Mean of empty slice.\n", - " return np.exp(log_a.mean(axis=axis))\n", - "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:10: RuntimeWarning: Mean of empty slice\n", - " # Remove the CWD from sys.path while we load stuff.\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:11: RuntimeWarning: Mean of empty slice\n", - " # This is added back by InteractiveShellApp.init_path()\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:13: RuntimeWarning: Mean of empty slice\n", - " del sys.path[0]\n", - "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:12: RuntimeWarning: Mean of empty slice\n", - " if sys.path[0] == '':\n", - "/usr/local/lib/python3.5/dist-packages/numpy/lib/nanfunctions.py:1018: RuntimeWarning: Mean of empty slice\n", - " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n" - ] - } - ], - "source": [ - "# Define the function to calculate the geometric mean of number of each group of protists per gram\n", - "def groupby_gmean(input):\n", - " return pd.DataFrame({'Number of ciliates [# g^-1]': gmean(input['Number of ciliates [# g^-1]'].dropna()),\n", - " 'Number of naked amoebae [# g^-1]': gmean(input['Number of naked amoebae [# g^-1]'].dropna()),\n", - " 'Number of testate amoebae [# g^-1]': gmean(input['Number of testate amoebae [# g^-1]'].dropna()),\n", - " 'Number of flagellates [# g^-1]': gmean(input['Number of flagellates [# g^-1]'].dropna())},index=[0])\n", - "\n", - "# Define the function to calculate the arithmetic mean of number of each group of protists per gram\n", - "def groupby_mean(input):\n", - " return pd.DataFrame({'Number of ciliates [# g^-1]': np.nanmean(input['Number of ciliates [# g^-1]'].dropna()),\n", - " 'Number of naked amoebae [# g^-1]': np.nanmean(input['Number of naked amoebae [# g^-1]'].dropna()),\n", - " 'Number of testate amoebae [# g^-1]': np.nanmean(input['Number of testate amoebae [# g^-1]'].dropna()),\n", - " 'Number of flagellates [# g^-1]': np.nanmean(input['Number of flagellates [# g^-1]'].dropna())},index=[0])\n", - "\n", - "# Define the function to calculate the median of number of each group of protists per gram\n", - "def groupby_median(input):\n", - " return pd.DataFrame({'Number of ciliates [# g^-1]': np.nanmedian(input['Number of ciliates [# g^-1]'].dropna()),\n", - " 'Number of naked amoebae [# g^-1]': np.nanmedian(input['Number of naked amoebae [# g^-1]'].dropna()),\n", - " 'Number of testate amoebae [# g^-1]': np.nanmedian(input['Number of testate amoebae [# g^-1]'].dropna()),\n", - " 'Number of flagellates [# g^-1]': np.nanmedian(input['Number of flagellates [# g^-1]'].dropna())},index=[0])\n", - "\n", - "\n", - "# Group the samples by habitat and study, and calculate the geometric mean\n", - "grouped_data_gmean = data.groupby(['Habitat','DOI']).apply(groupby_gmean)\n", - "\n", - "# Group the samples by habitat and study, and calculate the arithmetic mean\n", - "grouped_data_mean = data.groupby(['Habitat','DOI']).apply(groupby_mean)\n", - "\n", - "# Group the samples by habitat and study, and calculate the arithmetic mean\n", - "grouped_data_median = data.groupby(['Habitat','DOI']).apply(groupby_median)\n", - "\n", - "# Group the representative values by habitat, and calculate the geometric mean\n", - "habitat_gmean = grouped_data_gmean.groupby('Habitat').apply(groupby_gmean)\n", - "\n", - "# Group the representative values by habitat, and calculate the arithmetic mean\n", - "habitat_mean = grouped_data_mean.groupby('Habitat').apply(groupby_mean)\n", - "\n", - "# Group the representative values by habitat, and calculate the median\n", - "habitat_median = grouped_data_median.groupby('Habitat').apply(groupby_median)\n", - "\n", - "\n", - "habitat_gmean.set_index(habitat_gmean.index.droplevel(1),inplace=True)\n", - "habitat_mean.set_index(habitat_mean.index.droplevel(1),inplace=True)\n", - "habitat_median.set_index(habitat_median.index.droplevel(1),inplace=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here is the calculated geometric mean number of cells per gram for each habitat and each group of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Number of ciliates [# g^-1]Number of flagellates [# g^-1]Number of naked amoebae [# g^-1]Number of testate amoebae [# g^-1]
Habitat
Boreal Forestnannannan1.1e+04
Cropland2.7e+022.4e+046.2e+033.5e+02
Desert2.4e+02nan1.1e+043.4e+03
Forest1.2e+02nannan1.1e+04
Generalnan1.0e+061.0e+05nan
Grassland1.1e+034.2e+041.8e+043.3e+03
Shrubland5.9e+01nannan1.1e+04
Temperate Forest3.6e+029.0e+051.0e+052.4e+04
Tropical Forestnannannan4.5e+04
Tundra4.8e+012.0e+06nan1.5e+03
Woodland2.6e+02nannan1.9e+04
\n", - "
" - ], - "text/plain": [ - " Number of ciliates [# g^-1] Number of flagellates [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest nan nan \n", - "Cropland 2.7e+02 2.4e+04 \n", - "Desert 2.4e+02 nan \n", - "Forest 1.2e+02 nan \n", - "General nan 1.0e+06 \n", - "Grassland 1.1e+03 4.2e+04 \n", - "Shrubland 5.9e+01 nan \n", - "Temperate Forest 3.6e+02 9.0e+05 \n", - "Tropical Forest nan nan \n", - "Tundra 4.8e+01 2.0e+06 \n", - "Woodland 2.6e+02 nan \n", - "\n", - " Number of naked amoebae [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest nan \n", - "Cropland 6.2e+03 \n", - "Desert 1.1e+04 \n", - "Forest nan \n", - "General 1.0e+05 \n", - "Grassland 1.8e+04 \n", - "Shrubland nan \n", - "Temperate Forest 1.0e+05 \n", - "Tropical Forest nan \n", - "Tundra nan \n", - "Woodland nan \n", - "\n", - " Number of testate amoebae [# g^-1] \n", - "Habitat \n", - "Boreal Forest 1.1e+04 \n", - "Cropland 3.5e+02 \n", - "Desert 3.4e+03 \n", - "Forest 1.1e+04 \n", - "General nan \n", - "Grassland 3.3e+03 \n", - "Shrubland 1.1e+04 \n", - "Temperate Forest 2.4e+04 \n", - "Tropical Forest 4.5e+04 \n", - "Tundra 1.5e+03 \n", - "Woodland 1.9e+04 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitat_median" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For some groups, not all habitats have values. We fill values for missing data by the following scheme. For missing values in the boreal forest biome, we use values from the temperate forest biome. If we have data for the group of protists from the \"General\" habitat, which is based on expert assessment of the characteristic number of individuals for that group per gram of soil, we fill the missing values with the value for the \"General\" habitat.\n", - "\n", - "The only other missing data was for ciliates in tropical forests and tundra. For tropical forest, we used the values from temperate forests forests. For tundra, we use the mean over all the different habitats to fill the value:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Number of ciliates [# g^-1]Number of flagellates [# g^-1]Number of naked amoebae [# g^-1]Number of testate amoebae [# g^-1]
Habitat
Boreal Forest3.6e+029.0e+051.0e+051.1e+04
Cropland2.7e+022.4e+046.2e+033.5e+02
Desert2.4e+021.0e+061.1e+043.4e+03
Forest1.2e+02nannan1.1e+04
Generalnan1.0e+061.0e+05nan
Grassland1.1e+031.0e+061.8e+043.3e+03
Shrubland5.9e+011.0e+061.0e+051.1e+04
Temperate Forest3.6e+029.0e+051.0e+052.4e+04
Tropical Forest3.6e+021.0e+061.0e+054.5e+04
Tundra3.1e+022.0e+061.0e+051.5e+03
Woodland2.6e+021.0e+061.0e+051.9e+04
\n", - "
" - ], - "text/plain": [ - " Number of ciliates [# g^-1] Number of flagellates [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest 3.6e+02 9.0e+05 \n", - "Cropland 2.7e+02 2.4e+04 \n", - "Desert 2.4e+02 1.0e+06 \n", - "Forest 1.2e+02 nan \n", - "General nan 1.0e+06 \n", - "Grassland 1.1e+03 1.0e+06 \n", - "Shrubland 5.9e+01 1.0e+06 \n", - "Temperate Forest 3.6e+02 9.0e+05 \n", - "Tropical Forest 3.6e+02 1.0e+06 \n", - "Tundra 3.1e+02 2.0e+06 \n", - "Woodland 2.6e+02 1.0e+06 \n", - "\n", - " Number of naked amoebae [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest 1.0e+05 \n", - "Cropland 6.2e+03 \n", - "Desert 1.1e+04 \n", - "Forest nan \n", - "General 1.0e+05 \n", - "Grassland 1.8e+04 \n", - "Shrubland 1.0e+05 \n", - "Temperate Forest 1.0e+05 \n", - "Tropical Forest 1.0e+05 \n", - "Tundra 1.0e+05 \n", - "Woodland 1.0e+05 \n", - "\n", - " Number of testate amoebae [# g^-1] \n", - "Habitat \n", - "Boreal Forest 1.1e+04 \n", - "Cropland 3.5e+02 \n", - "Desert 3.4e+03 \n", - "Forest 1.1e+04 \n", - "General nan \n", - "Grassland 3.3e+03 \n", - "Shrubland 1.1e+04 \n", - "Temperate Forest 2.4e+04 \n", - "Tropical Forest 4.5e+04 \n", - "Tundra 1.5e+03 \n", - "Woodland 1.9e+04 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Fill missing values for boreal forests\n", - "habitat_mean.loc['Boreal Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']] = habitat_mean.loc['Temperate Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']]\n", - "habitat_gmean.loc['Boreal Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']] = habitat_gmean.loc['Temperate Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']]\n", - "habitat_median.loc['Boreal Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']] = habitat_median.loc['Temperate Forest',['Number of ciliates [# g^-1]','Number of flagellates [# g^-1]','Number of naked amoebae [# g^-1]']]\n", - "\n", - "# Fill missing values for naked amoebae\n", - "habitat_mean.loc[['Shrubland','Tropical Forest','Tundra','Woodland'],'Number of naked amoebae [# g^-1]'] = habitat_mean.loc['General','Number of naked amoebae [# g^-1]']\n", - "habitat_gmean.loc[['Shrubland','Tropical Forest','Tundra','Woodland'],'Number of naked amoebae [# g^-1]'] = habitat_gmean.loc['General','Number of naked amoebae [# g^-1]']\n", - "habitat_median.loc[['Shrubland','Tropical Forest','Tundra','Woodland'],'Number of naked amoebae [# g^-1]'] = habitat_median.loc['General','Number of naked amoebae [# g^-1]']\n", - "\n", - "# Fill missing values for flagellates\n", - "habitat_gmean.loc[['Desert','Grassland','Shrubland','Tropical Forest','Woodland'],'Number of flagellates [# g^-1]'] = habitat_gmean.loc['General','Number of flagellates [# g^-1]']\n", - "habitat_mean.loc[['Desert','Grassland','Shrubland','Tropical Forest','Woodland'],'Number of flagellates [# g^-1]'] = habitat_mean.loc['General','Number of flagellates [# g^-1]']\n", - "habitat_median.loc[['Desert','Grassland','Shrubland','Tropical Forest','Woodland'],'Number of flagellates [# g^-1]'] = habitat_median.loc['General','Number of flagellates [# g^-1]']\n", - "\n", - "# Fill missing values for ciliates\n", - "habitat_gmean.loc['Tropical Forest','Number of ciliates [# g^-1]'] = habitat_gmean.loc['Temperate Forest','Number of ciliates [# g^-1]']\n", - "habitat_mean.loc['Tropical Forest','Number of ciliates [# g^-1]'] = habitat_mean.loc['Temperate Forest','Number of ciliates [# g^-1]']\n", - "habitat_median.loc['Tropical Forest','Number of ciliates [# g^-1]'] = habitat_median.loc['Temperate Forest','Number of ciliates [# g^-1]']\n", - "\n", - "habitat_gmean.loc['Tundra','Number of ciliates [# g^-1]'] = gmean(habitat_mean['Number of ciliates [# g^-1]'].dropna())\n", - "habitat_mean.loc['Tundra','Number of ciliates [# g^-1]'] = habitat_mean['Number of ciliates [# g^-1]'].dropna().mean()\n", - "habitat_median.loc['Tundra','Number of ciliates [# g^-1]'] = habitat_median['Number of ciliates [# g^-1]'].dropna().mean()\n", - "habitat_median" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We have estimates for the total number of individual protists per gram of soil. In order to calculate the total number of individual protists we need to first convert the data to number of individuals per $m^2$. To convert number of individuals per gram of soil to number of individuals per $m^2$, we calculate a global average soil density in the top 15 cm based on [Hengl et al.](https://dx.doi.org/10.1371%2Fjournal.pone.0105992).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the global mean bulk density of soil in the top 15 cm is ≈1.2e+06 g m^3\n" - ] - } - ], - "source": [ - "# Load soil density map from Hengl et al. (in the top 15 cm, reduced in resolution to 1 degree resolution)\n", - "gtif = gdal.Open('bulk_density_data.tif')\n", - "bulk_density_map = np.array(gtif.GetRasterBand(1).ReadAsArray())\n", - "\n", - "# Fill missing values with NaN\n", - "bulk_density_map[bulk_density_map == bulk_density_map[0,1]] = np.nan\n", - "# Mean soil bulk density from Hengl et al. [in g per m^3]\n", - "bulk_density = np.nanmean(bulk_density_map[:])*1000\n", - "print('Our best estimate for the global mean bulk density of soil in the top 15 cm is ≈%.1e g m^3' %bulk_density)\n", - "#of ≈1.3 g $cm^3$" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Measuring the density of individuals per gram of soil does not take into account the distribution on biomass along the soil profile. Most of the measurements of the number of individual protists per gram of soil are done in shallow soil depths. We calculate the average sampling depth across studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The average sampling depth of soil protists is ≈5 cm\n" - ] - } - ], - "source": [ - "# Calculate the average sampling depth \n", - "sampling_depth = data.groupby('DOI').mean().mean()['Sampling Depth [cm]']\n", - "\n", - "sampling_depth = data.groupby('DOI').median().median()['Sampling Depth [cm]']\n", - "\n", - "print('The average sampling depth of soil protists is ≈%.0f cm' %sampling_depth)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is not obvious what is the fraction of the total biomass of soil protists that is found in the top 8 cm of soil. To estimate the fraction of the biomass of soil protists found in the top 8 cm, we rely on two methodologies. The first is based on the distribution of microbial biomass with depth as discussed in Xu et al. Xu et al. extrapolate the microbial biomass across the soil profile based on empirical equations for the distribution of root biomass along soil depth from [Jackson et al.](http://dx.doi.org/10.1007/BF00333714). The empirical equations are biome-specific, and follow the general form: $$Y = 1-\\beta^d$$ Where Y is the cumulative fraction of roots, d is depth in centimeters, and $\\beta$ is a coefficient fitted for each biome. On a global scale, the best fit for $\\beta$ as reported in Jackson et al., is ≈0.966. We use this coefficient to calculate the fraction of total biomass of soil protists found in the top 8 cm: " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the fraction of biomass of soil protists found in soil layers sampled, based on Jackson et al. is ≈16 percent\n" - ] - } - ], - "source": [ - "# The beta coefficient from Jackson et al.\n", - "jackson_beta = 0.966\n", - "\n", - "# Calculate the fraction of the biomass of soil protists found in the top 8 cm\n", - "jackson_fraction = 1 - jackson_beta** sampling_depth\n", - "\n", - "print('Our estimate for the fraction of biomass of soil protists found in soil layers sampled, based on Jackson et al. is ≈%.0f percent' %(jackson_fraction*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As a second estimate for the fraction of the total biomass of soil protists found in the top 8 cm, we rely on an empirical equation from [Fierer et al.](http://dx.doi.org/10.1111/j.1461-0248.2009.01360.x), which estimates the fraction microbial biomass found below sampling depth d:\n", - "$$ f = [-0.132×ln(d) + 0.605]×B$$\n", - "Where f is the fraction microbial biomass found below sampling depth d (in cm). We use this equation to calculate the fraction of the total biomass of soil protists found in the top 8 cm:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the fraction of biomass of soil protists found in soil layers sampled, based on Fierer et al. is ≈61 percent\n" - ] - } - ], - "source": [ - "# The fraction of microbial biomass found in layer shallower than depth x based on Fierer et al.\n", - "fierer_eq = lambda x: 1-(-0.132*np.log(x)+0.605)\n", - "fierer_frac = fierer_eq(sampling_depth)\n", - "print('Our estimate for the fraction of biomass of soil protists found in soil layers sampled, based on Fierer et al. is ≈%.0f percent' %(fierer_frac*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the fraction of the total biomass of soil protists found in layers shallower than 8 cm, we use the geometric mean of the estimates based on Jackson et al. and Fierer et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the fraction of biomass of soil protists found in soil layers sampled is ≈38 percent\n" - ] - } - ], - "source": [ - "best_depth_frac = frac_mean(np.array([jackson_fraction,fierer_frac]))\n", - "best_depth_frac = np.median([jackson_fraction,fierer_frac])\n", - "print('Our best estimate for the fraction of biomass of soil protists found in soil layers sampled is ≈%.0f percent' %(best_depth_frac*100))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To convert the measurements per gram of soil to number of individuals per $m^2$, we calculate the average sampling depth across studies. We calculate the volume of soil held within this sampling depth. We use the bulk density to calculate the total weight of soil within one $m^2$ of soil with depth equal to the sampling depth. We multiply the estimates per gram of soil by the total weight of soil per $m^2$. To account for biomass present in lower layers, we divide the total number of individual protists per $m^2$ by our best estimate for the fraction of the total biomass of soil protists found in layer shallower than 8 cm." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# convert number of individuals per gram soil to number of individuals per m^2\n", - "habitat_per_m2_gmean = (habitat_gmean*bulk_density*sampling_depth/100/best_depth_frac)\n", - "habitat_per_m2_mean = (habitat_mean*bulk_density*sampling_depth/100/best_depth_frac)\n", - "habitat_per_m2_median = (habitat_median*bulk_density*sampling_depth/100/best_depth_frac)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total number of protists we multiply the total number of individuals per unit area of each type of protist in each habitat by the total area of each habitat taken from the book [Biogeochemistry: An analysis of Global Change](https://www.sciencedirect.com/science/book/9780123858740) by Schlesinger & Bernhardt. The areas of each habitat are:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Area [m^2]Unnamed: 2
Biome
Boreal Forest13700000000000Temperate forest, Tropical rainforest
Desert27700000000000Desert
Temperate Forest10400000000000Temperate forest
Grassland15000000000000Grassland
Tropical Forest17500000000000Tropical rainforest
Tundra5600000000000Tundra
Tropical Savanna27700000000000Scrubland,grassland,Temprate Forest, Tropical ...
Cropland15500000000000Cropland
\n", - "
" - ], - "text/plain": [ - " Area [m^2] \\\n", - "Biome \n", - "Boreal Forest 13700000000000 \n", - "Desert 27700000000000 \n", - "Temperate Forest 10400000000000 \n", - "Grassland 15000000000000 \n", - "Tropical Forest 17500000000000 \n", - "Tundra 5600000000000 \n", - "Tropical Savanna 27700000000000 \n", - "Cropland 15500000000000 \n", - "\n", - " Unnamed: 2 \n", - "Biome \n", - "Boreal Forest Temperate forest, Tropical rainforest \n", - "Desert Desert \n", - "Temperate Forest Temperate forest \n", - "Grassland Grassland \n", - "Tropical Forest Tropical rainforest \n", - "Tundra Tundra \n", - "Tropical Savanna Scrubland,grassland,Temprate Forest, Tropical ... \n", - "Cropland Cropland " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "habitat_area = pd.read_excel('terrestrial_protist_data.xlsx','Biome area', skiprows=1,index_col=0)\n", - "habitat_area" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "One habitat for which we do not have data is the savanna. We use the mean of the values for the tropical forest, woodland, shrubland and grassland as an estimate of the total biomass in the savanna." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of ciliates [# g^-1] 2.0e+22\n", - "Number of flagellates [# g^-1] 3.1e+25\n", - "Number of naked amoebae [# g^-1] 1.3e+24\n", - "Number of testate amoebae [# g^-1] 3.3e+23\n", - "dtype: float64\n", - "Number of ciliates [# g^-1] 6.6e+21\n", - "Number of flagellates [# g^-1] 1.6e+25\n", - "Number of naked amoebae [# g^-1] 1.1e+24\n", - "Number of testate amoebae [# g^-1] 2.3e+23\n", - "dtype: float64\n", - "[1.15593687e+22 2.24138426e+25 1.19695371e+24 2.73770045e+23]\n", - "Number of ciliates [# g^-1] 8.4e+21\n", - "Number of flagellates [# g^-1] 2.0e+25\n", - "Number of naked amoebae [# g^-1] 1.3e+24\n", - "Number of testate amoebae [# g^-1] 2.8e+23\n", - "dtype: float64\n" - ] - } - ], - "source": [ - "habitat_per_m2_gmean.loc['Tropical Savanna'] = gmean(habitat_per_m2_gmean.loc[['Tropical Forest','Woodland','Shrubland','Grassland']])\n", - "habitat_per_m2_mean.loc['Tropical Savanna'] = habitat_per_m2_mean.loc[['Tropical Forest','Woodland','Shrubland','Grassland']].mean(axis=0)\n", - "habitat_per_m2_median.loc['Tropical Savanna'] = habitat_per_m2_median.loc[['Tropical Forest','Woodland','Shrubland','Grassland']].median(axis=0)\n", - "\n", - "tot_num_gmean = habitat_per_m2_gmean.mul(habitat_area['Area [m^2]'],axis=0)\n", - "tot_num_mean = habitat_per_m2_mean.mul(habitat_area['Area [m^2]'],axis=0)\n", - "tot_num_median = habitat_per_m2_median.mul(habitat_area['Area [m^2]'],axis=0)\n", - "print(tot_num_mean.sum())\n", - "print(tot_num_gmean.sum())\n", - "print(gmean([tot_num_mean.sum(),tot_num_gmean.sum()]))\n", - "print(tot_num_median.sum())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We generated two types of estimates for the total number of soil protists: an estimate which uses the arithmetic mean of the number of individuals at each habitat, and an estimate which uses the geometric mean of the number of individuals at each habitat. The estimate based on the arithmetic mean is more susceptible to sampling bias, as even a single measurement which is not characteristic of the global population (such as samples which are contaminated with organic carbon sources, or samples which have some technical biases associated with them) might shift the average concentration significantly. On the other hand, the estimate based on the geometric mean might underestimate global biomass as it will reduce the effect of biologically relevant high biomass concentrations. As a compromise between these two caveats, we chose to use as our best estimate the geometric mean of the estimates from the two methodologies." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 8.4e+21\n", - "Number of flagellates [# g^-1] 2.0e+25\n", - "Number of naked amoebae [# g^-1] 1.3e+24\n", - "Number of testate amoebae [# g^-1] 2.8e+23\n", - "dtype: float64" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tot_num_protist = gmean([tot_num_mean.sum(),tot_num_gmean.sum()])\n", - "tot_num_protist = tot_num_median.sum()\n", - "tot_num_protist" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Carbon content of protists\n", - "We estimate the characteristic carbon content of a single protist from each of the morphological groups of protists based on data from several sources. Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceDOICarbon content of naked amoebae [g C cell^-1]Carbon content of ciliates [g C cell^-1]Carbon content of testate amoebae [g C cell^-1]Carbon content of flagellates [g C cell^-1]Remarks
0Wanner et al.http://dx.doi.org/10.1007/s00248-007-9322-2nannan1.5e-09nanCalculated from table 2, assuming 15% carbon c...
1Wanner et al.http://dx.doi.org/10.1007/s00248-007-9322-2nannan1.1e-09nanCalculated from table 2, assuming 15% carbon c...
2Wanner et al.http://dx.doi.org/10.1007/s00248-007-9322-2nannan2.1e-09nanCalculated from table 2, assuming 15% carbon c...
3Wanner et al.http://dx.doi.org/10.1007/s00248-007-9322-2nannan3.4e-09nanCalculated from table 2, assuming 15% carbon c...
4Foissnerhttp://dx.doi.org/10.1016/0167-8809(92)90093-Qnan3.8e-094.7e-09nanCalculated from table 2, assuming 15% carbon c...
\n", - "
" - ], - "text/plain": [ - " Reference DOI \\\n", - "0 Wanner et al. http://dx.doi.org/10.1007/s00248-007-9322-2 \n", - "1 Wanner et al. http://dx.doi.org/10.1007/s00248-007-9322-2 \n", - "2 Wanner et al. http://dx.doi.org/10.1007/s00248-007-9322-2 \n", - "3 Wanner et al. http://dx.doi.org/10.1007/s00248-007-9322-2 \n", - "4 Foissner http://dx.doi.org/10.1016/0167-8809(92)90093-Q \n", - "\n", - " Carbon content of naked amoebae [g C cell^-1] \\\n", - "0 nan \n", - "1 nan \n", - "2 nan \n", - "3 nan \n", - "4 nan \n", - "\n", - " Carbon content of ciliates [g C cell^-1] \\\n", - "0 nan \n", - "1 nan \n", - "2 nan \n", - "3 nan \n", - "4 3.8e-09 \n", - "\n", - " Carbon content of testate amoebae [g C cell^-1] \\\n", - "0 1.5e-09 \n", - "1 1.1e-09 \n", - "2 2.1e-09 \n", - "3 3.4e-09 \n", - "4 4.7e-09 \n", - "\n", - " Carbon content of flagellates [g C cell^-1] \\\n", - "0 nan \n", - "1 nan \n", - "2 nan \n", - "3 nan \n", - "4 nan \n", - "\n", - " Remarks \n", - "0 Calculated from table 2, assuming 15% carbon c... \n", - "1 Calculated from table 2, assuming 15% carbon c... \n", - "2 Calculated from table 2, assuming 15% carbon c... \n", - "3 Calculated from table 2, assuming 15% carbon c... \n", - "4 Calculated from table 2, assuming 15% carbon c... " - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cc_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Carbon content')\n", - "cc_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine this data with an additional source from [Finlay & Fenchel](http://dx.doi.org/10.1078/1434-4610-00060). We calculate the average cell length for each group. " - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load data from Finlay & Fenchel\n", - "ff_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Finlay & Fenchel', skiprows=1)\n", - "\n", - "# Define the function to calculate the weighted average for each group of protists\n", - "def weighted_av_groupby(input):\n", - " return np.average(input['Length [µm]'],weights=input['Abundance [# g^-1]'])\n", - "\n", - "cell_lengths = ff_data.groupby('Protist type').apply(weighted_av_groupby)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We convert the cell length to biovolume according the the allometric relation decribed in Figure 10 of Finlay & Fenchel. The relation between cell volume and cell length is given by the equation: \n", - "$$V = 0.6×L^{2.36}$$\n", - "Where V is the cell volume in $µm^3$ and L is the cell length in µm." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Protist type\n", - "Ciliate 5.4e+03\n", - "Flagellate 1.2e+02\n", - "Naked amoebae 1.4e+03\n", - "Testate amoebae 3.6e+03\n", - "dtype: float64" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cell_volumes = 0.6*cell_lengths**2.36\n", - "cell_volumes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We convert cell volumes to carbon content assuming ≈150 fg C µm$^3$:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Protist type\n", - "Ciliate 8.1e-10\n", - "Flagellate 1.8e-11\n", - "Naked amoebae 2.0e-10\n", - "Testate amoebae 5.5e-10\n", - "dtype: float64" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ff_carbon_content = cell_volumes*150e-15\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "ff_carbon_content" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We add these numbers as an additional source for calculating the carbon content of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "cc_data.loc[cc_data.index[-1]+1] = pd.Series({'Reference': 'Finlay & Fenchel',\n", - " 'DOI': 'http://dx.doi.org/10.1078/1434-4610-00060',\n", - " 'Carbon content of ciliates [g C cell^-1]': ff_carbon_content.loc['Ciliate'],\n", - " 'Carbon content of naked amoebae [g C cell^-1]': ff_carbon_content.loc['Naked amoebae'],\n", - " 'Carbon content of testate amoebae [g C cell^-1]': ff_carbon_content.loc['Testate amoebae'],\n", - " 'Carbon content of flagellates [g C cell^-1]': ff_carbon_content.loc['Flagellate']\n", - " })\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of carbon contents for first for values within each study and then for the average values between studies:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/scipy/stats/stats.py:315: RuntimeWarning: Mean of empty slice.\n", - " return np.exp(log_a.mean(axis=axis))\n", - "/usr/local/lib/python3.5/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n", - "/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:4033: RuntimeWarning: All-NaN slice encountered\n", - " r = func(a, **kwargs)\n" - ] - } - ], - "source": [ - "def groupby_gmean(input):\n", - " return pd.DataFrame({'Carbon content of ciliates [g C cell^-1]': gmean(input['Carbon content of ciliates [g C cell^-1]'].dropna()),\n", - " 'Carbon content of naked amoebae [g C cell^-1]': gmean(input['Carbon content of naked amoebae [g C cell^-1]'].dropna()),\n", - " 'Carbon content of testate amoebae [g C cell^-1]': gmean(input['Carbon content of testate amoebae [g C cell^-1]'].dropna()),\n", - " 'Carbon content of flagellates [g C cell^-1]': gmean(input['Carbon content of flagellates [g C cell^-1]'].dropna())},index=[0])\n", - "\n", - "def groupby_median(input):\n", - " return pd.DataFrame({'Carbon content of ciliates [g C cell^-1]': np.nanmedian(input['Carbon content of ciliates [g C cell^-1]']),\n", - " 'Carbon content of naked amoebae [g C cell^-1]': np.nanmedian(input['Carbon content of naked amoebae [g C cell^-1]']),\n", - " 'Carbon content of testate amoebae [g C cell^-1]': np.nanmedian(input['Carbon content of testate amoebae [g C cell^-1]']),\n", - " 'Carbon content of flagellates [g C cell^-1]': np.nanmedian(input['Carbon content of flagellates [g C cell^-1]'])},index=[0])\n", - "\n", - "\n", - "study_mean_cc = cc_data.groupby('DOI').apply(groupby_gmean)\n", - "study_mean_cc = cc_data.groupby('DOI').apply(groupby_median)\n", - "mean_cc = study_mean_cc.reset_index().groupby('level_1').apply(groupby_gmean)\n", - "mean_cc = study_mean_cc.reset_index().groupby('level_1').apply(groupby_median)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
level_10
0
Carbon content of ciliates [g C cell^-1]7.8e-10
Carbon content of flagellates [g C cell^-1]1.8e-11
Carbon content of naked amoebae [g C cell^-1]2.0e-10
Carbon content of testate amoebae [g C cell^-1]1.9e-09
\n", - "
" - ], - "text/plain": [ - "level_1 0\n", - " 0\n", - "Carbon content of ciliates [g C cell^-1] 7.8e-10\n", - "Carbon content of flagellates [g C cell^-1] 1.8e-11\n", - "Carbon content of naked amoebae [g C cell^-1] 2.0e-10\n", - "Carbon content of testate amoebae [g C cell^-1] 1.9e-09" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mean_cc.T" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To estimate the total biomass of soil protists based on the total number of individuals and their carbon content, we multiply our estimate for the total number of individuals for each morphological type by its characteristic carbon content. We sum over all morophological types of protists to generate our best estimate for the global biomass of soil protists" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate of the total biomass of soil protists is ≈1.2 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the total biomass of protists\n", - "best_estimate = (tot_num_protist.values*mean_cc).sum(axis=1)\n", - "\n", - "print('Our best estimate of the total biomass of soil protists is ≈%.1f Gt C' %(best_estimate/1e15))\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty associated with our estimate of the total biomass of terrestrial protists, we collect available uncertainties for the values reported within studies and between studies. We use the highest uncertainty out of this collection of uncertainties as our best projection for the uncertainty associated we the estimate of the total biomass of terrestrial protists.\n", - "\n", - "## Number of individuals\n", - "We assemble different measures of uncertainty at different levels - for values within the same study, for studies within the same habitat, and between habitats.\n", - "\n", - "### Intra-study uncertainty\n", - "For each study which reports more than one value, we calculate 95% confidence interval around the geometric mean of those values. We take the maximal uncertainty in each habitat as our measure of the intra-study uncertainty" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "pd.options.display.float_format = '{:,.1f}'.format\n", - "\n", - "# Define the function ot calculate the 95% confidence interval around the\n", - "# geometric mean of number of each group of protists per gram\n", - "def groupby_geo_CI(input):\n", - " return pd.DataFrame({'Number of ciliates [# g^-1]': geo_CI_calc(input['Number of ciliates [# g^-1]'].dropna()),\n", - " 'Number of naked amoebae [# g^-1]': geo_CI_calc(input['Number of naked amoebae [# g^-1]'].dropna()),\n", - " 'Number of testate amoebae [# g^-1]': geo_CI_calc(input['Number of testate amoebae [# g^-1]'].dropna()),\n", - " 'Number of flagellates [# g^-1]': geo_CI_calc(input['Number of flagellates [# g^-1]'].dropna())},index=[0])\n", - "\n", - "# Group the samples by habitat and study, and calculate the 95% confidence\n", - "# interval around the geometric mean of values within each study\n", - "intra_study_num_CI = data.groupby(['Habitat','DOI']).apply(groupby_geo_CI)\n", - "\n", - "# Use the maximal uncertainty in each habitat as a measure of the intra-study uncertainty\n", - "intra_num_CI = intra_study_num_CI.groupby('Habitat').max()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Interstudy uncertainty\n", - "We calculate 95% confidence interval around the geometric mean of the average values from different studies." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Number of ciliates [# g^-1]Number of flagellates [# g^-1]Number of naked amoebae [# g^-1]Number of testate amoebae [# g^-1]
Habitat
Boreal Forestnannannan457.0
Cropland3.313.1nan4.8
Desertnannannan1.4
Forest17.3nannan4.2
Generalnannannannan
Grassland10.4nan1.42.4
Shrubland1.0nannannan
Temperate Forest2.642.2nan4.6
Tropical Forestnannannannan
Tundra2.75.1nan1.7
Woodland1.0nannan1.0
\n", - "
" - ], - "text/plain": [ - " Number of ciliates [# g^-1] Number of flagellates [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest nan nan \n", - "Cropland 3.3 13.1 \n", - "Desert nan nan \n", - "Forest 17.3 nan \n", - "General nan nan \n", - "Grassland 10.4 nan \n", - "Shrubland 1.0 nan \n", - "Temperate Forest 2.6 42.2 \n", - "Tropical Forest nan nan \n", - "Tundra 2.7 5.1 \n", - "Woodland 1.0 nan \n", - "\n", - " Number of naked amoebae [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest nan \n", - "Cropland nan \n", - "Desert nan \n", - "Forest nan \n", - "General nan \n", - "Grassland 1.4 \n", - "Shrubland nan \n", - "Temperate Forest nan \n", - "Tropical Forest nan \n", - "Tundra nan \n", - "Woodland nan \n", - "\n", - " Number of testate amoebae [# g^-1] \n", - "Habitat \n", - "Boreal Forest 457.0 \n", - "Cropland 4.8 \n", - "Desert 1.4 \n", - "Forest 4.2 \n", - "General nan \n", - "Grassland 2.4 \n", - "Shrubland nan \n", - "Temperate Forest 4.6 \n", - "Tropical Forest nan \n", - "Tundra 1.7 \n", - "Woodland 1.0 " - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Group the representative values by habitat, and calculate the 95% confidence interval\n", - "# around the geometric mean of values within habitat\n", - "inter_study_habitat_num_CI = grouped_data_gmean.groupby('Habitat').apply(groupby_geo_CI)\n", - "inter_study_habitat_num_CI.set_index(inter_study_habitat_num_CI.index.droplevel(level=1),inplace=True)\n", - "inter_study_habitat_num_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inter-habitat uncertainty\n", - "We first use the maximum of the intra-study and interstudy uncertainty in each habitat as our best projection for the uncertainty associated with the estimate of the total number of protists in the habitat. For habitats with missing uncertainty projections, we use the maximum of the uncertainties for the same group of protists in other habitats." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Number of ciliates [# g^-1]Number of flagellates [# g^-1]Number of naked amoebae [# g^-1]Number of testate amoebae [# g^-1]
Habitat
Boreal Forest19.242.21.8457.0
Cropland19.213.11.24.8
Desert1.942.21.81.4
Forest17.342.21.84.2
General19.242.21.8457.0
Grassland10.442.21.42.4
Shrubland2.142.21.82.6
Temperate Forest4.242.21.84.6
Tropical Forest19.242.21.81.3
Tundra2.75.11.81.7
Woodland1.042.21.82.1
Tropical Savanna19.242.21.8457.0
\n", - "
" - ], - "text/plain": [ - " Number of ciliates [# g^-1] Number of flagellates [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest 19.2 42.2 \n", - "Cropland 19.2 13.1 \n", - "Desert 1.9 42.2 \n", - "Forest 17.3 42.2 \n", - "General 19.2 42.2 \n", - "Grassland 10.4 42.2 \n", - "Shrubland 2.1 42.2 \n", - "Temperate Forest 4.2 42.2 \n", - "Tropical Forest 19.2 42.2 \n", - "Tundra 2.7 5.1 \n", - "Woodland 1.0 42.2 \n", - "Tropical Savanna 19.2 42.2 \n", - "\n", - " Number of naked amoebae [# g^-1] \\\n", - "Habitat \n", - "Boreal Forest 1.8 \n", - "Cropland 1.2 \n", - "Desert 1.8 \n", - "Forest 1.8 \n", - "General 1.8 \n", - "Grassland 1.4 \n", - "Shrubland 1.8 \n", - "Temperate Forest 1.8 \n", - "Tropical Forest 1.8 \n", - "Tundra 1.8 \n", - "Woodland 1.8 \n", - "Tropical Savanna 1.8 \n", - "\n", - " Number of testate amoebae [# g^-1] \n", - "Habitat \n", - "Boreal Forest 457.0 \n", - "Cropland 4.8 \n", - "Desert 1.4 \n", - "Forest 4.2 \n", - "General 457.0 \n", - "Grassland 2.4 \n", - "Shrubland 2.6 \n", - "Temperate Forest 4.6 \n", - "Tropical Forest 1.3 \n", - "Tundra 1.7 \n", - "Woodland 2.1 \n", - "Tropical Savanna 457.0 " - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Use the maximum of the intra-study and interstudy uncertainty as our best projection of the uncertainty \n", - "# of the number of protists in each habitat\n", - "tot_num_habitat_CI = inter_study_habitat_num_CI.where(inter_study_habitat_num_CI > intra_num_CI, intra_num_CI).fillna(inter_study_habitat_num_CI)\n", - "\n", - "# Fill missing values for each habitat with the mean of the uncertainties for the same group of \n", - "# protists in the other habitats\n", - "tot_num_habitat_CI['Number of ciliates [# g^-1]'].fillna(tot_num_habitat_CI['Number of ciliates [# g^-1]'].max(),inplace=True)\n", - "tot_num_habitat_CI['Number of flagellates [# g^-1]'].fillna(tot_num_habitat_CI['Number of flagellates [# g^-1]'].max(),inplace=True)\n", - "tot_num_habitat_CI['Number of naked amoebae [# g^-1]'].fillna(tot_num_habitat_CI['Number of naked amoebae [# g^-1]'].max(),inplace=True)\n", - "tot_num_habitat_CI['Number of testate amoebae [# g^-1]'].fillna(tot_num_habitat_CI['Number of testate amoebae [# g^-1]'].max(),inplace=True)\n", - "\n", - "# Fill the uncertainty of the values for the tropical savanna with the mean the uncertainties \n", - "# for the same group of protists in the other habitats\n", - "tot_num_habitat_CI.loc['Tropical Savanna'] = tot_num_habitat_CI.max()\n", - "tot_num_habitat_CI\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We propagate the uncertainties associated with the estimates of the total number of protists per gram soil in each habitat to the estimate of the sum across all habitats:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 3.6\n", - "Number of flagellates [# g^-1] 3.7\n", - "Number of naked amoebae [# g^-1] 1.3\n", - "Number of testate amoebae [# g^-1] 24.4\n", - "dtype: float64" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tot_num_habitat_CI = tot_num_habitat_CI.loc[tot_num_gmean.dropna().index.values]\n", - "\n", - "ciliate_num_per_g_CI = CI_sum_prop(estimates=tot_num_gmean.dropna()['Number of ciliates [# g^-1]'],mul_CIs=tot_num_habitat_CI['Number of ciliates [# g^-1]'])\n", - "flagellate_num_per_g_CI = CI_sum_prop(estimates=tot_num_gmean.dropna()['Number of ciliates [# g^-1]'],mul_CIs=tot_num_habitat_CI['Number of ciliates [# g^-1]'])\n", - "naked_amoebea_num_per_g_CI = CI_sum_prop(estimates=tot_num_gmean.dropna()['Number of naked amoebae [# g^-1]'],mul_CIs=tot_num_habitat_CI['Number of naked amoebae [# g^-1]'])\n", - "testate_amoebea_num_per_g_CI = CI_sum_prop(estimates=tot_num_gmean.dropna()['Number of testate amoebae [# g^-1]'],mul_CIs=tot_num_habitat_CI['Number of testate amoebae [# g^-1]'])\n", - "num_per_g_CI = pd.Series([ciliate_num_per_g_CI,flagellate_num_per_g_CI,naked_amoebea_num_per_g_CI,testate_amoebea_num_per_g_CI], index= tot_num_habitat_CI.columns)\n", - "num_per_g_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inter-method uncertainty\n", - "We generated two types of estimates for the total number of individual protists per gram of soil - one based on the arithmetic mean and one based on the geometric mean of values. As our best estimate we used the geometric mean of the arithmetic mean and geometric mean-based estimates. We calculate the 95% confidence interval around the geometric mean of the two types of estimates as a measure of the uncertainty this procedure introduces into the estimate of the total number of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 2.6\n", - "Number of flagellates [# g^-1] 1.9\n", - "Number of naked amoebae [# g^-1] 1.2\n", - "Number of testate amoebae [# g^-1] 1.4\n", - "dtype: float64" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "inter_method_num_CI = geo_CI_calc(pd.DataFrame([tot_num_mean.sum(),tot_num_gmean.sum()]))\n", - "inter_method_num_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximum of the uncertainty stemming from the intra-study and interstudy variability and the inter-method uncertainty as our best projection of the uncertainty associated with our estimate of the number of individual protists per gram of soil:" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 3.6\n", - "Number of flagellates [# g^-1] 3.7\n", - "Number of naked amoebae [# g^-1] 1.3\n", - "Number of testate amoebae [# g^-1] 24.4\n", - "dtype: float64" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "best_num_CI = np.max([num_per_g_CI,inter_method_num_CI],axis=0)\n", - "best_num_CI = pd.Series(best_num_CI,index= inter_method_num_CI.index)\n", - "best_num_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To convert the total number of individual protists per gram soil to an estimate of the total number of protists per $m^2$, we rely on the bulk density of soil and on an estimate of the fraction of the total biomass of soil protists in the top 8 cm of soil. We now assess the uncertainty associated with each of those parameters.\n", - "\n", - "### Bulk density of soil\n", - "We do not have a good estimate for the uncertainy associated with the bulk density of soils. We thus use a crude uncertainty of ≈2-fold as a measure of the uncertainty associated with the bulk density of soils.\n", - "\n", - "### Fraction of biomass of protists in top 8 cm\n", - "To estimate the fraction of the total biomass of soil protists present in the top 8 cm of soils, we rely on two estimates - one based on data from Jackson et al. and one based on data from Fierer et al. As a measure of the uncertainty associated with the estimate of the fraction of the total biomass of soil protists present in the top 8 cm of soils, we calculate the 95% confidence interval around the geometric mean of the two estmates:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our projection for the uncertainty associated with our estimate of the fraction of the total biomass of soil protists found in the top 8 cm of soil is ≈2.9-fold\n" - ] - } - ], - "source": [ - "# We use a crude estimate of ≈2-fold as our measure of the uncertainty associated with\n", - "# the average bulk density of soils\n", - "bulk_density_CI = 2\n", - "\n", - "# Calculate the 95% confidence interval around the geometric mean of our estimates for\n", - "# the fraction of the total soil biomass present in the top 8 cm\n", - "depth_frac_CI = frac_CI(np.array([jackson_fraction,fierer_frac]))\n", - "print('Our projection for the uncertainty associated with our estimate of the fraction of the total biomass of soil protists found in the top 8 cm of soil is ≈%.1f-fold' %depth_frac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine the uncertainties associated with the total number of individual protists per gram soil with the uncertainties associated with the average bulk density of soil and the uncertainty associated with the fraction of the total biomass of soil protists found in the top 8 cm of soil:" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Number of ciliates [# g^-1] 6.2\n", - "Number of flagellates [# g^-1] 6.2\n", - "Number of naked amoebae [# g^-1] 3.7\n", - "Number of testate amoebae [# g^-1] 31.2\n", - "dtype: float64" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ciliate_num_CI = CI_prod_prop(np.array([best_num_CI['Number of ciliates [# g^-1]'],bulk_density_CI,depth_frac_CI]))\n", - "flagellates_num_CI = CI_prod_prop(np.array([best_num_CI['Number of flagellates [# g^-1]'],bulk_density_CI,depth_frac_CI]))\n", - "naked_amoebae_num_CI = CI_prod_prop(np.array([best_num_CI['Number of naked amoebae [# g^-1]'],bulk_density_CI,depth_frac_CI]))\n", - "testate_amoebae_num_CI = CI_prod_prop(np.array([best_num_CI['Number of testate amoebae [# g^-1]'],bulk_density_CI,depth_frac_CI]))\n", - "tot_num_CI = pd.Series([ciliate_num_CI,flagellates_num_CI,naked_amoebae_num_CI,testate_amoebae_num_CI], index= tot_num_habitat_CI.columns)\n", - "tot_num_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Carbon content\n", - "We assemble different measures of uncertainty at different levels - for values within the same study and for values between studies.\n", - "### Intra-study uncertainty\n", - "For studies which report more than one measurement, we calculate the 95% confidence interval around the mean of the values. We use the maximal uncertainty as a measure of the intra-study uncertainty associated with the carbon content of protists." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def groupby_geo_CI(input):\n", - " return pd.DataFrame({'Carbon content of ciliates [g C cell^-1]': geo_CI_calc(input['Carbon content of ciliates [g C cell^-1]'].dropna()),\n", - " 'Carbon content of naked amoebae [g C cell^-1]': geo_CI_calc(input['Carbon content of naked amoebae [g C cell^-1]'].dropna()),\n", - " 'Carbon content of testate amoebae [g C cell^-1]': geo_CI_calc(input['Carbon content of testate amoebae [g C cell^-1]'].dropna()),\n", - " 'Carbon content of flagellates [g C cell^-1]': geo_CI_calc(input['Carbon content of flagellates [g C cell^-1]'].dropna())},index=[0])\n", - "\n", - "\n", - "cc_intra_CI = cc_data.groupby('DOI').apply(groupby_geo_CI).max()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Interstudy uncertainty\n", - "We calculate the 95% confidence interval around the mean carbon content from different studies. We use the maximal uncertainty as a measure of the interstudy uncertainty associated with the carbon content of protists." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Carbon content of ciliates [g C cell^-1] 2.0\n", - "Carbon content of flagellates [g C cell^-1] 3.3\n", - "Carbon content of naked amoebae [g C cell^-1] 1.6\n", - "Carbon content of testate amoebae [g C cell^-1] 1.9\n", - "dtype: float64" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cc_inter_CI = geo_CI_calc(study_mean_cc)\n", - "cc_inter_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the maximum of the intra-study and interstudy uncertainties as our best projection of the uncertainty associated with the estimate of the carbon content of protists." - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Carbon content of ciliates [g C cell^-1] 2.9\n", - "Carbon content of flagellates [g C cell^-1] 3.3\n", - "Carbon content of naked amoebae [g C cell^-1] 8.1\n", - "Carbon content of testate amoebae [g C cell^-1] 2.2\n", - "dtype: float64" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "best_cc_CI = np.max([cc_intra_CI,cc_inter_CI],axis=0)\n", - "best_cc_CI = pd.Series(best_cc_CI,index=cc_inter_CI.index)\n", - "best_cc_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Calculating the total uncertainty\n", - "We propagate the uncertainty in the total number of protists and in the carbon content of protists to the total estimate of the biomass of protists. We first calculate the uncertainty associated with the estimate of biomass of each of the groups of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "ciliate_biomass_CI = CI_prod_prop(np.array([ciliate_num_CI,best_cc_CI['Carbon content of ciliates [g C cell^-1]']]))\n", - "flagellates_biomass_CI = CI_prod_prop(np.array([flagellates_num_CI,best_cc_CI['Carbon content of flagellates [g C cell^-1]']]))\n", - "naked_amoebae_biomass_CI = CI_prod_prop(np.array([naked_amoebae_num_CI,best_cc_CI['Carbon content of naked amoebae [g C cell^-1]']]))\n", - "testate_amoebae_biomass_CI = CI_prod_prop(np.array([testate_amoebae_num_CI,best_cc_CI['Carbon content of testate amoebae [g C cell^-1]']]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then propagate the uncertainty associated with the biomass of each protist group to the estimate of the total biomass of protists:" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the total biomass of terrestrial protists is ≈7-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_sum_prop(estimates=(tot_num_protist*mean_cc).values.squeeze(), mul_CIs= np.array([ciliate_biomass_CI, flagellates_biomass_CI, naked_amoebae_biomass_CI, testate_amoebae_biomass_CI]))\n", - "print('Our best projection for the uncertainty associated with the estimate of the total biomass of terrestrial protists is ≈%0.f-fold' % mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Biomass of terrestrial protists: 1.6 Gt C\n", - "Uncertainty associated with the estimate of the total biomass of terrestrial protists: ≈7-fold\n" - ] - } - ], - "source": [ - "\n", - "print('Biomass of terrestrial protists: %.1f Gt C' %(best_estimate/1e15))\n", - "print('Uncertainty associated with the estimate of the total biomass of terrestrial protists: ≈%.0f-fold' % mul_CI)\n", - "\n", - "\n", - "old_results = pd.read_excel('../protists_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Biomass of terrestrial protists',\n", - " 'Value': float(best_estimate)/1e15,\n", - " 'Units': 'Gt C',\n", - " 'Uncertainty': mul_CI\n", - " })\n", - "\n", - "result.to_excel('../protists_biomass_estimate.xlsx',index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists_old-checkpoint.ipynb b/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists_old-checkpoint.ipynb deleted file mode 100644 index 8e46f0f..0000000 --- a/protists/terrestrial_protists/.ipynb_checkpoints/terrestrial_protists_old-checkpoint.ipynb +++ /dev/null @@ -1,1781 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total biomass of terrestrial protists\n", - "After searching the literature, we could not find a comprehensive account of the biomass of protists in soils. We generated a crude estimate of the total biomass of protists in soil based on five methodologies. The first two methodologies are based on direct counts of protists in soils, where as the last three methods are based on molecular techniques. We detail below the calculation of the global protist biomass using each method. Our best estimate for the total biomass of soil protists is the geometric mean of the estimates from the five different methodologies.\n", - "\n", - "## Direct biomass density measurements\n", - "Our first method for estimating the total biomass of protists relies on measurement of the average biomass density of protists is soils. We collected data from several studies, which we list below:" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ReferenceDOIMethodGroupHabitatSiteBiomass density [g C m^-2]Remarks
0Schröter et al.http://dx.doi.org/10.1034/j.1600-0579.2003.120...Direct countsTestate amoebaeConiferous forestNorth Sweden0.2Taken from table 3
1Schröter et al.http://dx.doi.org/10.1034/j.1600-0579.2003.120...Direct countsTestate amoebaeConiferous forestSouth Sweden0.6Taken from table 3
2Schröter et al.http://dx.doi.org/10.1034/j.1600-0579.2003.120...Direct countsTestate amoebaeConiferous forestGermany1.0Taken from table 3
3Schröter et al.http://dx.doi.org/10.1034/j.1600-0579.2003.120...Direct countsTestate amoebaeConiferous forestFrance0.6Taken from table 3
4Zwart et al.http://dx.doi.org/10.1016/0167-8809(94)90043-4Direct countsAmeboa and FlagellatesCroplandNetherlands1.2Top 25 cm, Taken from table 1
5De Ruiter et al.http://dx.doi.org/10.2307/2404274Direct countsAmeboa and FlagellatesCroplandNetherlands0.6Top 25 cm (85% in top 10 cm), Taken from Table 1
6Schaeferhttp://dx.doi.org/10.1007/BF00318544Direct countsFlagellates, Ameboa, Testate amoebaeBeech forestGermany0.8Taken from table 1 assuming 50% carbon content
7Stapleton et al.http://dx.doi.org/10.1016/j.soilbio.2005.03.016Direct countsHeterotrophic flagellates and Testate amoebaTundraSvalbard8.3Values extracted from Figure 2 in the control ...
8Stapleton et al.http://dx.doi.org/10.1016/j.soilbio.2005.03.016Direct countsHeterotrophic flagellates and Testate amoebaTundraSvalbard8.5Values extracted from Figure 2 in the control ...
9Stapleton et al.http://dx.doi.org/10.1016/j.soilbio.2005.03.016Direct countsHeterotrophic flagellates and Testate amoebaTundraSvalbard2.5Values extracted from Figure 2 in the control ...
10Stapleton et al.http://dx.doi.org/10.1016/j.soilbio.2005.03.016Direct countsHeterotrophic flagellates and Testate amoebaTundraSvalbard3.6Values extracted from Figure 2 in the control ...
11Stapleton et al.http://dx.doi.org/10.1016/j.soilbio.2005.03.016Direct countsHeterotrophic flagellates and Testate amoebaTundraSvalbard10.1Values extracted from Figure 2 in the control ...
12Bouwman & Zwarthttp://dx.doi.org/10.1016/0167-8809(94)90040-XDirect countsAmeboa and FlagellatesCroplandNetherlands1.6From abstract. In top 25 cm.
\n", - "
" - ], - "text/plain": [ - " Reference DOI \\\n", - "0 Schröter et al. http://dx.doi.org/10.1034/j.1600-0579.2003.120... \n", - "1 Schröter et al. http://dx.doi.org/10.1034/j.1600-0579.2003.120... \n", - "2 Schröter et al. http://dx.doi.org/10.1034/j.1600-0579.2003.120... \n", - "3 Schröter et al. http://dx.doi.org/10.1034/j.1600-0579.2003.120... \n", - "4 Zwart et al. http://dx.doi.org/10.1016/0167-8809(94)90043-4 \n", - "5 De Ruiter et al. http://dx.doi.org/10.2307/2404274 \n", - "6 Schaefer http://dx.doi.org/10.1007/BF00318544 \n", - "7 Stapleton et al. http://dx.doi.org/10.1016/j.soilbio.2005.03.016 \n", - "8 Stapleton et al. http://dx.doi.org/10.1016/j.soilbio.2005.03.016 \n", - "9 Stapleton et al. http://dx.doi.org/10.1016/j.soilbio.2005.03.016 \n", - "10 Stapleton et al. http://dx.doi.org/10.1016/j.soilbio.2005.03.016 \n", - "11 Stapleton et al. http://dx.doi.org/10.1016/j.soilbio.2005.03.016 \n", - "12 Bouwman & Zwart http://dx.doi.org/10.1016/0167-8809(94)90040-X \n", - "\n", - " Method Group \\\n", - "0 Direct counts Testate amoebae \n", - "1 Direct counts Testate amoebae \n", - "2 Direct counts Testate amoebae \n", - "3 Direct counts Testate amoebae \n", - "4 Direct counts Ameboa and Flagellates \n", - "5 Direct counts Ameboa and Flagellates \n", - "6 Direct counts Flagellates, Ameboa, Testate amoebae \n", - "7 Direct counts Heterotrophic flagellates and Testate amoeba \n", - "8 Direct counts Heterotrophic flagellates and Testate amoeba \n", - "9 Direct counts Heterotrophic flagellates and Testate amoeba \n", - "10 Direct counts Heterotrophic flagellates and Testate amoeba \n", - "11 Direct counts Heterotrophic flagellates and Testate amoeba \n", - "12 Direct counts Ameboa and Flagellates \n", - "\n", - " Habitat Site Biomass density [g C m^-2] \\\n", - "0 Coniferous forest North Sweden 0.2 \n", - "1 Coniferous forest South Sweden 0.6 \n", - "2 Coniferous forest Germany 1.0 \n", - "3 Coniferous forest France 0.6 \n", - "4 Cropland Netherlands 1.2 \n", - "5 Cropland Netherlands 0.6 \n", - "6 Beech forest Germany 0.8 \n", - "7 Tundra Svalbard 8.3 \n", - "8 Tundra Svalbard 8.5 \n", - "9 Tundra Svalbard 2.5 \n", - "10 Tundra Svalbard 3.6 \n", - "11 Tundra Svalbard 10.1 \n", - "12 Cropland Netherlands 1.6 \n", - "\n", - " Remarks \n", - "0 Taken from table 3 \n", - "1 Taken from table 3 \n", - "2 Taken from table 3 \n", - "3 Taken from table 3 \n", - "4 Top 25 cm, Taken from table 1 \n", - "5 Top 25 cm (85% in top 10 cm), Taken from Table 1 \n", - "6 Taken from table 1 assuming 50% carbon content \n", - "7 Values extracted from Figure 2 in the control ... \n", - "8 Values extracted from Figure 2 in the control ... \n", - "9 Values extracted from Figure 2 in the control ... \n", - "10 Values extracted from Figure 2 in the control ... \n", - "11 Values extracted from Figure 2 in the control ... \n", - "12 From abstract. In top 25 cm. " - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0,'../../statistics_helper/')\n", - "from fraction_helper import *\n", - "from CI_helper import *\n", - "pd.options.display.float_format = '{:,.1f}'.format\n", - "data = pd.read_excel('terrestrial_protist_data.xlsx')\n", - "data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our best estimate based on this method, we first calculate the geometric mean of values for each study. We then calculate the geometric mean for each habitat, and then calculate the geometric mean of the average values from different habitats:" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass density of protists in soil based on direct biomass density measurements is ≈1.2 g C m^-2\n" - ] - } - ], - "source": [ - "# Define the function to calculate the geometric mean for each study\n", - "def groupby_gmean(input):\n", - " mean = gmean(input['Biomass density [g C m^-2]'])\n", - " habitat = np.unique(input['Habitat'])[0]\n", - " return pd.Series({'Habitat': habitat, 'Biomass density [g C m^-2]': mean})\n", - "\n", - "# Calculate the geometric mean for each study\n", - "study_mean = data.groupby('Reference').apply(groupby_gmean)\n", - "\n", - "# Calculate the geometric mean of the biomass density at each habitat\n", - "habitat_mean = data.groupby('Habitat')['Biomass density [g C m^-2]'].apply(gmean)\n", - "\n", - "# Calculate the geometric mean of biomass densities from different habitats\n", - "direct_biomass_mean = gmean(habitat_mean)\n", - "\n", - "print('Our best estimate for the biomass density of protists in soil based on direct biomass density measurements is ≈%.1f g C m^-2' % direct_biomass_mean)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our estimate for the total biomass of protists using the direct biomass density measurement method, we multiply the our best estimate for the biomass density by the total area of ice-free land surface, which is ≈$1.3×10^{14} m^2$:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of soil protists using direct biomass density measurements is ≈0.2 Gt C\n" - ] - } - ], - "source": [ - "ice_free_area = 1.3e14\n", - "\n", - "# Calculate the total biomass of soil protists\n", - "method1_estimate = direct_biomass_mean*ice_free_area \n", - "\n", - "print('Our best estimate for the biomass of soil protists using direct biomass density measurements is ≈%.1f Gt C' % (method1_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Number of individuals and carbon content\n", - "In this method, in order to calculate the total biomass of soil protists we calculate a characteristic number of individual protists in a gram of soil for each one of the morphological groups of protists (flagellates, ciliates, and naked and testate ameobae). We combine these estimates with estimates for the carbon content of each morphological group.\n", - "\n", - "### Characteristic carbon content of protists\n", - "We estimate the characteristic carbon content of a single protist from each of the morphological groups of protists based on data from three sources.\n", - "\n", - "The first source is [Finlay & Fenchel](http://dx.doi.org/10.1078/1434-4610-00060). We calculate the average cell length for each group. \n", - "\n", - "For flagellates, the estimates on the number of individuals per gram of soil distinguishes between small and large flagellates (defined as flagellates below or above 15 µm in diameter). We thus estimate the average length of small and large flagellates by dividing them into these two size categories." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load data from Finlay & Fenchel\n", - "ff_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Finlay & Fenchel', skiprows=1)\n", - "\n", - "# Define the function to calculate the weighted average for each group of protists\n", - "def weighted_av_groupby(input):\n", - " return np.average(input['Length [µm]'],weights=input['Abundance [# g^-1]'])\n", - "\n", - "cell_lengths = ff_data.groupby('Protist type').apply(weighted_av_groupby)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We convert the cell length to biovolume according the the allometric relation decribed in Figure 10 of Finlay & Fenchel. The relation between cell volume and cell length is given by the equation: \n", - "$$V = 0.6×L^{2.36}$$\n", - "Where V is the cell volume in $µm^3$ and L is the cell length in µm." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Protist type\n", - "All protozoa 593.2\n", - "Ciliate 5,404.3\n", - "Large Flagellate 1,085.1\n", - "Naked amoebae 1,355.9\n", - "Small Flagellate 87.5\n", - "Testate amoebae 3,634.9\n", - "dtype: float64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cell_volumes = 0.6*cell_lengths**2.36\n", - "cell_volumes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We convert cell volumes to carbon content assuming ≈150 fg C µm$^3$:" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Protist type\n", - "All protozoa 8.9e-11\n", - "Ciliate 8.1e-10\n", - "Large Flagellate 1.6e-10\n", - "Naked amoebae 2.0e-10\n", - "Small Flagellate 1.3e-11\n", - "Testate amoebae 5.5e-10\n", - "dtype: float64" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ff_carbon_content = cell_volumes*150e-15\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "ff_carbon_content" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our second source for estimating the carbon content of soil protists is [Persson et al.](http://www.jstor.org/stable/20112829), which reports the dry weight of individuals from different morphological types:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Morphological typeMean body dry weight [g]
0Cilliates1.5e-09
1Flagellates4.0e-10
2Rhizopoda8.0e-10
\n", - "
" - ], - "text/plain": [ - " Morphological type Mean body dry weight [g]\n", - "0 Cilliates 1.5e-09\n", - "1 Flagellates 4.0e-10\n", - "2 Rhizopoda 8.0e-10" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "persson_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Persson', skiprows=1)\n", - "persson_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our third source is [Schaefer](http://dx.doi.org/10.1007/BF00318544), which reports the total number of cells and the total biomass for three morphological groups of protists - Flagellates, Amoebae and Testate amoebae. We calculate the characteristic carbon content for each group by dividing the total biomass by the total number of individuals:" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Group\n", - "Flagellates 1.0e-11\n", - "Naked amoebae 1.6e-10\n", - "Testate amoebae 2.0e-09\n", - "dtype: float64" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data from Schaefer\n", - "schaefer_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Schaefer', skiprows=1,index_col='Group')\n", - "\n", - "# Calculate the characteristic carbon content for each of the groups of protists\n", - "schaefer_cc = schaefer_data['Biomass density [g C m^-2]']/schaefer_data['Number of individuals (# m^-2) ']\n", - "schaefer_cc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the carbon content of each morphological group is the geometric mean of the estimates from Finlay & Fenchel, Persson et al. and Schaefer. Persson reports values for Rhizopoda, which include naked amoebae, and does not report values for Testate amoebae. Schaefer reports values for naked and testate amoebae but not for ciliates. For flagellates, we use only the data from Finlay & Fenchel as it is able to distinguish between small and large Flagellates." - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Carbon content [g C]
Protist type
Ciliate7.8e-10
Large Flagellate1.6e-10
Naked amoebae2.4e-10
Small Flagellate1.3e-11
Testate amoebae1.1e-09
\n", - "
" - ], - "text/plain": [ - " Carbon content [g C]\n", - "Protist type \n", - "Ciliate 7.8e-10\n", - "Large Flagellate 1.6e-10\n", - "Naked amoebae 2.4e-10\n", - "Small Flagellate 1.3e-11\n", - "Testate amoebae 1.1e-09" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "carbon_content = pd.DataFrame()\n", - "\n", - "ciliate_cc = gmean([ff_carbon_content['Ciliate'],persson_data.loc[0]['Mean body dry weight [g]']/2])\n", - "small_flagellate_cc = ff_carbon_content['Small Flagellate']\n", - "large_flagellate_cc = ff_carbon_content['Large Flagellate']\n", - "naked_amoebae_cc = gmean([ff_carbon_content['Naked amoebae'],persson_data.loc[2]['Mean body dry weight [g]']/2,schaefer_cc.loc['Naked amoebae']])\n", - "testate_amoebae_cc = gmean([ff_carbon_content['Testate amoebae'],schaefer_cc.loc['Testate amoebae']])\n", - "\n", - "carbon_content['Carbon content [g C]'] = pd.Series([ciliate_cc,large_flagellate_cc,naked_amoebae_cc,small_flagellate_cc,testate_amoebae_cc])\n", - "carbon_content.set_index(ff_carbon_content.index[1:],inplace=True)\n", - "carbon_content" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Number of individuals\n", - "We rely on two main sources for our estimate. The first is [Adl & Coleman](http://dx.doi.org/10.1007/s00374-005-0009-x). The second source in [Finlay & Fenchel](http://dx.doi.org/10.1078/1434-4610-00060). For each study, we calculate the geometric mean of measurements for each protist group:" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "ac_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Adl & Coleman', skiprows=1)\n", - "\n", - "def groupby_mean(input):\n", - " return gmean(input.dropna())\n", - "ac_mean = ac_data[['Small flagellates','Large flagellates','Gymnamoebae', 'Ciliates']].apply(groupby_mean)\n", - "\n", - "ff_mean = ff_data.groupby('Protist type')['Abundance [# g^-1]'].apply(sum)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate we use the geometric mean of values from Adl & Coleman and from Finlay & Fenchel. For Flagellates, we use only the values from Adl & Coleman, as Finlay & Fenchel rely on the Most Probable Number method to measure the amount of flagellates. This method is based on culturing of protists, which is known to under estimate the actual number of protists. Adl & Coleman do not report a value for testate amoebae, and thus we rely on the values from Finlay & Fenchel." - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Abundance [# g^-1]
Protist type
Ciliate7.4e+02
Large Flagellate2.3e+04
Naked amoebae4.2e+04
Small Flagellate9.9e+05
Testate amoebae1.0e+04
\n", - "
" - ], - "text/plain": [ - " Abundance [# g^-1]\n", - "Protist type \n", - "Ciliate 7.4e+02\n", - "Large Flagellate 2.3e+04\n", - "Naked amoebae 4.2e+04\n", - "Small Flagellate 9.9e+05\n", - "Testate amoebae 1.0e+04" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "abund_mean = pd.DataFrame()\n", - "ciliate_abun = gmean([ac_mean['Ciliates'], ff_mean['Ciliate']])\n", - "naked_amoebae_abun = gmean([ac_mean['Gymnamoebae'],ff_mean['Naked amoebae']])\n", - "abund_mean['Abundance [# g^-1]'] = pd.Series([ciliate_abun,ac_mean['Large flagellates'],naked_amoebae_abun,ac_mean['Small flagellates'], ff_mean['Testate amoebae']])\n", - "abund_mean.set_index(ff_mean.index[1:],inplace=True)\n", - "abund_mean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total biomass of protists per gram of soil, we multiply the total number of individuals of each group of protists by their respective carbon conent, and sum over all protist groups:" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of protists per gram of soil is ≈3.8e-05 g C\n" - ] - } - ], - "source": [ - "tot_biomass_density = (carbon_content['Carbon content [g C]']*abund_mean['Abundance [# g^-1]']).sum()\n", - "print('Our best estimate for the biomass of protists per gram of soil is ≈%.1e g C' % tot_biomass_density)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To convert the biomass density per gram of soil to units of biomass per area, we use a soil bulk density of ≈1.5 g cm$^{-3}$. We assume that most biomass is concentrated in the top 20 cm of soil (see the section on terrestrial protists for in the Supplemenray Information for details)." - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of protists per m^2 of soil is ≈11 g C\n" - ] - }, - { - "data": { - "text/plain": [ - "Protist type\n", - "Ciliate 1.5e-02\n", - "Large Flagellate 9.9e-02\n", - "Naked amoebae 2.6e-01\n", - "Small Flagellate 3.4e-01\n", - "Testate amoebae 2.9e-01\n", - "dtype: float64" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bulk_density = 1.5e6\n", - "biomass_depth = 0.2\n", - "biomass_per_m2 = tot_biomass_density*bulk_density*biomass_depth\n", - "print('Our best estimate for the biomass of protists per m^2 of soil is ≈%.0f g C' % biomass_per_m2)\n", - "carbon_content['Carbon content [g C]']*abund_mean['Abundance [# g^-1]']*bulk_density*biomass_depth/biomass_per_m2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total biomass of protists based on measurements of number of individuals and characteristic carbon contents per individual, we multiply the biomass density per unit area by the total ice-free land surface, which is ≈$1.3×10^{14} m^2$:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of soil protists using measurements of number of individuals and carbon content is ≈1.4 Gt C\n" - ] - } - ], - "source": [ - "method2_estimate = biomass_per_m2*ice_free_area\n", - "print('Our best estimate for the biomass of soil protists using measurements of number of individuals and carbon content is ≈%.1f Gt C' % (method2_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The next three methods for estimating the total biomass of protists are based on mulecular surveys of the abundance of protists in soils. The methods we use to estimate the total biomass of protists are 18S rDNA sequencing, 18S rRNA sequencing and metatranscriptomics. \n", - "\n", - "The molecular techniques we rely on measure the relative fraction of protists out of the total population of soil eukaryotes. Estimating the total biomass of eukaryotes based on molecular techniques assumes a correlation between the number of reads of a specific taxon and its biomass. Even though this procedure is not well established , we rely on it as one of our sources due to the scarcity of data. \n", - "\n", - "To generate our estimate of the total biomass of soil protist using these molecular techniques, we multiply the fraction of protists out of the total biomass of soil eukaryotes by our estimate for the total biomass of soil fungi, which we assume dominate the biomass of soil eukaryotes." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 18S rDNA sequencing\n", - "To estimate the total biomass of soil protists from 18S rDNA sequencing data, we calculate the fraction of protists out of the total population of soil eukaryotes based on data from forests ([Tedersoo et al.](http://dx.doi.org/10.1038/ismej.2015.116)), grasslands and croplands ([Chen et al.](http://dx.doi.org/10.3389/fmicb.2015.01149)). Below is a sample of the data:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SiteFungiProtistsHabitat
0G-07.0e-011.9e-01Grassland
1G-75.3e-012.9e-01Grassland
2G-306.1e-011.7e-01Grassland
3A-06.0e-012.7e-01Cropland
4A-76.4e-012.2e-01Cropland
5A-306.9e-011.4e-01Cropland
6G-F-05.8e-012.9e-01Grassland
7A-F-06.1e-012.4e-01Cropland
\n", - "
" - ], - "text/plain": [ - " Site Fungi Protists Habitat\n", - "0 G-0 7.0e-01 1.9e-01 Grassland\n", - "1 G-7 5.3e-01 2.9e-01 Grassland\n", - "2 G-30 6.1e-01 1.7e-01 Grassland\n", - "3 A-0 6.0e-01 2.7e-01 Cropland\n", - "4 A-7 6.4e-01 2.2e-01 Cropland\n", - "5 A-30 6.9e-01 1.4e-01 Cropland\n", - "6 G-F-0 5.8e-01 2.9e-01 Grassland\n", - "7 A-F-0 6.1e-01 2.4e-01 Cropland" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data from Chen et al.\n", - "chen_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Chen',skiprows=1)\n", - "chen_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We first calculate the geometric mean of the values in Chen et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "chen_mean = frac_mean(chen_data.groupby('Habitat')['Protists'].apply(frac_mean))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the fraction of protists out of the population of soil eukaryotes we use the geometric mean of the value from Chen et al. and the value reported in Tedersoo et al. of ≈6%. We calculate the total biomass of soil protists by multiplying the fraction of protists out of the total population of soil eukaryotes by the our estimate of the total biomass of soil fungi:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of soil protists based on 18S rDNA sequencing data is ≈1.4 Gt C\n" - ] - } - ], - "source": [ - "# The fraction of protists out of the population of soil eukaryotes reported in Tedersoo et al.\n", - "tedersoo_frac = 0.06\n", - "\n", - "# Calculate our best estimate for the fraction of soil protists\n", - "rDNA_frac = frac_mean(np.array([chen_mean,tedersoo_frac]))\n", - "\n", - "# Our best estimate for the biomass of soil fungi\n", - "fungi_biomass = 12e15\n", - "\n", - "# Calculate the total biomass of soil protists based on 18S rDNA sequencing data\n", - "method3_estimate = rDNA_frac*fungi_biomass\n", - "\n", - "print('Our best estimate for the biomass of soil protists based on 18S rDNA sequencing data is ≈%.1f Gt C' %(method3_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 18S rRNA sequencing\n", - "To estimate the total biomass of soil protists from 18S rRNA sequencing data, we calculate the fraction of protists out of the total population of soil eukaryotes based on data from beech and spruce forests ([Damon et al.](http://dx.doi.org/10.1371/journal.pone.0028967)). Below is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SampleFraction of protistsMethod
0Beech 1A1.2e-0118S rRNA
3Beech 1B1.2e-0118S rRNA
6Spruce 1A1.2e-0118S rRNA
9Spruce 1B1.2e-0118S rRNA
\n", - "
" - ], - "text/plain": [ - " Sample Fraction of protists Method\n", - "0 Beech 1A 1.2e-01 18S rRNA\n", - "3 Beech 1B 1.2e-01 18S rRNA\n", - "6 Spruce 1A 1.2e-01 18S rRNA\n", - "9 Spruce 1B 1.2e-01 18S rRNA" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data from Damon et al.\n", - "damon_data = pd.read_excel('terrestrial_protist_data.xlsx', 'Damon', skiprows=1)\n", - "\n", - "# Use the data based on 18S rRNA sequencing\n", - "rRNA_data = damon_data[damon_data['Method'] == '18S rRNA']\n", - "rRNA_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the values from Damon et al. as our best estimate for the fraction of protists out of the total population of soil eukaryotes. We calculate the total biomass of soil protists by multiplying the fraction of protists out of the total population of soil eukaryotes by the our estimate of the total biomass of soil fungi:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of soil protists based on 18S rRNA sequencing data is ≈1.5 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the values from Damon et al.\n", - "rRNA_frac = frac_mean(rRNA_data['Fraction of protists'])\n", - "\n", - "# Calculate the total biomass of soil protists based on 18S rRNA sequencing data\n", - "method4_estimate = rRNA_frac*fungi_biomass\n", - "\n", - "print('Our best estimate for the biomass of soil protists based on 18S rRNA sequencing data is ≈%.1f Gt C' %(method4_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Metatranscriptomics\n", - "To estimate the total biomass of soil protists from metatranscriptomics data, we calculate the fraction of protists out of the total population of soil eukaryotes based on data from beech and spruce forests ([Damon et al.](http://dx.doi.org/10.1371/journal.pone.0028967)). Below is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SampleFraction of protistsMethod
1Beech 2A3.6e-02Metatranscriptomics
2Beech 3A3.8e-02Metatranscriptomics
4Beech 2B5.2e-02Metatranscriptomics
5Beech 3B3.6e-02Metatranscriptomics
7Spruce 2A2.9e-02Metatranscriptomics
8Spruce 3A2.6e-02Metatranscriptomics
10Spruce 2B4.3e-02Metatranscriptomics
11Spruce 3B3.4e-02Metatranscriptomics
\n", - "
" - ], - "text/plain": [ - " Sample Fraction of protists Method\n", - "1 Beech 2A 3.6e-02 Metatranscriptomics\n", - "2 Beech 3A 3.8e-02 Metatranscriptomics\n", - "4 Beech 2B 5.2e-02 Metatranscriptomics\n", - "5 Beech 3B 3.6e-02 Metatranscriptomics\n", - "7 Spruce 2A 2.9e-02 Metatranscriptomics\n", - "8 Spruce 3A 2.6e-02 Metatranscriptomics\n", - "10 Spruce 2B 4.3e-02 Metatranscriptomics\n", - "11 Spruce 3B 3.4e-02 Metatranscriptomics" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Use the data based on 18S rRNA sequencing\n", - "meta_trans_data = damon_data[damon_data['Method'] == 'Metatranscriptomics']\n", - "meta_trans_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the geometric mean of the values from Damon et al. as our best estimate for the fraction of protists out of the total population of soil eukaryotes. We calculate the total biomass of soil protists by multiplying the fraction of protists out of the total population of soil eukaryotes by the our estimate of the total biomass of soil fungi:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of soil protists based on metatranscriptomics data is ≈0.4 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the values from Damon et al.\n", - "meta_trans_frac = frac_mean(meta_trans_data['Fraction of protists'])\n", - "\n", - "# Calculate the total biomass of soil protists based on 18S rRNA sequencing data\n", - "method5_estimate = meta_trans_frac*fungi_biomass\n", - "\n", - "print('Our best estimate for the biomass of soil protists based on metatranscriptomics data is ≈%.1f Gt C' %(method5_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate for the biomass of soil protists, we use the geometric mean of the five estimates we generated from the five differnt methodologies:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the biomass of terrestrial protists is ≈0.7 Gt C\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of the five different estimates we generated\n", - "best_estimate = gmean([method1_estimate,method2_estimate,method3_estimate,method4_estimate,method5_estimate])\n", - "\n", - "print('Our best estimate for the biomass of terrestrial protists is ≈%.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty associated with our estimate of the total biomass of terrestrial protists, we collect available uncertainties for the values reported within studies, between studies using the same method, and between methods. We use the highest uncertainty out of this collection of uncertainties as our best projection for the uncertainty associated wi the estimate of the total biomass of terrestrial protists.\n", - "\n", - "## Intra-study uncertainty\n", - "For each study which reports more than one value, we calculate 95% confidence interval of the geometric mean of those values.\n", - "\n", - "### Direct biomass measurement" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Reference\n", - "Bouwman & Zwart nan\n", - "De Ruiter et al. nan\n", - "Schaefer nan\n", - "Schröter et al. 1.8\n", - "Stapleton et al. 1.7\n", - "Zwart et al. nan\n", - "Name: Biomass density [g C m^-2], dtype: float64" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Calculate the 95% confidence interval geometric mean for each study\n", - "biomass_study_CI = data.groupby('Reference')['Biomass density [g C m^-2]'].apply(geo_CI_calc)\n", - "biomass_study_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Carbon content and number of individuals\n", - "We calculate the intra-study 95% confience interval around the estimate of the total number of protists per gram of soil from Adl & Coleman:" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Small flagellates 4.4\n", - "Large flagellates 2.8\n", - "Gymnamoebae 1.8\n", - "Ciliates 2.3\n", - "dtype: float64" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ac_CI = ac_data[['Small flagellates','Large flagellates','Gymnamoebae', 'Ciliates']].apply(geo_CI_calc)\n", - "ac_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 18S rDNA sequencing\n", - "We calculate the 95% confidence interval for the geometric mean of the values from Chen et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty of the value from Chen et al. is ≈1.2-fold\n" - ] - } - ], - "source": [ - "print('The intra-study uncertainty of the value from Chen et al. is ≈%.1f-fold' %frac_CI(chen_data['Protists']))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 18S rRNA sequencing" - ] - }, - { - "cell_type": "code", - "execution_count": 156, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty associated with the fraction of protists based on 18S rRNA sequencing data of Damon et al. is ≈1.01-fold\n" - ] - } - ], - "source": [ - "print('The intra-study uncertainty associated with the fraction of protists based on 18S rRNA sequencing data of Damon et al. is ≈%.2f-fold' %frac_CI(rRNA_data['Fraction of protists']))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 18S rRNA sequencing" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra-study uncertainty associated with the fraction of protists based on metatranscriptomics data of Damon et al. is ≈1.2-fold\n" - ] - } - ], - "source": [ - "print('The intra-study uncertainty associated with the fraction of protists based on metatranscriptomics data of Damon et al. is ≈%.1f-fold' %frac_CI(meta_trans_data['Fraction of protists']))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Intra-methd uncertainty\n", - "For each method that relies on more than one study, we calculate the 95% confidence interval of the geometric mean of the values from the different studies. The methods which are based on more than one study are the direct biomass measurement-based method, the carbon content and number of individual based method and the 18S rDNA sequencing-based method.\n", - "\n", - "### Direct biomass measurement\n", - "To calculate our best estimate for the biomass of terrestrial protists based on direct biomass density measurements, we first calculated the geometric mean of values from the same habitat, generating characteristic values for each habitat.We then calculate the geomteric mean of the characteristic values for each habitat. \n", - "\n", - "As a measure of the interstudy uncertainty associated with the estimate based on direct biomass density measurements, we first calculate the 95% confidence interval of the characteristic values for each habitat, and then calculate the 95% confidence invertval around the geometric mean of the characteristic values from each habiat\n", - "\n", - "#### Uncertainty within habitats" - ] - }, - { - "cell_type": "code", - "execution_count": 152, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty for studies within the same habitat:\n" - ] - }, - { - "data": { - "text/plain": [ - "Habitat\n", - "Beech forest nan\n", - "Coniferous forest 1.8\n", - "Cropland 1.8\n", - "Tundra 1.7\n", - "Name: Biomass density [g C m^-2], dtype: float64" - ] - }, - "execution_count": 152, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "biomass_intra_habitat_CI = data.groupby('Habitat')['Biomass density [g C m^-2]'].apply(geo_CI_calc)\n", - "print('The interstudy uncertainty for studies within the same habitat:')\n", - "biomass_intra_habitat_CI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Uncertainty between habitats" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval of the geometric mean of the characteristic biomass densities from each habitat is ≈2.8-fold\n" - ] - } - ], - "source": [ - "biomass_inter_habitat_CI = geo_CI_calc(habitat_mean)\n", - "print('The 95 percent confidence interval of the geometric mean of the characteristic biomass densities from each habitat is ≈%.1f-fold' %biomass_inter_habitat_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Carbon content and number of individuals\n", - "As a measure of the interstudy uncertainty associated with the estimate of the biomass of terrestrial protists based on the characteristic carbon content of soil protists and the density of number of individuals per unit area, we first calculate the interstudy uncertainty for the characteristic carbon content of each type of protist:\n", - "\n", - "#### Carbon content of groups of protists\n", - "For each group of protists, we calculate the 95% confidence interval around our estimate of the characteristic carbon content of single protists from that group. For flagellates, we rely only on a single source, and thus for the estimate of the carbon content of flagellates we are not able to project an uncertainty." - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the carbon conent of a single protist is ≈3.6-fold\n" - ] - } - ], - "source": [ - "# Calculate the interstudy 95% confidence interval around the estimate of the carbon content of each group\n", - "ciliate_cc_CI = geo_CI_calc([ff_carbon_content['Ciliate'],persson_data.loc[0]['Mean body dry weight [g]']/2])\n", - "naked_amoebae_cc_CI = geo_CI_calc([ff_carbon_content['Naked amoebae'],persson_data.loc[2]['Mean body dry weight [g]']/2,schaefer_cc.loc['Naked amoebae']])\n", - "testate_amoebae_cc_CI = geo_CI_calc([ff_carbon_content['Testate amoebae'],schaefer_cc.loc['Testate amoebae']])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we calculate the interstudy uncertainty uncertainty associated with the estimate of the total number of individual protists per gram of soil:\n", - "\n", - "#### Number of individuals of \n", - "For each group of protists, we calculate the 95% confidence interval around our estimate of the density of number of individuals from that group per unit area. For flagellates and testate amoebae, we rely only on a single source, and thus we are not able to project an uncertainty." - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Calculate the interstudy 95% confidence interval around the estimate of the number of individuals\n", - "# per gram of soil for each group\n", - "ciliate_abun_CI = geo_CI_calc([ac_mean['Ciliates'], ff_mean['Ciliate']])\n", - "naked_amoebae_abund_CI = geo_CI_calc([ac_mean['Gymnamoebae'],ff_mean['Naked amoebae']])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We propagate the uncertainties associated with the carbon content and number of individuals per gram soil for each group into our final estimate of the biomass of soil protists. In cases we could not calculate the uncertainty associated with the estimate, we use the mean of the uncertainties from the other groups." - ] - }, - { - "cell_type": "code", - "execution_count": 151, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the interstudy uncertainty associated with the estimate of the total biomass of soil protists based on estimates of carbon content and number of individuals is ≈9-fold\n" - ] - } - ], - "source": [ - "# Calculate the average uncertainty associated with the estimate of the carbon content \n", - "# and number of individuals per gram of soil\n", - "average_cc_CI = np.mean([ciliate_cc_CI,naked_amoebae_cc_CI, testate_amoebae_cc_CI])\n", - "average_abund_CI = np.mean([ciliate_abun_CI,naked_amoebae_abund_CI])\n", - "\n", - "# Propagate the uncertainty in the carbon content and number of individuals for each group\n", - "# For cased where no uncertainty projection is available, use the average uncertainty calculate\n", - "# above\n", - "ciliate_CI = CI_prod_prop(np.array([ciliate_cc_CI, ciliate_abun_CI]))\n", - "naked_amoebae_CI = CI_prod_prop(np.array([naked_amoebae_cc_CI,naked_amoebae_abund_CI]))\n", - "flagellate_CI = CI_prod_prop(np.array([average_cc_CI,average_abund_CI]))\n", - "testate_amoebae_CI = CI_prod_prop(np.array([testate_amoebae_cc_CI,average_abund_CI]))\n", - "\n", - "# Propagate the uncertainties for each group to the total estimate of the biomass of soil protists\n", - "method2_inter_CI = CI_sum_prop(estimates= (carbon_content['Carbon content [g C]']*abund_mean['Abundance [# g^-1]']), \n", - " mul_CIs=np.array([ciliate_CI,flagellate_CI,naked_amoebae_CI,flagellate_CI,testate_amoebae_CI]))\n", - "print('Our best projection for the interstudy uncertainty associated with the estimate of the total biomass of soil protists based on estimates of carbon content and number of individuals is ≈%.0f-fold' % method2_inter_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 18S-rDNA sequencing\n", - "Our estimate of the biomass of soil protists based on 18S rDNA sequencing relies on data from two studies (Tedersoo et al. and Chen et al.). We calculate the 95% confidence interval around the geometric mean of values from the two studies as our best projection of the interstudy uncertainty associated with the estimate of the total biomass of terrestrial protists based on 18S rDNA sequencing." - ] - }, - { - "cell_type": "code", - "execution_count": 153, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the interstudy uncertainty associated with the estimate of the total biomass of soil protists based on 18S rDNA sequencing is ≈3-fold\n" - ] - } - ], - "source": [ - "# Calculate the 95% confidence interval around the estimate for the fraction of soil protists\n", - "rDNA_frac_CI = frac_CI(np.array([chen_mean,tedersoo_frac]))\n", - "\n", - "print('Our best projection for the interstudy uncertainty associated with the estimate of the total biomass of soil protists based on 18S rDNA sequencing is ≈%.0f-fold' % rDNA_frac_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inter-method uncertainty\n", - "As our best estimate of the total biomass of soil protists we use the geometric mean of the estimates from the five independent methods. As a measure of the uncertainty associated with the geometric mean of estimates from different methods, we calculate the 95% confidence interval around the geometric mean of the estimates.\n", - "\n", - "Because we are less confident in our estimates based on molecular techmiques, we first calculate the geometric mean of the estimates based on the three molecular techniques, and then calculate the 95% confidence interval of the geometric mean of the estimates from the first two methods and the mean of the estimates based on molecular techniques:" - ] - }, - { - "cell_type": "code", - "execution_count": 155, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the inter-method uncertainty associated with the estimate of the total biomass of soil protists is ≈4-fold\n" - ] - } - ], - "source": [ - "# Calculate the geometric mean of estimates based on molecular techniques\n", - "mol_estimate = gmean([method3_estimate,method4_estimate,method5_estimate])\n", - "\n", - "# Calculate the 95% confidence interval around the geometric mean of values from \n", - "# the two estimates based on direct measurements and the mean value from molecular\n", - "# techniques\n", - "inter_method_CI = geo_CI_calc(np.array([method1_estimate,method2_estimate,mol_estimate]))\n", - "\n", - "print('Our best projection for the inter-method uncertainty associated with the estimate of the total biomass of soil protists is ≈%.0f-fold' % inter_method_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection for the uncertainty associated with the estimate of the total biomass of soil protists, we use the highest uncertainty out of the collection of uncetainties we generated at the various levels (intra-study, interstudy, and inter-method):" - ] - }, - { - "cell_type": "code", - "execution_count": 169, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the estimate of the total biomass of soil protists is ≈9-fold\n" - ] - } - ], - "source": [ - "mul_CI = np.max([np.max(biomass_study_CI),\n", - " np.max(ac_CI),\n", - " frac_CI(chen_data['Protists']),\n", - " frac_CI(meta_trans_data['Fraction of protists']),\n", - " np.max(biomass_intra_habitat_CI),\n", - " biomass_inter_habitat_CI,\n", - " method2_inter_CI,\n", - " rDNA_frac_CI,\n", - " inter_method_CI])\n", - "\n", - "print('Our best projection for the uncertainty associated with the estimate of the total biomass of soil protists is ≈%.0f-fold' % mul_CI)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/results (copy).xlsx b/results (copy).xlsx deleted file mode 100644 index e4397d9..0000000 Binary files a/results (copy).xlsx and /dev/null differ diff --git a/results.xlsx b/results.xlsx index 3ee8a91..a7fce03 100644 Binary files a/results.xlsx and b/results.xlsx differ diff --git a/results_20180408.xlsx b/results_20180408.xlsx deleted file mode 100644 index c369057..0000000 Binary files a/results_20180408.xlsx and /dev/null differ diff --git a/viruses/.ipynb_checkpoints/phage_biomass_estimate-checkpoint.ipynb b/viruses/.ipynb_checkpoints/phage_biomass_estimate-checkpoint.ipynb deleted file mode 100644 index 723c833..0000000 --- a/viruses/.ipynb_checkpoints/phage_biomass_estimate-checkpoint.ipynb +++ /dev/null @@ -1,227 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import sys\n", - "sys.path.insert(0,'../statistics_helper/')\n", - "from CI_helper import *\n", - "pd.options.display.float_format = '{:,.0e}'.format\n", - "from excel_utils import *" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# Estimating the biomass of phages\n", - "Our estimate of the total biomass of phages relies upon the estimates for the total number of phages and the carbon content of a single phage which we derived in the relevant sections\n", - "\n", - "These are our best estimates for the different parameters required for the estimate, along with the associated uncertainties:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ParameterValueUnitsUncertainty
0Carbon content of a single phage2e-17g C per individual2e+00
1Total number of phages1e+31Number of individuals1e+01
\n", - "
" - ], - "text/plain": [ - " Parameter Value Units Uncertainty\n", - "0 Carbon content of a single phage 2e-17 g C per individual 2e+00\n", - "1 Total number of phages 1e+31 Number of individuals 1e+01" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load estimates for the total number of phages and for the carbon cont\n", - "estimate = pd.read_excel('phage_biomass_estimate.xlsx')\n", - "estimate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to estimate the total biomass of phages, we multiply our estimate of the total number of phages by our estimate of the carbon content of a single phage." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total biomass of phages is 0.2 Gt C\n" - ] - } - ], - "source": [ - "best_estimate = estimate['Value'].prod()\n", - "\n", - "print('Our best estimate for the total biomass of phages is %.1f Gt C' %(best_estimate/1e15))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We propagate the uncertainties associated with each of the parameters to project the uncertainty of our estimate of the total biomass of phages:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estiamte of the biomass of phages is 15.6-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_prod_prop(estimate['Uncertainty'])\n", - "\n", - "print('Our best projection for the uncertainty associated with our estiamte of the biomass of phages is %.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Due to the scarcity of data on the different parameters used to estimate the total biomass of phages, we use a higher uncertainty projection of ≈20-fold." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py:196: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " res = shell.run_cell(code, store_history=store_history, silent=silent)\n", - "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py:2683: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " interactivity=interactivity, compiler=compiler, result=result)\n" - ] - } - ], - "source": [ - "mul_CI = 20\n", - "\n", - "# Feed results to Table 1 & Fig. 1\n", - "update_results(sheet='Table1 & Fig1', \n", - " row=('Viruses','Viruses'), \n", - " col=['Biomass [Gt C]', 'Uncertainty','Total uncertainty'],\n", - " values=[best_estimate/1e15,mul_CI,mul_CI],\n", - " path='../results.xlsx')\n", - "\n", - "\n", - "# Feed results to Table S1\n", - "update_results(sheet='Table S1', \n", - " row=('Viruses','Viruses'), \n", - " col=['Number of individuals'],\n", - " values=estimate.loc[1,'Value'],\n", - " path='../results.xlsx')\n", - "\n", - "# Calculate non-deep subsurface virus biomass\n", - "phage_num = pd.read_excel('phage_num/phage_num_estimate.xlsx')\n", - "non_deep_phage = phage_num.loc[[0,2],'Value'].sum()*estimate.loc[0,'Value']\n", - "\n", - "# Feed results to Fig S1\n", - "update_results(sheet='FigS1', \n", - " row=('Viruses','Viruses'), \n", - " col=['Biomass [Gt C]'],\n", - " values=non_deep_phage/1e15,\n", - " path='../results.xlsx')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/viruses/carbon_content/.ipynb_checkpoints/phage_carbon_content-checkpoint.ipynb b/viruses/carbon_content/.ipynb_checkpoints/phage_carbon_content-checkpoint.ipynb deleted file mode 100644 index 7cbd09b..0000000 --- a/viruses/carbon_content/.ipynb_checkpoints/phage_carbon_content-checkpoint.ipynb +++ /dev/null @@ -1,411 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from CI_helper import *\n", - "pd.options.display.float_format = '{:,.0f}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the characteristic size of phages\n", - "In order to estimate the characteristic size of phages, we rely of data from quantitative transmission electron microscopy (qTEM) measurement of samples from 41 sites across the world's oceans, reported by [Brum et al.](http://dx.doi.org/10.1126/science.1261498). We extracted the data from figure 1 in Brum et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Site5% diameter [nm]Median diameter [nm]95% diameter [nm]
018_SRF355166
122_SRF344965
223_DCM485765
325_SRF384962
425_DCM385269
\n", - "
" - ], - "text/plain": [ - " Site 5% diameter [nm] Median diameter [nm] 95% diameter [nm]\n", - "0 18_SRF 35 51 66\n", - "1 22_SRF 34 49 65\n", - "2 23_DCM 48 57 65\n", - "3 25_SRF 38 49 62\n", - "4 25_DCM 38 52 69" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the data extracted from Brum et al.\n", - "data = pd.read_excel('phage_size_data.xlsx',skiprows=1)\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use the geometric mean of the median diameters from each site as our best estimate for the characteristic diameter of phages:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the radius of a phage is ≈26 nm.\n" - ] - } - ], - "source": [ - "phage_rad = gmean(data['Median diameter [nm]'])/2\n", - "print('Our best estimate for the radius of a phage is ≈%i nm.' %(phage_rad))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "r = (data['95% diameter [nm]']-data['Median diameter [nm]']).mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Estimating the carbon content of phages\n", - "To estimate the carbon content of phages, we rely on a biophysical model described in [Jover et al.](http://dx.doi.org/10.1038/nrmicro3289), which describes the carbon content of a phage as a function of its radius. The relation between the radius of phages and their total number of carbon atoms described in Jover et al. is: \n", - "$$ C_{head} = \\frac{4\\pi(r_c-h)^3C_{bp}fill}{3v_{bp}} + \\frac{4\\pi d_C(3r_c^2h-3h^2r_c+h^3)}{3}$$\n", - "Where $C_{head}$ is the total number of carbon atoms, $r_c$ is the radius of the phage, h is the thickness of the phage capsid, $C_{bp}$ is the number of carbon atoms in a single base pair of DNA, *fill* is that fraction of the phage volume that is filled with DNA, $v_{bp}$ is the volume of a single base pair of DNA, and $d_C$ is the number of carbon atoms per volume of protein.\n", - "\n", - "To get from the total number of carbon atoms to an estimate of the carbon content of a single phage, we can multiply the total number of atoms by the molecular weight of carbon, and divide by Avogadro's number.\n", - "\n", - "Jover et al. supply estimates for each of the parameters in the model, as well as their respective uncertainties. We plug into this model our estimates for the radius of phages in order to get an estimate for the carbon content of phages, as well as the uncertainty associated with this esitmate. We use 1.96 times the uncertainty reported in Jover et al. to calculate 95% confidence interval for the carbon content estimate." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(2.31+/-0.25)e+14\n", - "Our best estimate for the carbon content of a single phage is ≈ 2.3e-17+/- 0.3e-17 g\n" - ] - } - ], - "source": [ - "# Import uncertainties library to deal with the error propagation\n", - "from uncertainties import ufloat\n", - "\n", - "##############################\n", - "# Define the model parameters#\n", - "##############################\n", - "\n", - "# The phage radius we calculated in the phage size section [nm]\n", - "r_c = phage_rad\n", - "\n", - "# The thickness of the phage capsid [nm]\n", - "h = ufloat(2.5,0.3*1.96)\n", - "\n", - "# The number of caron atoms in a single DNA base pair\n", - "C_bp = ufloat(19.5,0.1*1.96)\n", - "\n", - "# The fraction of the phage capsid filled with DNA\n", - "fill = ufloat(0.53,0.04*1.96)\n", - "\n", - "# The volume of a single base pair [nm^3]\n", - "v_bp = ufloat(0.34*np.pi,0)\n", - "\n", - "# The number of carbon atoms per volume of proteins [# nm^-3]\n", - "d_C = ufloat(31,1*1.96)\n", - "\n", - "#Avogadro's number [molecules per mol]\n", - "Na = 6e23 \n", - "\n", - "# Molecular weight of carbon [g per mol]\n", - "M_C = 12\n", - "\n", - "# Define the eqation for deriving the carbon content of a phage as a function of it's radius\n", - "func = lambda x: ((4*np.pi*(x-h)**3*C_bp*fill)/(3*v_bp) + 4*np.pi*d_C*(3*x**2*h-3*h**2*x+h**3)/3)*M_C/Na\n", - "\n", - "#C_head = (4*np.pi*(r_c-h)**3*C_bp*fill)/(3*v_bp) + 4*np.pi*d_C*(3*r_c**2*h-3*h**2*r_c+h**3)/3\n", - "\n", - "# Calculate our best estimate for the carbon content of a single phage\n", - "best_estimate = func(r_c)\n", - "\n", - "print(func(r_c)*1e31)\n", - "print('Our best estimate for the carbon content of a single phage is ≈{:10.1e} g'.format(best_estimate))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "To assess the uncertainty associated with the estimate of the radius of a single phage, we use the variability of capsid radii in the sites reported in Brum et al. We calculate the relative multiplicative uncertainty of the variation within a site and between sites. We take the higher uncertainty of the two as our best estimate for the uncertainty associated with the radius of phages." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The intra site uncertainty in the radius of phages is ≈1.4-fold\n", - "The intra sample uncertainty in the radius of phages is ≈1.02-fold\n" - ] - } - ], - "source": [ - "intra_CI = 1+gmean((data['95% diameter [nm]'] - data['5% diameter [nm]'])/data['Median diameter [nm]']/2)\n", - "inter_CI = geo_CI_calc(data['Median diameter [nm]'])\n", - "\n", - "print('The intra site uncertainty in the radius of phages is ≈%0.1f-fold' %intra_CI)\n", - "print('The intra sample uncertainty in the radius of phages is ≈%0.2f-fold' %inter_CI)\n", - "\n", - "rad_CI = np.max([intra_CI,inter_CI])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In each site Brum et al. sampled ≈100 phages, so the intra-site uncertainty should be much lower. Nevertheless, the uncertainty we calculated doesn’t take into consideration the fact that viruses from other environments might have different size ranges. Even though phages isolated from deep terrestrial deep subsurface seem to be in the same size range range ([Eydal et al.](http://dx.doi.org/10.1038/ismej.2009.66)), we chose to use the 95% variability within each site as a measure of the uncertainty in the radius of a single pahge, to take into account possible variability of phage sizes in other environments.\n", - "\n", - "To propagate the uncertainty in the radius of a single phage into the uncertainty in the carbon content of a single phage, we use a numerical approach. We chose to use this approach as propagating the uncertainty in the radius of phages creates a probability distribution of the carbon content of a single phage which is not gaussian, and thus the uncertainty ranges a normal error propagation procedure will output will be non-informative. Namely, the additive standard error that we will get will be higher than the nominal estimate, but in reality there is no phage with a netagive radius length.\n", - "Therefore, in order to quanitfy the uncertainty of the carbon content of a single phage stemming from the uncertainty in our estimate for the radius of a single phage, we sample 1000 times from a log-normal distribution of radii with a mean that is equal to our best estimate for the radius of a phage, and a multiplicative standard diviation which is equal to the uncertainty for the radius of a phage we project. We feed these sampled radii into our model and calculate a carbon content for each radius, resulting in a distribution of carbon content estimates. We take the multiplicative ratio between the 2.5% and 97.5% percentiles and our best estimate for the carbon content as our best estimate for the uncertainty of the carbon content of a single phage stemming from the uncertainty in our estimate for the radius of a single phage." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the uncertainty of the carbon content of a single phage stemming from the uncertainty in our estimate for the radius of a single phage is ≈2.3-fold\n" - ] - } - ], - "source": [ - "# Sample 1000 from a log-normal distribution of radii\n", - "rad_dist = np.random.lognormal(np.log(phage_rad),np.log(rad_CI)/1.96,1000)\n", - "\n", - "# Calculate the carbon content for each radius\n", - "cc_dist = func(rad_dist)\n", - "\n", - "# Calculate the upper and lower multiplicative ratios of the carbon content\n", - "upper_CI = np.percentile([x.nominal_value for x in cc_dist],97.5)/best_estimate\n", - "lower_CI = best_estimate/np.percentile([x.nominal_value for x in cc_dist],2.5)\n", - "\n", - "rad_cc_CI = np.mean([upper_CI,lower_CI]).nominal_value\n", - "\n", - "print('Our best estimate for the uncertainty of the carbon content of a single phage stemming from the uncertainty in our estimate for the radius of a single phage is ≈%.1f-fold' %rad_cc_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The uncertainty associated with the parameters of the model is ≈1.2-fold:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty associated with the parameters of the model is 1.2-fold\n" - ] - } - ], - "source": [ - "model_param_CI = 1+best_estimate.std_dev*1.96/best_estimate.nominal_value\n", - "print('The uncertainty associated with the parameters of the model is %.1f-fold' %model_param_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We combine these two uncertainties as our best projection for the uncertainty associated with the carbon content of a single phage:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the carbon content of a single phage is ≈2.3-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_prod_prop(np.array([rad_cc_CI,model_param_CI]))\n", - "print('Our best projection for the uncertainty associated with the carbon content of a single phage is ≈%.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the carbon content of a single phage: 2e-17 g\n", - "Uncertainty associated with the estiamte of the carbon content of a single phage: 2-fold\n" - ] - } - ], - "source": [ - "print('Our best estimate for the carbon content of a single phage: %.0e g' % best_estimate.nominal_value)\n", - "print('Uncertainty associated with the estiamte of the carbon content of a single phage: %.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../phage_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Carbon content of a single phage',\n", - " 'Value': best_estimate.nominal_value,\n", - " 'Units': 'g C per individual',\n", - " 'Uncertainty': mul_CI\n", - " })\n", - "\n", - "result.to_excel('../phage_biomass_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/viruses/phage_biomass_estimate_OLD.xlsx b/viruses/phage_biomass_estimate_OLD.xlsx deleted file mode 100644 index abef89f..0000000 Binary files a/viruses/phage_biomass_estimate_OLD.xlsx and /dev/null differ diff --git a/viruses/phage_num/.ipynb_checkpoints/tot_phage_num-checkpoint.ipynb b/viruses/phage_num/.ipynb_checkpoints/tot_phage_num-checkpoint.ipynb deleted file mode 100644 index 83608a6..0000000 --- a/viruses/phage_num/.ipynb_checkpoints/tot_phage_num-checkpoint.ipynb +++ /dev/null @@ -1,241 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import sys\n", - "import sys\n", - "sys.path.insert(0, '../../statistics_helper/')\n", - "from CI_helper import *\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of phages\n", - "In order to estimate the total number of phages world-wide, we sum our estimates for the total biomass of phages in all of the environments we cover: the marine environment, soils, the marine deep subsurface, and the terrestrial deep subsurface.\n", - "Here is a summary of our estimates for the total number of phages in each of the environments:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ParameterValueUnitsUncertainty
0Total number of marine phages2.0e+30Number of individualsnan
1Total number of phages in the marine deep subs...4.8e+30Number of individualsnan
2Total number of phages in soils6.2e+29Number of individuals3.2e+01
3Total number of phages in the terrestrial deep...2.1e+30Number of individuals6.4e+01
\n", - "
" - ], - "text/plain": [ - " Parameter Value \\\n", - "0 Total number of marine phages 2.0e+30 \n", - "1 Total number of phages in the marine deep subs... 4.8e+30 \n", - "2 Total number of phages in soils 6.2e+29 \n", - "3 Total number of phages in the terrestrial deep... 2.1e+30 \n", - "\n", - " Units Uncertainty \n", - "0 Number of individuals nan \n", - "1 Number of individuals nan \n", - "2 Number of individuals 3.2e+01 \n", - "3 Number of individuals 6.4e+01 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "estimate = pd.read_excel('phage_num_estimate.xlsx')\n", - "estimate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate of the total number of phages is the sum of our estimates for the number of phages in all the environments we cover:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of phages is 9.5e+30\n" - ] - } - ], - "source": [ - "# Calculate the total number of phages\n", - "best_estimate = estimate.sum()['Value']\n", - "\n", - "print('Our best estimate for the total number of phages is %.1e' %best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "We could only produce projections for the number of phages in soils and in the terrestrial deep subsurface. For the number of phages in the marine environment and in the marine deep subsurface, we did not have a methodology which we believe represents well the uncertainty associated with our estimate. We therefore chose to use an uncertainty of about one and a half orders of magnitude for both the number of phages in the marine environments and in the marine deep subsurface. We hope further studies could come up with a better methodology for assessing the uncertainty of the estimate of the total number of phages in those environments.\n", - "\n", - "We combine the uncertainties for the number of phages in each of the environments to produce our projection for the uncertainty associated with our estimate of the total number of phages:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with our estimate of the total number of phages is 13.4-fold\n" - ] - } - ], - "source": [ - "# Set the uncertainty associated with our estimate of the total number of phages\n", - "# in the marine environment and in the marine deep subsurface as one and a half\n", - "# orders of magnitude\n", - "estimate.loc[0,'Uncertainty'] = 10**1.5\n", - "estimate.loc[1,'Uncertainty'] = 10**1.5\n", - "\n", - "# Combine the uncertainties for all environments to produce our best projection\n", - "mul_CI = CI_sum_prop(estimates=estimate['Value'],mul_CIs=estimate['Uncertainty'])\n", - "\n", - "print('Our best projection for the uncertainty associated with our estimate of the total number of phages is %.1f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of phages : 1e+31\n", - "Uncertainty associated with the estimate of the total number of phages: 13-fold\n" - ] - } - ], - "source": [ - "print('Our best estimate for the total number of phages : %.0e' % best_estimate)\n", - "print('Uncertainty associated with the estimate of the total number of phages: %.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../phage_biomass_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Total number of phages',\n", - " 'Value': best_estimate,\n", - " 'Units': 'Number of individuals',\n", - " 'Uncertainty': mul_CI\n", - " })\n", - "\n", - "\n", - "result.to_excel('../phage_biomass_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/viruses/phage_num/marine/.ipynb_checkpoints/marine_phage_num-checkpoint.ipynb b/viruses/phage_num/marine/.ipynb_checkpoints/marine_phage_num-checkpoint.ipynb deleted file mode 100644 index 57cc908..0000000 --- a/viruses/phage_num/marine/.ipynb_checkpoints/marine_phage_num-checkpoint.ipynb +++ /dev/null @@ -1,274 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline \n", - "from scipy.stats import gmean\n", - "from scipy.optimize import curve_fit\n", - "pd.options.display.float_format = '{:,.1e}'.format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of marine phages\n", - "To estimate the total number of phages, we rely on a recent study which characterized the abundance of marine phages and prokaryotes globally [Wigington et al.](http://dx.doi.org/10.1038/nmicrobiol.2015.24). Here is a sample of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OBSDATEStudylonglatDEPTHBACTERIAVIRUS
018/28/2000ARCTICSBI-1.0e+026.8e+011.0e+001.2e+052.4e+07
128/28/2000ARCTICSBI-1.0e+026.8e+011.0e+001.7e+052.4e+07
238/28/2000ARCTICSBI-1.0e+026.8e+011.0e+001.6e+051.5e+07
348/28/2000ARCTICSBI-1.0e+026.8e+011.0e+001.2e+051.6e+07
458/30/2000ARCTICSBI-1.0e+027.0e+011.0e+001.2e+051.3e+07
\n", - "
" - ], - "text/plain": [ - " OBS DATE Study long lat DEPTH BACTERIA VIRUS\n", - "0 1 8/28/2000 ARCTICSBI -1.0e+02 6.8e+01 1.0e+00 1.2e+05 2.4e+07\n", - "1 2 8/28/2000 ARCTICSBI -1.0e+02 6.8e+01 1.0e+00 1.7e+05 2.4e+07\n", - "2 3 8/28/2000 ARCTICSBI -1.0e+02 6.8e+01 1.0e+00 1.6e+05 1.5e+07\n", - "3 4 8/28/2000 ARCTICSBI -1.0e+02 6.8e+01 1.0e+00 1.2e+05 1.6e+07\n", - "4 5 8/30/2000 ARCTICSBI -1.0e+02 7.0e+01 1.0e+00 1.2e+05 1.3e+07" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_excel('marine_phage_data.xls',skiprows=1)\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We bin the data based to ten depth bins so that each bin contains the same amount of data. The abundance of virions seems to follow an exponential deacy with depth:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[-0.4940688 17.61664779]\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAETCAYAAAAs4pGmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xl4VOX5xvHvkwRkNWIArYIExFKw\nKpWIcUFxq7jgbhURl5+KVKhbbYutVVHbat2oSrUIuKK4VGURFbQsbhESQRZRwUAEl4IhRPYs8/7+\nOBMa4iQZkpk5cyb357pyyZycOfMkGXPnPe8572POOURERGpK87sAERFJTgoIERGJSAEhIiIRKSBE\nRCQiBYSIiESkgBARkYgUECIiEpECQkREIlJAiIhIRBl+F9AY7du3d9nZ2X6XISISKAUFBd875zrU\nt1+gAyI7O5v8/Hy/yxARCRQzK4pmv0CeYjKzgWY2trS01O9SRERSViADwjk31Tk3NDMz0+9SRERS\nViADQkRE4i+QcxBmNhAY2L179x99rry8nDVr1rBt27bEFya7rEWLFnTq1IlmzZr5XYqI1GDJ0g/C\nzFoD/wTKgNnOuYn1PScnJ8fVnKReuXIlbdu2JSsrCzOLT7ESE845iouL2bhxI127dvW7HJEmw8wK\nnHM59e0X11NMZjbBzNaa2ZIa2weY2edmtsLMRoY3nwO87Jy7CjijnuPWOkm9bds2hUNAmBlZWVka\n7YkkqXjPQTwJDKi+wczSgTHAKUAvYJCZ9QI6AavDu1XWddD6JqkVDsGhn5UkUkFRCWNmraCgqMTv\nUhpn7TIIheL+MnENCOfcXGB9jc19gRXOuULnXBkwCTgTWIMXEnGvS0SanoKiEgaPy+P+GZ8zeFxe\nMEOifBu8fTs8ehQseDruL+fHL+J9+d9IAbxg2Bd4BTjXzB4Fptb2ZDMbamb5Zpa/bt26+FbaQOnp\n6fTu3ZsDDzyQQw45hPvvv59QPWm/atUqnnvuuQRVKNL05BUWU1YRIuSgvCJEXmGx3yXtmq8+gseO\nhvcehN4XQa+z4v6SSXMVk3NuM3B5FPuNBcaCN0kdi9cuKCohr7CY3G5Z9OnSrtHHa9myJQsXLgRg\n7dq1XHTRRfzwww+MGjWq1udUBcRFF13U6NcXkR/L7ZZF84w0yitCNMtII7dblt8lRadsM7xzJ3z0\nGGR2hiGvwv7HJ+Sl/QiIr4HO1R53Cm/zRdWws6wiRPOMNCZemRuTkKjSsWNHxo4dy2GHHcbtt99O\nUVERQ4YMYfPmzQA88sgjHHnkkYwcOZJly5bRu3dvLr30Us4+++yI+4lIw/Tp0o6JV+bG9I/BuCuc\nA1N+AxuKoO9QOOE22K1Nwl7ej4CYDxxgZl3xguFCwLc/myMNO2P9xunWrRuVlZWsXbuWjh07MnPm\nTFq0aMHy5csZNGgQ+fn53H333dx3331MmzYNgC1btkTcT0Qark+XdsEIhm2lMPNWKHgS9twfLn8D\nuiT+D8S4BoSZPQ/0B9qb2RrgNufceDMbAbwFpAMTnHNL41lHXRI97CwvL2fEiBEsXLiQ9PR0vvji\ni0btJyIp5ou3YOr1sOk7OPJaOO6P0KylL6XENSCcc4Nq2T4dmB7P145WIoadhYWFpKen07FjR0aN\nGsVee+3FJ598QigUokWLFhGf8+CDD0a1n4ikiC3r4c2bYdEk6NATLngWOvXxtaSkmaT2UzyHnevW\nrWPYsGGMGDECM6O0tJROnTqRlpbGU089RWWld8tH27Zt2bhx447n1bafiKSgTyfD6zfB1vVw7B+g\n328hYze/q1JAxMPWrVvp3bs35eXlZGRkMGTIEG688UYArrnmGs4991yefvppBgwYQOvWrQE4+OCD\nSU9P55BDDuGyyy6rdT8RSSGb1sL0m7yA+MkhMOQV2Psgv6vaIWnWYmqISGsxLVu2jJ49e/pUkTSE\nfmbS5DgHi16EN//gXcba/2ZvviE9MX+zR7sWk0YQIiKJVPo1TLsBlr8FnfrCmY9Ahx5+VxWRAkJE\nJBGcg4+fhhm3QGU5nPw3OPxqSEv3u7JaKSBEROKtZBVMuRZWzoHsfnDGQ7BnN7+rqpcCQkQkXkIh\nmP+4t8CepcPpo+HQSyEtGOuRKiBEROLh++UweQSszoPuJ8HA0ZDZqf7nJREFhIhILFVWwIcPw6y/\neXdAn/UYHHIhBLD3STDGOTXU1VEuGVQt9131cffdd/tdUq1Gjx7Nli1bdjw+9dRT2bBhQ53Pyc7O\n5vvvv//R9pdeeomePXty3HHHkZ+fz7XXXgvA7Nmz+eCDD2JbuEgy+u9SGHeCd0rpgJNg+DzoPSiQ\n4QABHUE456YCU3Nycq7yu5ZIqi/3nexGjx7NxRdfTKtWrQCYPr3hK6CMHz+exx9/nKOPPhqAnBzv\nMuvZs2fTpk0brUYrqauiDN57AObeBy0y4fwnvX4NAQ2GKoEMiKi9MRK+WxzbY+59EJyy6yOC0tJS\n+vbty5QpU+jRoweDBg3i+OOP56qrrqJNmzZcddVVzJgxg7333ptJkybRoUMHFi5cyLBhw9iyZQv7\n778/EyZMoF27dvTv35/DDz+cWbNmsWHDBsaPH0+/fv2orKxk5MiRzJ49m+3btzN8+HCuvvpqZs+e\nze2330779u1ZsmQJffr04dlnn+Xhhx/mm2++4bjjjqN9+/bMmjWL7Oxs8vPzad++PWeddRarV69m\n27ZtXHfddQwdOrTWr++OO+7gvffe44orruCMM87gtNNO47777uORRx7hscceIz09fcdr9uvXrzE/\nAZHk8vXH3lzD2qVw0K9gwN3QOiC9JuoRyFNMya5qqY2qjxdeeIHMzEweeeQRLrvsMiZNmkRJSQlX\nXeUNgDZv3kxOTg5Lly7l2GOP3dFY6JJLLuGee+5h0aJFHHTQQTs1HKqoqGDevHmMHj16x/bx48eT\nmZnJ/PnzmT9/Po8//jgrV64EYMGCBYwePZpPP/2UwsJC3n//fa699lr22WcfZs2axaxZs370dUyY\nMIGCggLy8/N56KGHKC6uvQPXrbfeSk5ODhMnTuTee+/dsT07O5thw4Zxww03sHDhQoWDpI7yrTDz\nNu+U0tb1MGgSnPt4yoQDpPoIogF/6cdCbaeYTjrpJF566SWGDx/OJ598smN7WloaF1xwAQAXX3wx\n55xzDqWlpWzYsIFjjz0WgEsvvZTzzz9/x3POOeccAPr06cOqVasAmDFjBosWLeLll18GvFHL8uXL\nad68OX379qVTJ+8Kit69e7Nq1aodp4Jq89BDD/Hqq68CsHr1apYvX05WVuq8+UUa7Ks8mDwcilfA\nL4bAL++Clnv4XVXMpXZAJJlQKMSyZcto1aoVJSUlO35h12RRnLfcbTdvpcf09HQqKioAcM7x8MMP\nc/LJJ++07+zZs3fsX/M5tZk9ezZvv/02H374Ia1ataJ///5s27at3rpEUlrZZnjnDvjoXwlv/+kH\nnWJKoAcffJCePXvy3HPPcfnll1NeXg54wVH1V/9zzz3H0UcfTWZmJu3atePdd98F4JlnntkxmqjN\nySefzKOPPrrjuF988cWOlqW1qbnMeJXS0lLatWtHq1at+Oyzz8jLy9vlr7e+1xAJlMLZ8M8jvN7Q\nfYfCNR+mdDiARhBxUTUHUWXAgAFcfvnljBs3jnnz5tG2bVuOOeYY7rrrLkaNGkXr1q2ZN28ed911\nFx07duSFF14A4KmnntoxSd2tWzeeeOKJOl/3yiuvZNWqVRx66KE45+jQoQOvvfZanc8ZOnQoAwYM\n2DEXUb3mxx57jJ49e9KjRw9yc3Mb/P0YOHAg5513HpMnT9YktQTPtlKY8Wf4+Clf23/6Qct9J4E2\nbdqwadMmv8vwTRB/ZtJEVG//ecQIX9t/xpKW+xYRaagt6+HNkbDoBa/954XPwr7+tv/0QyADwswG\nAgO7d+/udykx0ZRHDyJJ59PJ8PpvYWtJUrX/9EMgJ6mdc1Odc0MzMzNr+3yCK5KG0s9KksamtfDC\nEHjxEth9Hxg62zul1ETDAQI6gqhLixYtKC4uJisrK6rLRcU/zjmKi4tp0aKF36VIU7ZT+88tcMJt\nCW3/mcxS7jvQqVMn1qxZw7p16/wuRaLQokWLWu8HEYm70q9h2vWwfEa4/ecY6PBTv6tKGikXEM2a\nNaNr165+lyEiycw5KHgSZt4KoQpv/aS+Q5O6/acfUi4gRETqtH4lTL0WVs4Nt/98GPbUH5WRBDIg\nUu0qJhFJgFAI5o2Fd0b9r/1nn8sCvyR3PKXkVUwiIjv5fjk8cYo3Ed3lKBieBzmXKxzqEcgRhIhI\nVGq2/zz7X3DwBQqGKCkgRCQ1fbfEW5L724XQcyCcej+03cvvqiIqKCohr7CY3G5Z9OnSzu9ydlBA\niEhqqSiDd++Hd++DFnvA+U/BgWf5XVWtCopKGDwuj7KKEM0z0ph4ZW7ShEQgA0KT1CIS0U7tP8+H\nAfckfYe3vMJiyipChByUV4TIKyxOmoDQJLWIBF/5Vu+ehh3tP1+Ac8clfTgA5HbLonlGGukGzTLS\nyO2WPDUHcgQhIrJD0YcwZYTX/vPQS+CkOwPV/rNPl3ZMvDJXcxAiIjGzfZPX/nPeWNijMwx5DfY/\nzu+qGqRPl3ZJFQxVAhkQmoMQaeK+nOXdDb1htbdExgm3wm5t/K4q5WgOQkSCY1spTPkNPHMWpDf3\n2n+e+neFQ5wEcgQhIk3Q52/CtBu89p9HXQf9b06J9p/JTAEhIslty3p44w+w+EXo2KvJtv/0QyAD\nQnMQIk3E0tdg+k3h9p8jw+0/m/tdVZOhOQgRST4b/wsvXAwvXQq77wtD58BxNyscEiyQIwgRSVHO\nwSeT4M2R3s1vJ94OR/xG7T99Esjvuk4xiaSg0jUw9XpYMRM6H+61/2x/gN9VNWk6xSQi/nIO8p+A\nMblQ9L63ftLlbygckkDSjCDMrBvwJyDTOXee3/WISAKsX+nd17DqXeh6DAx8SO0/k0hcRxBmNsHM\n1prZkhrbB5jZ52a2wsxGAjjnCp1zV0R53IFmNra0tDQeZYtIvIUqIe9RePRI+GYhDPwHXDJF4ZBk\n4n2K6UlgQPUNZpYOjAFOAXoBg8ys164cVKeYRAJs3RcwYYA3EZ19NAz/SL2hk1RcTzE55+aaWXaN\nzX2BFc65QgAzmwScCXwazTHNbCgwFGC//faLWa0iEmeVFfDBP2D2PdC8FZw9Fg7+lYIhifkxSb0v\nsLra4zXAvmaWZWaPAb8ws5tre7JzbqxzLsc5l9OhQ4d41yoisfDdYhh3vLf6ao8BMHweHKLe0Mku\naSapnXPFwDC/6xCRGKoo81p/vns/tGwHv3oaep3pd1USJT8C4mugc7XHncLbRCSVfF0Qbv/5KRx8\nIQz4G7Ta0++qZBf4ERDzgQPMrCteMFwIXORDHSISD+VbYdZf4cNHoM3ecNGL8NOT/a5KGiCuAWFm\nzwP9gfZmtga4zTk33sxGAG8B6cAE59zSeNYhIglS9IE3alj/JRx6KfzyTmihqw2DKt5XMQ2qZft0\nYHo8X1tEEmj7JnhnVLj9Zxe4ZDJ06+93VdJISTNJLSIB9eUsmHItlK6Gw38NJ/wZmrf2uyqJAQWE\niDTM1g0w4xZY8AxkHQD/9ybsl+t3VRJDCggR2XWfvxFu/7kWjr7Ba+bTrIXfVUmMKSBEJHqbi+HN\nP8Dil2Cvn8Og52GfX/hdlcSJAkJE6uccLH0Vpv8OtpVC/z96Iwd1eEtpCggRqdvG7+D138Jn07zR\nwplTYK8D/a5KEkABISKR/aj95yg4YoTafzYh+kmLyI9tWA3TrocVb0PnXDjzkXo7vBUUlZBXWExu\ntyz6dGmXoEIlnhQQIvI/oRB8/CTMuBVcCE65Fw67EtLqXvi5oKiEwePyKKsI0TwjjYlX5iokUkAg\nA8LMBgIDu3fv7ncpIqljfaF3w9uqd6HrsXDGQ9AuO6qn5hUWU1YRIuSgvCJEXmGxAiIF+NEPotHU\nUU4khkKV8OEY+OeR8O0ncMbD3lIZUYYDQG63LJpnpJFu0CwjjdxuWfGrVxImkCMIEYmRdZ/D5OGw\nZj78dACc9gBk7rvLh+nTpR0Tr8zVHESKUUCINEWV5fDBQzD7bm/dpHMeh4POb1SHtz5d2ikYUowC\nQqSp+XaRN2r4bhH0OgtOvRfadPS7KklCCgiRpqJiO8y9D957AFruqfafUi8FhEhTsCbfGzWs+wwO\nGQQn/1XtP6VetQaEmR0axfPLnXOLY1iPiMRS2RaY/VfvKqW2P4GLXoKf/tLvqiQg6hpBzMHrH13X\nrFVXIDuWBYlIjKx6H6aM8O5v6HM5nHQHtNjd76okQOoKiPnOuePrerKZ/SfG9URFN8qJ1GH7Rnh7\nFMx/PNz+cwp0O9bvqiSAzDnndw0NlpOT4/Lz8/0uQyR5fPkfmHJduP3nMLX/lIjMrMA5l1PfflFN\nUpvZwXinknbs75x7pcHViUhsbd0AM/4EC54Nt/98C/Y73O+qJODqDQgzmwAcDCwFQuHNDlBAiCSD\nz6Z77T83r1P7T4mpaEYQuc65XnGvRER2zeZieOP3sORlr/3nRZPU/lNiKpqA+NDMejnnPo17NSJS\nP7X/lASJJiCexguJ74DteJe9OufcwXGtTER+bKf2n4fCmWNgLw3wJT6iCYjxwBBgMf+bg/CVLnOV\nJsc5WPgcvHWzt2TGSXdA7nC1/5S4iubdtc45NyXulewC59xUYGpOTs5VftciEncbVsPU6+DLd2C/\nI+CMR6C9/jiS+IsmIBaY2XPAVLxTTIAucxWJu1AICibAzNu8EUSU7T9FYiWagGiJFwzVF3DRZa4i\n8VT8pdf+s+i9XW7/WV1BUYma+EiD1RsQzrnLE1GIiOC1//zoMXjnTkhv5rX//MWQBjXyKSgqYfC4\nPMoqQjTPSGPilbkKCdkltY5VzWxofU+OZp94MLOBZja2tLTUj5cXiY+1n8GEk+GtP3prJ12TB4de\n0uAub3mFxZRVhAg5KK8IkVdYHOOCJdXVNYIYaWbf1/F5A64Dxsa2pPppklpSSmU5vP8PmHNPzNp/\nAuR2y6J5RhrlFSGaZaSR2y0rRgVLU1Hfct8D63n+zBjWItL0fLsIJl8D3y2OefvPPl3aMfHKXM1B\nSIPVGhCaexCJo4rtMPdeeO/BcPvPZ6DXGTF/mT5d2ikYpMECeZeNbpSTQFP7TwmIQF5Q7Zyb6pwb\nmpmZ6XcpItEr2wJv/QnGn+Q19Rn8Mpz9mMJBklYgRxAigbPqPZjyG7X/lECJph/EbsC5/Lhh0B3x\nK6vemnSKSYJh+0Z4+3aYP8670e3SqdD1GL+rEolKNKeYJgNnAhXA5mofvtEpJgmEFW/DP4+A+eMh\n9xr49QcKBwmUaE4xdXLODYh7JSKpYmsJvHULLFT7Twm2aALiAzM7yDm3OO7VREmnmCRpffY6TLtR\n7T8lJZhzLvInzBbjLcqXARwAFJJkDYNycnJcfn6+32WIwObvw+0//+21/zzzEbX/lKRlZgXOuZz6\n9qtrBHF6DOuJipmdBZwG7A6Md87NSHQNIrvEOS8U3vg9bPsBjvsTHHW92n9KSqh1kto5V+ScKwLu\nqvp39W3RvoCZTTCztWa2pMb2AWb2uZmtMLOR4dd8zTl3FTAMuKCOY2qxPvHfD9/CpMHw7ytgjy5w\n9Vw49vcRw6GgqIQxs1ZQUFTiQ6EiDRPNVUwHVn9gZulAn114jSeBnSa5w8cYA5wC9AIGmVn1xrq3\nhD8fka5iEl85BwuehTGHe13eTroTrphZa2/oqmW375/xOYPH5SkkJDDqWu77ZjPbCBxsZj+Y2cbw\n47V4l75GxTk3F1hfY3NfYIVzrtA5VwZMAs40zz3AG865j2upa6iZ5ZtZ/rp166ItQyQ2NnwFz57j\nLZWxVy8Y9j4cdW2dvaG17LYEVV2nmP7mnGsL3Ouc29051zb8keWcu7mRr7svsLra4zXhbb8BTgTO\nM7NhtdQ11jmX45zL6dChQyPLEIlSKOTd7PbPI+Crj7z2n5dNj6o3dNWy2+mGlt2WQInmMtc/mtk5\nwNF4VzW965x7LR7FOOceAh6Kx7FFGqz4S2+ZjKL3oVt/GPgQtOsS9dO17LYEVTQBMQboDjwffjzM\nzE5yzg1vxOt+DXSu9rhTeJtI8ghVQt4/4T9/gfTmjWr/qWW3JYiiCYjjgZ4ufMOEmT0FLG3k684H\nDjCzrnjBcCFwUSOPKRI7az/z5hm+zoefngKnPwC77+N3VSIJFc1VTCuA/ao97hzeFhUzex74EOhh\nZmvM7ArnXAUwAngLWAa86JxrbOiINF5ludfI51/9vJVXzx0Pg55XOEiTFM0Ioi2wzMzm4c1B9AXy\nzWwKgHOuzjZYzrlBtWyfDkzftXJF4ujbT7xRw3eL4cCzvYnoNroQQpquaALi1rhXIeKniu0w5+9e\n+8/W7eGCZ6Fnfe3YRVJfvQHhnJtjZl2AA5xzb5tZSyDDObcx/uWJxNmafHjtGvj+c+g9GE7+C7Rs\nR0FRia46kiYvmoZBVwFDgT2B/fGuOHoMOCG+pYnEUdkWmPUX7yqltvvA4H/DAScC/7vzuawiRPOM\nNCZemauQkCYpmlNMw/HmHT4CcM4tN7OOca1KJJ5WvQeTR0DJSsj5Pzhx1E7tPyPd+ayAkKYomoDY\n7pwrs/C132aWgTdZLRIs2zfCzNsgfzy06wqXToOu/X60W9Wdz+UVId35LE1aNAExx8z+CLQ0s5OA\na4Cp8S1LJMZWvA1Tr4fSNXDECG9Z7uatIu6qO59FPNEExEjgCmAxcDXepanj4lmUSMxUb//Zvgdc\nMQM69633abrzWSS6q5hCZvYa8JpzTsunSnB89jpMu8Hr9tbvt3DM79X+U2QX1BoQ5k063IZ3x3Na\neFsl8LBz7o7ElCfSAJu/h+m/g6WvwF4HweCX4CeH+F2VSODUtdTGDcBRwGHOuT2dc3sChwNHmdkN\nCamuFuooJxE5B4tfhjF94bNpcPwtMHSWwkGkgSy8Bt+PP2G2ADjJOfd9je0dgBnOOd87sufk5Lj8\n/Hy/y5Bk8MO38Ppv4fPXYd8+cOYY6NjT76pEkpKZFTjncurbr645iGY1wwHAObfOzJo1qjqRWHEO\nFk6EN/8Ildvhl3dB7jWQlu53ZSKBV1dAlDXwcyI7iduyFRu+gqnXwZf/gS5Hef0asvaP3fFFmri6\nAuIQM/shwnYDdCmIRCUuy1aEQt7Nbm/f7j0+9T7IuQLSolm9XkSiVWtAOOc0RpdGi/myFdXbf+5/\nPAz8B+yxX/3PE5FdFs2NciINFrNlK2q2/zxzjLf6agPaf4pIdBQQElcxWbZi7TJvcb2v86HHqXDa\nA7D7T2JfrIjsRAEhcdfgZSsqy+G90TD377BbW6/958/P1ahBJEECGRBmNhAY2L17d79LkXjZqf3n\nOXDqvV63NxFJmEBe9uGcm+qcG5qZmel3KRJr5dvgnTtg7HGwaS1cMBHOf0LhIOKDQI4gJEWtnu+N\nGmq0/xQRfyggxH9V7T8/HAO77wsX/xu6n+h3VSJNngJC/LXyXe++hpKV3s1uJ96+U/tPEfGPAkL8\nse0HePs2yJ9QZ/tPEfGPAkISb/nb3hpKP3xdb/tPEfFPIANCl7kG1NYSeOtP3uqr7XvAFTOh82F+\nVyUitdBlrpIYy6bBmMPhk0nQ7yYY9q7CQSTJBXIEIQGi9p8igaWAaKLi1qOhinOw5N9eOJRt8tp/\nHnU9pKvXlEhQKCCaoLj0aKjuh2/h9Rvh8+lq/ykSYIGcgzCzgWY2trS01O9SAilSj4aYcA4+fsab\na/jyP177zytmKhxEAiqQAaFJ6sap6tGQbjSuR0N1G76CZ86GKSNg75/Drz+AI3+j3tAiAaZTTE1Q\nTHo0VFH7T5GUpYBoohrco6G64i+9Rj5ffaD2nyIpKJABoRvlfBaq9BbWm/UXyNhN7T9FUlQgzwNo\nDsJHa5fB+JNg5p9h/xPgmo/gFxcrHERSUCBHEOKDynJ470GY83dvtdXzJnid3hQMIilLASH1+2ah\nN9fw38VeT+hT/q4ObyJNQCADQnMQCVK+DebcA+//wwuECyZCz9P9rkpEEkRzEBLZ6nnwr37w3gNw\nyCAY/pHCQaSJCeQIQuKobDP85y7IexQyO6n9p0gTFsiA0CmmOFk5N9z+cxUcdqXX/nO3tj4XJSJ+\n0Skm8dp/TrsBnhoIGFz2Opx2v8JBpIlLmhGEmfUErgPaA+845x6tY1+NIGJl+UyYej1s/EbtP0Vk\nJ3EdQZjZBDNba2ZLamwfYGafm9kKMxsJ4Jxb5pwbBvwKOKqu42oEEQNb1sOrv4aJ50Hz1vB/M+Dk\nvygcRGSHeJ9iehIYUH2DmaUDY4BTgF7AIDPrFf7cGcDrwPQ419W0LZvqLcm96AU45ndq/ykiEcU1\nIJxzc4H1NTb3BVY45wqdc2XAJODM8P5TnHOnAINrO6aZDTWzfDPLX7duXbxKT02b1sGLl8ILF0Pb\nvWDoLK/TW8ZuflcmIknIjzmIfYHV1R6vAQ43s/7AOcBu1DGCcM6NBcYC5OTkuPiVmUKcg8Uvwxu/\nD7f//DMcdZ3af4pInZJmkto5NxuY7XMZqeeHb2DajfDFG7BvTrj958/8rkpEAsCPgPga6Fztcafw\nNokl52DBM/DWLVBZBr/8C+T+Wh3eRCRqfgTEfOAAM+uKFwwXAhf5UEfqKimCqddC4WzocjSc8RBk\n7e93VSISMHENCDN7HugPtDezNcBtzrnxZjYCeAtIByY455bGs44mIxSC+eO89p9mcNoD0Odytf8U\nkQaJa0A45wbVsn06upQ1tr5fAVNGwFcfeo18Bv4D9uhc//NERGqRNJPU0kCVFZA3Bmb91btc9axH\nvdVX1chHRBpJARFk//0UJg+Hbz6GHqfB6Q9A2739rkpEUoQCIogqyrz2n3PvVftPEYkbBUTQfLMg\n3P5zCfz8PDjlHrX/FJG4UEAERfk2mHM3vP8QtO4AFz4PPzvV76pEJIUpIILgq4+8uYbi5fCLi72b\n3lru4XdVIpLiFBDJrGwzvHMnfPRYuP3nK9D9BL+rEpEmQgGRrFbOZfsrw9lt41es/dkldDz7r+rw\nJiIJpVtsk822Uph6HTw1kG8l7kBJAAAKWElEQVR/KOOCsj9zzNJTKfiuwu/KRKSJCeQIImVbjn4x\nA6ZdDxu/ZUGnIQz+8ni2uN1IdyHyCovp06Wd3xWKSBMSyBFEyrUc3bIeXrkanjsfdtsdrnib0El3\nEspoSbpBs4w0crtl+V2liDQxgRxBpJRPp8Drv4Wt6+GY38MxN0HGbvQBJl6ZS15hMbndsjR6EJGE\nU0D4ZdNamP47+PQ12PtguPjf8JODd9qlT5d2CgYR8Y0CItGcg8Uvhdt/boYTboUjr1X7TxFJOgqI\nRPrhG5h2A3zxJnQ6zGv/2aGH31WJiESkgEgE5+Djp2HGLVBZDif/DQ6/Wu0/RSSpKSDirWQVTLkW\nVs6B7H5e+889u/ldlYhIvRQQ8RIKwfzH4e1RYGlq/ykigRPIgEj6G+Wqt//sfiKcPlrtP0UkcAL5\n52zS3ihXWQHvjYZHj4S1n3rtPwe/rHAQkUAK5AgiKf13abj95wL42elw2v1q/ykigaaAaKyd2n9m\nwnlPwIFnq/2niASeAqIxvlkArw2HtUvhoPNhwD3QWmsmiUhqUEA0RPk2mP03+OBhtf8UkZQVyIDw\n9Sqmr/Jg8ohw+88h8Mu71P5TRFKSrmKKVtlmeGMkTBgAFdthyKtw5iMKBxFJWYEcQSRc4RyY8hvY\nUASHXQUn3qb2nyKS8hQQddlWCjNvhYInveUxLpsO2Uf5XZWISEIoIGrzxVsw9XrY9J23HPdxf4Rm\nLf2uSkQkYQIZEHGdpN6yHt68GRZNgg494YJnoVOf2L+OiEiS0yR1dZ9OhjGHw5KXvfafV89ROIhI\nkxXIEURjFRSV7NzredNamH6TFxB7HwxDXoG9D/K7TBERXzW5gCgoKmHwuDzKKkI0zzDeOO47us6/\nQ+0/RURqaHIBkVdYTFlFiJCDjIqttP/obuhwgHdPg9p/iojsEMiAaMwkdW63LJpnpFFeEaIioxWr\nBr7MQb0OVPtPEZEazDnndw0NlpOT4/Lz83f5eT+agxARaULMrMA5l1PffoEcQTRWny7tFAwiIvUI\n5GWuZjbQzMaWlpb6XYqISMoKZEAkbctREZEUEsiAEBGR+AtkQOgUk4hI/AUyIHSKSUQk/gIZECIi\nEn+BvMy16kY54AczW17tU5nArpx32pX9o923vv3q+nx74Pso60lWu/ozSMbXbOzx9D70X6Lfh/F4\nvcYcs77ndonqKM65lPkAxsZr/2j3rW+/uj4P5Pv9PUz0zyAZX7Oxx9P70P+PRL8P4/F6jTlmrOpJ\ntVNMU+O4f7T71rffrtYYNH58fbF+zcYeT+9D/yX664vH6zXmmDGpJ9BLbaQaM8t3Udz+LhJPeh9K\nlVQbQQTdWL8LEEHvQwnTCEJERCLSCEJERCJSQIiISEQKCBERiUgBISIiEQXyTuqmwszSgDuB3fFu\nXnrK55KkCTKzfsBgvN8XvZxzR/pckiSIRhAJZmYTzGytmS2psX2AmX1uZivMbGR485lAJ6AcWJPo\nWiV17cr70Dn3rnNuGDAN0B8pTYgCIvGeBAZU32Bm6cAY4BSgFzDIzHoBPYAPnHM3Ar9OcJ2S2p4k\n+vdhlYuA5xJVoPhPAZFgzrm5wPoam/sCK5xzhc65MmAS3uhhDVAS3qcycVVKqtvF9yFmth9Q6pzb\nmNhKxU8KiOSwL7C62uM14W2vACeb2cPAXD8KkyaltvchwBXAEwmvSHylSeok5pzbgvc/poivnHO3\n+V2DJJ5GEMnha6BztcedwttEEknvQ9mJAiI5zAcOMLOuZtYcuBCY4nNN0vTofSg7UUAkmJk9D3wI\n9DCzNWZ2hXOuAhgBvAUsA150zi31s05JbXofSjS0mquIiESkEYSIiESkgBARkYgUECIiEpECQkRE\nIlJAiIhIRAoIERGJSAEhIiIRKSBERCQiBYSIJA0z62Zm483sZb9rEQWEAGZWaWYLzWyJmb1kZq3M\nLLtmt7GmxMz2MLNrGrKfmX0Qg9fPNrOtZrYwin1rfb1Y1NJYZrYp2n3DvSh2rGBsZi3D780yM2sf\nnwqlNgoIAdjqnOvtnPs5UAYM87ugJLAHUG9ARNovhj2bv3TO9a5vp0ivZ560ZO0fbWYHmdm0Gh8d\na+7nnNsa/h5840OZTZ4CQmp6F+ge/ne6mT1uZkvNbIaZtQQws9fMrCC8fWjVE83sz+F+xu+Z2fNm\ndlN4+8VmNi/8l+C/wq0tf8TMLjGzRWb2iZk9E952Y3hks8TMrg9vyzazZZFqq+M4P6qhnuPcDewf\n3v/eOr7uSPttamjttTGzu81seLXHt1f7/la9Xnb4+/80sAToXFctddVjZq3N7PXw93CJmV1Qo55s\nM/vMzJ40sy/MbKKZnWhm75vZcjPrW9fX45xb7Jw7vcbH2vq+D5Jgzjl9NPEPYFP4vxnAZLz+19lA\nBdA7/LkXgYvD/94z/N+WeL+IsoDDgIVAC6AtsBy4CegJTAWahZ/zT+CSCDUcCHwBtK96DaAPsBho\nDbQBlgK/qKe2SMeJWEM9x8kGltSoMdLXHWm/TQ2pvcYxdjpu+Llzqj3+FOhc4+eXDYSA3Ghqqfac\nH9UDnAs8Xu04mRHqqwAOwvtDswCYABhem9LXar6/onwvZgGPAV8CN1fbvqrqZ6qPxH2oo5wAtKx2\nrvtdYDywD7DSOVe1vQDvlwLAtWZ2dvjfnYEDgFxgsnNuG7DNzKaGP38C3i+o+WYG3i/XSH8pHg+8\n5Jz7HsA5t97MhgCvOuc2A5jZK0A/vB4FtdUW6TgX1VLD3DqOE0mkr/u7WvY9ugG118o5t8DMOprZ\nPkAHoMQ5tzrCrkXOubwoa1kQ/nykel4E7jeze4Bpzrl3I7zWSufc4vAxlwLvOOecmS2O5muq5ess\nRqc4k4YCQiA8B1F9Q/gX6fZqmyrxgqQ/cCJwhHNui5nNxhs11MaAp5xzN8e04gi17WoNZpYd7XEa\n8HXXZVdqr+4l4Dxgb+CFWvbZHIt6nHNfmNmhwKnAXWb2jnPujjqeF6r2OIR+t6QEzUHIrsrE++t1\ni5n9DG/kAPA+MNDMWphZG+D08PZ3gPOqJiDNbE8z6xLhuP8BzjezrKr98EYzZ5l3VVVr4OzwtrpE\nOk60NVS3Ee9UWX1fd839qjSk9vq8gNfl7Ty8sIjWLtcSHqlscc49C9wLHNqwkiXIlPKyq94EhpnZ\nMuBzIA/AOTffzKYAi4D/4p3zLnXOfWpmtwAzzCwNKAeGA0XVD+qcW2pmfwHmmFklsMA5d5mZPQnM\nC+82LnyqJbu24uo4TqQaajs9hHOuODzhugR4A7illq97p/2cc78Lb/94V2uvT/hrawt87Zz7dhee\nF7GWep52EHCvmYXwvl+/bkjNYa3MbE21xw845x5oxPEkQdRRTmLGzNo45zaZWSu88/tDnXMf+11X\nEIWDZJrzLj1u8sxsFZBTNbckiaFTTBJLY8OT3R8D/1Y4NEolkGlR3CiXysKX3C4EmuHNbUgCaQQh\nIiIRaQQhIiIRKSBERCQiBYSIiESkgBARkYgUECIiEpECQkREIlJAiIhIRAoIERGJ6P8Bg4UWJ4Bs\n5hIAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Bin data to 10 depth bins with equal data\n", - "data['Depth_bin_equal'], bins = pd.qcut(data['DEPTH'], 10, retbins=True)\n", - "\n", - "# Group data by depth bins\n", - "binned_data = data.groupby('Depth_bin_equal')\n", - "\n", - "# Calculate arithmetic and geometric means for each depth bin\n", - "depth_bins_mean = binned_data['VIRUS'].aggregate(np.mean)\n", - "\n", - "def func(x,a,b):\n", - " return a*x+b\n", - "\n", - "popt, pcov = curve_fit(func, np.log(bins[1:]), np.log(depth_bins_mean))\n", - "print(popt)\n", - "# Plot mean virion concentrations \n", - "plt.loglog(depth_bins_mean,bins[1:],'.',label='Data')\n", - "plt.loglog(np.exp(func(np.log(bins[1:]),*popt)),bins[1:],label='Exponential fit')\n", - "plt.gca().invert_yaxis()\n", - "plt.xlabel(r'Phage concentation [virions mL$^{-1}$]')\n", - "plt.ylabel(r'Depth [m]')\n", - "plt.legend()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the total number of virions by multiplying the average concentration of virions in each depth bin by the total volume of each depth bin:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of marine phages is 2e+30\n" - ] - } - ], - "source": [ - "# Ocean surface area in m^2\n", - "ocean_surface_area = 3.6e14\n", - "\n", - "# m^3 in mL\n", - "ml_m3_conv = 1e6\n", - "\n", - "# Multiply mean concentrations by the total volume at each depth bin and sum over all bins\n", - "total_phage_mean = (depth_bins_mean*np.diff(bins)*ml_m3_conv*ocean_surface_area).sum()\n", - "\n", - "print('Our best estimate for the total number of marine phages is %.0e' %total_phage_mean)\n", - "\n", - "old_results = pd.read_excel('../phage_num_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[0] = pd.Series({\n", - " 'Parameter': 'Total number of marine phages',\n", - " 'Value': total_phage_mean,\n", - " 'Units': 'Number of individuals',\n", - " 'Uncertainty': np.nan\n", - " })\n", - "\n", - "result.to_excel('../phage_num_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/viruses/phage_num/marine_deep_subsurface/.ipynb_checkpoints/marine_deep_subusrface_phage_num-checkpoint.ipynb b/viruses/phage_num/marine_deep_subsurface/.ipynb_checkpoints/marine_deep_subusrface_phage_num-checkpoint.ipynb deleted file mode 100644 index b9e3ab4..0000000 --- a/viruses/phage_num/marine_deep_subsurface/.ipynb_checkpoints/marine_deep_subusrface_phage_num-checkpoint.ipynb +++ /dev/null @@ -1,161 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper')\n", - "from CI_helper import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the number of phages in subseafloor sediments\n", - "To estimate the total number of phages in subseafloor sediments, we rely on two recent papers which measured the ratio between the number of prokaryotes in subseafloor sediments and the number of phage like particles ([Engelhardt et al.](http://dx.doi.org/10.1038/ismej.2013.245) and [Middelboe et al.](https://doi.org/10.3354/ame01485). We extracted the data from figure 3 of Engelhardt et al. and from figure 2 of Middelboe et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEgCAYAAACjEpTiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xd4VHX2+PH3IYUmAlJWlC4onUCA\ngICrIoiKumBF3BUVUMCC7FeX3fUnrq5tBVQQVsECKoqIiILo2kBQQCB0AkgNRlEhJBh6yvn9cWfi\nEFLmTmYyk+S8nidPcu/cci4J85lPPaKqGGOMMf6qEO4AjDHGlC5WcBhjjHHFCg5jjDGuWMFhjDHG\nFSs4jDHGuGIFhzHGGFes4DDGGOOKFRzGGGNcifbnIBE5y4/DclQ1vZjxGGOMiXDiz8xxETkO/ARI\nIYdFqWrDYAVmjDEmMvlV4wC2qGqHwg4QkbVBiMcYY0yE87fGUUlVjxf3GGOMMaWfXwWHMcYY41Xk\nqCoRuVtE3hCRm0VkgYgML4nAjDHGRCZ/huNeCtwG/FlV+wHtQxuSMcaYSOZPwZGqTnvWS57tEyGM\nxxhjTIQrso9DRFqo6laf7T+q6tchj8wYY0xE8rtzXERqq+qBEMdjjDEmwrlZcuS1kEVhjDGm1HBT\ncBQ2a9wYY0w54abgsAkfxhhjrMZhjDHGHTed421UdVOI4wmK2rVra+PGjcMdhjHGlCqJiYkHVLVO\nUcf5u8ghqrpJRDoB/wQaec4V5yVtF3CkIdC4cWNWr14d7jCMMaZUEZFkf47zu+DwmAk8CGwEctwG\nZYwxpvRzW3DsV9WPQhKJMcaYUsFtwTFWRF4BvsRn6RFVnRvUqIwxxkQstwXH7UALIIbfm6oUiPiC\nIzMzk5SUFI4ft5QhBipVqkT9+vWJiYkJdyjGlDpuC47OqnpBSCIJAhG5Gri6WbNmp72WkpJCtWrV\naNy4MSI2srg8U1VSU1NJSUmhSZMm4Q7HmFLHzTwOgGUi0iokkQSBqs5X1WHVq1c/7bXjx49Tq1Yt\nKzQMIkKtWrWs9mlMgNzWOLoC60RkN04fR0QOxy2IFRrGy/4WTFmQmJzGil2pdG1ai/hGNUvsvm5r\nHH2B5kAf4Gqgn+e78UNUVBRxcXG5X08//XTQ79G4cWMOHDjAnj17aNOmTUiunZ6ezpQpU/w659FH\nH2XcuHGFHjNv3jySkpKCEaIx5UZichqDXlnB+M+2MeiVFSQmp5XYvV3VOFTVr8khJn+VK1dm3bp1\n4Q7DL1lZWURH5//n4S04RowYEZR7zZs3j379+tGqVcS2ghoTcVbsSuVkVg45CplZOazYlVpitQ5X\nNQ4RmSEiNXy2a4pImV1uPTE5jcmLdoS8JG/cuDFjx46lY8eOtG3blq1bnbxZ+/fvp3fv3rRu3Zoh\nQ4bQqFEjDhxwUqK89dZbdOnShbi4OO666y6ys7NPu252djZDhw6ldevW9OnTh2PHjgEwbdo0Onfu\nTPv27bnuuus4evQoAIMHD+buu+8mISGBhx56iNTUVPr06ZN7f+/yNGPGjGHnzp3ExcXx4IMPnnbf\nJ554gvPPP58ePXqwbdu23P353XfZsmV89NFHPPjgg8TFxbFz584C4zPG/K5r01rERlcgSiAmugJd\nm9YquZurqt9fwFp/9oX7Kz4+XvNKSko6bV9hVu85qBc8vFCbjFmgFzy8UFfvOejq/PxUqFBB27dv\nn/s1a9YsVVVt1KiRTpw4UVVVJ0+erHfeeaeqqo4cOVKffPJJVVX95JNPFND9+/drUlKS9uvXT0+e\nPKmqqsOHD9cZM2bkXmv//v26e/dujYqK0rVr16qq6g033KBvvvmmqqoeOHAgN6Z//vOfufe+7bbb\n9KqrrtKsrCxVVb333nv1X//6l6qqLliwIPf+u3fv1tatW+f/77Z6tbZp00aPHDmihw4d0vPOO0+f\nffbZIu/73nvv5b5W0HHB5vZvwphIs3rPQX3xq+26es/BU34OFLBa/XiPdds5XkFEaqpqGoCInIX7\nDnZXPKO4HgVSgS9VdU4o7+cVimpgYU1VAwYMACA+Pp65c51pMd988w0ffPABAH379qVmTef+X375\nJYmJiXTu3BmAY8eOUbdu3dOu2aRJE+Li4nKvu2fPHgA2bdrEww8/THp6OocPH+byyy/PPeeGG24g\nKioKgCVLluTGctVVV+XevzBLly6lf//+VKlSBYBrrrkm97XC7uvL3+OMKe/iG9UkvlHN3P6Ok1k5\nxEZXYOaQriFttnL7pj8eWC4i73m2bwCecHtTT/NWP+BXVW3js78v8AIQBbyiqk8DVwCTVHWpiHwE\nlEjB4a0GZmbllEg1sGLFioDTgZ6VlVXosarKbbfdxlNPPeXXNb3X9TZVDR48mHnz5tG+fXumT5/O\n4sWLc4+rWrVqgE9QtMLuG8hxxhinSf35L74v0f4OV30cqvoGMAD4xfM1QFXfDOC+03FGaOUSkShg\nMk5B0QoY6KltvAncLCLPAiXWiBffqCYzh3RldJ8LQl56F6R79+7Mnj0bgM8++4y0NKevpVevXsyZ\nM4dff/0VgIMHD5Kc7P+4hYyMDOrVq0dmZiYzZ84s8LiLLrqIt99+G4BPPvkk9/7VqlUjIyOjwHPm\nzZvHsWPHyMjIYP78+UXeN+/1/I3PmPLOW9P4ZvsBchQqlFB/h+tmJlVNAoo1dlJVl4hI4zy7uwA7\nVHUXgIjMAq5V1aeAkZ6CpUSXNvFWA4Pl2LFjuU1H4DQ/FTYkd+zYsQwcOJA333yTbt26cfbZZ1Ot\nWjVq167Nv//9b/r06UNOTg4xMTFMnjyZRo0a+RXH448/TkJCAnXq1CEhIaHAQsB7/9atW3PhhRfS\nsGFDAGrVqkX37t1p06YNV1xxBc8++2zuOR07duSmm26iffv21K1bN7c5rbD73nzzzQwdOpSJEycy\nZ84cv+MzprzzNqkrTi2ge7PajLrs/JB/0PU7kVPQb+wUHAu8TVUicj3QV1WHeLb/DCQA44B/AFWB\n/6rqNwVcbxgwDKBhw4bxeT+Bb9myhZYtW4bkWULlxIkTREVFER0dzfLlyxk+fHipGc5bGpTGvwlj\nfHlrHN4m9eK2johIoqp2Kuq4kHZsB4Oq7sFTIBRx3FRgKkCnTp3KRH70vXv3cuONN5KTk0NsbCzT\npk0Ld0jGmAjibVIv6dnjrgoOEbkB+FRVM0TkYaAj8G9VXROEWH4EGvhs1/fscxNfgYsclkbNmzdn\n7dq14Q7DGBPBgt2k7g+3S478P0+h0QO4DHgV+G+QYlkFNBeRJiISC9wMuEoapYUscmiMMSY43BYc\n3unJVwFTVfVjINbtTUXkHWA5cIGIpIjInaqaBdwD/A/YAsxW1c0ur3u1iEw9dOiQ25CMMcb4yW0f\nx48i8jLOIofPiEhF3Bc+qOrAAvYvBBa6vZ7P+fOB+Z06dRoa6DWMMcYUzu2b/o04NYI+qpoOnAWc\nvlhRmFiNwxgTyUpq/btQc1twHMMZFuutMcQA6UGNqBgivY9DRLj11ltzt7OysqhTpw79+vUD4KOP\nPipwXscZZ5yR7/7BgwczZ07hk+l9j/EujR5KF198MatXry70mOeff94WLzTlSjiXQQ82twXHFJxk\nTt6CIwNntrfxQ9WqVdm0aVPu0h+ff/455557bu7r11xzDWPGjAlXeCXKCg5THvjWMPJb/y5Y1y5p\nbguOBFUdCRwH8Cx26LpzPFSC3lT1w0pYOt75HiRXXnklH3/8MQDvvPMOAwf+3t0zffp07rnnHgB2\n795Nt27daNu2LQ8//HDuMarKPffcwwUXXMBll12Wu+wIQGJiIn/84x+Jj4/n8ssvZ9++ffnG8J//\n/Ie2bdvSpUsXduzYAcCePXu49NJLadeuHb169WLv3r2As7T7ddddR+fOnencuTPffvvtadc7duwY\nN998My1btqR///65BSPA8OHD6dSpE61bt2bs2LEATJw4kZ9++olLLrmESy65pMDjjCnN8tYwalaJ\nDdoy6PnVXo5nHefDHR9SEpO63RYcmZ6lPxRAROoAOUGPKkBBbar6YSXMuAa+esL5HqTC4+abb2bW\nrFkcP36cDRs2kJCQkO9x999/P8OHD2fjxo3Uq1cvd/8HH3zAtm3bSEpK4o033mDZsmUAZGZmcu+9\n9zJnzhwSExO54447+Oc//5nvtatXr87GjRu55557GDVqFAD33nsvt912Gxs2bGDQoEHcd999uXE8\n8MADrFq1ivfff58hQ4acdr3//ve/VKlShS1btvCvf/2LxMTE3NeeeOIJVq9ezYYNG/j666/ZsGED\n9913H+eccw6LFi1i0aJFBR5nTGmWt4aRdvRk0Na/O/XaWbyx8X2unnc1D3/7MGt/Df3cL7ejqiYC\nHwB1ReQJ4Hrg4cJPKaX2LIXsk6DZzvc9S6FBl2Jftl27duzZs4d33nmHK6+8ssDjvv32W95//30A\n/vznP/O3v/0NcJY6HzhwIFFRUZxzzjlceumlAGzbto1NmzbRu3dvwEni5Fvg+PLWcgYOHMgDDzwA\nwPLly3OXUP/zn//MQw89BMAXX3xxSlrX3377jcOHD5/S57JkyZLcgqZdu3a0a/d7CvrZs2czdepU\nsrKy2LdvH0lJSae87vY4Y0qL/FbYDtZkPe+1s2O/p2LdT/gq9Uda1WrFkz2epOMfOgYh+sK5TR07\nU0QSgV6AAH9S1S0hiSwAQZ053rgnRMU6hUZUrLMdJNdccw3/93//x+LFi0lNLbidU0T8vqaq0rp1\na5YvX17ksb7XLeoeOTk5rFixgkqVKvkdi9fu3bsZN24cq1atombNmgwePJjjx48HfJwxkcbbd5Hf\nch+hXA6k+pmpxHeZy4aDK6hV8Q882OVprmhyBRXE9eyIgAQyB2Orqk5W1RcjqdCAIDdVNegCt30E\nl/7T+R6E2obXHXfcwdixY2nbtm2Bx3Tv3p1Zs2YBnLK0+EUXXcS7775LdnY2+/bty23queCCC9i/\nf39uwZGZmcnmzfnPn3z33Xdzv3fr1g2ACy+88JT79ezpFJR9+vRh0qRJuefmt8ii7/LrmzZtym1m\n+u2336hatSrVq1fnl19+4ZNPPsk9x3cp9cKOMyZS+TNKKr5RTUZe0ixohcb+o/t5dNmjXDf/OnZn\nbGZ0/Gj+d8PHXNX0qhIrNMDPGoeIZODp18j7EqCqemZQo4oUDboEtcDwql+/fm7TTkFeeOEFbrnl\nFp555hmuvfba3P39+/fnq6++olWrVjRs2DD3jT82NpY5c+Zw3333cejQIbKyshg1ahStW7c+7dpp\naWm0a9eOihUr8s477wAwadIkbr/9dp599lnq1KnD66+/Djgd2SNHjqRdu3ZkZWVx0UUX8dJLL51y\nveHDh3P77bfTsmVLWrZsSXx8PADt27enQ4cOtGjRggYNGtC9e/fcc4YNG0bfvn1z+zoKOs6YSBWK\nLKEFOZp5lBmbZ/D65tfJzM5kYIuB3N3ubmpUqhGS+xUlbMuqh4JPU9XQ7du3n/KaLaFt8rK/CVMc\nwV7SPD9ZOVnM2zGPyesmc+DYAXo36s2ojqNoeGbDoN7Hq8wsq+6GLTlijCkphfVhFNb34Q9VZemP\nS3ku8Tl2pO8grk4cz138HHF144o+uQS4XVZ9BnC/Z7kRRKQmMF5V7whFcMYYE8nyGyXlrYmczMoh\nNoCaSFJqEuNXj2flzytpWK0hz138HL0a9nI1WCbU3NY42nkLDXAmAIpIhyDHZIwxpVagfR/7Du9j\n4tqJLNi1gBoVazCmyxhuPP9GYqJiSiBqd9wWHBVEpKZnxjgiclYA1zDGmFKvoOao/OZvFHb8byd/\n45WNrzAzaSYiwp1t7uTOtndSLbZaiT+Tv9y+6Y8HVojIbJwRVdcDTwY9qgCVtQyAxpjIVFhzVH59\nH/kd367+Gcz+fjYvrX+J9BPpXN30au7tcC/1zsh/4m4kcTsB8A0RWQ1c4tnVP5LmcljnuDGmJBTV\nHJW37+PU47N5e9MCHkmcxd6MvSScncDoTqNpVatVOB4lIH7NGBGRbzzfM4AVwNOer5Ui8lvowitb\noqKiiIuLy/0qaAn14vAum75nzx7atGkTkmunp6czZcoU1+c/+aT7yqk/S7Tv37+fhIQEOnTowNKl\nS7nyyitJT08POE5jiuJtjvJ3wULv8TGVk6nU6CU+O/AfYqNimdxrMtP6TCtVhQb4WeNQ1R6e75Hb\n6FYKVK5cOd+Z15EoKyuL6Oj8/zy8b8gjRoxwdc0nn3ySf/zjH8EI7xRffvklbdu25ZVXXgHInfW+\nZ8+egOI0pihulxOpUzODbt0Wsnr/YmrE1mJU/KNc2+xaoiuUzi5iV3PUReQZf/YZdxo3bszYsWPp\n2LEjbdu2ZevWrYDzSbp37960bt2aIUOG0KhRo9wkTG+99RZdunQhLi6Ou+66i+zs7NOum52dzdCh\nQ2ndujV9+vTJXe582rRpdO7cmfbt23Pdddfl5sUYPHgwd999NwkJCTz00EOkpqbSp0+f3Pt7J4uO\nGTOGnTt3EhcXx4MPnp4AMr/YxowZw7Fjx4iLi2PQoEGnnRPosurr1q3joYce4sMPPyQuLo5jx47l\n1oyKitOY4vBnOZG042k8vfJprv3wWjanfceI9iP43/ULue7860ptoQHuO8d7A3/Ls++KfPZFtGdW\nPsPWg1uDes0WZ7Xgb10K/2fwvnF6/f3vf+emm24CoHbt2qxZs4YpU6Ywbtw4XnnlFf71r39x6aWX\n8ve//51PP/2UV199FXBmPL/77rt8++23xMTEMGLECGbOnMlf/vKXU+63fft23nnnHaZNm8aNN97I\n+++/z6233sqAAQMYOtTpBnr44Yd59dVXuffeewFISUlh2bJlREVFcd9999GjRw8eeeQRPv7449z7\nP/3002zatCnf2lNBsT399NO8+OKLBda4nnjiCc466yyys7Pp1asXGzZs8Gt13Li4OB577DFWr17N\niy++eMprhcVpTCgdzzrOzC0zeXXjqxzJOsKA5gMY0X4EdarUCXdoQeHvWlXDgRFAUxHxTZRQDTg9\ns4/JV2FNVQMGDAAgPj4+d3nzb775hg8++ACAvn37UrOm88nmyy+/JDExkc6dOwNOgVS3bt3Trtmk\nSZPcgio+Pp49e/YAzkKEDz/8MOnp6Rw+fJjLL78895wbbriBqKgowFku3RvLVVddlXv/wvgbW162\nrLopC3I0h493fczEtRP5+cjP/LH+H3kg/gHOq3FeuEMLKn9rHG8DnwBPAb65TTNU9WDQowqQv8Nx\ni6oZhEPFihUBpwM9Kyur0GNVldtuu42nnnrKr2t6r+ttqho8eDDz5s2jffv2TJ8+ncWLF+ceV7Vq\n1QCfwF1svmxZdVMWfLfvO8avHs+Wg1toeVZLnuj+BF3qBX+R1EjgVx+Hqh4CkoGHVDXZ5ytiCg0I\n8rLqEaB79+7Mnj0bgM8++4y0NGfZ5l69ejFnzpzctLEHDx4kOTnZ7+tmZGRQr149MjMzT1myPS/f\n5dI/+eST3Pv7LomeV2GxxcTEkJmZedo5oVpWvbA4jQmWnek7GfnlSIZ8NoT0E+k81fMpZvWbVWYL\nDXDROa5Oz+jCEMZS5nn7OLxfY8aMKfT4sWPH8tlnn9GmTRvee+89zj77bKpVq0arVq3497//TZ8+\nfWjXrh29e/cuML94fh5//HESEhLo3r07LVq0KPT+S5YsoXXr1sydO5eGDZ0VOWvVqkX37t1p06bN\naZ3OhcU2bNgw2rVrd1rnuO/y67fcckuBy6oPGTKkyKG5vgqL05ji8ubGGPDRANb8soYH4h9gfv/5\n9Gvar0RzY4SDq2XVPYscvqiqq0IXUvF16tRJ877BlMYltE+cOEFUVBTR0dEsX76c4cOHW0dvEJXG\nvwkTfnlzY9zc4maGtRtGzUqhycVRkkK1rHoCMEhEkoEj/J7IyXoxQ2Dv3r3ceOON5OTkEBsby7Rp\n08IdkjHlVknnxohkbguOy4s+xARL8+bNWbt2bbjDMKZci/TcGOHgdq2qZE8OjuZAJZ+X/O+ZNcaY\nUiIpNYkJqyfw3c/f0bBaQyZcPIHLGl4WUbkxwsFtIqchwP1AfWAd0BVYDlwa/NBy79kQmAgcBL5X\n1YAXeFLVcv8LN46ylDLZBF9pyo0RDm67/u8HOgPJqnoJ0AFIL/yU04nIayLyq4hsyrO/r4hsE5Ed\nIuIdctQWmOPJMhhw0qhKlSqRmppqbxgGVSU1NZVKlSoVfbApdRKT05i8aAeJyWmuz804mcFzic/R\n74N+fJ78OXe2uZOFAxYyqOUgKzR8uO3jOK6qx0UEEamoqltF5IIA7jsdeBF4w7tDRKKAyTjLmqQA\nq0TkI5zVeOeIyB3AmwHcC4D69euTkpLC/v37A72EKUMqVapE/fr1wx2GCTJ/0rbml1ApMzuz+Lkx\nflgJe5ZC457QoOzO4QD3BUeKiNQA5gGfi0gaAfRvqOoSEWmcZ3cXYIeq7gIQkVnAtUAmMNZzzhzg\ndbf3A2fyWZMmTQI51RhTShSVJyNvwfLWnQmkSSLPJz7v5Maol8Bf4/9Ky1ouh2n/sBJmXAPZJyEq\nFm77qEwXHm47x/t7fnxURBYB1YFPgxTLucAPPtspOMN/X/Lc7xZgT0Eni8gwYBiQO1HNGFO+FJS2\n1cu3YMmO3s2Y5a/xy8mtNKvRjMm9JtPz3J6B9YPuWeoUGprtfN+z1AoOLxGphLPYYQ9AgW9w30/i\niqpuwklRW9RxU4Gp4EwADGVMxpjIVFSejK5NaxFb+SBS8xOiz9zIcc7i0W5ByI3RuKdT0/DWOBr3\nLOaTRDa3/1JvABnAJM/2LTj9DjcEIZYfgQY+2/U9+/xmOceNMXnTtnqlHU/j819eplLjdxGiubrR\nHYy58C6qxFQp8pr59YucokEXp3mqnPRxuF1yJElVWxW1z89rNQYWqGobz3Y08D3QC6fAWAXcoqqb\n3V47vyVHjDHlkzc3xisbX+Fo1lEGNB/AyLiR1K5c26/z/elwLytCteTIGhHpqqorPDdJAFy/Q4vI\nO8DFQG0RScHp/H5VRO4B/gdEAa+5LTSsxmGM8fLmxpi0dhL7juwLODdGUR3u5ZHbgiMeWCYiez3b\nDYFtIrIRF2tWqerAAvYvpBgr8KrqfGB+p06dhgZ6DWNM6bdy30rGrR6Xmxvj8e6Pk1AvIaBrFdXh\nXh65LTj6hiSKILEahzHl2460HTy35jmWpCyhXtV6PNXzKa5scmWxljkvqsO9PHLVx1FaWB+HMeXL\n/qP7mbxuMh/s+ICq0VUZ0m4Ig1oOomJUxXyPL7Kzu5wKah+HiKxR1Y7FPcYYY4LpaOZRpm+ezvTN\n08nMzmRgi4Hc1e6uQnNjlKfO7lDxt6mqpYhsKOR1wZkMGFbWVGVM+VCc3BjW2V18/hYcBecX/V12\ncQIJBuscNyZ8SqL5x5sbY8LqCew8tDOg3BjW2V18fhUcqmr5NowxBSqJ5p9g5cawzu7iK8Yc+8hj\nTVXGhEcom3/2Hd7HpLWTmL9rPpUqVGNQs/v4a9fBxVrmvKDZ5cY/ZargsKYqY8IjFM0/GSczeHXj\nq7yZ9KazKGHaxaTuv5jp31ehd/3D9sYfRm4XOawIXAc09j1XVR8LbljGmNIkmM0/3twYL69/mbQT\naVzd9GrOPH41U7ccdGo0OdahHW5uaxwfAoeAROBE8MMpHmuqMiZ8itv8o6p8sfeL33NjnJ3A6E6j\naVWrFVtXfUFszEcsy2rBpqgW1qEdZm4XOdzkXZQwktkEQGNKl3W/rmPc6nGs37+eZjWaMTp+ND3O\n7eF0fHuSJGn2CbIkhp1XvE2LzpfZJL4QCNUih8tEpK2qbgwwLmOMybX3t708v+Z5Pk/+nNqVa+ef\nG8OTJEk0hxiyaHF8PYnJ8X6N4rLCJTTcFhw9gMEishunqUpwsbihMcaAkxvj5Q0v8+7Wd4mJimFE\n+xHc1vq2/HNjeJIkafZJsiSanZXa+zWKy2aIh47bguOKkERhjCnTvJ/8OzY6gy1HF/LKhlc4knWE\n/s36MzJuJHWq1Cn45AZd2Hr5Wyyc/x7fZrVg80eZPNIvtshRXDZDPHTc5hyP6ImA1jluTORxPvkv\nI7vKWiru/B8Sk85F9S/igY4P0Kymf/9XvzzcmBczryFHIUpzSDt6sshRXDZDPHRcz+MQkfaAN6Hu\nUlVdH9yQAmfzOIyJPO8nLaJC/anEVPqRnOPncs25o3myV39X18ivEChqFJfNEA8dt6Oq7geGAnM9\nu/oDU1V1UsFnlTwbVWVMePh2Rlc/MzU3N4Zm1uDk/supcLQDM4dcSHyF7a7zc1tHd+j5O6rKbcGx\nAeimqkc821WB5ZHWOW4FhzElz9sZnanpVKr7BVHVV3NGjJMbo2WVK1mTfNh506+wHWZcA9knISoW\nbvvI78LDhFaohuMKp66Cm+3ZZ4wpIwL9ZL90Rwpa/TMq11oCkkWbalcy5cq/UaNSDQC6NfUe6Ayv\nRbOd73uWWsFRyrgtOF4HvhORD3AKjD8BrwU9KmNMSBVUOAQyhNWbG2POr5OIrXOQrN/aoGlX8tfb\n+uUWGqfwDK/NrXE07nn6MSaiuR1VNUFEFgPdPbtuU9V1QY/KGBMyhRUOboawenNjPJf4HDvSd9C+\nTnv61X+c1NRzCq+tNOjiNE+57OMI5DmtTyQ0/E0d+42q9hCRDEDxaZ4SEVXVM0MVoBs2HNeYohVW\nOPg7hHVL6hbGrx4feG6MBl1C2jxlk/9Cy99ETj0836uFNpziseG4xhStsMKhqCGs+w7vY+LaiSzY\ntYAaFWswpssYbjz/RmKiYiLqE75N/gstt8uqP6OqfytqnzEmchVVOOQ3PyLjZAavbHyFt5LeAuCO\nNndwZ9s7OTPWaWyItE/4NvkvtNx2jvcG8hYSV+SzzxgTwfxdAj0zJ5PZ22bz0vqXSD+RztVNr+be\nDvdS74x6pxwXjk/4hdVwbPJfaPnbxzEcGAE09czl8KoGLAtFYMaY8DktN0a9BP4a/1da1mqZ7/El\n/QnfnxqOpYcNHX9rHG8DnwBPAWN89meo6sGgR2WMCZv1+9czbtU41u1fR7MazZjSa8rvuTEKUNKf\n8K0PI7z87Rw/hJP5b6CI1ARlgW4ZAAAgAElEQVSaA5UARARVXRK6EI0xJcGv3BiFKMlP+NaHEV5u\nO8eHAPcD9YF1QFdgOXBp8EMzxpSEtONpTN0wlVnbZhFTIYbh7YczuPXg/HNjuBDKUVbWhxFebjvH\n7wc6AytU9RIRaQE8GfywficiPYFBOLG2UtULQ3k/Y8qLE9knmLllprvcGH4qiVFW1ocRPm4LjuOq\nelxEEJGKqrpVRC5we1MReQ3oB/zqm8NcRPoCLwBRwCuq+rSqLgWWisifgFVu72WMOVWO5vDxro+Z\ntHYS+47sc50bwx9B74P4YWXIZ5ob/7ktOFJEpAYwD/hcRNKAQJI7TQdeBN7w7hCRKGAyzpDfFGCV\niHykqkmeQ24B7gzgXsYYj5X7VjJu9Ti2HNxCy7Na8nj3x0molxD0+wSzD2Lrqi8475NbiNZMJKqi\nraYbAfwuOMQZUnGfqqYDj4rIIqA68Knbm6rqEhFpnGd3F2CHqu7y3G8WcC2QJCINgUOqmlFIfMOA\nYQANGzZ0G5IxZdrO9J1MSJzAkpQl1Ktaj6d6PsWVTa6kglQIyf2C1QeRmJzG1/Pf4z7JRCQHzT6J\n2Gq6Yed3waGqKiILgbae7a+DHMu5wA8+2ymA96PQnTgr8xYW31RgKjj5OIIcmzGl0oFjB5i8bjJz\nt8+lanRVRnUcxa2tbqViVMWQ3zsYfRArdqXybVYLhsdEg2ahFaKJsdV0w85tU9UaEemsqiXa16Cq\nY/05zhY5NMZxNPMoMzbP4PXNr5OZncnAFgO5q91d1KxU8Bt5Sa015eY+XZvWYlJUC/6c+Q8ujN7K\nlVfcQAurbYSd24IjARgkIsnAEZxVcjVIGQB/BBr4bNf37PObLXJoyrvsnGzm7ZjH5HWT2X9sP70b\n9WZUx1E0PLPw5lu/RkH9sBLWvw0ItB8YUHOR29FWvzd5Nadr08G0sFFUEcFtwXF5SKJwrAKai0gT\nnALjZpwOcb9ZjcOUV/nlxphw8QTi6sb5dX6Ro6B+WAnTr3KSLwGsnQmDF7guPHzvcyIzh7lrUoqs\nddiw28jjtmdshKom+37hrGHlioi8gzNx8AIRSRGRO1U1C7gH+B+wBZitqpvdXFdV56vqsOrVq7sN\nyZh8JSanMXnRDhKT0yI2hqTUJIZ+NpSRX47k18NHGNn6Md684k2/Cw34fRRUlHDaKKjE5DSWfzUP\nzc78/QRvyleXujatRXSU87ajwHurfwjrv60JTFhWx1XVgQXsXwgsdBlTLqtxmGCKhKXCC4vhs21b\neXnDZL4/upgzoquTvf9a9qV25vmtsXSuk+4q1oJGQXnv3zq7Dm/HRBErWU4WtwBTvsY3qsn18fV5\n57u9KJCdo7bOVCnkV41DRIaLyEacGsIGn6/dwIaizi8pVuMwwZRf8004YjiR6cRwMtOJIeNkBn/7\n6ilGLxvItsPfkJ12MT0rj+d4ajdyNDrgWOMb1WTkJc1OeRP3/hsk5pzPoMyH2VTvOuh0R0DNVF7X\ndaxPxZj8azemdLDVcY0pQCQspFezSizeseU5ZLHz+KdcNXc2aSfSyM7owPFf+1AhuybRjaqEJFbf\nf4NNUS042XcwFLN2YOtMlX6iWnamPPg0VQ3dvn17uMMxZUA40qH63nPFrlTGf7aVCmdsomLdT6kQ\nm0rC2Qlcce4Q/vluWm5BMXNIV4CQxBpJKWFNaIlIoqp2KvI4NwWHiFQErgMa41NbUdXHAogxZDp1\n6qSrV68OdxjGuJa3T+OOXhWYsfVFKlTeg574A6M7jeb2DlcgIsV/Q7f1n0we/hYcbjvHP8TJy5EI\nnAgkMGNMwbx9ChqditT9lDeTN1Kz+lm0P2M4t7W5gS5Nfl+5tljDVH9YCTOucUZHRcXa+k/GFbcF\nR31V7RuSSILARlWZ0q51/SgqnT2fCtVXgEbxp8a38/cL7y52bozT7FnqFBqa/fvQWis4jJ/cFhzL\nRKStqm4MSTTFZDPHTWnlmxsjusYRWpxxGXe1vZte5zcPzQ0b93RqGt4ah63/ZFxwW3D0AG4XkV04\nTVXBXHLEmHInR3NYuHshE9dMZN+RffQ8tyej40cHNTdGvhp0cZqnrI/DBMBtwXFFSKIIEmuqMpGk\nqM7rVT+vYtzqcSSlJoU0N0aBGnTxq8CwUVUmL7ejqgQnjWtTVX3MkyfjbFVdGaoAA2Gjqky4FTbj\ne2f6Tp5LfI6vU76mXtV63NvhXq5qelXIcmMURyTMnjclJ1SjqqYAOcClwGNABvA+Th5yY4xHfrPO\nG9XNPiU3xgPxDzCo5aASyY0RqKCngDVlgutl1VW1o4isBVDVNBGJDUFcxpRqp8w6j8nilwrz6Tvn\nHTJzTtLr3P78vx73F5obI1JEwux5E3ncFhyZntzgCiAidXBqIMYYH/GNavLGHZ2Ysek91h9+lw/2\nHCTncFuO/3I5C3fUZVBziG8U7iiLZsuDmPy4LTgmAh8AdUXkCeB64P8FPaoAWee4iQS5uTE2/J4b\no0mFB5m5VchRQEpXk4/lwzB5uSo4VHWmiCQCvXCG4v5JVbeEJLIA2DwOE25bUrcwfvV4vvv5OxpU\na8CEiydwWcPLWLM3nfe+XeGqycdGM5lI5argEJEZwP2qOtmzXVNEXlPVO0ISnTGlxL7D+5i0dhIL\ndi2gesXqjOkyhhvPv5GYqBgg/yafwgoGG81kIpnbpqp2qpru3fB0jncIckzGlBoZJzN4deOrvJn0\nJgC3t7mdO9veyZmxZ552rG+TT1EFg41mMpHMbcFRQURqqmoagIicFcA1jCn1MrMzmf39bF5e/zJp\nJ9Lo17Qf93a4l3POOMev84sqGGw0k4lkbt/0xwPLReQ9z/YNwBPBDcmYwoWz7V9V+WLvFzyf+Dx7\nM/aScHYCozuNplWtVq6uU1TBYKOZTCRznchJRFrhTAAE+EpVk4IeVTHZzPGyK5xt/+t+Xcf41eNZ\nt38d51U/j9GdRtPz3J44Cyq4Z53fJtKEauY4noIi4goLsOG45UE42v73/raX59c8z+fJn1O7cm3G\ndhtLw5g/smr7Iapmp+d7f38KBRvmakort6OqIjoDoA3HLftKsu0//Xg6L294mVnbZhFTIYbh7Ycz\nuPVgtvx0otBaj42IMmWdZQA0pUpJtP375sY4knWE/s36MzJuJHWqONn3Vuz6qdBaj42IMmVdmcoA\naMqHUDXx+Jsbo6haj42IMmVdmcoAaEwgEpPTeD9pMRuOvkXy4e+LzI1RVK3HRkSZss5tPo4koBmw\nmwjOAGijqoy/Pkpay98XP0mFqlvRzBrc1fYeRna5IeJyY9gILFMSQjWqKqIzABrjrwPHDjB53WTe\n/34uUimGE7/2JTutOxXOaxORhYZ1tptI4naRw2QRaQ94M9svVdX1wQ/LmNBYtusnpm14nY0ZH5Kt\nmVx2bn8WLm1D9snKEdsfYZ3tJtK4HY57PzAUmOvZ9ZaITFXVSUGP7Pd7VgAeB84EVqvqjFDdy5Rd\n2TnZvPDd27yW9F8kOoOcw235z6VjuKplOwY1j+xmIOtsN5HGbVPVnThZAI8AiMgzwHLAVcEhIq8B\n/YBfVbWNz/6+wAtAFPCKqj4NXAvUB1KBFJfxmnIuNzdGopMbIyezIcdTbkWON2LPz1Wg5emLD0Za\nIWKd7SbSuC04BMj22c727HNrOvAi8EbuhZ3MgpOB3jgFxCoR+Qi4AFimqi+LyBzgywDuZ8qhLalb\nGJ84nu/2ObkxRrZ6jOc/qohkab6f3CO5L8FmmZtI4rbgeB34TkQ+8Gz/CXjN7U1VdYmINM6zuwuw\nQ1V3AYjILJzaxg/ASc8x2RRARIYBwwAaNmzoNiRTyuRXM/Dua35OFot/fSPf3Bid6xZco7C+BGP8\n47ZzfIKILAZ6eHbdrqprgxTLuTiFhFcKkIDTdDVJRHoCSwqJbSowFZzhuEGKyUSg/GoGAINeW4RW\nX0TMnm+IiaqQb26Mwj65W1+CMf4JNAPgGs92yDMAqupRnL4Vf+KzRQ7LAd+awcmsHL7d+TNJhz8j\nuuE7SPQRsg514NaWd/NA/IWurmt9Ccb4J5IyAP4INPDZru/Z5zdb5LB8qFkllhwFUCqcsZk5P08k\nLfMn9OR5HP/hSqKzG9D72pYBXdv6EowpWiRlAFwFNBeRJjgFxs3ALW4uYDWOsie/voy0oyeJqrSX\n2D98THSVZKABk3tNpkpWG77bfTAktYVIHG1lTLiEJQOgiLwDXAzUFpEUYKyqvioi9wD/wxmO+5qq\nbnZzXatxlC359WXUrXmYxGMTqdJkEZpVjaxfr2Pc9SPpUt9ZubZT47NKJA4rPEx55rZz/A0RWc3v\nGQAHBJIBUFUHFrB/IbDQ7fW8rMZRtpwyyikng/GJ/2HLkU+JqRBDj1q3cOiXC+l3URO6NKlTcnHY\naCtjylYGQKtxlC1dm9YiNiYbrfYNMbUWsSnjJAOa96dn7UGMfHMHJ7MOs2bPZi44u1pI38httJUx\npwpW/4QxQZWjOezL/pazW00k9cQvtD+rG4/2eIhmNZsxedGOEq0B2GgrY05VpgoOa6oqG1buW8n4\nxPEkpSbR8qyWPPPHJ07JjRGOGkBJjrbytyPeOuxNuLjKx1FaWD6O0mln+k6eS3yOr1O+5uyqZ3Nf\nh/u4qulV+S5zXlbfNP3tiLcOexMKIcnHISICDAKaqupjItIQOFtVVwYYpzEcOHaAKeumMHf7XCpH\nV2ZUx1EMajmIStGVCjynrM638Lcj3jrsTTi5baqaAuTgjKp6DMgA3gc6BzmugFhTVelyNPMoM5Jm\n8Pqm18nMzuTmFjdzV7u7qFmp/L4B+tsMZx32Jpzcpo5do6odRWStqnbw7Fuvqu1DFmEArKkqspty\nsnOymbdjHpPXTWb/sf30btSbUR1H0fBMW5wS4O3v9vLJpn1c0aYetyQU/G/i73HG+CtUqWMzPcuf\nq+cmdXBqICaCRGr7t6ryzY/fMCFxAjvSd9C+TnsmXDyBuLpx4Q4tYiQmp/HYgs2czMph1Z6DBQ41\n9vc4Y0LBbXLlicAHwB9E5AngW+CpoEdliiW/9u9w25K6haGfD2XElyM4mX2SCRdP4M0r3rRCIw9/\nf3eR+Ds25YfbmeMzRSQR6OXZdY2qbg1+WIEpy30cbpqeIqn9++cjPzNxzcR8c2OY01kfhykN/Orj\nEJEMPM1T3l0+P6uqnkkEKWt9HIE0PYW7jyPjZAavbXqNN5PeRFUZ1GoQQ9oOOSU3hsmfzeMw4RLU\nPg5VrVb8kEygAhl6Ga7hqpk5mby37T1eWv8SaSfS6Ne0H/d2uJdzzjgn5PcuK2+k/v7uyuqQZBP5\nytTM8bKqNDRLqCpf7v2S59c8T/JvyXQ5uwujO42mda3WJXL/SB0QYExZFGgGwHTPdk1gfCgzALpR\nVvs4In2tpPX71zN+9XjW/rqW86qfx+Rek+l5bk+c+aIlwybEGVNyIikDYLGV5dVxI7FZYu9ve3l+\nzfN8nvw5tSvXZmy3sfyp2Z+IrlDyFdnSUCszpqyIpAyAppRIP57OyxteZta2WcRUiGF4++EMbj2Y\nKjFVwhZTpNfKjClLwpIB0IRHcTuPT2Sf4O0tbzNtwzSOZB2hf7P+jIgbQd0qdUMQrXuRWCszpiwK\nJANgInCJZ1dAGQBNyStO53GO5rBw90ImrZnET0d+oue5PRkdP5pmNctWX5Ixxj+BZADcDLjKBW7C\nL9DO47y5Mf7V/V90rde1BCI2xkQqvwoOEflGVXsUMBEw4iYAmtO57TzOmxvjyR5PFpgbwxhTvpSp\nRE4+w3GHbt++PdzhhFV+/Rn+9HEcOHaAyesmM3f7XKpEV2FI2yFF5sYwxpQN/s4cd7us+jOq+rei\n9oVbWVtyxK1A+jOOZh5lxuYZvL7ZyY1xU4ubyn1uDGPKm1Atq94byFtIXJHPPhNGbvoz8suNcX/H\n+2l0ZqMSjtoYU1r428cxHBgBnCciG3xeqoaztLqJIL79GVEVhJ/Sj5GYnHZK4ZE3N0a7Ou0sN4Yx\nxi/+ro5bHagFvALc7vNShqoeDFFsASvvTVXgNFe9vyaFOYkpZGWf2mS1JXULExInsGLfChpUa8Co\njqPo3ah3iS4RYoyJPMFeHfcQcEhE6qpqcrGjM6cIxaqu8Y1qsmJXKlnZvzdZffH9Nub+MJ/5O+dz\nZsUzLTeGMSYgbvs4EkWks6quCkk05VBichoDpy4nM1uJiRLeGdYtaIVHbpNVzlEq1vmad3/+FgEG\ntxlsuTGMMQFzW3AkAINEJBk4wu/zONoFPbJy4v01KZzMdpoLT2Yr769JCVrB0a7BGdze90fm7Hqd\n4zm/cXnjksuNYYwpu9wWHJeHJIpCiMjFwOM4s9Vnqeriko4hlPL2KgSjl0FV+WLvFzyf+Dx7M/aW\neG4MY0zZ5natqmRPDo7mgO+MMFf9HiLyGtAP+FVV2/js7wu8AEQBr6jq0zgz1Q977pfi5j6RzNuv\n0fqc6qfM6B7QsX6xrrt+/3rGrRrHuv3rwpYbwxhTtrlN5DQEuB+oD6wDugLLgUtd3nc68CLwhs+1\no4DJOHNFUoBVIvIRsFRVvxaRPwATgEEu7xVx8k7Qe/Tq1qQdPVmszvEffvuB59c8z2fJn1GrUi0e\n6fYI/Zv1Py03RllJr2qMCR+3TVX3A52BFap6iYi0AJ50e1NVXSIijfPs7gLsUNVdACIyC7jWZ/Xd\nNKCi23tForwT9NKOnmTkJUWvNJvfm76b3BiWXtUYEwxuC47jqnpcRBCRiqq6VUQuCFIs5wI/+Gyn\nAAkiMgCnb6UGTi0lXyIyDBgG0LBhwyCF5D83n+QDyVaX903/9ds7sPXoJ65yY1h6VWNMMLgtOFJE\npAYwD/hcRNJw2b/hlqrOBeb6cdxUEdkHXB0bGxsfypjycvtJPpBsdb+/6eeQU2Uto5c9y+Hs/a5y\nY1h6VWNMMLjtHO/v+fFREVkEVAc+DVIsPwINfLbre/b5LVw5xwP5JJ83W11RNZauTWtRsdpu5KwF\nRFX+kVqVm/PchU+6yo1h6VWNMcHgtnO8Es6aVT1wRjt9AwQrQcMqoLmINMEpMG4GbnEZn3dZ9SCF\n5J+aVWKpIAKqAX2SL6rGsjN9J6/veI7oc7/mjKjaDGz+T+7pcqPr3BjWMW6MCQa3TVVvABnAJM/2\nLcCbOLnH/SYi7wAXA7VFJAUYq6qvisg9wP9whuO+5sk26Ldw1DgSk9N4bMFmclSpUEF4pF9r12/K\nBdVY8ubGGNVxVMC5Maxj3BgTLG4Ljjaq2spne5GIuM45rqoDC9i/EFjo9npexa1xBPKJ3PdNX1DS\njp50fd+8fQ8dGlXhv+v/y+ubnNwYN19wM3e3v7tYuTGsY9wYEyxuC441ItJVVVcAiEgCEDHL0Ban\nxhHoJ/JgdDh7+x6W7fyVzCoreXj1rUHPjWEd48aYYHFbcMQDy0Rkr2e7IbBNRDZSytesCvQTeTA6\nnFWVDakrmPPzixzM3Ev7Ou0Zf/F4OtTtEMijFBjnI/1a88mmfVzRpp7VNowxAXNbcPQNSRRBUpym\nquJ8Is87QsqNLalbePSbZ0hKTyTnZC1yUm9l1B+H0qHuWQFdryDevpiTWTms2nOQC86uZoWHMSYg\nrteqClUgwVCcpqqSHqr685GfmbR2EvN3zqdihTM48cvVnDyYQJRE893ug3RqHNyCw/o4jDHB4rbG\nUaYVp+bgr4yTGby68VXe2vIWqsrgNoPpVP16hs3YTJSErv/B+jiMMcHiV+rY0sKnqWro9u3bwx3O\nKTJzMnlv23u8uHYKGZmH6PaHPjza46+5uTFKYo6FzeMwxhTG39SxZarg8IqknOOqyld7v+K5Nc+R\n/FsyOUfP48QvVxCd3dDmUhhjIoq/BYerqcfiuFVEHvFsNxSRLoEGWdat37+e2z69jVGLRxEt0VxR\n558c2zuErOP1c/sZjDGmtHG7XMgUoBvgncCXgZNDw/j44bcf+Oviv3LrwlvZ+9teHun2CHOumcNN\nrS8nNjqKKIGY6ArUrBLL5EU7SExOC3fIxhjjN9c5x1W1o4isBVDVNBGJDUFcAQnXWlVeReXG8B25\nVbNKbO7wWH8nHFofhTEmErgtODI9mfoUQETqADlBjypA4Vod90T2Cd7e8vYpuTFGxo2kTpU6px3r\nHbk1edEOV8Njba0pY0ykcFtwTAQ+AOqKyBPA9cDDQY+qlMjRHD7Z/QkT10zkpyM/BS03Rn41C5uH\nYYyJFG4Ljq3AQ0AvQIA/AecFO6jSYNXPqxi3ehxJqUm0PKslj3V/jIR6CX6fX9CEw4JqFjYPwxgT\nKdwWHNOAv6jqZAARGQiMAhYEO7BAlEQfx670XTyX+ByLUxZzdtWzebLHk1zV9CrXuTEg/wmHBdUs\nLAmTMSZSuC04rgfmiMgtQE/gL0CfoEcVoFD2cRw4doAp66Ywd/tcKkdXLlZujMIUVrMoiZntxhhT\nFLdrVe0SkZtxco7vBfqo6rGQRBYhjmYe5Y2kN3ht02tkZmdy0wU3FTs3RmHyq1nYaCpjTCTxq+Dw\nLpvus+ssnCx934kIpXk59YJk52Tz4c4PeXHti0HPjVEU35qFjaYyxkQaf2sc/UIaRQRRVb758Rsm\nJE5gR/oO2tdpz4SLJxBXNy4s8dhoKmNMpPGr4Ij05dSDZUvqFsYnjue7fd/RoFoDxv9xPL0b9UZE\nwhaTjaYyxkQaf5uqvlHVHiKSwalNVoKT+e/MkERXwl5Y8wLbDm5jTJcx3Hj+jcRExYQ7JBtNZYyJ\nOGVqddziLqu+7/A+qsZW5czYMlEOGmOMKyFZHTfSqep8VR1WvXr1gM6vd0Y9KzSMMaYI/jZVeZuo\nfBv7vdtlpqnKGGNM0fztHK8W6kCMMcaUDgE3VYnI2cEMxBhjTOlQnD6OhUGLwhhjTKlRnIIjfJMb\njDHGhE1xCo5pQYvCGGNMqeGq4BDHrSLyiKpOEZGGItIlVMH53LeqiKwWkXKz9IkxxkQqtzWOKUA3\nYKBnOwOY7PamIvKaiPwqIpvy7O8rIttEZIeIjPF56W/AbLf3McYYE3xuC44EVR0JHAdQ1TQgNoD7\nTgf6+u7w5DKfDFwBtAIGikgrEekNJAG/BnCfUiMxOY3Ji3aQmJwW7lCMMaZQbhM5ZXre4BVAROoA\nOW5vqqpLRKRxnt1dgB2qustz7VnAtcAZQFWcwuSYiCxUVdf3jGS2dLoxpjRxW3BMBD4A/iAiT+Bk\nBHw4SLGcC/zgs52CU8O5B0BEBgMHCio0RGQYMAygYcOGQQqpZNjS6caY0sRtBsCZIpII9PLs+pOq\nbgl+WPnee3oRr08VkX3A1bGxsfElEVOw2NLpxpjSxFXBISKj8+y6QkQuBBJVdV0xY/kRaOCzXd+z\nz2+hzDkeSrZ0ujGmNHHbVNXJ8zXfs90P2ADcLSLvqep/ihHLKqC5iDTBKTBuBm5xcwGfZdWLEUZ4\n+KaLNcaYSOZ2VFV9oKOq/lVV/wrEA3WBi4DB/l5ERN4BlgMXiEiKiNypqlnAPcD/gC3AbFXd7Ca4\n4i6rbowxpmhuaxx1gRM+25nAH1T1mIicKOCc06jqwAL2L8TWwDLGmIjmtuCYCXwnIh96tq8G3haR\nqjhzLcKqNDdVGWNMaeE6dayIdAK6eza/VdXVQY+qmDp16qSrV0dcWMYYE9FCmTp2J07/xFqgiohc\nFMA1QkJErhaRqYcOHQp3KMYYU2a5qnGIyBDgfpxO8nVAV2C5ql4amvACIyL7geQCXq4OFFWyFHZM\nfq/l3VfYtvdn3321gQNFxBRovP4cY890+nZZfibfn+2Z3MXrzzGl+ZlqqGqdIq+mqn5/ARuBSsA6\nz3YLYK6ba4T7C5hanGPyey3vvsK2vT/n2bfansmeqaSeKc/z2TPZM/l9jPfLbVPVcVU9DiAiFVV1\nK3CBy2uE2/yiDyn0mPxey7uvsO35BRxTHPZM/r1mz+QuFn/YM/n3Wml/plO4bar6ALgdGAVcCqQB\nMap6pd8XMacRkdXqR4dUaWLPVDrYM5UOkfZMbteq6u/58VERWYTTJvZJ0KMqf6aGO4AQsGcqHeyZ\nSoeIeia3NY6KwHVAY3wKHVV9LOiRGWOMiUhuJwB+iNMzn8ipM8iNMcaUE25rHJtUtU0I4zHGGBPh\n3I6qWiYibUMSiTHGmFLBrxqHiGzESRcbDTQHduE0VQmgqtoulEEaY4yJHP4WHI0Ke11VC5qlbVwS\nkZY4s/NrA1+q6n/DHFJQeBbC/Bp4VFUXhDueYBCRi4HHgc3ALFVdHNaAgkBEKuA805k4k85mhDmk\nYhORnsAgnA++rVT1wjCHVGwi0hAnlfdB4HtVfbok7+9vU9UvQH/gQaAv8KOqJnu/QhZdGSEir4nI\nryKyKc/+viKyTUR2iMgYAFXdoqp3Azfy+2KSEcfNM3n8DZhdslG65/K5FDiMs5pCSknH6i+Xz3Qt\nzpJCmZSRZ1LVpZ7/UwuAiC0IXf6e2gJzVPUOoEOJB+vndPV3gbeAu4B5wAvFmf5e3r5wEl11BDb5\n7IvCWTCyKRALrMf5NARwDc78mFvCHXswngnojZPRcTDQL9yxB/G5Knhe/wMwM9yxB+mZxgB3eY6Z\nE+7Yg/FMPq/PBqqFO/Yg/Z5qAYuAr4DbSzpWf4fjtlLVtgAi8iqw0s/zDKCqS0SkcZ7dXYAdqroL\nQERm4XzaS1LVj4CPRORj4O2SjNVfLp/pDKAqzh/8MRFZqKo5JRiu39w8l6p6c9CkARVLLEiXXP6u\nfgBOeo7JLqkY3XL7f8rTtHNIVTNKNFAXXD5TJjDWc84c4PWSjNXfgiPT+4OqZolIiMIpV87F+U/q\nlQIkeNrNB+C8EZW2bIj5PpOq3gMgIoOBA5FaaBSioN/VAOByoAbwYjgCK4Z8nwl4AZjk6RdYEo7A\niqGgZwK4kxJ+cw2Sgp7pJZwVPG4B9pR0UP4WHO1F5DfPzwJU9mx7R1WdGZLoyiF1OlgXhzmMkFDV\n6eGOIZhUdS4wN9xxBEiI4EcAAATXSURBVJOqHsV5ky1TVHVsuGMIJlXdBFwfrvv7VXCoalSoAymH\nfgQa+GzX9+wrzcriM0HZfC57ptIhIp8pkAyAJjhWAc1FpImIxOJ0Hn8U5piKqyw+E5TN57JnKh0i\n8pms4CgBIvIOTrrdC0QkRUTuVNUs4B7gf8AWYLaqbg5nnG6UxWeCsvlc9kylQ2l6JldrVRljjDFW\n4zDGGOOKFRzGGGNcsYLDGGOMK1ZwGGOMccUKDmOMMa5YwWGMMcYVKziMMca4YgWHMSasRKSliLwk\nInNEZHi44zFFs4LDFJuInC0is0Rkp4gkishCETm/iHMO+36PZCJSQ0RGBOM8EVkWpJgai8gxEVkX\n4PmH8/s5HDRP8jIRqSwi60TkpIjUDmdsJn9WcJhiEWeN/Q+Axap6nqrGA3/HSW5UVtQA8i04xFHQ\n/6PTztPgpi3dqapxQbxeSIlIWxFZkOerrue1a4CPgYWqeszzXD+FNWBTsHBnvbKv0v0FXAosKeC1\nW3GSfq0DXgaifF477Ps9n3P/AmzAyXj2ps/+0cAmz9coz77GOOv4TMPJ//0ZULmI65wWW0HXAWYB\nxzzHPus5bhvwhue4RjiZMRM928M89zjlvLzP6/ZZ8vz7NMYnU1xBz1rQ7yBPHN7fRVWcN+/1nphu\n8uf34ollKzAd+B6YCVwGfAtsB7q4+Hv62OfnPUDtcP+N21c+v6dwB2BfpfsLuA94Lp/9LYH5QIxn\newrwF5/XCyw4gNaeN6Danu2zPN/jgY2eN7gzPG+sHTxvXFlAnOe42Z43zIKuk29shVznlDdpz3YO\n0NVnn/falT1vurUKeHM/HMiz5PNvlDem0561sN8B+Rcc1wHTfPZX9/P34o25LU4rRiLwGk6+nmuB\neUX8DV0MTMQp2Eb67N+DFRwR+eVvIidj3OqF8+a4ypMxsjLwq5/nXgq8p6oHAFT1oGd/D+ADVT0C\nICJzgZ44y0zvVlVve38izptZzQKuU1BsSwq4zjf5xJisqit8tu8Tkf6enxsAzYGfC3lGt89SlNP+\nzTzZ4dz8DjYC40XkGWCBqi4t6h4+r+1W1Y2eZ9kMfKmqKiIbi4pfy3DysrLKCg5TXJvJPxOZADNU\n9e8lFMcJn5+zcd4kC5JvbJ58z/5e54jPeRfjNM10U9WjIrIYqORn3Plx8yyFcfU7UNXvRaQjcCXw\nbxH5UlUf8/NevjHn+GznYO8zZY51jpvi+gqoKCLDvDtEpB1OO/j1Pp2fZ4lIIxfXvEFEannP9exf\nCvxJRKqISFWgv2ef2+t86TK2DKBaIa9XB9I8hUYLoKsf57l9lqLk96yunlNEzgGOqupbOH05Hf24\nhymHrOAwxaKqivOmd5lnOO5m4CmcETEPA5+JyAbgc6Cen9fcDDwBfC0i64EJnv1rcDpgVwLfAa+o\n6toArpPkJjZVTQW+FZFNIvJsPod8CkSLyBbgaWBFUee5fZai5Pesbp8Tp49ipWeI71jg30XdI9B4\nTelmiZyMKYU8zWoLVLVNmEMJGRHZA3Ty9qmYyGE1DmNKp2ygeqATACOZdwIgEIPTR2IijNU4jDHG\nuGI1DmOMMa5YwWGMMcYVKziMMca4YgWHMcYYV6zgMMYY44oVHMYYY1yxgsMYY4wrVnAYY4xxxQoO\nY4wxrvx/raCxCZ2UUNAAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Load data extracted from Engelhardt et al.\n", - "data = pd.read_excel('marine_deep_subsurface_phage_data.xlsx',skiprows=1)\n", - "\n", - "# Load data extracted from Middelboe et al.\n", - "middelboe = pd.read_excel('marine_deep_subsurface_phage_data.xlsx','Middelboe',skiprows=1,index_col=0)\n", - "\n", - "# Plot the data\n", - "plt.loglog(data['Cells concentration [cells cm^-3]'],data['Phage concentration [virions cm^-3]'],'.',label='Engelhardt data')\n", - "plt.loglog(middelboe['Prokaryote abundance [cm^-3]'],middelboe['Viral abundance [cm^-3]'],'.',label='Middelboe data')\n", - "\n", - "# Plot the fit Engelhardt et al. used for the data\n", - "fit_xdata = 10**np.linspace(np.log10(data['Cells concentration [cells cm^-3]'].min()),np.log10(data['Cells concentration [cells cm^-3]'].max()),100)\n", - "plt.loglog(fit_xdata,271.8*fit_xdata**0.768,label='Engelhardt et al. fit')\n", - "plt.xlabel(r'Cell concentration [cells cm$^{-3}$]')\n", - "plt.ylabel(r'Phage-like particle concentration [particles cm$^{-3}$]')\n", - "plt.legend()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As the data from the two studies seem to correspond well to the same fit used in Engelhardt et al., we combined the data from the two studies and calculate the geometic mean of the ratios between phage-like particles and prokaryotes across measurements in Engelhardt et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the ratio between the concentration of phage-like particles and cells in subseafloor sediments is ≈12.\n" - ] - } - ], - "source": [ - "# Merge data from Engelhardt et al. and Middelboe et al.\n", - "merged_data = pd.concat([(data['Phage concentration [virions cm^-3]']/data['Cells concentration [cells cm^-3]']),(middelboe['Viral abundance [cm^-3]']/middelboe['Prokaryote abundance [cm^-3]'])])\n", - "geo_mean_ratio = gmean(merged_data)\n", - "print('Our best estimate for the ratio between the concentration of phage-like particles and cells in subseafloor sediments is ≈%.0f.' %geo_mean_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total number of phages in subseafloor sediments, we multiply the ratio of phage-like particles to prokaryotes by our estimate for the total number of prokaryotes in subseafloor sediments." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of phages in subseafloor sediments is ≈5e+30\n" - ] - } - ], - "source": [ - "prokaryote_estimate = pd.read_excel('../../../bacteria_archaea/marine_deep_subsurface/marine_deep_subsurface_prok_biomass_estimate.xlsx')\n", - "best_estimate = prokaryote_estimate.loc[0]['Value']*geo_mean_ratio\n", - "print('Our best estimate for the total number of phages in subseafloor sediments is ≈%.0e' %best_estimate)\n", - "\n", - "old_results = pd.read_excel('../phage_num_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[1] = pd.Series({\n", - " 'Parameter': 'Total number of phages in the marine deep subsurface',\n", - " 'Value': best_estimate,\n", - " 'Units': 'Number of individuals',\n", - " 'Uncertainty': np.nan\n", - " })\n", - "\n", - "result.to_excel('../phage_num_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/viruses/phage_num/phage_num_estimate_OLD.xlsx b/viruses/phage_num/phage_num_estimate_OLD.xlsx deleted file mode 100644 index 394c780..0000000 Binary files a/viruses/phage_num/phage_num_estimate_OLD.xlsx and /dev/null differ diff --git a/viruses/phage_num/soil/.ipynb_checkpoints/soil_phage_num-checkpoint.ipynb b/viruses/phage_num/soil/.ipynb_checkpoints/soil_phage_num-checkpoint.ipynb deleted file mode 100644 index 2efba49..0000000 --- a/viruses/phage_num/soil/.ipynb_checkpoints/soil_phage_num-checkpoint.ipynb +++ /dev/null @@ -1,161 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "from scipy.stats import gmean\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper/')\n", - "from CI_helper import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of phages in soils\n", - "We could not come by many data for the abundance of phages in soil. Our estimates are based on the data available to us, which is mainly from [Williamson](http://dx.doi.org/10.1007/978-3-642-14512-4_4) and [Parikka et al.](http://dx.doi.org/10.1111/brv.12271). \n", - "\n", - "Based on these data, the values for the concentration of phages in soils appears to be ~$10^8-10^9$ phages per gram of soil. Assuming bulk soil density of ≈1.5 g cm$^3$, and soil depth of 10 meters (based on [Shangguan et al.](http://dx.doi.org/10.1002/2016MS000686)), we estimate the total number of phages per $m^2$ to be:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The lower bound of the number of phages per m^2 of soil is ≈1.5e+15 phages\n", - "The upper bound of the number of phages per m^2 of soil is ≈1.5e+16 phages\n" - ] - } - ], - "source": [ - "# Lower and upper bounds for the concentration of phages per gram\n", - "lower_concentration = 1e8\n", - "upper_concentration = 1e9\n", - "\n", - "# Typical bulk density of soil in g per cm^3\n", - "bulk_density = 1.5\n", - "\n", - "# Total number of cm^3 in m^3\n", - "cm3_m3 = 1e6\n", - "\n", - "# Soil depth in meters\n", - "depth = 10\n", - "# Calculate the total number of phages per m^2\n", - "lower_phage_per_m2 = lower_concentration*bulk_density*cm3_m3*depth\n", - "upper_phage_per_m2 = upper_concentration*bulk_density*cm3_m3*depth\n", - "\n", - "print('The lower bound of the number of phages per m^2 of soil is ≈%.1e phages' %lower_phage_per_m2)\n", - "print('The upper bound of the number of phages per m^2 of soil is ≈%.1e phages' %upper_phage_per_m2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best estimate of the number of phages per $m^2$, we use the geometric mean of the lower and upper bounds. We apply this phage concentration across the entire ice-free land surface of the Earth, which is ≈$1.3×10^{14} m^2$ to estimate the total number of phages in soils:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of phages in soils is ≈6.2e+29 phages\n" - ] - } - ], - "source": [ - "# Use the geometric mean of the lower and upper phage concentrations as our best estimate\n", - "best_phage_per_m2 = gmean([lower_phage_per_m2,upper_phage_per_m2])\n", - "\n", - "# The area of ice-free land surface in m^2\n", - "area = 1.3e14\n", - "\n", - "best_estimate = best_phage_per_m2*area\n", - "\n", - "print('Our best estimate for the total number of phages in soils is ≈%.1e phages' % best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty analysis\n", - "There are no good indicators for the uncerainty of our estimate of the total number of phages in soils. The range of ~$10^8-10^9$ phages per gram of soil introduces an uncertainty of about an order of magnitude. The specific values of the bulk density of soils, as well as the depth of soils also have uncertainty associated with them, which is hard to quantify. Our estimate is likely to be an overestimate, as it likely that the concentration of phages in deeper soil layers will be lower than in shallower layers, as is for prokaryotes.\n", - "We thus project very crudely an uncertainty of one and a half orders of magnitude associated with our estimate of the number of phages in soils.\n", - "\n", - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of phages in soils: 6e+29 Gt C\n", - "Uncertainty associated with the estiamte of the total number of phages in soils: 32-fold\n" - ] - } - ], - "source": [ - "mul_CI = 10**1.5\n", - "\n", - "print('Our best estimate for the total number of phages in soils: %.0e Gt C' % best_estimate)\n", - "print('Uncertainty associated with the estiamte of the total number of phages in soils: %.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../phage_num_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[2] = pd.Series({\n", - " 'Parameter': 'Total number of phages in soils',\n", - " 'Value': best_estimate,\n", - " 'Units': 'Number of individuals',\n", - " 'Uncertainty': mul_CI\n", - " })\n", - "\n", - "result.to_excel('../phage_num_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/viruses/phage_num/terrestrial_deep_subsurface/.ipynb_checkpoints/terrestrial_deep_subsurface_phage_num-checkpoint.ipynb b/viruses/phage_num/terrestrial_deep_subsurface/.ipynb_checkpoints/terrestrial_deep_subsurface_phage_num-checkpoint.ipynb deleted file mode 100644 index ad0d973..0000000 --- a/viruses/phage_num/terrestrial_deep_subsurface/.ipynb_checkpoints/terrestrial_deep_subsurface_phage_num-checkpoint.ipynb +++ /dev/null @@ -1,848 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Load dependencies\n", - "import pandas as pd\n", - "pd.options.display.float_format = '{:,.1e}'.format\n", - "import numpy as np\n", - "import sys\n", - "sys.path.insert(0, '../../../statistics_helper/')\n", - "from CI_helper import *\n", - "from scipy.stats import gmean" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Estimating the total number of terrestrial deep subsurface phages\n", - "Estimating the total number of phages in the terrestrial deep subsurface is challenging as data on the abundance of phages in this environment is scarce. To generate an estimate for the total number of phages present in the terrestrial deep subsurface, we combined five different types of estimates for the ratio between the concentration of prokaryotes and phage-like particles. Below we detail these five different estimates. \n", - "\n", - "## Phage-like particle to prokaryotes concentration ratios\n", - "### Naive ratio of phage-live particles and prokaryotes\n", - "A common claim regarding the ratio between the concentration of phage-like particles and prokaryotes is that phage-like particles are about 10 times more abundant than the number of prokaryotes. We use this ratio as our first estimate for the ratio between the concentration of phage-like particles and prokaryotes." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "naive_ratio = 10" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Engelhardt et al. based ratio\n", - "For our second estimate of the ratio of the concentration of phage-like particles and prokaryotes, we use the relation measured in subseafloor sediments by [Engelhardt et al.](http://dx.doi.org/10.1038/ismej.2013.245). The ratio Engelhardt et al. measured is: $$V = 271.8\\times P ^{0.768}$$\n", - "Where V is the concentrations of phage-like particles and P is the concentration of prokaryotes.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "engelhardt_ratio = lambda x: 271.8*x**0.768" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Kyle et al. based ratio\n", - "For our fourth estimate of the ratio of the concentration of phage-like particles and prokaryotes, we use the relation measured in the terrestrial deep subsurface by [Kyle et al.](http://dx.doi.org/10.1038/ismej.2008.18). The ratio Kyle et al. measured is: $$V = 10^{1.3\\times log_{10}(P)-0.62}$$\n", - "Where V is the concentrations of phage-like particles and P is the concentration of prokaryotes." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "kyle_ratio = lambda x: 10**(1.3*np.log10(x)-0.62)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pan et al. based ratio\n", - "For our third estimate of the ratio of the concentration of phage-like particles and prokaryotes, we use the relation measured in the terrestrial deep subsurface by [Pan et al.](http://dx.doi.org/10.3389/fmicb.2017.01199). Pan et al. measured the concentration of phage-like particles and prokaryotes in alluvial aquifer which is situated near a U.S. Department of Energy uranium ore-processing site. The measurement were done in aquifers that are inside or outside a uranium plume from the nearby site. We use the data from samples outside the plume and calculate a geometric mean of the ratio between the concentration of phage-like particles and prokaryotes.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the ratio between the concentration of phage-like particles and prokaryotes based on Pan et al. is ≈3.\n" - ] - } - ], - "source": [ - "pan_data = pd.read_excel('terrestrial_deep_subsurface_phage_num_data.xlsx','Pan',skiprows=1)\n", - "pan_ratio = gmean(pan_data['Virus-to-cell ratio (VCR)'])\n", - "print('Our estimate for the ratio between the concentration of phage-like particles and prokaryotes based on Pan et al. is ≈%.0f.' % pan_ratio)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Roundnew et al. based ratio\n", - "For our fifth estimate of the ratio of the concentration of phage-like particles and prokaryotes, we use the relation measured in the terrestrial deep subsurface by [Roundnew et al.](http://onlinelibrary.wiley.com/doi/10.1111/j.1745-6592.2011.01393.x/full). Roundnew et al. measured the concentration of phage-like particles and prokaryotes in groundwater along a depth profile. We use the data from samples outside the plume and calculate a geometric mean of the ratio between the concentration of phage-like particles and prokaryotes.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the ratio between the concentration of phage-like particles and prokaryotes based on Roundnew et al. is ≈2.\n" - ] - } - ], - "source": [ - "roundnew_data = pd.read_excel('terrestrial_deep_subsurface_phage_num_data.xlsx','Roundnew',skiprows=1)\n", - "roundnew_ratio = gmean(roundnew_data['Virus:Bacteria ratio'])\n", - "print('Our estimate for the ratio between the concentration of phage-like particles and prokaryotes based on Roundnew et al. is ≈%.0f.' % roundnew_ratio)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generating estimates for the total number of phages in the terrestrial deep subsurface\n", - "These estimates of the ratio of the concentration of phage-like particles and prokaryotes can be used to estimate the total number of phages by plugging into them an estimate for the total number of prokaryotes in the terrestrial deep subsurface. After detailing the five estimates of the ratio between the number of phage-like particles and prokaryotes, we discuss the estimate of the total number of prokaryotes in the terrestrial deep subsurface that we plug into the ratios to generate estimates of the total number of phages in the terrestrial deep subsurface.\n", - "\n", - "In general, it is not clear whether the measured ratios between the concentrations of phage-like particles and prokaryotes refer to attached or unattached cells. We take this factor into consideration in our estimate as a scaling factor that converts the estimated number of phages in groundwater to an estimate for the total number of phages. Our best estimate for this factor is a geometric mean of three estimates. The first takes into account only groundwater, and the other two assume an that attached cells (and thus also phages) are ≈$10^2-10^3$ more abundant than cell in groundwater (as estimated in [McMahon & Parnell](http://dx.doi.org/10.1111/1574-6941.12196)).\n", - "\n", - "The estimates of the ratio between the concentration of phage-like particles and prokaryotes can be divided to two categories: estimates that are invariant to the local concentration of prokaryotes and ratios that are dependent on the local concentration of prokaryotes.\n", - "\n", - "The first category of estimates includes the naive estimate and the estimates by Pan et al. and Roundnew et al. For those estimates, we can just plug in an estimate for the total number of prokaryotes in the terrestrial deep subsurface and get an estimate for the total number of phages. The second category includes the estimates by Engelhardt et al. and Kyle et al. For those estimates, we need to use the local concentrations of prokaryotes to generate local concentrations of phage-like particles, and then sum all the local concentrations.\n", - "\n", - "We start with generating the estimates for the first category of estimates of the ratio between the concentration of phage-like particles and prokaryotes. The total number of prokaryotes we use is based on our analysis of the biomass of terrestrial bacteria and archaea (see relevant section in the Supplementary Information). As we note in the section on the biomass of terrestrial deep subsurface prokaryotes, we generate two estimates for the total number of cells in groundwater - one based on arithmetic means of cell concentrations at several depth bins, and the other based on geometric means of cell concentraions at the same depth bins. Here is a view of the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
indexMean cell concentration [cells mL-1]Geometric mean cell concentration [cells mL-1]
Depth bin [m]
(0.0, 250.0]01.0e+064.2e+05
(250.0, 500.0]16.4e+052.2e+05
(500.0, 750.0]27.3e+054.1e+05
(750.0, 1000.0]31.6e+056.7e+04
(1000.0, 1250.0]41.6e+051.2e+05
(1250.0, 1500.0]51.4e+051.2e+04
(1500.0, 1750.0]62.9e+043.4e+03
(1750.0, 2000.0]72.1e+043.5e+03
\n", - "
" - ], - "text/plain": [ - " index Mean cell concentration [cells mL-1] \\\n", - "Depth bin [m] \n", - "(0.0, 250.0] 0 1.0e+06 \n", - "(250.0, 500.0] 1 6.4e+05 \n", - "(500.0, 750.0] 2 7.3e+05 \n", - "(750.0, 1000.0] 3 1.6e+05 \n", - "(1000.0, 1250.0] 4 1.6e+05 \n", - "(1250.0, 1500.0] 5 1.4e+05 \n", - "(1500.0, 1750.0] 6 2.9e+04 \n", - "(1750.0, 2000.0] 7 2.1e+04 \n", - "\n", - " Geometric mean cell concentration [cells mL-1] \n", - "Depth bin [m] \n", - "(0.0, 250.0] 4.2e+05 \n", - "(250.0, 500.0] 2.2e+05 \n", - "(500.0, 750.0] 4.1e+05 \n", - "(750.0, 1000.0] 6.7e+04 \n", - "(1000.0, 1250.0] 1.2e+05 \n", - "(1250.0, 1500.0] 1.2e+04 \n", - "(1500.0, 1750.0] 3.4e+03 \n", - "(1750.0, 2000.0] 3.5e+03 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data on the concentrations of prokaryotes in each depth bin from our analysis of the biomass\n", - "# of terrestrial deep subsurface prokaryotes\n", - "prok_concentration = pd.read_excel('terrestrial_deep_subsurface_prok_num.xlsx','Cell concentration')\n", - "prok_concentration = prok_concentration.reset_index().set_index('Depth bin [m]')\n", - "prok_concentration" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We multiply the concentraion by data on the total volume of groundwater at each depth bin. The data on the total volume at each depth bin is generated in our analysis of the biomass of terrestrial deep subsurface prokaryotes. Here is a view of the water volume data:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
indexWater volume [mL]
Depth bin [m]
(0.0, 250.0]05.4e+21
(250.0, 500.0]14.1e+21
(500.0, 750.0]23.2e+21
(750.0, 1000.0]32.6e+21
(1000.0, 1250.0]42.2e+21
(1250.0, 1500.0]51.9e+21
(1500.0, 1750.0]61.7e+21
(1750.0, 2000.0]71.6e+21
\n", - "
" - ], - "text/plain": [ - " index Water volume [mL]\n", - "Depth bin [m] \n", - "(0.0, 250.0] 0 5.4e+21\n", - "(250.0, 500.0] 1 4.1e+21\n", - "(500.0, 750.0] 2 3.2e+21\n", - "(750.0, 1000.0] 3 2.6e+21\n", - "(1000.0, 1250.0] 4 2.2e+21\n", - "(1250.0, 1500.0] 5 1.9e+21\n", - "(1500.0, 1750.0] 6 1.7e+21\n", - "(1750.0, 2000.0] 7 1.6e+21" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load data on the total volume of groundwater in each depth bin from our analysis of the biomass\n", - "# of terrestrial deep subsurface prokaryotes\n", - "water_vol = pd.read_excel('terrestrial_deep_subsurface_prok_num.xlsx','Water volume')\n", - "water_vol = water_vol.reset_index().set_index('Depth bin [m]')\n", - "water_vol" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To calculate the total number of phages based on the naive method and based on the data in Pan et al. and Roundnew et al., we calculate the total number of prokaryotes by multiplying the cell concentration at each depth bin by the total volume of water at that depth bin, and sum over depth bins:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Geometric mean estimate 4.9e+27\n", - "Arithmetic mean estimate 1.2e+28\n", - "dtype: float64" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tot_prok = pd.DataFrame()\n", - "\n", - "# Multiply the arithmetic of geometric mean concentrations of prokaryotes at each depth bin \n", - "# by the total volume of groundwater at each depth bin\n", - "tot_prok['Geometric mean estimate'] = prok_concentration['Geometric mean cell concentration [cells mL-1]'] * water_vol['Water volume [mL]']\n", - "tot_prok['Arithmetic mean estimate'] = prok_concentration['Mean cell concentration [cells mL-1]'] * water_vol['Water volume [mL]']\n", - "tot_prok.sum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the total number of prokaryotes in groundwater is the geometric mean of the total number of prokaryotes based on geometric and arithmetic mean concentrations (see the biomass of terrestrial deep subsurface prokaryotes section for details). " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of prokaryotes in groundwater for the calculation of the total number of phages in the terrestrial deep subsurface is 8e+27.\n" - ] - } - ], - "source": [ - "# Estimate the total number of prokaryotes in groundwater as the geometric mean of the estimates based on \n", - "# arithmetic and geometric mean cell concentrations\n", - "tot_prok_num_gw = gmean(tot_prok.sum())\n", - "\n", - "print('Our best estimate for the total number of prokaryotes in groundwater for the calculation of the total number of phages in the terrestrial deep subsurface is %.0e.' %tot_prok_num_gw)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that we have an estimate for the total number of prokaryotes, we can plug them into the ratios estimated based on the data in Pan et. al, Roundnew et al., or to use our naive estimate of ten times more phages than prokaryotes.\n", - "\n", - "As stated above, to go from the total number of phages in groundwater to our estimate for the total number of phages in the terrestrial deep subsurface, we multiply our estimate of the total number of phages by a scaling factor. As our best estimate for this scaling factor we use geometric mean of three estimates. The first takes into account only groundwater (and thus the scaling factor is 1), and the other two assume an attached to unattached ratios of $10^2-10^3$ as in [McMahon & Parnell](http://dx.doi.org/10.1111/1574-6941.12196)." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our estimate for the total number of phages in the terrestrial deep subsurface based on the naive ratio of 10:1 is 3e+30\n", - "Our estimate for the total number of phages in the terrestrial deep subsurface based on Pan et al. is 9e+29\n", - "Our estimate for the total number of phages in the terrestrial deep subsurface based on Pan et al. is 6e+29\n" - ] - } - ], - "source": [ - "# Define the scaling factor from number of cells in groundwater to cells relevant for calculating the total\n", - "# Number of phages\n", - "scaling_factor = gmean([1,100,1000])\n", - "\n", - "\n", - "# Estimate the total number of phages based on the naive ratio of 10:1\n", - "tot_phage_naive = tot_prok_num_gw*naive_ratio*scaling_factor\n", - "print('Our estimate for the total number of phages in the terrestrial deep subsurface based on the naive ratio of 10:1 is %.0e' %tot_phage_naive)\n", - "\n", - "# Estimate the total number of phages based on Pan et al.\n", - "tot_phage_pan = tot_prok_num_gw*pan_ratio*scaling_factor\n", - "print('Our estimate for the total number of phages in the terrestrial deep subsurface based on Pan et al. is %.0e' %tot_phage_pan)\n", - "\n", - "# Estimate the total number of phages based on Roundnew et al.\n", - "tot_phage_roundnew = tot_prok_num_gw*roundnew_ratio*scaling_factor\n", - "print('Our estimate for the total number of phages in the terrestrial deep subsurface based on Pan et al. is %.0e' %tot_phage_roundnew)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For the two estimates of the ratio between the concentration of phage-like particles and prokaryotes which are dependent on the local concentraions of prokaryotes, we the data on the arithmetic and geometric mean cell concentrations at each depth bin total number of cells at each depth bin, and plug it into the relations described by either Engelhardt et al. or Kyle et al.:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "engelhardt_phage_conc_geo_mean = engelhardt_ratio(prok_concentration['Geometric mean cell concentration [cells mL-1]'])\n", - "engelhardt_phage_conc_mean = engelhardt_ratio(prok_concentration['Mean cell concentration [cells mL-1]'])\n", - "\n", - "kyle_phage_conc_mean = kyle_ratio(prok_concentration['Mean cell concentration [cells mL-1]'])\n", - "kyle_phage_conc_geo_mean = kyle_ratio(prok_concentration['Geometric mean cell concentration [cells mL-1]'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We calculate the total number of phages based on the arithmetic and geometric mean concentration in each depth bin by multiplying by the total volume of groundwater at each depth bin and by the scaling factor we used for the previous method to convert from number of phages in groundwater to total number of phages in the terrestrial deep subsurface." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "engelhardt_tot_phage_mean = (engelhardt_phage_conc_mean*water_vol['Water volume [mL]']).sum()*scaling_factor\n", - "engelhardt_tot_phage_geo_mean = (engelhardt_phage_conc_geo_mean*water_vol['Water volume [mL]']).sum()*scaling_factor\n", - "\n", - "kyle_tot_phage_mean = (kyle_phage_conc_mean*water_vol['Water volume [mL]']).sum()*scaling_factor\n", - "kyle_tot_phage_geo_mean = (kyle_phage_conc_geo_mean*water_vol['Water volume [mL]']).sum()*scaling_factor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our best estimate for the total number of phages is the geometric mean of the estimates based on the arithmetic and geometric means." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of phages in the terrestrial deep subsurface based on the data from Engelhardt et al. on the relation between the number of phage-like particles and prokaryotes is 5e+30\n", - "Our best estimate for the total number of phages in the terrestrial deep subsurface based on the data from Kyle et al. on the relation between the number of phage-like particles and prokaryotes is 4e+30\n" - ] - } - ], - "source": [ - "engelhardt_tot_phage = gmean([engelhardt_tot_phage_geo_mean,engelhardt_tot_phage_mean])\n", - "kyle_tot_phage = gmean([kyle_tot_phage_geo_mean,kyle_tot_phage_mean])\n", - "\n", - "print('Our best estimate for the total number of phages in the terrestrial deep subsurface based on the data from Engelhardt et al. on the relation between the number of phage-like particles and prokaryotes is %.0e' %engelhardt_tot_phage)\n", - "print('Our best estimate for the total number of phages in the terrestrial deep subsurface based on the data from Kyle et al. on the relation between the number of phage-like particles and prokaryotes is %.0e' %kyle_tot_phage)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In summary, the results from our five different approaches are:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Naive estimate 3.5e+30\n", - "Pan et al. 9.3e+29\n", - "Roundnew et al. 5.8e+29\n", - "Engelhardt et al. 4.6e+30\n", - "Kyle et al. 4.3e+30\n", - "dtype: float64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "estimates = pd.Series([tot_phage_naive,tot_phage_pan,tot_phage_roundnew,engelhardt_tot_phage,kyle_tot_phage],\n", - " index = ['Naive estimate','Pan et al.','Roundnew et al.','Engelhardt et al.','Kyle et al.'])\n", - "estimates" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To generate our best estimate for the total number of phages in the terrestrial deep subsurface, we calculate the geometric mean of the estimates from our five different methods:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of phages in the the terrestrial deep subsurface is 2e+30\n" - ] - } - ], - "source": [ - "best_estimate = gmean(estimates)\n", - "print('Our best estimate for the total number of phages in the the terrestrial deep subsurface is %.0e' % best_estimate)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Uncertainty estimate\n", - "To assess the uncertainty of our estimate of the total number of phages in the terrestrial deep subsurface, we calculate the uncertainty associated with each of the components of the estimate: The ratios between the concentration of phage-like particles and prokaryotes, the number of prokaryotes we use to derive the number phages and the scaling factor between the number of prokaryotes in groundwater and the total number of prokaryotes.\n", - "\n", - "## Uncertainty of the ratio between the number of phage-like particles and prokaryotes\n", - "As a measure of the uncertainty associated with our estimate of the ratio between the concentration of phage-like particles and prokaryotes, we calculate both the intra-study uncertainty of this ratio and the interstudy uncertainty.\n", - "\n", - "### Intra-study uncertainty\n", - "The only cases in which we could calculate the intra-study uncertainty of the ratio between the concentration of phage-like particles and prokaryotes are in Pan et al. and Roundnew et al. We calculate the 95% confidence interval of the geometric mean of the measurements in each of the studies as a measure of the intra-study uncertainty:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The 95 percent confidence interval of the geometric mean of the values in Pan et al. is ≈1.5-fold\n", - "The 95 percent confidence interval of the geometric mean of the values in Roundnew et al. is ≈1.7-fold\n" - ] - } - ], - "source": [ - "pan_CI = geo_CI_calc(pan_data['Virus-to-cell ratio (VCR)'])\n", - "roundnew_CI = geo_CI_calc(roundnew_data['Virus:Bacteria ratio'])\n", - "print('The 95 percent confidence interval of the geometric mean of the values in Pan et al. is ≈%.1f-fold' % pan_CI)\n", - "print('The 95 percent confidence interval of the geometric mean of the values in Roundnew et al. is ≈%.1f-fold' % roundnew_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Interstudy uncertainty\n", - "We calculate the 95% confidence interval of the geometric mean of the estimates from our five different methodologies for measuring the ratio between the concentration of phage-like particles and prokaryotes. We use this range as a measure of the interstudy uncertainty associated with the estimate of the ratio:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The interstudy uncertainty associated with our estimate of the ratio between the concentration of phage-like particles and prokaryotes is ≈2.3-fold\n" - ] - } - ], - "source": [ - "ratio_inter_CI = geo_CI_calc(estimates)\n", - "print('The interstudy uncertainty associated with our estimate of the ratio between the concentration of phage-like particles and prokaryotes is ≈%.1f-fold' % ratio_inter_CI)\n", - "\n", - "ratio_CI = np.max([ratio_inter_CI,pan_CI,roundnew_CI])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection for the uncertainty associated with the ratio between the concentration of phage-like particles and prokaryotes, we use the highest uncertainty of the intra-study and interstudy uncetainties. Thus, we project an uncertainty of ≈2.3-fold.\n", - "\n", - "## Uncertainty of the number of prokaryotes we use to derive the total number of phages\n", - "In order to use the ratios between the concentration of phage-like particles and prokaryotes to estimate the total number of phages, we need to use estimates for the total number of prokaryotes in groundwater. We use two different types of estimates - one based on the arithmetic mean cell concentration at each depth bin and one based on the geometric mean cell concentration at each depth bin. We plug either estimate into the five different ratios between the concentration of phages and prokaryotes and produce an estimate for the total number of phages in groundwater. As we have five estiamtes for the ratios, and two estimates for the number of prokaryotes, we generate ten different estimates for the total number of phages in groundwater. We then use the geometric mean of the two estimates for each ratio as our best estimate for that ratio. We now assess the uncertainty of the total number of phages associated with the uncertaitny of the number of prokaryotes we use.\n", - "\n", - "We calculate the 95% confidence interval of the geometric mean of the estimates of the total number of phages using arithmetic and geometric mean concentrations of prokaryotes. This yields an uncertainty for each one of the five methods to estimate the ratio between the concentration of phage-like particles and prokaryotes. We use the maximal uncertainty of those five uncertainties as our best projection for the uncertainty associated with the total number of prokaryotes." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for uncertainty in the total number of phages in the terrestrial deep subsurface associated with the estimate of the total number of prokaryotes in the terrestrial deep subsurface is ≈2.3-fold\n" - ] - } - ], - "source": [ - "# For the naive estimate, the Pan et al. based ratio and the Roundnew et al. based ratio\n", - "# The uncertainty is the 95% confidence interval of the total number of prokaryotes in\n", - "# groundwater\n", - "tot_prok_CI = geo_CI_calc(tot_prok.sum())\n", - "\n", - "# For the estimates based on the relation in Engelhardt et al. and Kyle et al., we use \n", - "# calculate the 95% confidence interval betwee the estimates based on arithmetic and \n", - "# geometric mean concentrations of prokaryotes\n", - "engelhardt_CI = geo_CI_calc(np.array([engelhardt_tot_phage_mean,engelhardt_tot_phage_geo_mean]))\n", - "kyle_CI = geo_CI_calc(np.array([kyle_tot_phage_mean,kyle_tot_phage_geo_mean]))\n", - "\n", - "#\n", - "prok_num_CI = np.max([tot_prok_CI,engelhardt_CI,kyle_CI])\n", - "print('Our best projection for uncertainty in the total number of phages in the terrestrial deep subsurface associated with the estimate of the total number of prokaryotes in the terrestrial deep subsurface is ≈%.1f-fold' %tot_prok_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Uncertainty of the total number of prokaryotes\n", - "As we discussed above, it is not clear whether the measured ratios between the concentrations of phage-like particles and prokaryotes refer to attached or unattached cells. We take this factor into consideration in our estimate as a scaling factor that converts the measured number of phages in groundwater to an estimate for the total number of phages. Our best estimate for this factor is a geometric mean of three estimate - one which includes only phages in groundwater (and thus a scaling factor of 1), and the other two assume an attached to unattached ratios of $10^2-10^3$ as in [McMahon & Parnell](http://dx.doi.org/10.1111/1574-6941.12196). To assess the uncertainty associated with this scaling factor, we calculate the 95% confidence interval of the geometric mean of the three estimates:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The uncertainty associated with the scaling factor from number of phages in groundwater to the total number of phages is ≈53.5-fold\n" - ] - } - ], - "source": [ - "scaling_factor_CI = geo_CI_calc(np.array([1,100,1000]))\n", - "print('The uncertainty associated with the scaling factor from number of phages in groundwater to the total number of phages is ≈%.1f-fold' %scaling_factor_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As our best projection of the uncertainty associated with our estimate of the total number of phages in the terrestrial deep subsurface, we combine the uncertainty projections for the three factors discussed above: the ratio between the concentration of phage-like particles and prokaryotes; the total number of prokaryotes we plug into the ratio between phages and prokaryotes; and the scaling factor between the number of phages in groundwater and the total number of phages:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best projection for the uncertainty associated with the total number of phages in the terrestrial deep subsurface is ≈64-fold\n" - ] - } - ], - "source": [ - "mul_CI = CI_prod_prop(np.array([ratio_CI,tot_prok_CI,scaling_factor_CI]))\n", - "print('Our best projection for the uncertainty associated with the total number of phages in the terrestrial deep subsurface is ≈%.0f-fold' %mul_CI)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our final parameters are:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Our best estimate for the total number of phages in the terrestrial deep subsurface: 2e+30\n", - "Uncertainty associated with the estiamte of the total number of phages in the terrestrial deep subsurface: 64-fold\n" - ] - } - ], - "source": [ - "\n", - "print('Our best estimate for the total number of phages in the terrestrial deep subsurface: %.0e' % best_estimate)\n", - "print('Uncertainty associated with the estiamte of the total number of phages in the terrestrial deep subsurface: %.0f-fold' % mul_CI)\n", - "\n", - "old_results = pd.read_excel('../phage_num_estimate.xlsx')\n", - "result = old_results.copy()\n", - "result.loc[3] = pd.Series({\n", - " 'Parameter': 'Total number of phages in the terrestrial deep subsurface',\n", - " 'Value': best_estimate,\n", - " 'Units': 'Number of individuals',\n", - " 'Uncertainty': mul_CI\n", - " })\n", - "\n", - "result.to_excel('../phage_num_estimate.xlsx',index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}