# Monte Carlo simulation

Steps to propagate biomass uncertainty through Monte Carlo simulation. Based on the proposal by Réjou-Méchain et al., 2017, Methods in Ecology and Evolution 8:1163-1167 and implemented in the R package *Biomass*.

Steps to propagate AGB uncertainty through Monte Carlo simulation.

1. Model tree diameter error. Could be normal distribution, parameters fitted from quality control measurements.  

2. Model wood density error. *Biomass* uses a truncated normal distribution based on the absolute ranges recorded at the wood density database (0.08-1.39 g/ml). A better way could be based on ancestral range reconstructed on a seed plant phylogeny.

3. Tree height error. *Biomass* takes a truncated normal distribution with the range(1.3-(maximum_height + 15)). However, this kind of error seems to by exponentially distributed, as it is more likely to record wrong values with taller trees. Parameters could be fitted with quality control measurements.

4. Allometric equation uncertainty. Depending on the allometric equation employed, coefficient distributions are estimated. *Biomass* estimates a posterior distribution for each equation using a MCMCMC.

5. Above ground biomass estimates are simulated for each tree n times using all the parameter distributions presented above, plus a random error. 


In [None]:
import pandas as pd
import numpy as np
import sqlalchemy as al
import db_utils
import comm
import pymc3
import matplotlib.pyplot as plt

### First section
Bayesian inference of allometric equation coefficient. 

In [None]:
# Load harvest data from Chave et al 2014
har = pd.read_csv("../Chave_harvest_db/Chave_GCB_Direct_Harvest_Data.csv")
loc = pd.read_csv("../Chave_harvest_db/Localities.csv")
loc['Forest_type'] = loc.Forest_type.str.replace(' forest','')

In [None]:
# Helper function to estimate environmental E coefficient from Chave et al. 2014
def est_E(row):
    ts = loc[loc.Abbreviation == row.Site]['Temp_Seasonality'].item()
    cwd = loc[loc.Abbreviation == row.Site]['CWD_mm_yr'].item()
    ps = loc[loc.Abbreviation == row.Site]['Precip_Seasonality_perc'].item()
    E = (0.178 * ts - 0.938 * cwd - 6.61 * ps) * 1e-3
    return E

har['E'] = har.apply(est_E, axis=1)

In [None]:
# Bayesian inference of allometric equation coefficients
mymodel = pymc3.Model()
trace = None
with mymodel:
    # Priors, all set to normal distributions
    a = pymc3.Normal('a', mu = -2.109, sd = 0.5)
    b = pymc3.Normal('b', mu = -0.896, sd = 0.5)
    c = pymc3.Normal('c', mu = 0.923, sd = 0.5)
    d = pymc3.Normal('d', mu = 2.794, sd = 0.5)
    e = pymc3.Normal('e', mu = -0.046, sd = 0.5)
    sigma = pymc3.HalfNormal('sigma', sd=1)
    
    # Allometric equation (Chave et al. 2014)
    y_exp = a + b * har.E + c * np.log(har.Gravity) + d * np.log(har.DBH_cm) + e * np.log(har.DBH_cm)**2   
        
    # Likelihood function: normal distribution
    Y_obs = pymc3.Normal('Y_obs', mu=y_exp, sd=sigma, observed=np.log(har.AGB_kg))
    
    ###############################################################
    # Metropolis kernel was the sampler employed by Réjou-Méchain et al. (2017), however 
    # here we use NUTS (No U-Turn Sampler), which achieves convergence faster
    # In PYMC3 Nuts is the default sampler for continuous equation, like our case
    ################################################################
    
    #mstep = pymc3.Metropolis()
    #trace = pymc3.sample(50000, njobs=4, step=mstep)
    
    trace = pymc3.sample(5000, njobs=4)


In [None]:
# Estimates the effective population size of each parameter in the posterior distribution
pymc3.diagnostics.effective_n(trace)

In [None]:
# Prints out basic statistical parameters for each parameter
pymc3.summary(trace)

In [None]:
# Draw the posterior distribution. Requires matplotlib.pyplot
pymc3.traceplot(trace)

### Second section
Estimation of diameter and density uncertainty from plot data. 

In [None]:
# Import plot data from IFN database

user = ''
password = ''
database = ''

engine = al.create_engine(
    'mysql+mysqldb://{0}:{1}@localhost/{2}?charset=utf8&use_unicode=1&unix_socket=/var/run/mysqld/mysqld.sock'.format(
    user, password, database))

conn = engine.connect()

# Table of taxonomic equivalence. Contains two columns: Taxon ID and Accepted Taxon ID. 
accnames = db_utils.acctax(conn)

# Simple table with all dasometric data, species names and densities.
table = db_utils.dasotab('Quimera', conn, 1, accepted_taxa = accnames)

In [None]:
# Paths to raster files
densities_file = '/home/nelsonsalinas/Documents/IDEAM/GlobalWoodDensityDB/gwddb_20180113.csv'
elevation_raster = '/home/nelsonsalinas/Documents/IDEAM/cust_layers/alt.tif'
precipitation_raster = '/home/nelsonsalinas/Documents/IDEAM/cust_layers/precp.tif'
chave_E_raster = '/home/nelsonsalinas/Documents/IDEAM/Chave_E/E.bil'

In [None]:
# Plot data is handled through the Plot class. It contains several methods to procure 
# all the basic data require for iomass calculation (forest types, wood densities, 
# removal of herbaceus taxa, etc.). Check the source (`comm.py`) for further documentation.

myplot = comm.Plot(dataframe=table)
myplot.name = 1
myplot.purify()
myplot.coordinates = db_utils.coords('Quimera', conn, 1)
myplot.set_holdridge(elevation_raster, precipitation_raster)
myplot.set_chave_forest(precipitation_raster)
myplot.set_E(chave_E_raster)
myplot.densities_from_file(densities_file)

In [None]:
# Sample tree diameter and wood density uncertainty values

# Samples to draw from density and diameter distribution per tree
iters = 100

# Sample posterior distribution of parameters
myas = np.random.choice(trace.get_values('a', burn = 1000, combine=True), 100)
mybs = np.random.choice(trace.get_values('b', burn = 1000, combine=True), 100)
mycs = np.random.choice(trace.get_values('c', burn = 1000, combine=True), 100)
myds = np.random.choice(trace.get_values('d', burn = 1000, combine=True), 100)
myes = np.random.choice(trace.get_values('e', burn = 1000, combine=True), 100)

# Multidimensional array of simulated data. Each row contains the data simulated
# for a single tree.
AGB = []

for tree in myplot.stems.itertuples():
    AGB.append([])
    
    # Diameter sampling
    sdd = tree.Diameter / 100.0
    diams = np.random.normal(tree.Diameter, sdd, iters)

    # Wood density sampling
    wd = myplot.taxa[myplot.taxa.TaxonID == tree.TaxonID]['Density'].item()
    sdwd = 0.01
    wds = np.random.normal(wd, sdwd, iters)

    # Allometric coefficient sampling
    for sdi, swd, sa, sb, sc, sd, se in zip(diams, wds, myas, mybs, mycs, myds, myes):
        agb = sa + sb * myplot.E + sc * np.log(swd) + sd * np.log(sdi) + se * np.log(sdi)**2
        AGB[-1].append(agb)
        
AGB = np.array(AGB)

In [None]:
# Simulations to estimate plot AGB   
path_sims = 5000

total_agb = []
for x in xrange(path_sims):
    this_agb = 0.0
    for t in xrange(AGB.shape[0]):
        this_agb += np.random.choice(AGB[t], 1)
    total_agb.append(this_agb[0])


In [None]:
# Plot the final AGB distribution
plt.hist(total_agb, bins=100)