# Allometric equation uncertainty

In [2]:
import allometry
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit

In [11]:
# Load harvest data from Chave et al 2014
har = pd.read_csv("../Chave_harvest_db/Chave_GCB_Direct_Harvest_Data.csv")
loc = pd.read_csv("../Chave_harvest_db/Localities.csv")
loc['Forest_type'] = loc.Forest_type.str.replace(' forest','')
print har.columns
print loc.columns

Index([u'Site', u'DBH_cm', u'Total_height_m', u'AGB_kg', u'Gravity'], dtype='object')
Index([u'Abbreviation', u'Tree_count', u'2005_avail', u'Latitude',
       u'Longitude', u'Locality', u'Forest_type', u'Successional_type',
       u'Mean_annual_temperature', u'Temp_Seasonality', u'Mean_Annual_Precip',
       u'Precip_Seasonality_perc', u'Altitude', u'Evapotranspiration_mm_yr',
       u'Dry_months', u'CWD_mm_yr', u'Ref'],
      dtype='object')


In [61]:
# allometric functions
def est_chaveI(row):
    fo = loc[loc.Abbreviation == row.Site]['Forest_type'].item()
    return allometry.chaveI(row.DBH_cm, row.Gravity, fo)

def est_chaveI_or(row):
    fo = loc[loc.Abbreviation == row.Site]['Forest_type'].item()
    return allometry.chaveI_original(row.DBH_cm, row.Gravity, fo)

def est_E(row):
    #E = ( 0.178 × TS-0.938 × CWD-6.61× PS ) ×10 −3
    ts = loc[loc.Abbreviation == row.Site]['Temp_Seasonality'].item()
    cwd = loc[loc.Abbreviation == row.Site]['CWD_mm_yr'].item()
    ps = loc[loc.Abbreviation == row.Site]['Precip_Seasonality_perc'].item()
    E = (0.178 * ts - 0.938 * cwd - 6.61 * ps) * 1e-3
    return E

def est_chaveII(row):
    return allometry.chaveII(row.DBH_cm, row.Gravity, e_value = row.E)

In [63]:
har['E'] = har.apply(est_E, axis=1)
har['ChaveI'] = har.apply(est_chaveI, axis=1)
har['ChaveI_or'] = har.apply(est_chaveI_or, axis=1)
har['ChaveII'] = har.apply(est_chaveII, axis=1)

In [38]:
# Estimate errors
har['ChaveI_error'] = (np.log(har.ChaveI) - np.log(har.AGB_kg))**2
har['ChaveI_or_error'] = (np.log(har.ChaveI_or) - np.log(har.AGB_kg))**2
har['ChaveII_error'] = (np.log(har.ChaveII) - np.log(har.AGB_kg))**2

In [54]:
# SEEs
print (har.ChaveI_error.sum() / (har.shape[0]-4))**0.5
print (har.ChaveI_or_error.sum() / (har.shape[0]-4))**0.5
print (har.ChaveII_error.sum() / (har.shape[0]-5))**0.5

0.597036288929
0.57808567889
0.420779217764


In [65]:
# Fitting function coefficients from scratch

def chave2005(X, a, b, c, d):
    dap, den = X
    out = a + b * np.log(dap) + c * np.log(dap)**2 + d * np.log(dap)**3 + np.log(den)
    return out

def chave2014(X, a, b, c, d, e):
    E, den, dap = X
    out = a + b * E + c * np.log(den) + d * np.log(dap) + e * np.log(dap)**2
    return out

In [59]:
curve_fit(chave2005, (har.DBH_cm, har.Gravity), np.log(har.AGB_kg))

(array([-3.35030644,  3.68832942, -0.29566994,  0.02574888]),
 array([[ 0.12942566, -0.13359906,  0.04347523, -0.00448497],
        [-0.13359906,  0.13954395, -0.04588297,  0.00477556],
        [ 0.04347523, -0.04588297,  0.01523812, -0.00160049],
        [-0.00448497,  0.00477556, -0.00160049,  0.00016959]]))

In [97]:
har['ChaveI_new'] = np.exp(chave2005((har.DBH_cm, har.Gravity), -3.350, 3.688, -0.296, 0.026))
har['ChaveI_new_error'] = (np.log(har.ChaveI_new) - np.log(har.AGB_kg))**2
print (har.ChaveI_new_error.sum() / (har.shape[0]-4)) ** 0.5

0.526537644398


In [66]:
curve_fit(chave2014, (har.E, har.Gravity, har.DBH_cm), np.log(har.AGB_kg))

(array([-2.10942259, -0.89647401,  0.92284733,  2.79430146, -0.04585078]),
 array([[  6.98923097e-03,  -2.73651475e-04,   2.46197208e-04,
          -4.58184091e-03,   7.17341406e-04],
        [ -2.73651475e-04,   3.65234170e-04,  -1.09504322e-04,
           6.04247529e-05,  -2.83776447e-06],
        [  2.46197208e-04,  -1.09504322e-04,   5.41010476e-04,
           1.22203572e-05,   1.06427576e-06],
        [ -4.58184091e-03,   6.04247529e-05,   1.22203572e-05,
           3.16582330e-03,  -5.07408790e-04],
        [  7.17341406e-04,  -2.83776447e-06,   1.06427576e-06,
          -5.07408790e-04,   8.33124928e-05]]))

In [67]:
har['ChaveII_new'] = np.exp(chave2014((har.E, har.Gravity, har.DBH_cm), -2.10942259, -0.89647401,  0.92284733,  2.79430146, -0.04585078))
har['ChaveII_new_error'] = (np.log(har.ChaveII_new) - np.log(har.AGB_kg))**2
print (har.ChaveII_new_error.sum() / (har.shape[0]-5)) ** 0.5

0.415313704408


## Maximum likelihood fit
### equation Chave I

In [68]:
import pymc3

In [93]:
mymodel = pymc3.Model()
with mymodel:
    #a + b * np.log(dap) + c * np.log(dap)**2 + d * np.log(dap)**3 + np.log(den)
    a = pymc3.Uniform('a')
    b = pymc3.Uniform('b')
    c = pymc3.Uniform('c')
    d = pymc3.Uniform('d')
    #sigma = pymc3.Normal('sigma', mu=0, sd=0.4)
    
    y_exp = a + b * np.log(har.DBH_cm) + c * np.log(har.DBH_cm)**2 + d * np.log(har.DBH_cm)**3 + \
        np.log(har.Gravity)
        
    Y_obs = pymc3.Normal('Y_obs', mu=y_exp, sd=0.4, observed=np.log(har.AGB_kg))
    
    trace = pymc3.sample(5000, njobs=2)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 5500/5500 [00:30<00:00, 183.29it/s]
  % (self._chain_id, mean_accept, target_accept))


In [94]:
pymc3.summary(trace)


a:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.163            0.013            0.000            [0.138, 0.189]

  Posterior quantiles:
  2.5            25             50             75             97.5
  
  0.138          0.155          0.163          0.172          0.189


b:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.998            0.001            0.000            [0.996, 1.000]

  Posterior quantiles:
  2.5            25             50             75             97.5
  
  0.994          0.998          0.999          1.000          1.000


c:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.263            0.001            0.000            [0.260, 0.266]

  Posterior quantiles:
  

In [95]:
map_estimate = pymc3.find_MAP(model=mymodel)

logp = -4,137.8, ||grad|| = 0.73633: 100%|██████████| 61/61 [00:00<00:00, 1139.89it/s]  


In [96]:
map_estimate
# 0.1606, 1, 0.262888, 0 

{'a': array(0.1605989897231933),
 'a_interval__': array(-1.6537780436162095),
 'b': array(0.9999999482293163),
 'b_interval__': array(16.77644174726633),
 'c': array(0.2628893377449346),
 'c_interval__': array(-1.0310048586321585),
 'd': array(8.784848639089234e-10),
 'd_interval__': array(-20.852822437043255)}

## Maximum likelihood fit
### equation Chave II

In [85]:
mymodel = pymc3.Model()
with mymodel:
    #a + b * E + c * np.log(den) + d * np.log(dap) + e * np.log(dap)**2
    a = pymc3.Uniform('a')
    b = pymc3.Uniform('b')
    c = pymc3.Uniform('c')
    d = pymc3.Uniform('d')
    e = pymc3.Uniform('e')
    sigma = pymc3.HalfNormal('sigma', sd=1)
    
    y_exp = a + b * har.E + c * np.log(har.Gravity) + d * np.log(har.DBH_cm) + e * np.log(har.DBH_cm)**2   
        
    Y_obs = pymc3.Normal('Y_obs', mu=y_exp, sd=sigma, observed=np.log(har.AGB_kg))
    
    trace = pymc3.sample(5000, njobs=2)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
  % (self._chain_id, n_diverging))
100%|██████████| 5500/5500 [00:56<00:00, 97.45it/s]
  % (self._chain_id, n_diverging))


In [89]:
pymc3.diagnostics.effective_n(trace)

{'a': 4195.0,
 'b': 8137.0,
 'c': 4545.0,
 'd': 3922.0,
 'e': 4742.0,
 'sigma': 7953.0}

In [86]:
pymc3.summary(trace)


a:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.025            0.017            0.000            [0.000, 0.055]

  Posterior quantiles:
  2.5            25             50             75             97.5
  
  0.001          0.011          0.022          0.035          0.063


b:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.001            0.001            0.000            [0.000, 0.002]

  Posterior quantiles:
  2.5            25             50             75             97.5
  
  0.000          0.000          0.000          0.001          0.003


c:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.642            0.029            0.000            [0.587, 0.698]

  Posterior quantiles:
  

In [87]:
map_estimate = pymc3.find_MAP(model=mymodel)

logp = -3,391.7, ||grad|| = 0.027703: 100%|██████████| 53/53 [00:00<00:00, 831.85it/s]  


In [88]:
map_estimate

{'a': array(0.011405196889127344),
 'a_interval__': array(-4.462215425050448),
 'b': array(3.558043845319378e-21),
 'b_interval__': array(-47.08507604068325),
 'c': array(0.6353570071067556),
 'c_interval__': array(0.5552682835581086),
 'd': array(0.9999991373411675),
 'd_interval__': array(13.963245688658171),
 'e': array(0.2592399273119747),
 'e_interval__': array(-1.0499227911495335),
 'sigma': array(0.5644005444368382),
 'sigma_log__': array(-0.5719910943745348)}