# Allometric equation uncertainty

In [1]:
import allometry
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit

In [2]:
# Load harvest data from Chave et al 2014
har = pd.read_csv("../Chave_harvest_db/Chave_GCB_Direct_Harvest_Data.csv")
loc = pd.read_csv("../Chave_harvest_db/Localities.csv")
loc['Forest_type'] = loc.Forest_type.str.replace(' forest','')
print har.columns
print loc.columns

Index([u'Site', u'DBH_cm', u'Total_height_m', u'AGB_kg', u'Gravity'], dtype='object')
Index([u'Abbreviation', u'Tree_count', u'2005_avail', u'Latitude',
       u'Longitude', u'Locality', u'Forest_type', u'Successional_type',
       u'Mean_annual_temperature', u'Temp_Seasonality', u'Mean_Annual_Precip',
       u'Precip_Seasonality_perc', u'Altitude', u'Evapotranspiration_mm_yr',
       u'Dry_months', u'CWD_mm_yr', u'Ref'],
      dtype='object')


In [3]:
# allometric functions
def est_chaveI(row):
    fo = loc[loc.Abbreviation == row.Site]['Forest_type'].item()
    return allometry.chaveI(row.DBH_cm, row.Gravity, fo)

def est_chaveI_or(row):
    fo = loc[loc.Abbreviation == row.Site]['Forest_type'].item()
    return allometry.chaveI_original(row.DBH_cm, row.Gravity, fo)

def est_E(row):
    #E = ( 0.178 × TS-0.938 × CWD-6.61× PS ) ×10 −3
    ts = loc[loc.Abbreviation == row.Site]['Temp_Seasonality'].item()
    cwd = loc[loc.Abbreviation == row.Site]['CWD_mm_yr'].item()
    ps = loc[loc.Abbreviation == row.Site]['Precip_Seasonality_perc'].item()
    E = (0.178 * ts - 0.938 * cwd - 6.61 * ps) * 1e-3
    return E

def est_chaveII(row):
    return allometry.chaveII(row.DBH_cm, row.Gravity, e_value = row.E)

In [4]:
har['E'] = har.apply(est_E, axis=1)
har['ChaveI'] = har.apply(est_chaveI, axis=1)
har['ChaveI_or'] = har.apply(est_chaveI_or, axis=1)
har['ChaveII'] = har.apply(est_chaveII, axis=1)

In [5]:
# Estimate errors
har['ChaveI_error'] = (np.log(har.ChaveI) - np.log(har.AGB_kg))**2
har['ChaveI_or_error'] = (np.log(har.ChaveI_or) - np.log(har.AGB_kg))**2
har['ChaveII_error'] = (np.log(har.ChaveII) - np.log(har.AGB_kg))**2

In [6]:
# SEEs
print (har.ChaveI_error.sum() / (har.shape[0]-4))**0.5
print (har.ChaveI_or_error.sum() / (har.shape[0]-4))**0.5
print (har.ChaveII_error.sum() / (har.shape[0]-5))**0.5

0.597036288929
0.57808567889
0.420779217764


In [7]:
# Fitting function coefficients from scratch

def chave2005(X, a, b, c, d):
    dap, den = X
    out = a + b * np.log(dap) + c * np.log(dap)**2 + d * np.log(dap)**3 + np.log(den)
    return out

def chave2014(X, a, b, c, d, e):
    E, den, dap = X
    out = a + b * E + c * np.log(den) + d * np.log(dap) + e * np.log(dap)**2
    return out

In [8]:
curve_fit(chave2005, (har.DBH_cm, har.Gravity), np.log(har.AGB_kg))

(array([-3.35030644,  3.68832942, -0.29566994,  0.02574888]),
 array([[ 0.12942566, -0.13359906,  0.04347523, -0.00448497],
        [-0.13359906,  0.13954395, -0.04588297,  0.00477556],
        [ 0.04347523, -0.04588297,  0.01523812, -0.00160049],
        [-0.00448497,  0.00477556, -0.00160049,  0.00016959]]))

In [9]:
har['ChaveI_new'] = np.exp(chave2005((har.DBH_cm, har.Gravity), -3.350, 3.688, -0.296, 0.026))
har['ChaveI_new_error'] = (np.log(har.ChaveI_new) - np.log(har.AGB_kg))**2
print (har.ChaveI_new_error.sum() / (har.shape[0]-4)) ** 0.5

0.526537644398


In [10]:
curve_fit(chave2014, (har.E, har.Gravity, har.DBH_cm), np.log(har.AGB_kg))

(array([-2.10942259, -0.89647401,  0.92284733,  2.79430146, -0.04585078]),
 array([[  6.98923097e-03,  -2.73651475e-04,   2.46197208e-04,
          -4.58184091e-03,   7.17341406e-04],
        [ -2.73651475e-04,   3.65234170e-04,  -1.09504322e-04,
           6.04247529e-05,  -2.83776447e-06],
        [  2.46197208e-04,  -1.09504322e-04,   5.41010476e-04,
           1.22203572e-05,   1.06427576e-06],
        [ -4.58184091e-03,   6.04247529e-05,   1.22203572e-05,
           3.16582330e-03,  -5.07408790e-04],
        [  7.17341406e-04,  -2.83776447e-06,   1.06427576e-06,
          -5.07408790e-04,   8.33124928e-05]]))

In [11]:
har['ChaveII_new'] = np.exp(chave2014((har.E, har.Gravity, har.DBH_cm), -2.109, -0.896,  0.923,  2.794, -0.046))
har['ChaveII_new_error'] = (np.log(har.ChaveII_new) - np.log(har.AGB_kg))**2
print (har.ChaveII_new_error.sum() / (har.shape[0]-5)) ** 0.5

0.415318493142


## Maximum likelihood fit - Spicy

In [72]:
from scipy.optimize import minimize

In [79]:
def logLike(true, guess):
    n = len(true)
    error = true - guess
    sigma = np.std(error)
    f = -(n/2.0)*np.log(2*np.pi) - (n/2.0) * np.log(sigma**2) - \
            (1.0/(2*sigma**2) * np.dot(error.T,error))
    return f

def opt_chave2005(pars):
    y = pars[0] + pars[1] * np.log(har.DBH_cm) + pars[2] * np.log(har.DBH_cm)**2 + \
            pars[3] * np.log(har.DBH_cm)**3 + np.log(har.Gravity)
    log = logLike(np.log(har.AGB_kg), y)
    return -1 * log

def opt_chave2014(pars):
    y = pars[0] + pars[1] * har.E + pars[2] * np.log(har.Gravity) + pars[3] * np.log(har.DBH_cm) \
        + pars[4] * np.log(har.DBH_cm)**2 
    log = logLike(np.log(har.AGB_kg), y)
    return -1 * log

### Chave 2005

In [77]:
#coeffs = np.array([-3.350, 3.688, -0.296, 0.026])
coeffs = np.array([-2.350, 2.688, -1.296, 0.026])
#opt_chave2005(coeffs)
res = minimize(opt_chave2005, coeffs, method='BFGS')

In [78]:
res

      fun: 3110.9083213173753
 hess_inv: array([[ 0.13983882, -0.14478274,  0.04725636, -0.00488957],
       [-0.14478274,  0.15184922, -0.05013124,  0.00523845],
       [ 0.04725636, -0.05013124,  0.01673074, -0.00176555],
       [-0.00488957,  0.00523845, -0.00176555,  0.00018807]])
      jac: array([-0.00506592, -0.0043335 , -0.00579834, -0.01083374])
  message: 'Desired error not necessarily achieved due to precision loss.'
     nfev: 425
      nit: 18
     njev: 69
   status: 2
  success: False
        x: array([-3.35027032,  3.68827668, -0.29564789,  0.02574613])

### Chave 2014

In [80]:
#coeffs = np.array([-2.109, -0.896,  0.923,  2.794, -0.046])
coeffs = np.array([-3.109, -1.896,  1.923,  1.794, 1.046])
res = minimize(opt_chave2014, coeffs, method='BFGS')

In [81]:
res

      fun: 2160.528919892148
 hess_inv: array([[  3.85710046e-03,   1.09729450e-03,   1.07812261e-03,
         -3.09214706e-03,   5.84385688e-04],
       [  1.09729450e-03,   4.23187321e-04,   7.68583429e-05,
         -9.49080921e-04,   1.74061446e-04],
       [  1.07812261e-03,   7.68583429e-05,   7.87634297e-04,
         -6.84315569e-04,   1.36771250e-04],
       [ -3.09214706e-03,  -9.49080921e-04,  -6.84315569e-04,
          2.65421608e-03,  -5.09667353e-04],
       [  5.84385688e-04,   1.74061446e-04,   1.36771250e-04,
         -5.09667353e-04,   9.94911498e-05]])
      jac: array([ -6.10351562e-05,   6.10351562e-05,   6.10351562e-05,
        -6.10351562e-05,   3.05175781e-05])
  message: 'Desired error not necessarily achieved due to precision loss.'
     nfev: 288
      nit: 21
     njev: 41
   status: 2
  success: False
        x: array([-2.10942915, -0.896474  ,  0.92284721,  2.79430614, -0.04585156])

## Maximum likelihood fit
### equation Chave I

In [None]:
import pymc3

In [None]:
mymodel = pymc3.Model()
with mymodel:
    #a + b * np.log(dap) + c * np.log(dap)**2 + d * np.log(dap)**3 + np.log(den)
    a = pymc3.Uniform('a')
    b = pymc3.Uniform('b')
    c = pymc3.Uniform('c')
    d = pymc3.Uniform('d')
    #sigma = pymc3.Normal('sigma', mu=0, sd=0.4)
    
    y_exp = a + b * np.log(har.DBH_cm) + c * np.log(har.DBH_cm)**2 + d * np.log(har.DBH_cm)**3 + \
        np.log(har.Gravity)
        
    Y_obs = pymc3.Normal('Y_obs', mu=y_exp, sd=0.4, observed=np.log(har.AGB_kg))
    
    trace = pymc3.sample(5000, njobs=2)

In [None]:
pymc3.summary(trace)

In [None]:
map_estimate = pymc3.find_MAP(model=mymodel)

In [None]:
map_estimate
# 0.1606, 1, 0.262888, 0 

## Maximum likelihood fit
### equation Chave II

In [None]:
mymodel = pymc3.Model()
with mymodel:
    #a + b * E + c * np.log(den) + d * np.log(dap) + e * np.log(dap)**2
    a = pymc3.Uniform('a')
    b = pymc3.Uniform('b')
    c = pymc3.Uniform('c')
    d = pymc3.Uniform('d')
    e = pymc3.Uniform('e')
    sigma = pymc3.HalfNormal('sigma', sd=1)
    
    y_exp = a + b * har.E + c * np.log(har.Gravity) + d * np.log(har.DBH_cm) + e * np.log(har.DBH_cm)**2   
        
    Y_obs = pymc3.Normal('Y_obs', mu=y_exp, sd=sigma, observed=np.log(har.AGB_kg))
    
    trace = pymc3.sample(5000, njobs=2)

In [None]:
pymc3.diagnostics.effective_n(trace)

In [None]:
pymc3.summary(trace)

In [None]:
map_estimate = pymc3.find_MAP(model=mymodel)

In [None]:
map_estimate