# Model-Assisted (MA) Sampling

In [8]:
PLOTS_SHP = 'data/plots.shp'
NDVI_TIF = 'data/ndvi.tif'

In [64]:
# set up offline graphing for plotly
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

from plotly.offline import iplot
import plotly.graph_objs as go

In [62]:
import numpy as np
import scipy.stats as st

import fiona
import rasterio
import pandas as pd
from shapely.geometry import shape

#### First, a traditional cruise workup

In [35]:
# load plot data (locations and BA)
plots = []
with fiona.open(PLOTS_SHP) as src:
    for feat in src:
        geom = shape(feat['geometry'])
        plots.append({
            'id': feat['properties']['id'],
            'basal_area': feat['properties']['basal_area'],
            'x': geom.x,
            'y': geom.y
        })

plots_df = pd.DataFrame(plots)
print len(plots_df), 'plots'

9 plots


In [36]:
plots_df[['id', 'basal_area']]

Unnamed: 0,id,basal_area
0,1,150
1,2,125
2,3,200
3,4,145
4,5,167
5,6,114
6,7,0
7,8,155
8,9,88


In [25]:
# using traditional statistics, 
# calculate 90% confidence interval for mean BA

bas = [p['basal_area'] for p in plots]
trad_mean = np.mean(bas)
trad_std_err = st.sem(bas)
trad_conf_interval = st.t.interval(0.90, len(bas)-1, loc=trad_mean, scale=trad_std_err)

print 'Traditional Statistics'
print 'Mean: %.2f' % trad_mean
print 'Standard Error: %.2f' % trad_std_err
print '90%% Confidence Interval: %.2f - %.2f' % trad_conf_interval

Traditional Statistics
Mean: 127.11
Standard Error: 19.14
90% Confidence Interval: 91.51 - 162.71


#### Now, time to use MA sampling theory
We will use a NDVI layer computed from a Landsat image
as our auxiliary data

In [59]:
# sample NDVI raster by plot locations

with rasterio.open(NDVI_TIF) as src:
    ndvi = [
        x[0] for x in  # have to parse out of returned array
        list(src.sample(plots_df[['x', 'y']].values))
    ]

plots_df['ndvi'] = ndvi

In [60]:
# NDVI vs. BA table
plots_df[['id', 'basal_area', 'ndvi']]

Unnamed: 0,id,basal_area,ndvi
0,1,150,0.105824
1,2,125,0.110767
2,3,200,0.057252
3,4,145,0.114119
4,5,167,0.105423
5,6,114,0.107453
6,7,0,0.235615
7,8,155,0.089554
8,9,88,0.11905


In [108]:
# define a model to predict BA from NDVI

slope, intercept, r_val, p_val, std_err = st.linregress(plots_df['ndvi'], plots_df['basal_area'])

def ndvi_to_ba(ndvi):
    # NOTE: added the 1.1 here so that residuals wouldn't sum to zero
    return (slope * 1.1 * ndvi) + intercept

In [109]:
# graph NDVI vs. BA
raw_data = go.Scatter(
    name='Raw Data',
    x=plots_df['ndvi'],
    y=plots_df['basal_area'],
    mode='markers',
    marker={'size': 10}
)

ticks = np.linspace(0, max(plots_df['ndvi']))
model_trace = go.Scatter(
    name='Model',
    x=ticks,
    y=map(ndvi_to_ba, ticks)
)

data = [model_trace, raw_data]

layout = go.Layout(
    xaxis={'title': 'NDVI'},
    yaxis={'title': 'Basal Area'}
)

figure = {
    'data': data,
    'layout': layout
}

iplot(figure)

In [110]:
# calculate model residuals
plots_df['predicted_ba'] = map(ndvi_to_ba, plots_df['ndvi'])
plots_df['residual'] = plots_df['predicted_ba'] - plots_df['basal_area']

plots_df[['id', 'basal_area', 'predicted_ba', 'residual']]

Unnamed: 0,id,basal_area,predicted_ba,residual
0,1,150,126.789587,-23.210413
1,2,125,120.738617,-4.261383
2,3,200,186.252588,-13.747412
3,4,145,116.634477,-28.365523
4,5,167,127.280089,-39.719911
5,6,114,124.79499,10.79499
6,7,0,-32.103667,-32.103667
7,8,155,146.70806,-8.29194
8,9,88,110.597335,22.597335


In [111]:
# predict BA for each pixel
with rasterio.open(NDVI_TIF) as src:
    ndvi_vals = np.ndarray.flatten(src.read(1))
    predicted_bas = map(ndvi_to_ba, ndvi_vals)

In [112]:
# calculate model-assisted mean BA
raw_mean_pix_ba = np.mean(predicted_bas)
mean_residual = np.mean(plots_df['residual'])
ma_mean = raw_mean_pix_ba - mean_residual

print 'Raw mean pixel BA: %.2f' % raw_mean_pix_ba
print 'Mean residual: %.2f' % mean_residual
print 'Model-assisted mean: %.2f' % ma_mean

Raw mean pixel BA: 118.11
Mean residual: -12.92
Model-assisted mean: 131.03


In [120]:
# calculate variance in our model-assisted estimate of the mean
n = len(plots_df)
ma_var = (1.0 / (n * (n-1))) * sum((plots_df['residual'] - mean_residual)**2)
print 'Model-assisted variance: %.2f' % ma_var

Model-assisted variance: 46.55


In [122]:
# calculate standard error of model-assisted estimate
ma_std_err = np.sqrt(ma_var) / np.sqrt(n)
print 'Model-assisted standard error: %.2f' % ma_std_err

Model-assisted standard error: 2.27


In [124]:
# compare to traditional cruise
comparison_df = pd.DataFrame([
    {'type': 'Traditional', 'mean': trad_mean, 'std_err': trad_std_err},
    {'type': 'Model-Assisted', 'mean': ma_mean, 'std_err': ma_std_err},
])

comparison_df[['type', 'mean', 'std_err']]

Unnamed: 0,type,mean,std_err
0,Traditional,127.111111,19.144512
1,Model-Assisted,131.029093,2.274273
