In [85]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

<img width=50px  src = 'https://apps.fs.usda.gov/lcms-viewer/images/lcms-icon.png'>

# LCMS Map Validation

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/redcastle-resources/lcms-training/blob/main/7-Map_Validation.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/redcastle-resources/lcms-training/blob/main/7-Map_Validation.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://github.com/redcastle-resources/lcms-training/blob/main/7-Map_Validation.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>
</table>
<br/><br/><br/>


## Overview


This notebook teaches how to assess map accuracy of LCMS outputs

### Objective

In this tutorial, you learn how to assess the map accuracy of LCMS map outputs

This tutorial uses the following Google Cloud services:

- `Google Earth Engine`

The steps performed include:

- Understanding the difference between model and map accuracy
- Simulating map accuracy with k fold cross validation

## Before you begin

### If you are working in Workbench: Set your current URL under `workbench_url`
This gives the Map Viewer a url in which to host the viewer we will be generating. 
* This will be in your URL/search bar at the top of the browser window you are currently in
* It will look something like `https://1234567890122-dot-us-west3.notebooks.googleusercontent.com/` (See the image below)

![workspace url](img/workspace-url.png)

### Set a folder to use for all exports under `export_path_root` 
* This folder should be an assets folder in an existing GEE project.
* By default, this folder is the same as the pre-baked folder (where outputs have already been created). 
* If you would like to create your own outputs, specify a different path for `export_path_root`, but leave the `pre_baked_path_root` as it was. This way, the pre-baked outputs can be shown at the end, instead of waiting for all exports to finish.
* It will be something like `projects/projectID/assets/newFolder`
* This folder does not have to already exist. If it does not exist, it will be created

**If you are working in Qwiklabs and wish to export:** Copy the project ID from the 'Start Lab' screen into the `projectID` field in `export_path_root`.

In [1]:
workbench_url = 'https://559cdf0b5fe9790f-dot-us-central1.notebooks.googleusercontent.com'
pre_baked_path_root  = 'projects/rcr-gee/assets/lcms-training'
export_path_root = pre_baked_path_root

print('Done')

Done


# Installation
First, install necessary Python packages. Uncomment the first line to upgrade geeViz if necessary.

Note that for this module, we're also importing many data science packages such as pandas. 

In [None]:
#Module imports
#!python -m pip install geeViz --upgrade
try:
    import geeViz.getImagesLib as getImagesLib
except:
    !python -m pip install geeViz
    import geeViz.getImagesLib as getImagesLib

import geeViz.changeDetectionLib as changeDetectionLib
import geeViz.assetManagerLib as aml
import geeViz.taskManagerLib as tml
import geeViz.gee2Pandas as g2p
import inspect,operator,os
import matplotlib.pyplot as plt
import pandas as pd  
import numpy as np


try:
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import GroupKFold
    from sklearn.metrics import accuracy_score,classification_report,balanced_accuracy_score,cohen_kappa_score
    from sklearn import metrics 
except:
    !pip install -U scikit-learn
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import GroupKFold
    from sklearn.metrics import accuracy_score,classification_report,balanced_accuracy_score,cohen_kappa_score
    from sklearn import metrics 
# from IPython.display import IFrame,display, HTML
ee = getImagesLib.ee
Map = getImagesLib.Map

# Can set the port used for viewing map outputs
Map.port = 1235
print('Done')


Collecting geeViz
  Obtaining dependency information for geeViz from https://files.pythonhosted.org/packages/e3/8a/56eed58bf36bcf0c5fbd10b7bc355e5b77fddfec8071c9f807431624ee65/geeViz-2023.8.7-py3-none-any.whl.metadata
  Downloading geeViz-2023.8.7-py3-none-any.whl.metadata (4.1 kB)
Collecting earthengine-api (from geeViz)
  Downloading earthengine-api-0.1.369.tar.gz (249 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.7/249.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting folium (from geeViz)
  Downloading folium-0.14.0-py2.py3-none-any.whl (102 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.3/102.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting simpledbf (from geeViz)
  Downloading simpledbf-0.2.6.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting google-api-python-client>=1.12.1 (from earthengine-api->geeViz)
  Obtaining dependen

## Set up your work environment

Create a folder in your export path where you will export the composites. In addition, create a blank image collection where your composites will live.

Currently, when running within Colab or Workbench, geeView uses a different project to authenticate through, so you may need to make your asset public to view from within Colab.

In [15]:
# Bring in all folders/collections that are needed
# These must already exist as they are created in previous notebooks

export_timeSync_folder = f'{export_path_root}/lcms-training_module-4_timeSync'

export_assembledLCMSOutputs_collection = f'{export_path_root}/lcms-training_module-6_assembledLCMSOutputs'

# This is the pre-made TimeSync data
# Creating this dataset is not covered in this set of notebooks
timeSync_featureCollection = 'projects/lcms-292214/assets/R8/PR_USVI/TimeSync/18_PRVI_AllPlots_TimeSync_Annualized_Table_secLC'

# The model options table (created in module 5.1, but stored in the lcms-training repository)
model_options_csv_filename = './tables/LCMS_model_options_table.csv'

print('Done')

Done


In [7]:
# set up map
Map.clearMap()

# reset port if necessary
Map.port = 1235
Map.proxy_url = workbench_url

print('Done')

Done


In [9]:

# First, we'll need to repeat steps from Module 5 and download our reference data to a local location
# Bring in raw TS data
timeSyncData = ee.FeatureCollection(timeSync_featureCollection)
timeSync_fields = timeSyncData.first().toDictionary().keys().getInfo()
# Now lets bring in all training data and prep it for modeling
assets = ee.data.listAssets({'parent': export_timeSync_folder})['assets']

# You may need to change the permissions for viewing model outputs in geeViz
# Uncomment this if needed
# for asset in assets:aml.updateACL(asset['name'],writers = [],all_users_can_read = True,readers = [])

# Read in each year of extracted TimsSync data
training_data = ee.FeatureCollection([ee.FeatureCollection(asset['name']) for asset in assets]).flatten()

# Bring in existing LCMS data for the class names, numbers, and colors
lcms_viz_dict = ee.ImageCollection("USFS/GTAC/LCMS/v2020-6").first().toDictionary().getInfo()
                                             
print('LCMS class code, names, and colors:',lcms_viz_dict)


# Get the field names for prediction
# Find any field that was not in the original TimeSync data and assume that is a predictor variable
all_fields = training_data.first().toDictionary().keys().getInfo()
predictor_field_names = [field for field in all_fields if field not in timeSync_fields]

# Filter out any non null values (any training plot with missing predictor data will cause the model to fail entirely)
training_data = training_data.filter(ee.Filter.notNull(predictor_field_names))

print('Done')

LCMS class code, names, and colors: {'Change_class_names': ['Stable', 'Slow Loss', 'Fast Loss', 'Gain', 'Non-Processing Area Mask'], 'Change_class_palette': ['3d4551', 'f39268', 'd54309', '00a398', '1b1716'], 'Change_class_values': [1, 2, 3, 4, 5], 'Land_Cover_class_names': ['Trees', 'Tall Shrubs & Trees Mix (SEAK Only)', 'Shrubs & Trees Mix', 'Grass/Forb/Herb & Trees Mix', 'Barren & Trees Mix', 'Tall Shrubs (SEAK Only)', 'Shrubs', 'Grass/Forb/Herb & Shrubs Mix', 'Barren & Shrubs Mix', 'Grass/Forb/Herb', 'Barren & Grass/Forb/Herb Mix', 'Barren or Impervious', 'Snow or Ice', 'Water', 'Non-Processing Area Mask'], 'Land_Cover_class_palette': ['005e00', '008000', '00cc00', 'b3ff1a', '99ff99', 'b30088', 'e68a00', 'ffad33', 'ffe0b3', 'ffff00', 'aa7700', 'd3bf9b', 'ffffff', '4780f3', '1b1716'], 'Land_Cover_class_values': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], 'Land_Use_class_names': ['Agriculture', 'Developed', 'Forest', 'Non-Forest Wetland', 'Other', 'Rangeland or Pasture', 'No

In [10]:
# Now, we'll crosswalk the training fields to numeric codes
# The TimeSync fields are a string by default
# They must be a number for modeling
# Set up lookup dictionaries to convert the names to numeric codes
land_cover_name_code_dict = ee.Dictionary({'TREES':1,
                             'TSHRUBS-TRE':2,
                             'SHRUBS-TRE':3,
                             'GRASS-TREE':4,
                             'BARREN-TRE':5,
                             'TSHRUBS':6,
                             'SHRUBS':7,
                             'GRASS-SHRU':8,
                             'BARREN-SHR':9,
                             'GRASS':10,
                             'BARREN-GRA':11,
                             'BARREN-IMP':12,
                             'BARREN-IMP':12,
                             'WATER':14
                            })
land_use_name_code_dict = ee.Dictionary({'Agriculture':1,
                           'Developed':2,
                           'Forest':3,
                           'Non-forest Wetland':4,
                           'Other':5,
                           'Rangeland':6
                          })

change_code_dict = ee.Dictionary({'Debris': 3, 
                                  'Fire': 3, 
                                  'Growth/Recovery': 4, 
                                  'Harvest': 3, 'Hydrology': 3, 
                                  'Mechanical': 3, 
                                  'Other': 3, 
                                  'Spectral Decline': 2, 
                                  'Stable': 1, 
                                  'Structural Decline': 2, 
                                  'Wind/Ice': 3})

reference_field_dict = {'Land_Cover':{'field':'DOM_SEC_LC','name_code_dict':land_cover_name_code_dict},
                        'Land_Use':{'field':'DOM_LU','name_code_dict':land_use_name_code_dict},
                        'Change':{'field':'CP','name_code_dict':change_code_dict,
                                  'fields':['Slow Loss', 'Fast Loss', 'Gain']}
                       }
# Make a function that will get the code for a given name and set it
# We could also use the remap function to accomplish this
def set_class_code(plot,product):
    name_fieldName = reference_field_dict[product]['field']
    code_fieldName = ee.String(name_fieldName).cat('_Code')
    name = ee.String(plot.get(name_fieldName))
    code = reference_field_dict[product]['name_code_dict'].get(name)
    plot = plot.set(code_fieldName,code)
    return plot
                    
                    
    # print(name_fieldName,code_fieldName.getInfo(),name.getInfo(),code.getInfo())
            
# set_class_code(training_data.first(),'Land_Cover')
for product in list(reference_field_dict.keys()):
    print('Crosswalking:',product)
    training_data = training_data.map(lambda f:set_class_code(f,product))

# Now will download the training table to a local location

local_model_data_folder = '/tmp/lcms-training/local_modeling'
local_training_csv = os.path.join(local_model_data_folder,'timeSync_training_table.csv')


if not os.path.exists(local_model_data_folder):os.makedirs(local_model_data_folder)

# Download the training data from a featureCollection to a local CSV
# This function will automatically break the featureCollection into 5000 feature featureCollections
# if it is larger than the 5000 feature limit set by GEE
g2p.featureCollection_to_csv(training_data,local_training_csv,overwrite = False)

# Once the table is store locally, read it in
training_df = pd.read_csv(local_training_csv)

training_df.describe()
print('Done')

Crosswalking: Land_Cover
Crosswalking: Land_Use
Crosswalking: Change
/tmp/lcms-training/local_modeling/timeSync_training_table.csv  already exists
Done


In [16]:
model_options = pd.read_csv(model_options_csv_filename)

# Filter out to only have rows from the non correlated top 30 predictors
# Any subset of predictors could be used here, but this one should work well
model_options = model_options[model_options['Model Name'] == 'Non-correlated Predictors Top 30']

display(model_options)

print('Done')

Unnamed: 0,Product Name,Model Name,OOB Acc,Overall Acc,Balanced Acc,Kappa,Var Imp
3,Change,Non-correlated Predictors Top 30,0.892001,0.833756,0.286863,0.099097,"['swir2_LT_slope', 'swir2_LT_fitted', 'NDVI_LT..."
7,Land_Cover,Non-correlated Predictors Top 30,0.973405,0.707922,0.290145,0.512691,"['red_LT_fitted', 'green_CCDC_fitted', 'slope'..."
11,Land_Use,Non-correlated Predictors Top 30,0.994287,0.815904,0.627273,0.697383,"['red_LT_fitted', 'red_CCDC_fitted', 'NDVI_CCD..."


Done


In [104]:
# LCMS does not have enough training samples to simply ommit 20% or so from training our final models
# Since our assemblage process introduces differences between the model predicted class, and our sample
# is based on a stratified random sample design, we cannot simply use the out-of-bag samples from the random forest model
# We have to use a method that will simulate the map accuracy that can account for the likelihood of each samples inclusion
# (strata weights), as well as also allow us to introduce any assemblage rules that are not typically part of the underlying 
# random forest model
# 

products = ['Change','Land_Cover','Land_Use']
KFoldInfo = {}
# kfoldinfo_pickle_filename = pickleName+'.p'
# KFoldInfo['TrainingData'] = training_df.copy()

# strata = allTrainingData[stratColumn].squeeze()
groups = training_df['PLOTID'].squeeze()
k = 5
n_jobs = 4
gkf = GroupKFold(n_splits=k)
foldNum = 1
seed = 999
nTrees = 50
# Fit and Train model
# Set up a random forest model
rf = RandomForestClassifier(n_estimators = nTrees, random_state=seed,oob_score=False,n_jobs = n_jobs)
KFoldInfo['STRATUM'] = []
KFoldInfo['STRATUM_PIXEL_COUNT'] = []
KFoldInfo['STRATUM_PIXEL_PCT'] = []
for train_index, test_index in gkf.split(training_df, training_df, groups):
    KFoldInfo[foldNum] = {}
    print()
    print('Fold Number: '+str(foldNum))
    print()
    print(len(train_index),len(test_index))
    # Indices of training and test samples
    KFoldInfo[foldNum]['Indices'] = {\
        'Train': train_index,
        'Test': test_index}

    # Strata of training and test samples
    # gk_strata_train, gk_strata_test = strata.iloc[train_index], strata.iloc[test_index]
#     KFoldInfo[str(foldNum)]['Strata'] = {\
#         'Train': gk_strata_train,
#         'Test': gk_strata_test}

    # Run model and predict probabilities
    KFoldInfo[foldNum]['Probabilities'] = {}
    KFoldInfo[foldNum]['Predictions'] = {}
    KFoldInfo[foldNum]['Model'] = {}
    KFoldInfo[foldNum]['Ref'] = {}
    
    
    k_train,k_test = training_df.iloc[train_index], training_df.iloc[test_index]
    
    # Get the strata info
    KFoldInfo['STRATUM'].extend(k_test['STRATUM'])
    KFoldInfo['STRATUM_PIXEL_COUNT'].extend(k_test['STRATUM_PIXEL_COUNT'])
    KFoldInfo['STRATUM_PIXEL_PCT'].extend(k_test['STRATUM_PIXEL_PCT'])
    for product_name in products:
        print(foldNum,product_name)
        
        # Pull predictors from table from 5.1
        # Some parsing is needed to read it in properly
        predictor_variable_names = model_options[model_options['Product Name'] == product_name]['Var Imp'].values[0]
        predictor_variable_names = predictor_variable_names[1:-1]
        predictor_variable_names=predictor_variable_names.replace("'","").split(', ')
        
        # Get X and Y points for each group  
        kx_train = k_train[predictor_variable_names]
        ky_train = k_train[reference_field_dict[product_name]['field']+'_Code']
        
        kx_test = k_test[predictor_variable_names]
        ky_test = k_test[reference_field_dict[product_name]['field']+'_Code']
        
        rf.fit(kx_train,ky_train)
        
        # Get Predicted Probabilities for each Test Point
        if product_name in ['Land_Cover','Land_Use']:
            ky_pred = rf.predict(kx_test)
            
        else:
            ky_pred = rf.predict_proba(kx_test)
        KFoldInfo[foldNum]['Predictions'][product_name] = ky_pred
        KFoldInfo[foldNum]['Ref'][product_name] = ky_test
        print(len(ky_pred))
    foldNum+=1

print('Done')


Fold Number: 1

15826 3952
1 Change
3952
1 Land_Cover
3952
1 Land_Use
3952

Fold Number: 2

15819 3959
2 Change
3959
2 Land_Cover
3959
2 Land_Use
3959

Fold Number: 3

15820 3958
3 Change
3958
3 Land_Cover
3958
3 Land_Use
3958

Fold Number: 4

15820 3958
4 Change
3958
4 Land_Cover
3958
4 Land_Use
3958

Fold Number: 5

15827 3951
5 Change
3951
5 Land_Cover
3951
5 Land_Use
3951
Done


In [108]:
stratum = [str(i) for i in KFoldInfo['STRATUM']]

stratum_counts = KFoldInfo['STRATUM_PIXEL_COUNT']
stratum_pct = KFoldInfo['STRATUM_PIXEL_PCT']
for product_name in products[1:2]:
    preds = []
    refs = []
    for foldNum in range(1,k+1):
        predsFold = KFoldInfo[foldNum]['Predictions'][product_name]
        refsFold = KFoldInfo[foldNum]['Ref'][product_name]
        preds.extend(predsFold)
        refs.extend(refsFold)
        
    refs = pd.Series(refs)
    preds = pd.Series(preds)
strata_dict = dict(set(zip(stratum,stratum_counts)))
print(strata_dict)
print('Done')

{'3': 110101, '6': 92958, '7': 3519261, '11': 102288, '4': 344785, '2': 113212, '5': 71185, '8': 3325898, '10': 258676, '1': 1450331, '9': 865643}
Done


In [110]:
print(refs.unique())
print(preds)

0        10
1        10
2         1
3         1
4         1
         ..
19773     1
19774     1
19775     3
19776     1
19777     1
Length: 19778, dtype: int64
0         1
1        10
2         1
3         1
4         1
         ..
19773     1
19774    12
19775     1
19776     1
19777     1
Length: 19778, dtype: int64


In [109]:
import lcms_scripts.accuracy_and_sampling_lib2 as asl
print(refs.nunique())
asl.get_write_stratified_accuracies(\
    refs,            # The correct classifications
    preds,      # The predicted classifications
    stratum,       # The strata of the same plots as above
    strata_dict,         # Dictionary of the number of pixels in each stratum - defined in LCMSVariables - used for weighting
    lcms_viz_dict[f'{product_name}_class_values'], # Class names - used for looping through classes for users/producers accuracies and areas
    method = 'test',        # This is just a run name, used for printing out accuracies in file. Not really used anymore
    accFile = None)

11
overall_accuracy
Stratum 1 Weight: 0.010737016860571593
Stratum 2 Weight: 0.009065236585725963
Stratum 3 Weight: 0.3431972887962148
Stratum 4 Weight: 0.009975095418153761
Stratum 5 Weight: 0.03362333092589692
Stratum 6 Weight: 0.011040400657750896
Stratum 7 Weight: 0.006941940084284329
Stratum 8 Weight: 0.32434058639377794
Stratum 9 Weight: 0.025226006788541592
Stratum 10 Weight: 0.14143584890609223
Stratum 11 Weight: 0.08441724858298995
Numerator Values:  [110101.0, 92958.0, 3519261.0, 102288.0, 344785.0, 113212.0, 71185.0, 3325898.0, 258676.0, 1450331.0, 865643.0]
Weights:  [0.010737016860571593, 0.009065236585725963, 0.3431972887962148, 0.009975095418153761, 0.03362333092589692, 0.011040400657750896, 0.006941940084284329, 0.32434058639377794, 0.025226006788541592, 0.14143584890609223, 0.08441724858298995]
Weights sum: 1.0
Out sum 1.0
Overall Accuracy:  1.0 +/- 0.0
balanced_accuracy
Stratum 1 Weight: 0.010737016860571593
Stratum 2 Weight: 0.009065236585725963
Stratum 3 Weight: 0.3

  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 2:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 2:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  3


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 3:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 3:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  4
Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)
  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 4:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 4:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  5
Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 5:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 5:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  6
Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 6:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 6:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  7
Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 7:  nan +/- <function standard_error

  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 8:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 8:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  9


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 9:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 9:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  10


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 10:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 10:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  11


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 11:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 11:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  12


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 12:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 12:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  13


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 13:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 13:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  14


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 14:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 14:  nan +/- <function standard_error at 0x7f0537d04280>

Class:  15


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Users Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Producers Accuracy
Stratum 3: R= nan
Stratum 6: R= nan
Stratum 7: R= nan
Stratum 11: R= nan
Stratum 4: R= nan
Stratum 2: R= nan
Stratum 5: R= nan
Stratum 8: R= nan
Stratum 10: R= nan
Stratum 1: R= nan
Stratum 9: R= nan
Users Accuracy for Class 15:  nan +/- <function standard_error at 0x7f0537d04280>
Producers Accuracy for Class 15:  nan +/- <function standard_error at 0x7f0537d04280>

Area
Stratum 1 Weight: 0.0
Stratum 2 Weight: 0.0
Stratum 3 Weight: 0.0
Stratum 4 Weight: 0.0
Stratum 5 Weight: 0.0
Stratum 6 Weight: 0.0
Stratum 7 Weight: 0.0
Stratum 8 Weight: 0.0
Stratum 9 Weight: 0.0
Stratum 10 Weight: 0.0
Stratum 11 Weight: 0.0
Numerator Values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Weights:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Weights sum: 0.0
Out sum 0.0


  sample_mean_yu = thisStrat['yu'].sum() / n_h_star # yh bar
  sample_mean_xu = thisStrat['xu'].sum() / n_h_star # xh bar
  out = np.sum(numerator_values) / np.sum(denominator_values)


Area
Stratum 1 Weight: 0.0
Stratum 2 Weight: 0.0
Stratum 3 Weight: 0.0
Stratum 4 Weight: 0.0
Stratum 5 Weight: 0.0
Stratum 6 Weight: 0.0
Stratum 7 Weight: 0.0
Stratum 8 Weight: 0.0
Stratum 9 Weight: 0.0
Stratum 10 Weight: 0.0
Stratum 11 Weight: 0.0
Numerator Values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Weights:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Weights sum: 0.0
Out sum 0.0
Estimated Area for Class 3:  0.0
Area
Stratum 1 Weight: 0.0
Stratum 2 Weight: 0.0
Stratum 3 Weight: 0.0
Stratum 4 Weight: 0.0
Stratum 5 Weight: 0.0
Stratum 6 Weight: 0.0
Stratum 7 Weight: 0.0
Stratum 8 Weight: 0.0
Stratum 9 Weight: 0.0
Stratum 10 Weight: 0.0
Stratum 11 Weight: 0.0
Numerator Values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Weights:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Weights sum: 0.0
Out sum 0.0
Estimated Area for Class 4:  0.0
Area
Stratum 1 Weight: 0.0
Stratum 2 Weight: 0.0
Stratum 3 Weight: 0.0
Stratum 4 Weight: 0.0
Stratum 5 Weight: 0.0
Stratum 6 Weight: 0.0
Str

(1.0,
 1.0,
 {1: nan,
  2: nan,
  3: nan,
  4: nan,
  5: nan,
  6: nan,
  7: nan,
  8: nan,
  9: nan,
  10: nan,
  11: nan,
  12: nan,
  13: nan,
  14: nan,
  15: nan},
 {1: nan,
  2: nan,
  3: nan,
  4: nan,
  5: nan,
  6: nan,
  7: nan,
  8: nan,
  9: nan,
  10: nan,
  11: nan,
  12: nan,
  13: nan,
  14: nan,
  15: nan},
 1.0,
 1.0,
 {1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0},
 0.0,
 {1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0},
 {1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0},
 {1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0})

In [None]:
# Another method for computing model accuracy is with cross validation
# This method partitions the data into k parts and leaves one out for each of k iterations
# The held out training points are then used to assess the model accuracy. All held out samples are combined to 
# get the simulated model accuracy
from sklearn.model_selection import cross_val_score
scores = cross_val_score(rf, X, y, cv=10,scoring = 'balanced_accuracy')
print("%0.2f balanced accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))