In [None]:
import os
import sys
from pathlib import Path
import pandas as pd
#import json

%matplotlib inline

In [None]:
sys.path.append(r"../LUCinSA_helpers")
from var_dataframe import *
from rf import *

In [None]:
'''
PARAMETERS: modify in notebook_params notebook, then run that notebook and this cell to update here
DO NOT modify this cell
'''

%store -r basic_config
print(" modelling year is (filter_year): {} (this is first year if season spans two years)".format(basic_config['filter_yr']))
print("sample point file is: {}".format(basic_config['ptfile']))
%store -r timeseries_params
if timeseries_params['load_samp'] == False:
    print('using polygon file to make new point file: {}'.format(basic_config['polyfile']) )
    print('will sample {} pts per polygon'.format(timeseries_params['npts']))
print('first year of calendar sequence is: {} \n'.format(timeseries_params['start_mo']))
%store -r classification_params
print("Classification_Params: \n" 
      " output files are saved to (model_dir): {} \n" 
      " shared input files are in (main_model_dir): {} \n"
      " sample_model = {} \n feature_model = {} \n model_name = {} \n"
      " the full sample pt file: {} \n"
      " the full sample dataframe with the feature model applied: {} \n"
      " the subset pt file based on the sample model: {} \n"
      " the feature model dictionary: {}"
      .format(classification_params['local_model_dir'],classification_params['main_model_dir'],
              classification_params['sample_model'],classification_params['feature_model'],classification_params['model_name'],
              basic_config['ptfile'],classification_params['samp_pix_vars'],classification_params['samp_pts'],
              classification_params['feature_mod_dict'] 
              ))

### Check feature model settings

## See existing models

In [None]:
with open(classification_params['feature_mod_dict'], 'r+') as feature_model_dict:
    dic = json.load(feature_model_dict)
    models = pd.DataFrame.from_dict(dic, orient='index')
models.head(n=10)

## Load feature model info (if existing) or save feature model info (if new)

##### IF new feature model: Make sure spec_indices, si_vars, singleton_vars and poly_vars are set correctly in parameters (if not, set and rerun parameters cell above)

In [None]:
mod_dict = classification_params['feature_mod_dict']
spec_indices,si_vars,spec_indices_pheno,pheno_vars,singleton_vars,poly_vars,combo_bands,band_names = getset_feature_model(
                      mod_dict, 
                      classification_params['feature_model'], 
                      classification_params['spec_indices'], 
                      classification_params['si_vars'],
                      classification_params['spec_indices_pheno'],
                      classification_params['pheno_vars'],
                      classification_params['singleton_vars'],
                      classification_params['poly_vars']
)
print('Band names: {}'.format(band_names))                                                                      

## Steps to make variable dataframe -- These steps have already been run - skip to load existing variable dataframe
#### Make variable stack for cells with sample data (Note: This is pretty heavy and should be run from SLURM with bash script (rf0_raster_var_stack))

In [None]:
'''
make_variable_stack(basic_config['smooth_dir'],
                    basic_config['grid_cell'],
                    classification_params['feature_model'],
                    basic_config['yr_range'][0],
                    timeseries_params['start_mo'],
                    classification_params['spec_indices'],
                    classification_params['si_vars'],
                    classification_params['spec_indices_pheno'],
                    classification_params['pheno_vars'],
                    classification_params['feature_mod_dict'],
                    classification_params['singleton_vars'] ,
                    classification_params['singleton_var_dict'],
                    classification_params['poly_vars'], 
                    classification_params['poly_var_path'],
                    classification_params['combo_bands']
                    None)
'''

### Make variable dataframe (use all sample points initially -- can then reduce in Notebook 6b)(Note: best to run through SLURM with bash script (rf1_var_data_frame.sh))

In [None]:
'''
make_var_dataframe(basic_config['smooth_dir'],
                  classification_params['local_model_dir'],
                  basic_config['grid_file'],
                  basic_config['grid_cells'],
                  classification_params['feature_model'],
                  classification_params['feature_mod_dict'],
                  basic_config['yr_range'][0],
                  basic_config['polyfile'],
                  oldest=timeseries_params['oldest_samp'],
                  newest=timeseries_params['newest_samp'],
                  npts=timeseries_params['npts'], 
                  seed=timeseries_params['seed1'],
                  load_samp=timeseries_params['load_samp'], 
                  ptfile=basic_config['ptfile'])
'''

# Alternatively, can append new variables to an existing dataframe without stacking them (much less storage required, but
# will need to stack final set of variables for surface-level classification)

'''
append_feature_dataframe(basic_config['smooth_dir'],
                         ptfile=basic_config['ptfile'],
                         classification_params['samp_pix_vars'],
                         basic_config['grid_cells'],
                         basic_config['grid_file'],
                         classification_params['local_model_dir'],
                         basic_config['yr_range'][0],
                         timeseries_params['start_mo'],
                         classification_params['spec_indices'],
                         classification_params['si_vars'],
                         classification_params['spec_indices_pheno'],
                         classification_params['pheno_vars'],
                         classification_params['singleton_vars'] ,
                         classification_params['singleton_var_dict'],
                         classification_params['poly_vars'], 
                         classification_params['poly_var_path'],
                         classification_params['combo_bands']
''';

## Load existing variable dataframe

In [None]:
pix_vars = pd.read_csv(classification_params['samp_pix_vars'])
pix_vars.head()

## Remove features from model

In [None]:
new_feature_mod = 'ptsgdb_Max_nopp_nnweg5-9'
drop_indices = ["nbr", "ndmi", "wi", "evi2", "gcvi"] # to drop all variables for an index (e.g. ['nbr'])
drop_vars = ["May_20", "Jun_20", "Jul_20", "Aug_20", "Sept_20"]  # to drop all instances of a given variable (e.g. [`cv_yr`, 'Jun_20'])
drop_combo = ['var_poly_pred_ext', 'var_poly_pred_dst', 'var_poly_pred_area', 'var_poly_pred_APR', 'var_poly_NovDecGCVI_Std'] # to drop specific index_variable combinations (e.g. ['var_wi_Jan_20', 'var_gcvi_maxv_wet'])

#classification_params['samp_pix_vars'] = '/home/downspout-cel/paraguay_lc/vector/ptsgdb_{}.csv'.format(new_feature_mod)
new_feature_mod = 'Max_dropTest2'
drop_indices = [] # to drop all variables for an index (e.g. ['nbr'])
drop_vars = ['Jun_20','Jul_20','Aug_20']  # to drop all instances of a given variable (e.g. [`cv_yr`, 'Jun_20'])
drop_combo = [] # to drop specific index_variable combinations (e.g. ['var_wi_Jan_20', 'var_gcvi_maxv_wet'])

In [None]:
reduce_variable_dataframe(classification_params['samp_pix_vars'], 
                          drop_indices, 
                          drop_vars, 
                          drop_combo, 
                          classification_params['local_model_dir'], 
                          new_feature_mod,  
                          classification_params['feature_mod_dict'])

## to augment sample with polygon data: (KW TODO)

* join sample points to polygon data to get subset of polygons with points (join class info to those polygons)
* run make_var_dataframe with load_samp = False and 
    polygon file set to subset above-- will get {npts} random points from each polygon

## To save an html copy of this notebook with all outputs:
(these two cells should be last in notebook)

In [None]:
%%script echo skipping
### comment out above line and run this cell to print output as html

out_name = str(basic_config['country']+'6a_RandomForest_FeathreModel_to_dataframe'+'_model'+str(classification_params['feature_model'])+str(basic_config['filter_yr']))
!jupyter nbconvert --output-dir='./Outputs' --to html --no-input --ExecutePreprocessor.store_widget_state=True --output=$out_name 6b_RandomFoest_ModelComparisons.ipynb