In [None]:
import os
import numpy as np
from pathlib import Path

In [None]:
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

## Set parameters in cell below, then run all:

In [None]:
priority1 = [3737,3773,3774,3806,3807,3808,3809,3810,3841,3842,3843,3844,3845,3877,3878,3880,3911,3912,3913,3914,3944,3945,3946,3947,3948,3976,3978,3979,3980,4007,4008,4009,4010,4036]
home_dir = Path("Outputs/local") ## Note this saves to the local folder inside this notebooks folder that is ignored by git
scratch = Path("/home/scratch-cel") ## Note scratch_dir has zero backup. This is good for temp files, but might not run from all clusters

### 1. BASIC CONFIG ###########################################################################################################
local_dir = home_dir ## home_dir, scratch, or another path for temporary outputs that you do not want to save to the project directory
country = 'Paraguay'  ## ('Chile' | 'Paraguay')
data_source = 'stac' ## ('stac' | 'GEE')
data_set = 'actual' ## ('actual' | 'test')
test_grid = Path('/home/sandbox-cel/paraguay_lc/testing/ts_test_area_004057.gpkg')   #Only needed if data_set == 'test'
#crs = 'EqualEarth' ##('EqualEarth' | 'AlbersEqualArea' | 'UTM') note, this refers to the grid file being used - set automatically based on data_source
grid_cell = 4004 # for single cell processes
grid_cells = [4004] # for multi-cell processes. [xxxx,xxxx,xxxx] or path to .csv file with cell list in column, no header
#grid_cells = priority1
spec_index = 'gcvi'
print_list = True
filter_yr = 2021 # for single year processes
yr_range = [2023,2025] ## [start,end] for processes where range is accepted. If only one year in range, use same year twice (e.g. [2017,2017])
image_type = 'Sentinel2' ## (Sentinel2'|'Landsat'|'Landsat5'|'Landsat7'|'Landsat8'|'Landsat9'|'AllRaw'|'Smooth'|'Smooth_old'|'Comp')
purpose = 'troubleshoot' ## troubleshoot | summary  -- if purpose == summary, many cells will not be run to avoid clutter in printout 
##############################################################################################################################
### 1b. summary dtasets ######################################################################################################
## (if set to 'master', default to the paths specified below. Can use a different path name if a subset is desired)
dl_db_path = 'master'
status_db_path = 'master'
image_summary_path = 'master'
##############################################################################################################################
### 1c. data inputs for plotting / timeseries ########################################
#ptfile = 'interactive'  #path or 'interactive' if selecting coordinates from map
ptfile= Path('/home/downspout-cel/paraguay_lc/vector/sampleData/SamplePts_Mar2024_ALL.csv')
polyfile = Path("home/sandbox-cel/paraguay_lc/vector/ClassificationModels/RF/pts_polyData.csv")
##############################################################################################################################
### 1d. PLOT PARAMS -- params for plotting/sampling
## set above interactive==True
interactive = True
viewband = 'kndvi'
plot_day = 100
gamma = 2
shpfile = 'poly'   ## ('point'| 'poly' | None)
get_new_coords = True
inputCRS = "epsg:8858"  ## esri:102033 = Albers SA Equal Area Conic, espg:8858 = Equal Earth
##############################################################################################################################
### 2. TIME SERIES ###########################################################################################################
load_prerunTS = False
smooth_TSfile = "/home/downspout-cel/chile_lc/OutputData/TSdfs/TS_evi2_smooth_2014-2018_natFor_1157.csv"
raw_TSfile = "/home/downspout-cel/chile_lc/OutputData/TSdfs/TS_evi2_rawSentinel_2014-2018_natFor_1157.csv"                   
start_yr = yr_range[0] #set this in Basic config
end_yr = yr_range[1] #set this in Basic config
start_mo = 11
single_grid_cell = False
## For polygon-based sample #################################################################################################
oldest_samp = 0   # oldest polygon to use (0 to ignore)
newest_samp = 0  # newest polygon to use (0 to ignore)
npts = 1 # pts sampled per polygon
seed1 = 88
## For point-based sample ####################################################################################################
load_samp = True   ##if = False: ptFile is ignored
filter_class = 'Trees-Forest'
##3. Params for single output plots
class_prefix = ''

##############################################################################################################################
## 4. MODELLING PARAMS (currently Random Forest) #############################################################################
model_type = 'RF'
## model_mode = 'testing' | 'production'
model_mode = 'testing'
## main_model_dir should not be changed -- outputs will go in here if model_mode == 'production'
main_model_dir = Path('/home/downspout-cel/{}_lc/classification/{}'.format(country.lower(),model_type))
## local_model_dir can be changed to any local directory -- outputs will go in here if model_mode == 'testing'
#local_model_dir = Path("/home/downspout-cel/paraguay_lc/vector/tests_KW2")
#local_model_dir = local_dir
local_model_dir = scratch
ho_thresh = 20    # (0-100), for full dataset holdout
## --------feature specs ----------------------------------------------------------------------------------------------
feature_model = 'base4Poly'
#spec_indices = ["evi2","gcvi","wi","kndvi","nbr","ndmi"]
spec_indices = ["kndvi","gcvi","ndmi","nbr"]
si_vars =["maxv_yr","minv_yr","amp_yr","avg_yr","sd_yr","Jan_20","Feb_20","Mar_20","Apr_20","May_20","Jun_20","Jul_20","Aug_20","Sep_20","Oct_20","Nov_20","Dec_20",'maxv_wet','minv_wet','med_wet','cv_wet','maxv_dry','minv_dry','med_dry','cv_dry']
spec_indices_pheno = ["kndvi"]
#pheno_vars=['slp_wet','numrot_wet','posd_wet','posv_wet','numlow_wet','tosd_wet','p1amp_wet','sosd_wet','sosv_wet','eosd_wet','eosv_wet','rog_wet','ros_wet','los_wet']
pheno_vars=["posv_wet"]
singleton_vars = ["BH","SH","Chaco","Cer"]
#singleton_vars = ["forest_strata"]
singleton_var_dict = '/home/downspout-cel/paraguay_lc/singleton_var_dict.json'
feature_mod_dict = '/home/downspout-cel/paraguay_lc/Feature_Models.json'
sample_mod_dict = '/home/downspout-cel/paraguay_lc/Sample_Models.json'
poly_vars = ["pred_ext","pred_dst","pred_area","pred_APR","NovDecGCVI_Std"]
#poly_vars = []
poly_var_path = "/home/downspout-cel/paraguay_lc/Segmentations/cultionet22/feats_EO_8pt5"
#samp_poly = 'D:/NasaProject/Paraguay/ClassificationModels/RF/pts_polyData.csv'
## ---------sample specs------------------------------------------------------------------------------------------------
## sample_model options are currently  base1000 | bal400mix1 bal400mix2 ... bal400mix10
sample_model = 'bal100mix3'
## samp_pts are modified pt dataframes based on the sample model. The full sample point dataframe is the ptfile in 1c above.
samp_pts = '/home/downspout-cel/paraguay_lc/classification/RF/sample_dfs/{}.csv'.format(sample_model)
## sample dataframe with variable data extracted to points. All points included (changes only if feature model changes of new points are made)
samp_pix_vars = '/home/downspout-cel/paraguay_lc/vector/ptsgdb_{}.csv'.format(feature_model)
## sample dataframe for current model (after sample adjustments are performed)
pixdf = '/home/downspout-cel/paraguay_lc/classification/RF/pixdf_{}_{}_2021.csv'.format(feature_model,sample_model)
## ----------model parameters-------------------------------------------------------------------------------------------
lc_mod = 'veg_with_crop' #'all'(=LC25) | 'cropNoCrop' | 'crop_nocrop_medcrop' | 'crop_nocrop_medcrop_tree' 
                 # | 'trans_cats' | 'veg'(=LC5) | 'cropType'(='LC_crops') | 'veg_with_crop' (='LC8') | single_{}'
ranhold = 29
impmeth = 'Impurity'  # 'Impurity' | 'Permutation' | 'None'
weights = np.ones(25)

### Run all, but do not need to modify below this line unless adding a new parameter
===============================================================================================================================

In [None]:
if data_source == 'GEE':
    smooth_dir = Path("/home/downspout-cel/{}_lc/raster/grids".format(country.lower()))
    raw_dir = Path("/home/sandbox-cel/{}_lc/raster/grids".format(country.lower()))
    crs = 'Albers Equal Area'
    if country == 'Chile':
        grid_file = Path("/home/sandbox-cel/chile_lc/chl_grids.gpkg")
    elif country == 'Paraguay':
        grid_file = Path("/home/sandbox-cel/paraguay_lc/vector/pry_grids.gpkg")
    if data_set == 'test':
        smooth_dir = Path("/home/downspout-cel/{}_lc/testing/grids".format(country.lower()))
        raw_dir = Path("/home/sandbox-cel/{}_lc/testing/grid".format(country.lower()))
        grid_file = test_grid
elif data_source == 'stac':
    smooth_dir = Path("/home/downspout-cel/{}_lc/stac/grids".format(country.lower()))
    raw_dir = Path("/home/sandbox-cel/{}_lc/stac/grid".format(country.lower()))
    grid_file = Path("/home/sandbox-cel/LUCinLA_grid_8858.gpkg")
    if dl_db_path == 'master':
        dl_db_path = Path("/home/downspout-cel/{}_lc/cell_processing_dl.csv".format(country.lower()))
    if status_db_path == 'master':
        status_db_path = Path("/home/downspout-cel/{}_lc/cell_processing_post.csv".format(country.lower()))
    if image_summary_path == 'master':
        image_summary_path = Path("/home/downspout-cel/{}_lc/ALLFileList.csv".format(country.lower()))
    crs = 'Equal Earth'
    if data_set == 'test':
        smooth_dir = Path("/home/downspout-cel/{}_lc/testing/grids".format(country.lower()))
        raw_dir = Path("/home/sandbox-cel/{}_lc/testing/grid".format(country.lower()))
        grid_file = test_grid

sensor_codes = [('Landsat', 'L'), ('Sentinel', 'S'), ('Landsat5', 'LT05'), ('Landsat7', 'LE07'), ('Landsat8', 'LC08'), ('Landsat9', 'LC09')]        
sc = [item for item in sensor_codes if item[0] == image_type]
sensor_code = sc[0][1] if len(sc) > 0 else 'na'

basic_config = { 'country' : country,
                'grid_cell' : grid_cell,
                'grid_cells' : grid_cells,
                'data_source' : data_source,
                'crs' : crs,
                'smooth_dir' : smooth_dir,
                'spec_index' : spec_index,
                'spec_indices' : spec_indices,
                'index_dir' : os.path.join(smooth_dir,'{:06d}'.format(grid_cell),'brdf_ts','ms',spec_index),
                'raw_dir' : raw_dir,
                'brdf_dir': os.path.join(raw_dir,'{:06d}'.format(grid_cell),'brdf'),
                'grid_file' : grid_file,
                'dl_db_path' : dl_db_path,
                'status_db_path' : status_db_path,
                'image_summary_path' : image_summary_path,
                'local_dir' : local_dir,
                'filter_yr' : filter_yr,
                'yr_range' : yr_range,
                'image_type' : image_type,
                'sensor_code' : sensor_code,
                'print_list' : print_list,
                'ptfile' : ptfile,
                'polyfile' : polyfile,
                'today' : today,
                'purpose' : purpose
              }

%store basic_config

if ptfile == 'interactive':
    ptfile = os.path.join(local_dir,'SelectedCoords.csv')
plot_params = {'viewband' : viewband,
                   'plot_day' : plot_day,
                   'gamma': gamma,
                   'shpfile' : shpfile,
                   'interactive' : interactive,             
                   'inputCRS' : inputCRS,
                   'get_new_coords' : get_new_coords
                        }

%store plot_params

if single_grid_cell == True:
    grid_cells = [grid_cell]
    
timeseries_params = {#'ptfile' : ptfile,
                    'filter_class' : filter_class,
                    'start_yr': start_yr,
                    'end_yr': end_yr,
                    'start_mo': start_mo,
                    'image_type' : image_type,
                    'oldest_samp' : oldest_samp,
                    'newest_samp' : newest_samp,
                    'npts' : npts,
                    'seed1' : seed1,
                    'load_samp' : load_samp,
                    'load_prerunTS' : load_prerunTS,
                    'smooth_TSfile' : smooth_TSfile,
                    'raw_TSfile' : raw_TSfile
    }

%store timeseries_params


single_output_params = {'class_prefix' : class_prefix,
                     'class_dir': os.path.join(smooth_dir,'{:06d}'.format(grid_cell),'cls'),
                      'class_file':os.path.join(smooth_dir,'{:06d}'.format(grid_cell),'cls',class_prefix+'{:06d}'.format(grid_cell)+'.tif'),
                      'map_years': yr_range
                      
    }

%store single_output_params

if model_mode == 'testing':
    model_name = '{}_{}'.format(feature_model, sample_model)
    model_dir = local_model_dir
elif model_mode == 'production':
    model_name = '{}_{}_{}'.format(feature_model, sample_model, filter_yr)
    model_dir = main_model_dir
    
classification_params = {'model_type' : model_type,
                         'model_mode' : model_mode,
                         'model_dir' : model_dir,
                         'main_model_dir': main_model_dir,
                         'local_model_dir' : local_model_dir,
                         "local_dir" : local_dir,
                         'samp_pts' : samp_pts,
                         'ho_thresh' : ho_thresh,
                         'samp_pix_vars' : samp_pix_vars,
                         'pixdf' : pixdf,
                         'feature_model': feature_model,
                         'feature_mod_dict' : feature_mod_dict,
                         'spec_indices' : spec_indices,
                         'si_vars' : si_vars,
                         'pheno_vars' : pheno_vars,
                         'spec_indices_pheno' : spec_indices_pheno,
                         'singleton_vars' : singleton_vars,
                         'singleton_var_dict' : singleton_var_dict, 
                         'poly_vars' : poly_vars,
                         'poly_var_path' : poly_var_path,
                         'sample_model' : sample_model,
                         'sample_mod_dict' : sample_mod_dict, 
                         'model_name' : model_name,
                         'lc_mod' : lc_mod,
                         'ranhold' : ranhold,
                         'impmeth' : impmeth,
                         "weights" : weights,
                         
                        }

%store classification_params