In [1]:
import xarray as xr
import rioxarray
import numpy as np
import os
import re
import pandas as pd
import geopandas as gpd
from datetime import datetime, timedelta
from tqdm.notebook import tqdm
import time
import json
from shapely import wkt
from itertools import chain
from hlsstack.hls_funcs import fetch
from hlsstack.hls_funcs.masks import mask_hls, shp2mask, bolton_mask, atsa_mask
from hlsstack.hls_funcs.indices import ndvi_func, dfi_func, ndti_func, satvi_func, ndii7_func
from hlsstack.hls_funcs.indices import bai_126_func, bai_136_func, bai_146_func, bai_236_func, bai_246_func, bai_346_func
from hlsstack.hls_funcs.smooth import despike_ts, double_savgol
import gc

In [2]:
inDIR = '../data/ground_cln/'
inFILE = 'vor_2013_2022_cln_2023_04_26.csv'

inPATH = os.path.join(inDIR, inFILE)

outDIR = '../data/training/'
outPATH = os.path.join(outDIR, re.sub('.csv', '_hls_idxs.csv', inFILE))

cluster_loc = 'hpc'

try_atsa = True
mask_bolton = False

veg_dict = {
    'NDVI': ndvi_func,
    'DFI': dfi_func,
    'NDTI': ndti_func,
    'SATVI': satvi_func,
    'NDII7': ndii7_func,
    'BAI_126': bai_126_func,
    'BAI_136': bai_136_func,
    'BAI_146': bai_146_func,
    'BAI_236': bai_236_func,
    'BAI_246': bai_246_func,
    'BAI_346': bai_346_func
}

band_list = ['NIR1', 'SWIR1', 'SWIR2']

In [3]:
if cluster_loc == 'local':
    print('   setting up Local cluster...')
    from dask.distributed import LocalCluster, Client
    import dask
    aws=False
    fetch.setup_env(aws=aws)
    cluster = LocalCluster(n_workers=8, threads_per_worker=2)
    client = Client(cluster)
    display(client)
elif cluster_loc == 'hpc':
    from dask.distributed import LocalCluster, Client
    import dask_jobqueue as jq
    import dask
    from jupyter_server import serverapp
    # get the server address for porting
    try:
        jupServer = [x for x in serverapp.list_running_servers()][0]
    except IndexError:
        # manually copy/paste the server address
        jupServer = {'base_url': '/node/ceres19-compute-98-eth.scinet.local/17710/'}
    print('   setting up cluster on HPC...')
    aws=False
    fetch.setup_env(aws=aws)
    dask.config.set({'distributed.dashboard.link': jupServer['base_url'] + 'proxy/{port}/status'})
    partition='short',#'short','debug', 'mem', 'mem-low',
    num_processes = 4
    num_threads_per_processes = 2
    mem = 2.5*num_processes*num_threads_per_processes
    n_cores_per_job = num_processes*num_threads_per_processes
    clust = jq.SLURMCluster(queue=partition,
                            processes=num_processes,
                            cores=n_cores_per_job,
                            memory=str(mem)+'GB',
                            #interface='ib0',
                            interface='enp24s0f0',
                            local_directory='$TMPDIR',
                            death_timeout=30,
                            walltime='02:00:00',
                            job_extra=["--output=/dev/null","--error=/dev/null"])
    client=Client(clust)
    #Scale Cluster 
    num_jobs=16
    clust.scale(jobs=num_jobs)
    try:
        client.wait_for_workers(n_workers=num_jobs*num_processes, timeout=60)
    except dask.distributed.TimeoutError as e:
        print(str(num_jobs*num_processes) + ' workers not available. Continuing with available workers.')
        #print(e)
        pass
    display(client)

  from distributed.utils import tmpfile


   setting up cluster on HPC...


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/8787/status,

0,1
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/8787/status,Workers: 64
Total threads: 128,Total memory: 298.24 GiB

0,1
Comm: tcp://10.1.4.118:34523,Workers: 64
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/8787/status,Total threads: 128
Started: Just now,Total memory: 298.24 GiB

0,1
Comm: tcp://10.1.4.118:40441,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/35961/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:41891,
Local directory: /local/bgfs/sean.kearney/300234/dask-worker-space/worker-qgfpqtq0,Local directory: /local/bgfs/sean.kearney/300234/dask-worker-space/worker-qgfpqtq0

0,1
Comm: tcp://10.1.4.118:40209,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/38265/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:36707,
Local directory: /local/bgfs/sean.kearney/300234/dask-worker-space/worker-bmr677b3,Local directory: /local/bgfs/sean.kearney/300234/dask-worker-space/worker-bmr677b3

0,1
Comm: tcp://10.1.4.118:32865,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43669/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:46137,
Local directory: /local/bgfs/sean.kearney/300234/dask-worker-space/worker-1ztzadwn,Local directory: /local/bgfs/sean.kearney/300234/dask-worker-space/worker-1ztzadwn

0,1
Comm: tcp://10.1.4.118:35573,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/34949/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:43379,
Local directory: /local/bgfs/sean.kearney/300234/dask-worker-space/worker-cblxmi43,Local directory: /local/bgfs/sean.kearney/300234/dask-worker-space/worker-cblxmi43

0,1
Comm: tcp://10.1.4.119:40993,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/41317/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:35161,
Local directory: /local/bgfs/sean.kearney/300243/dask-worker-space/worker-z22u6dxf,Local directory: /local/bgfs/sean.kearney/300243/dask-worker-space/worker-z22u6dxf

0,1
Comm: tcp://10.1.4.119:38159,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/32815/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:38403,
Local directory: /local/bgfs/sean.kearney/300243/dask-worker-space/worker-tj5j3ok0,Local directory: /local/bgfs/sean.kearney/300243/dask-worker-space/worker-tj5j3ok0

0,1
Comm: tcp://10.1.4.119:36345,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/34815/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:38917,
Local directory: /local/bgfs/sean.kearney/300243/dask-worker-space/worker-62mk0l6n,Local directory: /local/bgfs/sean.kearney/300243/dask-worker-space/worker-62mk0l6n

0,1
Comm: tcp://10.1.4.119:37143,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/44805/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:45527,
Local directory: /local/bgfs/sean.kearney/300243/dask-worker-space/worker-882blh2o,Local directory: /local/bgfs/sean.kearney/300243/dask-worker-space/worker-882blh2o

0,1
Comm: tcp://10.1.4.119:39451,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/34993/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:37567,
Local directory: /local/bgfs/sean.kearney/300239/dask-worker-space/worker-mxbtw0t1,Local directory: /local/bgfs/sean.kearney/300239/dask-worker-space/worker-mxbtw0t1

0,1
Comm: tcp://10.1.4.119:39763,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/38175/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:39735,
Local directory: /local/bgfs/sean.kearney/300239/dask-worker-space/worker-u9nqlawp,Local directory: /local/bgfs/sean.kearney/300239/dask-worker-space/worker-u9nqlawp

0,1
Comm: tcp://10.1.4.119:38171,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/34625/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:37353,
Local directory: /local/bgfs/sean.kearney/300239/dask-worker-space/worker-kr5glhuo,Local directory: /local/bgfs/sean.kearney/300239/dask-worker-space/worker-kr5glhuo

0,1
Comm: tcp://10.1.4.119:40991,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/40495/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:45225,
Local directory: /local/bgfs/sean.kearney/300239/dask-worker-space/worker-jub5nakz,Local directory: /local/bgfs/sean.kearney/300239/dask-worker-space/worker-jub5nakz

0,1
Comm: tcp://10.1.4.118:36071,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/39587/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:34225,
Local directory: /local/bgfs/sean.kearney/300228/dask-worker-space/worker-bbf9jh1p,Local directory: /local/bgfs/sean.kearney/300228/dask-worker-space/worker-bbf9jh1p

0,1
Comm: tcp://10.1.4.118:42359,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43301/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:39063,
Local directory: /local/bgfs/sean.kearney/300228/dask-worker-space/worker-wo9_5dm_,Local directory: /local/bgfs/sean.kearney/300228/dask-worker-space/worker-wo9_5dm_

0,1
Comm: tcp://10.1.4.118:39715,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/35831/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:43491,
Local directory: /local/bgfs/sean.kearney/300228/dask-worker-space/worker-9dhdnqyv,Local directory: /local/bgfs/sean.kearney/300228/dask-worker-space/worker-9dhdnqyv

0,1
Comm: tcp://10.1.4.118:42691,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/39019/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:39759,
Local directory: /local/bgfs/sean.kearney/300228/dask-worker-space/worker-ymfe1xkx,Local directory: /local/bgfs/sean.kearney/300228/dask-worker-space/worker-ymfe1xkx

0,1
Comm: tcp://10.1.4.119:45525,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/40521/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:36511,
Local directory: /local/bgfs/sean.kearney/300237/dask-worker-space/worker-3z9289ey,Local directory: /local/bgfs/sean.kearney/300237/dask-worker-space/worker-3z9289ey

0,1
Comm: tcp://10.1.4.119:46669,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/42655/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:41145,
Local directory: /local/bgfs/sean.kearney/300237/dask-worker-space/worker-rrrb246j,Local directory: /local/bgfs/sean.kearney/300237/dask-worker-space/worker-rrrb246j

0,1
Comm: tcp://10.1.4.119:43829,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/38549/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:35873,
Local directory: /local/bgfs/sean.kearney/300237/dask-worker-space/worker-kj6xjje5,Local directory: /local/bgfs/sean.kearney/300237/dask-worker-space/worker-kj6xjje5

0,1
Comm: tcp://10.1.4.119:44057,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/45811/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:39431,
Local directory: /local/bgfs/sean.kearney/300237/dask-worker-space/worker-41mgjfol,Local directory: /local/bgfs/sean.kearney/300237/dask-worker-space/worker-41mgjfol

0,1
Comm: tcp://10.1.4.119:35339,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/40613/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:43341,
Local directory: /local/bgfs/sean.kearney/300242/dask-worker-space/worker-__3d8kdu,Local directory: /local/bgfs/sean.kearney/300242/dask-worker-space/worker-__3d8kdu

0,1
Comm: tcp://10.1.4.119:34079,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/46303/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:35115,
Local directory: /local/bgfs/sean.kearney/300242/dask-worker-space/worker-7jpw6262,Local directory: /local/bgfs/sean.kearney/300242/dask-worker-space/worker-7jpw6262

0,1
Comm: tcp://10.1.4.119:44111,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43139/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:46587,
Local directory: /local/bgfs/sean.kearney/300242/dask-worker-space/worker-1trcj7ha,Local directory: /local/bgfs/sean.kearney/300242/dask-worker-space/worker-1trcj7ha

0,1
Comm: tcp://10.1.4.119:35615,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/39093/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:35709,
Local directory: /local/bgfs/sean.kearney/300242/dask-worker-space/worker-m7queai4,Local directory: /local/bgfs/sean.kearney/300242/dask-worker-space/worker-m7queai4

0,1
Comm: tcp://10.1.4.118:45889,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/45091/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:38349,
Local directory: /local/bgfs/sean.kearney/300235/dask-worker-space/worker-azf5c1ce,Local directory: /local/bgfs/sean.kearney/300235/dask-worker-space/worker-azf5c1ce

0,1
Comm: tcp://10.1.4.118:40959,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/45041/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:46513,
Local directory: /local/bgfs/sean.kearney/300235/dask-worker-space/worker-j_xg99qv,Local directory: /local/bgfs/sean.kearney/300235/dask-worker-space/worker-j_xg99qv

0,1
Comm: tcp://10.1.4.118:45437,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/44429/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:33439,
Local directory: /local/bgfs/sean.kearney/300235/dask-worker-space/worker-wz5lv2u7,Local directory: /local/bgfs/sean.kearney/300235/dask-worker-space/worker-wz5lv2u7

0,1
Comm: tcp://10.1.4.118:32879,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/39239/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:43633,
Local directory: /local/bgfs/sean.kearney/300235/dask-worker-space/worker-wuqc5elb,Local directory: /local/bgfs/sean.kearney/300235/dask-worker-space/worker-wuqc5elb

0,1
Comm: tcp://10.1.4.118:40945,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43431/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:37793,
Local directory: /local/bgfs/sean.kearney/300232/dask-worker-space/worker-d0_ie4vg,Local directory: /local/bgfs/sean.kearney/300232/dask-worker-space/worker-d0_ie4vg

0,1
Comm: tcp://10.1.4.118:44485,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/36483/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:40153,
Local directory: /local/bgfs/sean.kearney/300232/dask-worker-space/worker-fxxeqodq,Local directory: /local/bgfs/sean.kearney/300232/dask-worker-space/worker-fxxeqodq

0,1
Comm: tcp://10.1.4.118:34971,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/34831/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:42239,
Local directory: /local/bgfs/sean.kearney/300232/dask-worker-space/worker-r6k5ggq2,Local directory: /local/bgfs/sean.kearney/300232/dask-worker-space/worker-r6k5ggq2

0,1
Comm: tcp://10.1.4.118:33751,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/36915/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:32975,
Local directory: /local/bgfs/sean.kearney/300232/dask-worker-space/worker-juyra83k,Local directory: /local/bgfs/sean.kearney/300232/dask-worker-space/worker-juyra83k

0,1
Comm: tcp://10.1.4.119:35253,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/45739/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:44261,
Local directory: /local/bgfs/sean.kearney/300240/dask-worker-space/worker-gpo_nq8l,Local directory: /local/bgfs/sean.kearney/300240/dask-worker-space/worker-gpo_nq8l

0,1
Comm: tcp://10.1.4.119:37391,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/32963/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:42411,
Local directory: /local/bgfs/sean.kearney/300240/dask-worker-space/worker-_6bamc6v,Local directory: /local/bgfs/sean.kearney/300240/dask-worker-space/worker-_6bamc6v

0,1
Comm: tcp://10.1.4.119:33707,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/35669/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:43099,
Local directory: /local/bgfs/sean.kearney/300240/dask-worker-space/worker-23jdiagj,Local directory: /local/bgfs/sean.kearney/300240/dask-worker-space/worker-23jdiagj

0,1
Comm: tcp://10.1.4.119:37533,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/41989/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:34319,
Local directory: /local/bgfs/sean.kearney/300240/dask-worker-space/worker-0wsiym1f,Local directory: /local/bgfs/sean.kearney/300240/dask-worker-space/worker-0wsiym1f

0,1
Comm: tcp://10.1.4.118:43969,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/36153/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:34107,
Local directory: /local/bgfs/sean.kearney/300231/dask-worker-space/worker-26q6hurc,Local directory: /local/bgfs/sean.kearney/300231/dask-worker-space/worker-26q6hurc

0,1
Comm: tcp://10.1.4.118:33231,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43515/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:44787,
Local directory: /local/bgfs/sean.kearney/300231/dask-worker-space/worker-dkyjmcq6,Local directory: /local/bgfs/sean.kearney/300231/dask-worker-space/worker-dkyjmcq6

0,1
Comm: tcp://10.1.4.118:33057,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43423/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:38225,
Local directory: /local/bgfs/sean.kearney/300231/dask-worker-space/worker-0lcofbpt,Local directory: /local/bgfs/sean.kearney/300231/dask-worker-space/worker-0lcofbpt

0,1
Comm: tcp://10.1.4.118:45761,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/39871/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:37435,
Local directory: /local/bgfs/sean.kearney/300231/dask-worker-space/worker-nu7w_hg7,Local directory: /local/bgfs/sean.kearney/300231/dask-worker-space/worker-nu7w_hg7

0,1
Comm: tcp://10.1.4.118:33141,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/35955/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:40579,
Local directory: /local/bgfs/sean.kearney/300230/dask-worker-space/worker-uck8z01f,Local directory: /local/bgfs/sean.kearney/300230/dask-worker-space/worker-uck8z01f

0,1
Comm: tcp://10.1.4.118:34837,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/37321/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:37255,
Local directory: /local/bgfs/sean.kearney/300230/dask-worker-space/worker-3wmuuw6c,Local directory: /local/bgfs/sean.kearney/300230/dask-worker-space/worker-3wmuuw6c

0,1
Comm: tcp://10.1.4.118:43395,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/45545/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:37679,
Local directory: /local/bgfs/sean.kearney/300230/dask-worker-space/worker-rsin3fh9,Local directory: /local/bgfs/sean.kearney/300230/dask-worker-space/worker-rsin3fh9

0,1
Comm: tcp://10.1.4.118:46567,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/39519/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:39205,
Local directory: /local/bgfs/sean.kearney/300230/dask-worker-space/worker-qf5lmh22,Local directory: /local/bgfs/sean.kearney/300230/dask-worker-space/worker-qf5lmh22

0,1
Comm: tcp://10.1.4.119:46235,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/38455/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:38983,
Local directory: /local/bgfs/sean.kearney/300238/dask-worker-space/worker-rs7pj6hv,Local directory: /local/bgfs/sean.kearney/300238/dask-worker-space/worker-rs7pj6hv

0,1
Comm: tcp://10.1.4.119:46335,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/35653/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:34395,
Local directory: /local/bgfs/sean.kearney/300238/dask-worker-space/worker-vktgjs2x,Local directory: /local/bgfs/sean.kearney/300238/dask-worker-space/worker-vktgjs2x

0,1
Comm: tcp://10.1.4.119:32951,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/41845/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:39945,
Local directory: /local/bgfs/sean.kearney/300238/dask-worker-space/worker-hdpravb5,Local directory: /local/bgfs/sean.kearney/300238/dask-worker-space/worker-hdpravb5

0,1
Comm: tcp://10.1.4.119:43085,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/32901/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:37889,
Local directory: /local/bgfs/sean.kearney/300238/dask-worker-space/worker-pn_8ben8,Local directory: /local/bgfs/sean.kearney/300238/dask-worker-space/worker-pn_8ben8

0,1
Comm: tcp://10.1.4.118:35447,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/33197/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:46343,
Local directory: /local/bgfs/sean.kearney/300229/dask-worker-space/worker-4djcztde,Local directory: /local/bgfs/sean.kearney/300229/dask-worker-space/worker-4djcztde

0,1
Comm: tcp://10.1.4.118:37105,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43599/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:39081,
Local directory: /local/bgfs/sean.kearney/300229/dask-worker-space/worker-wwd5wq5c,Local directory: /local/bgfs/sean.kearney/300229/dask-worker-space/worker-wwd5wq5c

0,1
Comm: tcp://10.1.4.118:42581,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/46275/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:33627,
Local directory: /local/bgfs/sean.kearney/300229/dask-worker-space/worker-vagb2tsj,Local directory: /local/bgfs/sean.kearney/300229/dask-worker-space/worker-vagb2tsj

0,1
Comm: tcp://10.1.4.118:32775,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43081/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:41345,
Local directory: /local/bgfs/sean.kearney/300229/dask-worker-space/worker-6uuvkzxv,Local directory: /local/bgfs/sean.kearney/300229/dask-worker-space/worker-6uuvkzxv

0,1
Comm: tcp://10.1.4.119:37827,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43321/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:39049,
Local directory: /local/bgfs/sean.kearney/300236/dask-worker-space/worker-0zjqgwgy,Local directory: /local/bgfs/sean.kearney/300236/dask-worker-space/worker-0zjqgwgy

0,1
Comm: tcp://10.1.4.119:36233,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/46087/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:44917,
Local directory: /local/bgfs/sean.kearney/300236/dask-worker-space/worker-z_3qv81a,Local directory: /local/bgfs/sean.kearney/300236/dask-worker-space/worker-z_3qv81a

0,1
Comm: tcp://10.1.4.119:35099,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/39911/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:46739,
Local directory: /local/bgfs/sean.kearney/300236/dask-worker-space/worker-70guwasn,Local directory: /local/bgfs/sean.kearney/300236/dask-worker-space/worker-70guwasn

0,1
Comm: tcp://10.1.4.119:41415,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/37851/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:44527,
Local directory: /local/bgfs/sean.kearney/300236/dask-worker-space/worker-k3lbe_ai,Local directory: /local/bgfs/sean.kearney/300236/dask-worker-space/worker-k3lbe_ai

0,1
Comm: tcp://10.1.4.118:34009,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/41393/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:35607,
Local directory: /local/bgfs/sean.kearney/300233/dask-worker-space/worker-m8s8gt_q,Local directory: /local/bgfs/sean.kearney/300233/dask-worker-space/worker-m8s8gt_q

0,1
Comm: tcp://10.1.4.118:42865,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/34253/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:43331,
Local directory: /local/bgfs/sean.kearney/300233/dask-worker-space/worker-j14zxfs0,Local directory: /local/bgfs/sean.kearney/300233/dask-worker-space/worker-j14zxfs0

0,1
Comm: tcp://10.1.4.118:40385,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/45321/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:34933,
Local directory: /local/bgfs/sean.kearney/300233/dask-worker-space/worker-6vuo8mcl,Local directory: /local/bgfs/sean.kearney/300233/dask-worker-space/worker-6vuo8mcl

0,1
Comm: tcp://10.1.4.118:35663,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/33927/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.118:38321,
Local directory: /local/bgfs/sean.kearney/300233/dask-worker-space/worker-kx5t39ca,Local directory: /local/bgfs/sean.kearney/300233/dask-worker-space/worker-kx5t39ca

0,1
Comm: tcp://10.1.4.119:45889,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/35265/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:43927,
Local directory: /local/bgfs/sean.kearney/300241/dask-worker-space/worker-6f8cdwht,Local directory: /local/bgfs/sean.kearney/300241/dask-worker-space/worker-6f8cdwht

0,1
Comm: tcp://10.1.4.119:43643,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/39867/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:37189,
Local directory: /local/bgfs/sean.kearney/300241/dask-worker-space/worker-1zgvwmdy,Local directory: /local/bgfs/sean.kearney/300241/dask-worker-space/worker-1zgvwmdy

0,1
Comm: tcp://10.1.4.119:37585,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/43117/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:37759,
Local directory: /local/bgfs/sean.kearney/300241/dask-worker-space/worker-wfn0_brt,Local directory: /local/bgfs/sean.kearney/300241/dask-worker-space/worker-wfn0_brt

0,1
Comm: tcp://10.1.4.119:37981,Total threads: 2
Dashboard: /node/ceres18-compute-8-eth.dev.scinet.usda.gov/63830/proxy/42495/status,Memory: 4.66 GiB
Nanny: tcp://10.1.4.119:35005,
Local directory: /local/bgfs/sean.kearney/300241/dask-worker-space/worker-zvo7y_bg,Local directory: /local/bgfs/sean.kearney/300241/dask-worker-space/worker-zvo7y_bg


In [4]:
if not os.path.exists(outDIR):
    os.mkdir(outDIR)

In [5]:
# load csv of ground data as GeoDataFrame
df_vor = pd.read_csv(inPATH, parse_dates=[2, 3])
df_vor = df_vor[df_vor['geometry'].notnull()].copy()
df_vor['geometry'] = df_vor['geometry'].astype('str').apply(wkt.loads)
gdf_vor = gpd.GeoDataFrame(df_vor, geometry='geometry', crs=32613)
gdf_vor = gdf_vor.sort_values('Date')

In [6]:
# load any existing output data
if os.path.exists(outPATH):
    df_out = pd.read_csv(outPATH, parse_dates=[2, 3])
else:
    df_out = None

In [7]:
for yr in tqdm(gdf_vor['Year'].unique()):
    print(yr)
    # skip if year already in output data
    if df_out is not None and yr in df_out['Year'].unique():
        print('Skipping year - already in output dataset.')
        continue
    else:
        # get subset of vor data for year
        gdf_yr = gdf_vor[gdf_vor['Year'] == yr]

        # get the date range for the fetch as a dictionary
        #start_date = gdf_yr['Date'].min().date() - timedelta(days=30)
        #end_date = gdf_yr['Date'].max().date() + timedelta(days=30)
        start_date = str(yr - 1) + "-11-01"
        end_date = str(yr + 1) + "-03-01"
        data_dict = {'date_range': [str(start_date), 
                                    str(end_date)]}

        # set the bounding box for fetching data
        bbox_yr = np.array(gdf_yr.total_bounds) # the entire subset dataset 

        # fetch the data for the entire year's dataset
        hls_ds = fetch.get_hls(hls_data=data_dict,
                               bbox=bbox_yr, 
                               stack_chunks=(400, 400),
                               proj_epsg=gdf_yr.crs.to_epsg(),
                               lim=1000).load()

        # create a tile ID coordinate
        hls_ds = hls_ds.assign_coords(tile_id = ('time', [x.split('.')[2] for x in hls_ds['id'].values]))

        # pick best image for any dates with duplicate images for the same tile
        if len(hls_ds['time'].groupby('tile_id').apply(lambda x: x.drop_duplicates('time', False))) < len(hls_ds['time']):
            hls_mask = mask_hls(hls_ds['FMASK'], mask_types=['all'])
            hls_ds['maskcov_pct'] = ((hls_mask != 0).sum(['y', 'x']) / hls_ds['FMASK'].isel(time=0).size * 100)#
            hls_ds = hls_ds.groupby('tile_id').apply(lambda x: x.sortby('maskcov_pct').drop_duplicates('time', keep='first')).sortby('time').compute()

        # compute ATSA mask if possible
        if try_atsa:
            print('masking out clouds and shadows detected by ATSA')
            if len(np.unique(hls_ds.tile_id)) > 1:
                hls_atsa = hls_ds.groupby('tile_id').apply(lambda x: atsa_mask(x.where(
                    x['BLUE'].notnull(), drop=True))).compute()
                hls_atsa = hls_atsa.transpose('time', 'y', 'x')
            else:
                hls_ds = hls_ds.reset_coords(drop=True)
                try:
                    hls_atsa = atsa_mask(hls_ds).compute()
                    mask_atsa = True
                except ValueError or IndexError:
                    print('WARNING: Could not compute ATSA cloud/shadow mask')
                    mask_atsa = False
                    pass
        else:
            mask_atsa = False

        if mask_bolton:
            # compute the bolton mask
            hls_bolton_mask = bolton_mask(hls_ds).compute()
            hls_ds = xr.merge([hls_ds, hls_atsa], join='inner')
            hls_ds.where(hls_ds['BOLTON'] == 0, drop=True)

        # compute native HLS mask
        hls_mask = mask_hls(hls_ds['FMASK'], mask_types=['cirrus',
                                                        'cloud',
                                                        'cloud_adj',
                                                        'shadow', 
                                                        'snow',
                                                        'water',
                                                        'high_aerosol'])
        # mask using native HLS mask
        hls_ds = hls_ds.where(hls_mask == 0)
        # mask using ATSA mask, if available
        if mask_atsa:
            # merge ATSA mask with HLS data
            hls_ds = xr.merge([hls_ds, hls_atsa], join='inner')
            hls_ds = hls_ds.where(hls_ds['ATSA'] == 1)

        # in case multiple tile_id's still exist, take the mean by pixel
        if 'tile_id' in hls_ds.coords and len(np.unique(hls_ds.tile_id.values)) > 1:
            hls_ds = hls_ds.groupby('time').mean()

        # create an xarray mask from the ground data
        mask_info = gdf_vor.drop_duplicates(subset=['Id', 'Date'])[['Id', 'geometry']].reset_index(drop=True).reset_index().rename(columns={'index': 'id'})
        mask_shp = [(row.geometry, row.id+1) for _, row in mask_info.iterrows()]
        plot_mask = shp2mask(shp=mask_shp, 
                             transform=hls_ds.rio.transform(), 
                             outshape=hls_ds['BLUE'].shape[1:], 
                             xr_object=hls_ds['BLUE'])
        mask_dict = {row.id+1: row.Id for _, row in mask_info.iterrows()}
        mask_dict[0] = 'UNK'
        plot_mask = np.array([mask_dict[i] for i in plot_mask.values.flatten()]).reshape(plot_mask.shape)

        # assign the plot id's to the xarray dataset
        hls_ds = hls_ds.assign(Plot=(['y', 'x'], plot_mask)).chunk({'y': 50, 'x': 50})
        hls_ds = hls_ds.set_coords('Plot')

        # mask out areas outside ground plots
        hls_ds = hls_ds.where(hls_ds['Plot'] != 'UNK')

        # lazy compute all vegetation indices
        for vegidx in veg_dict:
            hls_ds[vegidx] = veg_dict[vegidx](hls_ds)

        # convert to dataframe at plot scale
        df_yr = hls_ds[list(veg_dict.keys()) + band_list].groupby('Plot').mean('stacked_y_x').to_dataframe().reset_index()

        # remove all non-plot data
        df_yr = df_yr[df_yr['Plot'] != 'UNK']

        # rename columns to match VOR data
        df_yr = df_yr.rename(columns={'time': 'Date',
                                        'Plot': 'Id'})

        # get missing dates for gap-filling
        dates_missing = [x for x in pd.date_range(start_date, end_date).date if x not in df_yr['Date'].unique()]

        # convert missing dates to a dataframe for combining with veg index data
        df_missing = pd.DataFrame({
            'Id': list(chain.from_iterable([list(np.repeat(x, len(dates_missing))) for x in df_yr['Id'].unique()])),
            'Date': list(chain.from_iterable(dates_missing for x in df_yr['Id'].unique()))})

        # combine into one dataframe for gapfilling
        df_yr_ts = pd.concat([df_yr, df_missing]).sort_values(['Id', 'Date'])

        # smooth all vegetation indices to gapfill
        for vegidx in veg_dict:
            df_yr_ts[vegidx + '_smooth'] = df_yr_ts.groupby('Id')[vegidx].transform(lambda x: double_savgol(x.values))
        for band in band_list:
            df_yr_ts[band + '_smooth'] = df_yr_ts.groupby('Id')[band].transform(lambda x: double_savgol(x.values))

        # convert date to datetime
        df_yr_ts['Date'] = pd.to_datetime(df_yr_ts['Date'])

        # rename smoothed columns and drop originals
        df_yr_ts = df_yr_ts.drop(columns=list(veg_dict.keys()) + band_list)
        col_rename_dict = {c: re.sub('_smooth', '', c) for c in df_yr_ts.columns if '_smooth' in c}
        df_yr_ts = df_yr_ts.rename(columns=col_rename_dict)

        df_out_yr = pd.merge(gdf_yr, 
                             df_yr_ts[['Id', 'Date'] + list(veg_dict.keys()) + band_list], 
                             on=['Id', 'Date'],
                             how='left')

        if df_out is not None:
            # merge with existing ouptput dataset
            df_out = pd.concat([df_out, df_out_yr])
        else:
            # create output dataset
            df_out = df_out_yr.copy()
        df_out.to_csv(outPATH, index=False)
        time.sleep(2)
        del df_yr_ts, df_out_yr, gdf_yr, hls_ds, hls_mask
        if mask_atsa:
            del hls_atsa
        if mask_bolton:
            del hls_bolton_mask
        gc.collect()
        client.restart()

  0%|          | 0/10 [00:00<?, ?it/s]

2013
Skipping year - already in output dataset.
2014
Skipping year - already in output dataset.
2015
Skipping year - already in output dataset.
2016
Skipping year - already in output dataset.
2017
Skipping year - already in output dataset.
2018
Skipping year - already in output dataset.
2019
Skipping year - already in output dataset.
2020
Skipping year - already in output dataset.
2021
masking out clouds and shadows detected by ATSA
HOT centers of clear, thin and thick cloudy pixels on land : [ 280.10517169 1702.76026828 3040.92698107]
Shadow index centers of shadow, nonshadow dark and nonshadow bright on land : [ 454.18708278 1720.62390532 6500.93693694]
2022
masking out clouds and shadows detected by ATSA
HOT centers of clear, thin and thick cloudy pixels on land : [ 347.81272618 1714.87889544 2981.78250026]
Shadow index centers of shadow, nonshadow dark and nonshadow bright on land : [ 352.74862128 1329.11980544 4037.02234637]
