In [1]:
import dask, concurrent.futures, time, warnings, os, re, pickle
from osgeo import gdal
import os
import requests as r
import pandas as pd
import geopandas as gpd
import numpy as np
import rasterio as rio
import rioxarray as riox
import time
import xarray as xr
from urllib.request import urlopen
from xml.etree.ElementTree import parse,fromstring
from pandas import to_datetime
from rasterio.crs import CRS
from datetime import datetime, timedelta
from netrc import netrc
from pyproj import Proj
from src.hls_funcs import fetch
from src.hls_funcs.masks import mask_hls, shp2mask, bolton_mask
from src.hls_funcs.indices import ndvi_func
from src.hls_funcs.smooth import smooth_xr, despike_ts_xr
import cartopy.crs as ccrs
from rasterio.plot import show
from src.hls_funcs.predict import pred_bm, pred_bm_se, pred_cov
import dask.diagnostics
import matplotlib.pyplot as plt

ERROR 1: PROJ: proj_create_from_database: Open of /project/cper_neon_aop/conda_envs/hls_nrt_env/share/proj failed


In [2]:
#wkDIR = os.getcwd()
#outDIR_base = 'data/hls_nrt/'

wkDIR = '/project/cper_neon_aop/hls_nrt/'
outDIR_base = '/90daydata/cper_neon_aop/hls_nrt/'

cluster_loc = 'hpc'

In [3]:
yr = 2021
#prefix = 'redtop' 
#prefix = 'cper'
prefix = 'tbng'

if prefix == 'cper': 
    aoi_f = os.path.join('data/ground/cper_pastures_2017_clip.shp')
    df_aoi = gpd.read_file(aoi_f)
    subunit_name_old = 'Past_Name_'
    subunit_name = 'Pasture'
elif prefix == 'redtop':
    from src.utils.convert import kmz_to_shp
    df_aoi = kmz_to_shp('data/ground/RedTop_Boundary.kmz', 'data/ground/')
    df_aoi = df_aoi.to_crs(epsg=32613)
    subunit_name_old = None
    subunit_name = None
elif prefix == 'tbng':
    df_aoi_txt = pd.read_csv('/project/cper_neon_aop/tbng_veg/data/bm_extract/TB_all_bm.csv')
    df_aoi = gpd.GeoDataFrame(
        df_aoi_txt, geometry=gpd.points_from_xy(df_aoi_txt['gps_E'], df_aoi_txt['gps_N']))
    df_aoi = df_aoi.set_crs(epsg=32613)
    df_aoi.geometry = df_aoi.buffer(500)
    subunit_name_old = None
    subunit_name = None

outDIR = os.path.join(outDIR_base, prefix)

In [4]:
os.chdir(wkDIR)
print(yr)
t00 = time.time()
t0 = time.time()
t1 = time.time()
if cluster_loc == 'local':
    print('   setting up Local cluster...')
    from dask.distributed import LocalCluster, Client
    import dask
    aws=False
    fetch.setup_env(aws=aws)
    cluster = LocalCluster(n_workers=8, threads_per_worker=2)
    client = Client(cluster)
    display(client)
elif cluster_loc == 'coiled':
    import coiled
    aws=True
    fetch.setup_env(aws=aws)
    s3_cred = fetch.setup_netrc(creds=['spkearney', '1mrChamu'], aws=aws)
    coiled.create_software_environment(
    name="hls_cog_coiled",
    conda="hls_cog_coiled_env.yaml")
    cluster = coiled.Cluster(
        name="hls_cog_coiled",
        software="kearney-sp/hls_cog_coiled",
        n_workers=5,
        worker_cpu=2,
        scheduler_cpu=2,
        backend_options={"region": "us-west-2"},
        environ=dict(GDAL_DISABLE_READDIR_ON_OPEN='FALSE', 
                   #AWS_NO_SIGN_REQUEST='YES',
                   GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
                   GDAL_SWATH_SIZE='200000000',
                   VSI_CURL_CACHE_SIZE='200000000',
                   CPL_VSIL_CURL_ALLOWED_EXTENSIONS='TIF',
                   GDAL_HTTP_UNSAFESSL='YES',
                   GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                   GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'),
                   AWS_REGION='us-west-2',
                   AWS_SECRET_ACCESS_KEY=s3_cred['secretAccessKey'],
                   AWS_ACCESS_KEY_ID=s3_cred['accessKeyId'],
                   AWS_SESSION_TOKEN=s3_cred['sessionToken'])
    )
    client = Client(cluster)
    display(client)
elif cluster_loc == 'hpc':
    from dask.distributed import LocalCluster, Client
    import dask_jobqueue as jq
    import dask
    from jupyter_server import serverapp
    
    # get the server address for porting
    try:
        jupServer = [x for x in serverapp.list_running_servers()][0]
    except IndexError:
        # manually copy/paste the server address
        jupServer = {'base_url': '/node/ceres19-compute-98-eth.scinet.local/17710/'}
    print('   setting up cluster on HPC...')
    aws=False
    fetch.setup_env(aws=aws)
    dask.config.set({'distributed.dashboard.link': jupServer['base_url'] + 'proxy/{port}/status'})
    partition='short',#'short','debug', 'mem', 'mem-low',
    num_processes = 4
    num_threads_per_processes = 2
    mem = 2.5*num_processes*num_threads_per_processes
    n_cores_per_job = num_processes*num_threads_per_processes
    clust = jq.SLURMCluster(queue=partition,
                            processes=1,
                            #n_workers=8,
                            cores=8,
                            #cores=n_cores_per_job,
                            memory=str(mem)+'6GB',
                            interface='ib0',
                            #interface='enp24s0f0',
                            local_directory='$TMPDIR',
                            death_timeout=30,
                            walltime='02:00:00',
                            job_extra=["--output=/dev/null","--error=/dev/null"])
    client=Client(clust)
    #Scale Cluster 
    num_jobs=64
    clust.scale(jobs=num_jobs)
    try:
        client.wait_for_workers(n_workers=num_jobs*num_processes, timeout=60)
    except dask.distributed.TimeoutError as e:
        print(str(num_jobs*num_processes) + ' workers not available. Continuing with available workers.')
        #print(e)
        pass
    display(client)
print('...completed in ' + str(round(time.time() - t1, 0)) + ' secs')
print('total elasped time: ' + str(round((time.time() - t0)/60, 2)) + ' mins\n')

if not os.path.exists(outDIR):
    os.mkdir(outDIR)

if subunit_name_old is not None:
    df_aoi = df_aoi.rename(columns={subunit_name_old: subunit_name})
if subunit_name is not None:
    df_aoi = df_aoi.dissolve(by=subunit_name).reset_index()

start_date = str(yr - 1) + "-11-01"
end_date = str(yr + 1) + "-03-01"

# set the date range for analysis
date_rng = pd.date_range(start=start_date, end=end_date)
date_rng = date_rng[date_rng <= datetime.today()]

2021


  from distributed.utils import tmpfile


   setting up cluster on HPC...
256 workers not available. Continuing with available workers.


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/8787/status,

0,1
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/8787/status,Workers: 64
Total threads: 512,Total memory: 1.17 TiB

0,1
Comm: tcp://10.1.10.70:43959,Workers: 64
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/8787/status,Total threads: 512
Started: 1 minute ago,Total memory: 1.17 TiB

0,1
Comm: tcp://10.1.10.90:32955,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/43917/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.90:45685,
Local directory: /local/bgfs/sean.kearney/7826879/dask-worker-space/worker-e3ar7aj8,Local directory: /local/bgfs/sean.kearney/7826879/dask-worker-space/worker-e3ar7aj8

0,1
Comm: tcp://10.1.10.77:36557,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/46679/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.77:44093,
Local directory: /local/bgfs/sean.kearney/7826901/dask-worker-space/worker-jo71w59q,Local directory: /local/bgfs/sean.kearney/7826901/dask-worker-space/worker-jo71w59q

0,1
Comm: tcp://10.1.10.78:44616,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/34519/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.78:41996,
Local directory: /local/bgfs/sean.kearney/7826902/dask-worker-space/worker-0z8eae1p,Local directory: /local/bgfs/sean.kearney/7826902/dask-worker-space/worker-0z8eae1p

0,1
Comm: tcp://10.1.10.103:44989,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/36554/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.103:44598,
Local directory: /local/bgfs/sean.kearney/7826869/dask-worker-space/worker-b8262ems,Local directory: /local/bgfs/sean.kearney/7826869/dask-worker-space/worker-b8262ems

0,1
Comm: tcp://10.1.10.81:37326,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/36691/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.81:40461,
Local directory: /local/bgfs/sean.kearney/7826916/dask-worker-space/worker-bb4j4ryu,Local directory: /local/bgfs/sean.kearney/7826916/dask-worker-space/worker-bb4j4ryu

0,1
Comm: tcp://10.1.10.77:38028,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/39055/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.77:38579,
Local directory: /local/bgfs/sean.kearney/7826894/dask-worker-space/worker-m06nyvuf,Local directory: /local/bgfs/sean.kearney/7826894/dask-worker-space/worker-m06nyvuf

0,1
Comm: tcp://10.1.10.53:32825,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/32975/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:42660,
Local directory: /local/bgfs/sean.kearney/7826886/dask-worker-space/worker-plhj9phh,Local directory: /local/bgfs/sean.kearney/7826886/dask-worker-space/worker-plhj9phh

0,1
Comm: tcp://10.1.10.77:37930,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/41931/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.77:43232,
Local directory: /local/bgfs/sean.kearney/7826900/dask-worker-space/worker-y27g5o7j,Local directory: /local/bgfs/sean.kearney/7826900/dask-worker-space/worker-y27g5o7j

0,1
Comm: tcp://10.1.10.101:46856,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/44261/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.101:39832,
Local directory: /local/bgfs/sean.kearney/7826874/dask-worker-space/worker-pw0x6zj3,Local directory: /local/bgfs/sean.kearney/7826874/dask-worker-space/worker-pw0x6zj3

0,1
Comm: tcp://10.1.10.90:34511,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/44834/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.90:33657,
Local directory: /local/bgfs/sean.kearney/7826882/dask-worker-space/worker-r_vlzzyt,Local directory: /local/bgfs/sean.kearney/7826882/dask-worker-space/worker-r_vlzzyt

0,1
Comm: tcp://10.1.10.80:36937,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/41856/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:45975,
Local directory: /local/bgfs/sean.kearney/7826921/dask-worker-space/worker-s8nznne9,Local directory: /local/bgfs/sean.kearney/7826921/dask-worker-space/worker-s8nznne9

0,1
Comm: tcp://10.1.10.89:40578,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/41359/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.89:44458,
Local directory: /local/bgfs/sean.kearney/7826878/dask-worker-space/worker-__ky_6uv,Local directory: /local/bgfs/sean.kearney/7826878/dask-worker-space/worker-__ky_6uv

0,1
Comm: tcp://10.1.10.80:40751,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/36563/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:38384,
Local directory: /local/bgfs/sean.kearney/7826920/dask-worker-space/worker-onue4yd9,Local directory: /local/bgfs/sean.kearney/7826920/dask-worker-space/worker-onue4yd9

0,1
Comm: tcp://10.1.10.78:34053,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/40216/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.78:41835,
Local directory: /local/bgfs/sean.kearney/7826906/dask-worker-space/worker-ntrhy4c5,Local directory: /local/bgfs/sean.kearney/7826906/dask-worker-space/worker-ntrhy4c5

0,1
Comm: tcp://10.1.10.78:39680,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/38479/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.78:38010,
Local directory: /local/bgfs/sean.kearney/7826905/dask-worker-space/worker-70209rff,Local directory: /local/bgfs/sean.kearney/7826905/dask-worker-space/worker-70209rff

0,1
Comm: tcp://10.1.10.80:39339,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/37425/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:37105,
Local directory: /local/bgfs/sean.kearney/7826918/dask-worker-space/worker-w0bdqa9o,Local directory: /local/bgfs/sean.kearney/7826918/dask-worker-space/worker-w0bdqa9o

0,1
Comm: tcp://10.1.10.53:40410,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/38563/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:36107,
Local directory: /local/bgfs/sean.kearney/7826884/dask-worker-space/worker-3_p2qyb_,Local directory: /local/bgfs/sean.kearney/7826884/dask-worker-space/worker-3_p2qyb_

0,1
Comm: tcp://10.1.10.80:40098,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/37924/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:39821,
Local directory: /local/bgfs/sean.kearney/7826922/dask-worker-space/worker-yx5hbb0e,Local directory: /local/bgfs/sean.kearney/7826922/dask-worker-space/worker-yx5hbb0e

0,1
Comm: tcp://10.1.10.80:33342,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/36464/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:39373,
Local directory: /local/bgfs/sean.kearney/7826917/dask-worker-space/worker-hp95bv2a,Local directory: /local/bgfs/sean.kearney/7826917/dask-worker-space/worker-hp95bv2a

0,1
Comm: tcp://10.1.10.78:41811,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/36238/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.78:34979,
Local directory: /local/bgfs/sean.kearney/7826907/dask-worker-space/worker-5atd9uaq,Local directory: /local/bgfs/sean.kearney/7826907/dask-worker-space/worker-5atd9uaq

0,1
Comm: tcp://10.1.10.77:34445,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/35522/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.77:37731,
Local directory: /local/bgfs/sean.kearney/7826899/dask-worker-space/worker-vs0gzdce,Local directory: /local/bgfs/sean.kearney/7826899/dask-worker-space/worker-vs0gzdce

0,1
Comm: tcp://10.1.10.80:45977,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/45175/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:35892,
Local directory: /local/bgfs/sean.kearney/7826925/dask-worker-space/worker-glmv4o3j,Local directory: /local/bgfs/sean.kearney/7826925/dask-worker-space/worker-glmv4o3j

0,1
Comm: tcp://10.1.10.53:44885,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/39590/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:41816,
Local directory: /local/bgfs/sean.kearney/7826890/dask-worker-space/worker-qdxc8gup,Local directory: /local/bgfs/sean.kearney/7826890/dask-worker-space/worker-qdxc8gup

0,1
Comm: tcp://10.1.10.90:38144,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/40671/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.90:38882,
Local directory: /local/bgfs/sean.kearney/7826880/dask-worker-space/worker-iqsr8zor,Local directory: /local/bgfs/sean.kearney/7826880/dask-worker-space/worker-iqsr8zor

0,1
Comm: tcp://10.1.10.55:36414,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/35618/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.55:40465,
Local directory: /local/bgfs/sean.kearney/7826865/dask-worker-space/worker-4zv2cai5,Local directory: /local/bgfs/sean.kearney/7826865/dask-worker-space/worker-4zv2cai5

0,1
Comm: tcp://10.1.10.89:43775,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/35528/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.89:35648,
Local directory: /local/bgfs/sean.kearney/7826876/dask-worker-space/worker-l37eaj1o,Local directory: /local/bgfs/sean.kearney/7826876/dask-worker-space/worker-l37eaj1o

0,1
Comm: tcp://10.1.10.84:40011,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/42626/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.84:39212,
Local directory: /local/bgfs/sean.kearney/7826927/dask-worker-space/worker-5ev9bgeg,Local directory: /local/bgfs/sean.kearney/7826927/dask-worker-space/worker-5ev9bgeg

0,1
Comm: tcp://10.1.10.81:34065,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/42354/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.81:33254,
Local directory: /local/bgfs/sean.kearney/7826910/dask-worker-space/worker-c3fu5apk,Local directory: /local/bgfs/sean.kearney/7826910/dask-worker-space/worker-c3fu5apk

0,1
Comm: tcp://10.1.10.53:36015,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/33894/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:34940,
Local directory: /local/bgfs/sean.kearney/7826893/dask-worker-space/worker-kqkyw_fb,Local directory: /local/bgfs/sean.kearney/7826893/dask-worker-space/worker-kqkyw_fb

0,1
Comm: tcp://10.1.10.77:38002,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/39626/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.77:34264,
Local directory: /local/bgfs/sean.kearney/7826897/dask-worker-space/worker-nwymw947,Local directory: /local/bgfs/sean.kearney/7826897/dask-worker-space/worker-nwymw947

0,1
Comm: tcp://10.1.10.53:44308,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/41682/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:39771,
Local directory: /local/bgfs/sean.kearney/7826891/dask-worker-space/worker-y3kp0950,Local directory: /local/bgfs/sean.kearney/7826891/dask-worker-space/worker-y3kp0950

0,1
Comm: tcp://10.1.10.84:46229,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/35091/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.84:35823,
Local directory: /local/bgfs/sean.kearney/7826926/dask-worker-space/worker-g42n5ldg,Local directory: /local/bgfs/sean.kearney/7826926/dask-worker-space/worker-g42n5ldg

0,1
Comm: tcp://10.1.10.80:41181,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/34819/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:38019,
Local directory: /local/bgfs/sean.kearney/7826923/dask-worker-space/worker-6f6_dbqu,Local directory: /local/bgfs/sean.kearney/7826923/dask-worker-space/worker-6f6_dbqu

0,1
Comm: tcp://10.1.10.78:42251,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/45313/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.78:46389,
Local directory: /local/bgfs/sean.kearney/7826909/dask-worker-space/worker-8mcy0h8v,Local directory: /local/bgfs/sean.kearney/7826909/dask-worker-space/worker-8mcy0h8v

0,1
Comm: tcp://10.1.10.78:42240,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/37201/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.78:38023,
Local directory: /local/bgfs/sean.kearney/7826908/dask-worker-space/worker-u0bznuxp,Local directory: /local/bgfs/sean.kearney/7826908/dask-worker-space/worker-u0bznuxp

0,1
Comm: tcp://10.1.10.101:39894,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/34197/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.101:43229,
Local directory: /local/bgfs/sean.kearney/7826872/dask-worker-space/worker-f_1ypiak,Local directory: /local/bgfs/sean.kearney/7826872/dask-worker-space/worker-f_1ypiak

0,1
Comm: tcp://10.1.10.53:41143,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/45623/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:35844,
Local directory: /local/bgfs/sean.kearney/7826892/dask-worker-space/worker-xvbmdfen,Local directory: /local/bgfs/sean.kearney/7826892/dask-worker-space/worker-xvbmdfen

0,1
Comm: tcp://10.1.10.77:40614,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/40065/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.77:39561,
Local directory: /local/bgfs/sean.kearney/7826896/dask-worker-space/worker-7e6kstx9,Local directory: /local/bgfs/sean.kearney/7826896/dask-worker-space/worker-7e6kstx9

0,1
Comm: tcp://10.1.10.90:43859,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/41136/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.90:41113,
Local directory: /local/bgfs/sean.kearney/7826881/dask-worker-space/worker-52hpypqq,Local directory: /local/bgfs/sean.kearney/7826881/dask-worker-space/worker-52hpypqq

0,1
Comm: tcp://10.1.10.53:40318,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/43701/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:32822,
Local directory: /local/bgfs/sean.kearney/7826887/dask-worker-space/worker-9wkwa6x9,Local directory: /local/bgfs/sean.kearney/7826887/dask-worker-space/worker-9wkwa6x9

0,1
Comm: tcp://10.1.10.53:42857,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/37913/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:43643,
Local directory: /local/bgfs/sean.kearney/7826885/dask-worker-space/worker-enfd7fmp,Local directory: /local/bgfs/sean.kearney/7826885/dask-worker-space/worker-enfd7fmp

0,1
Comm: tcp://10.1.10.101:35476,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/45439/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.101:46807,
Local directory: /local/bgfs/sean.kearney/7826871/dask-worker-space/worker-9dg7_4ds,Local directory: /local/bgfs/sean.kearney/7826871/dask-worker-space/worker-9dg7_4ds

0,1
Comm: tcp://10.1.10.81:43225,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/39746/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.81:35006,
Local directory: /local/bgfs/sean.kearney/7826913/dask-worker-space/worker-fl38i29u,Local directory: /local/bgfs/sean.kearney/7826913/dask-worker-space/worker-fl38i29u

0,1
Comm: tcp://10.1.10.80:42559,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/42383/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:35976,
Local directory: /local/bgfs/sean.kearney/7826919/dask-worker-space/worker-td4lv7df,Local directory: /local/bgfs/sean.kearney/7826919/dask-worker-space/worker-td4lv7df

0,1
Comm: tcp://10.1.10.81:40147,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/43275/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.81:42880,
Local directory: /local/bgfs/sean.kearney/7826911/dask-worker-space/worker-4f706xmh,Local directory: /local/bgfs/sean.kearney/7826911/dask-worker-space/worker-4f706xmh

0,1
Comm: tcp://10.1.10.80:44316,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/39390/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.80:34342,
Local directory: /local/bgfs/sean.kearney/7826924/dask-worker-space/worker-90urmm5w,Local directory: /local/bgfs/sean.kearney/7826924/dask-worker-space/worker-90urmm5w

0,1
Comm: tcp://10.1.10.89:46500,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/39421/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.89:39822,
Local directory: /local/bgfs/sean.kearney/7826877/dask-worker-space/worker-9i1pw2a5,Local directory: /local/bgfs/sean.kearney/7826877/dask-worker-space/worker-9i1pw2a5

0,1
Comm: tcp://10.1.10.55:38024,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/45042/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.55:38723,
Local directory: /local/bgfs/sean.kearney/7826866/dask-worker-space/worker-drjx51qz,Local directory: /local/bgfs/sean.kearney/7826866/dask-worker-space/worker-drjx51qz

0,1
Comm: tcp://10.1.10.101:45575,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/37685/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.101:33663,
Local directory: /local/bgfs/sean.kearney/7826870/dask-worker-space/worker-i58em8fl,Local directory: /local/bgfs/sean.kearney/7826870/dask-worker-space/worker-i58em8fl

0,1
Comm: tcp://10.1.10.55:36478,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/46270/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.55:34140,
Local directory: /local/bgfs/sean.kearney/7826868/dask-worker-space/worker-yckvht3e,Local directory: /local/bgfs/sean.kearney/7826868/dask-worker-space/worker-yckvht3e

0,1
Comm: tcp://10.1.10.53:41569,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/35572/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:33756,
Local directory: /local/bgfs/sean.kearney/7826883/dask-worker-space/worker-phob1om6,Local directory: /local/bgfs/sean.kearney/7826883/dask-worker-space/worker-phob1om6

0,1
Comm: tcp://10.1.10.81:45384,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/40424/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.81:46777,
Local directory: /local/bgfs/sean.kearney/7826915/dask-worker-space/worker-ia00gfqw,Local directory: /local/bgfs/sean.kearney/7826915/dask-worker-space/worker-ia00gfqw

0,1
Comm: tcp://10.1.10.101:46008,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/43722/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.101:39375,
Local directory: /local/bgfs/sean.kearney/7826873/dask-worker-space/worker-jtigxfgc,Local directory: /local/bgfs/sean.kearney/7826873/dask-worker-space/worker-jtigxfgc

0,1
Comm: tcp://10.1.10.89:46370,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/36050/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.89:38212,
Local directory: /local/bgfs/sean.kearney/7826875/dask-worker-space/worker-pod4lj77,Local directory: /local/bgfs/sean.kearney/7826875/dask-worker-space/worker-pod4lj77

0,1
Comm: tcp://10.1.10.81:38807,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/37991/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.81:45229,
Local directory: /local/bgfs/sean.kearney/7826914/dask-worker-space/worker-qse2bn0v,Local directory: /local/bgfs/sean.kearney/7826914/dask-worker-space/worker-qse2bn0v

0,1
Comm: tcp://10.1.10.81:43945,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/37089/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.81:44410,
Local directory: /local/bgfs/sean.kearney/7826912/dask-worker-space/worker-atr8478c,Local directory: /local/bgfs/sean.kearney/7826912/dask-worker-space/worker-atr8478c

0,1
Comm: tcp://10.1.10.53:44244,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/35314/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:41659,
Local directory: /local/bgfs/sean.kearney/7826888/dask-worker-space/worker-5hw0si2y,Local directory: /local/bgfs/sean.kearney/7826888/dask-worker-space/worker-5hw0si2y

0,1
Comm: tcp://10.1.10.55:33734,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/37663/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.55:41630,
Local directory: /local/bgfs/sean.kearney/7826864/dask-worker-space/worker-poxom987,Local directory: /local/bgfs/sean.kearney/7826864/dask-worker-space/worker-poxom987

0,1
Comm: tcp://10.1.10.78:41612,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/44706/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.78:39702,
Local directory: /local/bgfs/sean.kearney/7826903/dask-worker-space/worker-8bamkopb,Local directory: /local/bgfs/sean.kearney/7826903/dask-worker-space/worker-8bamkopb

0,1
Comm: tcp://10.1.10.78:37073,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/43050/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.78:33537,
Local directory: /local/bgfs/sean.kearney/7826904/dask-worker-space/worker-w78g8i9d,Local directory: /local/bgfs/sean.kearney/7826904/dask-worker-space/worker-w78g8i9d

0,1
Comm: tcp://10.1.10.53:39139,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/42891/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.53:40111,
Local directory: /local/bgfs/sean.kearney/7826889/dask-worker-space/worker-j_ddw4io,Local directory: /local/bgfs/sean.kearney/7826889/dask-worker-space/worker-j_ddw4io

0,1
Comm: tcp://10.1.10.77:37735,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/39030/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.77:42745,
Local directory: /local/bgfs/sean.kearney/7826895/dask-worker-space/worker-mtacxqp4,Local directory: /local/bgfs/sean.kearney/7826895/dask-worker-space/worker-mtacxqp4

0,1
Comm: tcp://10.1.10.77:45554,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/43353/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.77:42503,
Local directory: /local/bgfs/sean.kearney/7826898/dask-worker-space/worker-vybampeh,Local directory: /local/bgfs/sean.kearney/7826898/dask-worker-space/worker-vybampeh

0,1
Comm: tcp://10.1.10.55:35880,Total threads: 8
Dashboard: /node/ceres19-compute-64-eth.scinet.local/54140/proxy/39408/status,Memory: 18.68 GiB
Nanny: tcp://10.1.10.55:33874,
Local directory: /local/bgfs/sean.kearney/7826867/dask-worker-space/worker-8qcqoi15,Local directory: /local/bgfs/sean.kearney/7826867/dask-worker-space/worker-8qcqoi15


...completed in 69.0 secs
total elasped time: 1.16 mins



In [5]:
# fetch the COG data from the AWS bucket
data_dict = {'date_range': [str(date_rng.min().date()), str(date_rng.max().date())]}
hls_ds = fetch.get_hls(hls_data=data_dict,
                       bbox=df_aoi.total_bounds, 
                       stack_chunks=(4000, 4000),
                       proj_epsg=df_aoi.crs.to_epsg(),
                       lim=1000,
                       aws=aws)
hls_ds = hls_ds.where(hls_ds['eo:cloud_cover'] != None, drop=True)
hls_ds = hls_ds.where(hls_ds['eo:cloud_cover'] < 90, drop=True)
hls_ds = hls_ds.sortby('time').reset_coords(drop=True).chunk({'time': 1, 'y': -1, 'x': -1})

In [6]:
hls_ds

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray


In [7]:
# convert time to datetime
hls_ds['time'] = pd.to_datetime(hls_ds.time)
hls_ds = hls_ds.rename({'time': 'date'})
# drop spec attribute for writing to NETCDF
hls_ds.attrs = {k: hls_ds.attrs[k] for k in hls_ds.attrs if k != 'spec'}

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 18.40 GiB 42.34 MiB Shape (445, 2388, 2324) (1, 2388, 2324) Count 25785 Tasks 445 Chunks Type float64 numpy.ndarray",2324  2388  445,

Unnamed: 0,Array,Chunk
Bytes,18.40 GiB,42.34 MiB
Shape,"(445, 2388, 2324)","(1, 2388, 2324)"
Count,25785 Tasks,445 Chunks
Type,float64,numpy.ndarray


In [None]:
import shutil
zarr_path = os.path.join(outDIR,
                              prefix + 
                              '_hls_tmp/hls_ds_' + 
                              str(yr) +'_' + 
                              datetime.now().strftime('%Y%m%d') + '.zarr')
if not os.path.exists(os.path.join(outDIR, prefix + '_hls_tmp/')):
    os.mkdir(os.path.join(outDIR, prefix + '_hls_tmp/'))
if os.path.exists(zarr_path):
    shutil.rmtree(zarr_path, ignore_errors=True)
hls_ds.to_zarr(os.path.join(outDIR,
                              prefix + 
                              '_hls_tmp/hls_ds_' + 
                              str(yr) +'_' + 
                              datetime.now().strftime('%Y%m%d') + '.zarr'),
              mode = 'w',
               consolidated=True,)

In [None]:
hls_out = xr.open_dataset(os.path.join(outDIR,
                              prefix + 
                              '_hls_tmp/hls_ds_' + 
                              str(yr) +'_' + 
                              datetime.now().strftime('%Y%m%d') + '.zarr'))
hls_out

In [None]:
from rasterio.plot import show
show(hls_out.isel(time=100, y=slice(500, 1000), x=slice(500, 1000)))

In [7]:
def drop_dup_time(ds):
    hls_mask = mask_hls(ds['FMASK'], mask_types=['all'])
    ds['maskcov_pct'] = ((hls_mask != 0).sum(['y', 'x']) / ds['FMASK'].isel(time=0).size * 100)#
    ds_out = ds.groupby('maskcov_pct').apply(lambda x: x.sortby('maskcov_pct')).drop_duplicates('time', keep='first').sortby('time')
    return ds_out

In [8]:
#hls_ds = hls_ds.loc[dict(x=slice(517587.0, 527283.0), y=slice(4524402.0, 4514699.0))].chunk({'y': -1,
#                                                                                             'x': -1,
#                                                                                             'time':1})
#hls_mask = mask_hls(hls_ds['FMASK'])
print('   fetching data...')
t1 = time.time()
idx_load_try = 0
max_load_try = 5
while idx_load_try < max_load_try:
    try:
        # pick best image for any dates with duplicate images
        if len(np.unique(hls_ds.values)) < len(hls_ds.time.values):
            print('    reducing along id dimension to single observation for each date, keeping least-masked image')
            hls_ds = hls_ds.map_blocks(drop_dup_time, template=hls_ds.sortby('time').drop_duplicates('time', keep='first'))
            
        idx_load_try = max_load_try
    except RuntimeError as e:            
        if e.args[0] == 'Set changed size during iteration':
            print('Warning: error with cluster set size. Restarting cluster and retrying ' + \
              str(idx_load_try+1) + ' of ' + str(max_load_try))
            clust.close()
            client.close()
            clust = jq.SLURMCluster(queue=partition,
                            processes=num_processes,
                            cores=n_cores_per_job,
                            memory=str(mem)+'GB',
                            interface='ib0',
                            #interface='enp24s0f0',
                            local_directory='$TMPDIR',
                            death_timeout=30,
                            walltime='02:00:00',
                            job_extra=["--output=/dev/null","--error=/dev/null"])
            client=Client(clust)
            #Scale Cluster 
            clust.scale(jobs=num_jobs)
            try:
                client.wait_for_workers(n_workers=num_jobs*num_processes, timeout=60)
            except dask.distributed.TimeoutError as e:
                print(str(num_jobs*num_processes) + ' workers not available. Continuing with available workers.')
                #print(e)
                pass
            display(client)
        else:
            print('Warning: error connecting to lpdaac. Retrying ' + str(idx_load_try+1) + ' of ' + str(max_load_try))
            client.restart()
        idx_load_try += 1
    except rio.errors.RasterioIOError as e:
        print('Warning: error loading data. Retrying ' + str(idx_load_try+1) + ' of ' + str(max_load_try))
        client.restart()
        idx_load_try += 1

#hls_ds['time'] = hls_ds['time'].dt.date
print('...completed in ' + str(round(time.time() - t1, 0)) + ' secs')
print('total elasped time: ' + str(round((time.time() - t0)/60, 2)) + ' mins\n')

   fetching data...
    reducing along id dimension to single observation for each date, keeping least-masked image
...completed in 0.0 secs
total elasped time: 0.93 mins



    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the

In [9]:
# convert time to datetime
hls_ds['time'] = pd.to_datetime(hls_ds.time)

In [10]:
# drop spec attribute for writing to NETCDF
hls_ds.attrs = {k: hls_ds.attrs[k] for k in hls_ds.attrs if k != 'spec'}

In [None]:
if not os.path.exists(os.path.join(outDIR, prefix + '_hls_tmp/')):
        os.mkdir(os.path.join(outDIR, prefix + '_hls_tmp/'))
hls_ds.to_zarr(os.path.join(outDIR,
                              prefix + 
                              '_hls_tmp/hls_ds_' + 
                              str(yr) +'_' + 
                              datetime.now().strftime('%Y%m%d') + '.zarr'))

In [11]:
hls_ds.to_netcdf(os.path.join(outDIR,
                              prefix + 
                              '_hls_tmp/hls_ds_' + 
                              str(yr) +'_' + 
                              datetime.now().strftime('%Y%m%d') + '.nc'))

In [12]:
hls_ds