In [1]:
import os
import xarray as xr
import rioxarray as riox
import re
from tqdm.notebook import tqdm

In [2]:
inDIR = "/mnt/e/LMF_STARFM_unzip/"
outDIR = "/mnt/e/LMF_STARFM_netcdf/"
if not os.path.exists(outDIR):
    os.mkdir(outDIR)

ignore_hls_yrs = [2018, 2019, 2020, 2021, 2022]

In [3]:
from dask.distributed import LocalCluster, Client
import dask
cluster = LocalCluster(n_workers=8, threads_per_worker=2)
client = Client(cluster)
display(client)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 45535 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:45535/status,

0,1
Dashboard: http://127.0.0.1:45535/status,Workers: 8
Total threads: 16,Total memory: 11.85 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:33381,Workers: 8
Dashboard: http://127.0.0.1:45535/status,Total threads: 16
Started: Just now,Total memory: 11.85 GiB

0,1
Comm: tcp://127.0.0.1:36705,Total threads: 2
Dashboard: http://127.0.0.1:41607/status,Memory: 1.48 GiB
Nanny: tcp://127.0.0.1:34993,
Local directory: /tmp/dask-worker-space/worker-7332woeh,Local directory: /tmp/dask-worker-space/worker-7332woeh

0,1
Comm: tcp://127.0.0.1:44161,Total threads: 2
Dashboard: http://127.0.0.1:39437/status,Memory: 1.48 GiB
Nanny: tcp://127.0.0.1:42437,
Local directory: /tmp/dask-worker-space/worker-3_mimotw,Local directory: /tmp/dask-worker-space/worker-3_mimotw

0,1
Comm: tcp://127.0.0.1:34097,Total threads: 2
Dashboard: http://127.0.0.1:34577/status,Memory: 1.48 GiB
Nanny: tcp://127.0.0.1:43493,
Local directory: /tmp/dask-worker-space/worker-6cwoaqsp,Local directory: /tmp/dask-worker-space/worker-6cwoaqsp

0,1
Comm: tcp://127.0.0.1:41927,Total threads: 2
Dashboard: http://127.0.0.1:33939/status,Memory: 1.48 GiB
Nanny: tcp://127.0.0.1:36083,
Local directory: /tmp/dask-worker-space/worker-6l2sqj7f,Local directory: /tmp/dask-worker-space/worker-6l2sqj7f

0,1
Comm: tcp://127.0.0.1:37547,Total threads: 2
Dashboard: http://127.0.0.1:33553/status,Memory: 1.48 GiB
Nanny: tcp://127.0.0.1:37387,
Local directory: /tmp/dask-worker-space/worker-q2sj4est,Local directory: /tmp/dask-worker-space/worker-q2sj4est

0,1
Comm: tcp://127.0.0.1:42803,Total threads: 2
Dashboard: http://127.0.0.1:37381/status,Memory: 1.48 GiB
Nanny: tcp://127.0.0.1:39721,
Local directory: /tmp/dask-worker-space/worker-ya75dr29,Local directory: /tmp/dask-worker-space/worker-ya75dr29

0,1
Comm: tcp://127.0.0.1:37995,Total threads: 2
Dashboard: http://127.0.0.1:37409/status,Memory: 1.48 GiB
Nanny: tcp://127.0.0.1:46497,
Local directory: /tmp/dask-worker-space/worker-7eo15n_3,Local directory: /tmp/dask-worker-space/worker-7eo15n_3

0,1
Comm: tcp://127.0.0.1:39525,Total threads: 2
Dashboard: http://127.0.0.1:42405/status,Memory: 1.48 GiB
Nanny: tcp://127.0.0.1:32955,
Local directory: /tmp/dask-worker-space/worker-kw18cevj,Local directory: /tmp/dask-worker-space/worker-kw18cevj


In [4]:
def flist_to_xr(flist, band_str_in, yr_str, band_str_out, chunks={'x': 250, 'y': 250, 'band': -1}):
    da_list = []
    for f in tqdm(flist):
        da_tmp = riox.open_rasterio(f)
        da_tmp.name = band_str_out
        str_split = f.split('.')
        da_tmp = da_tmp.rename({'band': 'DOY'})
        da_tmp['DOY'] = [int(re.sub(yr_str, '',
                                    [str_split[e - 1] for e, s in enumerate(str_split) if s == band_str_in][0]))]
        da_tmp = da_tmp.assign_coords(YEAR=int(yr_str))
        da_list.append(da_tmp)
    da_out = xr.concat(da_list, dim='DOY')
    return da_out

In [7]:
all_dirs = os.listdir(inDIR)
dirs_dict = {}
for d in all_dirs:
    dirs_dict[int(re.sub('CPER_', '', d.split('.')[0]))] = {'dir': d, 'bands': {}}

chunks = {'x': 250, 'y': 250, 'band': -1}

In [8]:
# drop existing years from dictionary
yr_drop_list = [] + ignore_hls_yrs
for yr in dirs_dict.keys():
    if os.path.exists(os.path.join(outDIR, dirs_dict[yr]['dir'] + '.nc')):
        yr_drop_list.append(yr)
for yr in yr_drop_list:
    if yr in list(dirs_dict.keys()):
        dirs_dict.pop(yr)

In [9]:
dirs_dict.keys()

dict_keys([2017, 2016])

In [10]:
for yr in tqdm(dirs_dict):
    all_files = os.listdir(os.path.join(inDIR, dirs_dict[yr]['dir']))

    files_dict = {
        'BLUE': {'band_str': 'blue',
                 'files': {}},
        'GREEN': {'band_str': 'green',
                  'files': {}},
        'RED': {'band_str': 'red',
                'files': {}},
        'NIR1': {'band_str': 'nir',
                 'files': {}},
        'SWIR1': {'band_str': 'swir1',
                  'files': {}},
        'SWIR2': {'band_str': 'swir2',
                  'files': {}}
    }

    for b in files_dict:
        files_dict[b]['files'] = [os.path.join(inDIR,
                                               dirs_dict[yr]['dir'],
                                               f) for f in all_files if f.endswith(files_dict[b]['band_str'] + '.bin')]
    dirs_dict[yr]['bands'] = files_dict

for yr in dirs_dict:
    print(yr)
    ds_list = []
    for b in tqdm(dirs_dict[yr]['bands']):
        ds_tmp = flist_to_xr(flist=dirs_dict[yr]['bands'][b]['files'],
                             band_str_in=dirs_dict[yr]['bands'][b]['band_str'],
                             yr_str=str(yr),
                             band_str_out=b, chunks=chunks)
        ds_list.append(ds_tmp)

    dat = xr.merge(ds_list)
    dat.to_netcdf(os.path.join(outDIR, dirs_dict[yr]['dir'] + '.nc'))

  0%|          | 0/2 [00:00<?, ?it/s]

2017


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

2016


  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

  0%|          | 0/365 [00:00<?, ?it/s]

2023-05-05 15:13:36,584 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-76kz3pv0', purging
2023-05-05 15:13:36,585 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-isauf5a3', purging
2023-05-05 15:13:36,585 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-pfu8wvjm', purging
2023-05-05 15:13:36,586 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-bh7vk57t', purging
2023-05-05 15:13:36,586 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-k7ugzj3p', purging
2023-05-05 15:13:36,586 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-nenkiafy', purging
2023-05-05 15:13:37,534 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-g3szj99a', purging