In [2]:
from datetime import datetime
import json
import logging
from pathlib import Path
import re
import time
from typing import List, Tuple
from tempfile import NamedTemporaryFile

import dask.array as da
from dask.diagnostics import ProgressBar
from dask.distributed import Client, as_completed
import dask_geopandas as dgd 
import hydra
import geopandas as gpd
import numpy as np
from omegaconf import DictConfig, OmegaConf
import pandas as pd
from pyproj import CRS
from tqdm.notebook import tqdm
import xarray as xr
import zarr

log = logging.getLogger(__name__)

In [3]:
client = Client(dashboard_address=':8989')
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 41441 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:41441/status,

0,1
Dashboard: http://127.0.0.1:41441/status,Workers: 12
Total threads: 144,Total memory: 503.74 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:39711,Workers: 12
Dashboard: http://127.0.0.1:41441/status,Total threads: 144
Started: Just now,Total memory: 503.74 GiB

0,1
Comm: tcp://127.0.0.1:32945,Total threads: 12
Dashboard: http://127.0.0.1:43171/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:45927,
Local directory: /tmp/dask-scratch-space/worker-hvoo3dlw,Local directory: /tmp/dask-scratch-space/worker-hvoo3dlw

0,1
Comm: tcp://127.0.0.1:35857,Total threads: 12
Dashboard: http://127.0.0.1:37581/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:35313,
Local directory: /tmp/dask-scratch-space/worker-nntzrrs5,Local directory: /tmp/dask-scratch-space/worker-nntzrrs5

0,1
Comm: tcp://127.0.0.1:42943,Total threads: 12
Dashboard: http://127.0.0.1:42999/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:46323,
Local directory: /tmp/dask-scratch-space/worker-azi7i7at,Local directory: /tmp/dask-scratch-space/worker-azi7i7at

0,1
Comm: tcp://127.0.0.1:43491,Total threads: 12
Dashboard: http://127.0.0.1:34633/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:39707,
Local directory: /tmp/dask-scratch-space/worker-ar55pkqe,Local directory: /tmp/dask-scratch-space/worker-ar55pkqe

0,1
Comm: tcp://127.0.0.1:40757,Total threads: 12
Dashboard: http://127.0.0.1:44451/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:40433,
Local directory: /tmp/dask-scratch-space/worker-8az8oywq,Local directory: /tmp/dask-scratch-space/worker-8az8oywq

0,1
Comm: tcp://127.0.0.1:40351,Total threads: 12
Dashboard: http://127.0.0.1:46163/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:45475,
Local directory: /tmp/dask-scratch-space/worker-yerjdw0f,Local directory: /tmp/dask-scratch-space/worker-yerjdw0f

0,1
Comm: tcp://127.0.0.1:45079,Total threads: 12
Dashboard: http://127.0.0.1:40231/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:41745,
Local directory: /tmp/dask-scratch-space/worker-5v3tmp37,Local directory: /tmp/dask-scratch-space/worker-5v3tmp37

0,1
Comm: tcp://127.0.0.1:33809,Total threads: 12
Dashboard: http://127.0.0.1:37241/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:36235,
Local directory: /tmp/dask-scratch-space/worker-q9unfmtb,Local directory: /tmp/dask-scratch-space/worker-q9unfmtb

0,1
Comm: tcp://127.0.0.1:37157,Total threads: 12
Dashboard: http://127.0.0.1:39895/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:42439,
Local directory: /tmp/dask-scratch-space/worker-_q223s6p,Local directory: /tmp/dask-scratch-space/worker-_q223s6p

0,1
Comm: tcp://127.0.0.1:45397,Total threads: 12
Dashboard: http://127.0.0.1:41535/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:35707,
Local directory: /tmp/dask-scratch-space/worker-r9r5dakr,Local directory: /tmp/dask-scratch-space/worker-r9r5dakr

0,1
Comm: tcp://127.0.0.1:35017,Total threads: 12
Dashboard: http://127.0.0.1:38681/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:42767,
Local directory: /tmp/dask-scratch-space/worker-sibimioa,Local directory: /tmp/dask-scratch-space/worker-sibimioa

0,1
Comm: tcp://127.0.0.1:35543,Total threads: 12
Dashboard: http://127.0.0.1:37739/status,Memory: 41.98 GiB
Nanny: tcp://127.0.0.1:35315,
Local directory: /tmp/dask-scratch-space/worker-robk1ia_,Local directory: /tmp/dask-scratch-space/worker-robk1ia_


In [6]:
json_data = '''
{
  "name": "MERIT",
  "data_path": "/data/tkb5476/projects/marquette/data/",
  "dx": 2000,
  "buffer": 0.3334,
  "units": "mm/day",
  "date_codes": "${data_path}/date_codes.json",
  "crs": {
    "wgs": "epsg:4326",
    "utm18": "epsg:32618"
  },
  "is_streamflow_split": true,
  "start_date": "01-01-1980",
  "end_date": "12-31-2019",
  "num_cores": 20,
  "continent": 7,
  "area": 3,
  "zone": "${continent}${area}",
  "save_paths": {
    "attributes": "${data_path}/${name}/streamflow/attributes.csv",
    "flow_lines": "${data_path}/${name}/raw/flowlines",
    "basins": "${data_path}/${name}/raw/basins/cat_pfaf_${zone}_MERIT_Hydro_v07_Basins_v01_bugfix1.shp",
    "huc10": "${data_path}/HUC/huc_10_CONUS.shp",
    "streamflow_files": "${data_path}/${name}/streamflow/dpl_v2/dHBV"
  },
  "zarr": {
    "edges": "${data_path}/${name}/zarr/dpl_v2/${zone}_edges/",
    "sorted_edges_keys": "${data_path}/${name}/zarr/dpl_v2/${zone}_edge_keys/",
    "HUC_TM": "${data_path}/${name}/zarr/TMs/PFAF_${continent}${area}",
    "MERIT_TM": "${data_path}/${name}/zarr/TMs/MERIT_FLOWLINES_${continent}${area}",
    "streamflow": "${data_path}/streamflow/zarr/dpl_v2/${zone}"
    }
}'''

data_dict = json.loads(json_data)
cfg = OmegaConf.create(data_dict)

In [16]:
huc_to_merit_TM = zarr.open(Path(cfg.zarr.HUC_TM), mode='r')
merit_to_edge_TM = zarr.open(Path(cfg.zarr.MERIT_TM), mode='r')
streamflow_prediction = xr.open_zarr(Path("/data/tkb5476/projects/marquette/data/streamflow/zarr/dpl_v2/"))

FileNotFoundError: No such file or directory: '/data/tkb5476/projects/marquette/data/streamflow/zarr/dpl_v2/73/location'

In [11]:
print(huc_to_merit_TM.tree())

/
 ├── COMID (23294,) int64
 ├── HUC10 (2437,) object
 └── TM (2437, 23294) float64


In [12]:
print(merit_to_edge_TM.tree())

/
 ├── COMIDs (23294,) int64
 ├── EDGEIDs (96353,) <U11
 └── TM (23294, 96353) float64
