In [1]:
import math
import os
import time

# pip/conda installed
import dask.array as da
import fiona.transform
import fsspec
import pandas as pd
import xarray as xr
from netCDF4 import Dataset
from dask.distributed import as_completed
from dask.distributed import Client
from dask.distributed import performance_report
from dask_gateway import GatewayCluster

from utils import get_logger
from utils.dask import create_cluster
from utils.dask import upload_source

In [2]:
def reproject(lons, lats):
    dst_crs = "+proj=lcc +lat_1=25 +lat_2=60 +lat_0=42.5 +lon_0=-100 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs"
    src_crs = "EPSG:4326"
    (x, y) = fiona.transform.transform(
        src_crs, dst_crs, lons, lats
    )
    return x, y

In [3]:
# fill with your account key
os.environ['AZURE_ACCOUNT_KEY'] = ""

In [4]:
from dask.distributed import Client

client = Client("tcp://127.0.0.1:33247")
client

0,1
Client  Scheduler: tcp://127.0.0.1:33247  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 16  Memory: 33.68 GB


In [5]:
mean_src = fsspec.get_mapper(
    f"az://fia/daymet/na_40yr_monthly_means.zarr",
    account_name="usfs",
    account_key=os.environ['AZURE_ACCOUNT_KEY']
)
std_src = fsspec.get_mapper(
    f"az://fia/daymet/na_40yr_monthly_std_devs.zarr",
    account_name="usfs",
    account_key=os.environ['AZURE_ACCOUNT_KEY']
)

In [11]:
pt_means_dest = fsspec.get_mapper(
    f"az://fia/daymet/fia_40yr_monthly_means.zarr",
    account_name="usfs",
    account_key=os.environ['AZURE_ACCOUNT_KEY']
)
pt_std_dest = fsspec.get_mapper(
    f"az://fia/daymet/fia_40yr_monthly_std_devs.zarr",
    account_name="usfs",
    account_key=os.environ['AZURE_ACCOUNT_KEY']
)

In [7]:
df = pd.read_csv('./fia_no_pltcn.csv')
train_pts = df.where(df['INVYR'] >= 2015).dropna()
x, y = reproject(train_pts.LON.tolist(), train_pts.LAT.tolist())

In [8]:
mn = xr.open_zarr(mean_src, chunks={'month':1, 'x': 1250, 'y': 1250})
mn

  store.get_chunk(k, v, chunks),
  store.get_chunk(k, v, chunks),


Unnamed: 0,Array,Chunk
Bytes,252.39 MB,6.25 MB
Shape,"(8075, 7814)","(1250, 1250)"
Count,50 Tasks,49 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 252.39 MB 6.25 MB Shape (8075, 7814) (1250, 1250) Count 50 Tasks 49 Chunks Type float32 numpy.ndarray",7814  8075,

Unnamed: 0,Array,Chunk
Bytes,252.39 MB,6.25 MB
Shape,"(8075, 7814)","(1250, 1250)"
Count,50 Tasks,49 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,252.39 MB,6.25 MB
Shape,"(8075, 7814)","(1250, 1250)"
Count,50 Tasks,49 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 252.39 MB 6.25 MB Shape (8075, 7814) (1250, 1250) Count 50 Tasks 49 Chunks Type float32 numpy.ndarray",7814  8075,

Unnamed: 0,Array,Chunk
Bytes,252.39 MB,6.25 MB
Shape,"(8075, 7814)","(1250, 1250)"
Count,50 Tasks,49 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray


In [9]:
mn.sel(
    x=xr.DataArray(x, dims='idx', coords={'idx': train_pts['INDEX'].tolist()}),
    y=xr.DataArray(y, dims='idx', coords={'idx': train_pts['INDEX'].tolist()}),
    method="nearest"
).drop_vars(['lat', 'lon', 'x', 'y']).to_zarr(pt_means_dest)

<xarray.backends.zarr.ZarrStore at 0x7fdd1807a460>

In [10]:
std = xr.open_zarr(std_src, chunks={'month':1, 'x': 1250, 'y': 1250})
std

  store.get_chunk(k, v, chunks),
  store.get_chunk(k, v, chunks),


Unnamed: 0,Array,Chunk
Bytes,252.39 MB,6.25 MB
Shape,"(8075, 7814)","(1250, 1250)"
Count,50 Tasks,49 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 252.39 MB 6.25 MB Shape (8075, 7814) (1250, 1250) Count 50 Tasks 49 Chunks Type float32 numpy.ndarray",7814  8075,

Unnamed: 0,Array,Chunk
Bytes,252.39 MB,6.25 MB
Shape,"(8075, 7814)","(1250, 1250)"
Count,50 Tasks,49 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,252.39 MB,6.25 MB
Shape,"(8075, 7814)","(1250, 1250)"
Count,50 Tasks,49 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 252.39 MB 6.25 MB Shape (8075, 7814) (1250, 1250) Count 50 Tasks 49 Chunks Type float32 numpy.ndarray",7814  8075,

Unnamed: 0,Array,Chunk
Bytes,252.39 MB,6.25 MB
Shape,"(8075, 7814)","(1250, 1250)"
Count,50 Tasks,49 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.03 GB 6.25 MB Shape (12, 8075, 7814) (1, 1250, 1250) Count 589 Tasks 588 Chunks Type float32 numpy.ndarray",7814  8075  12,

Unnamed: 0,Array,Chunk
Bytes,3.03 GB,6.25 MB
Shape,"(12, 8075, 7814)","(1, 1250, 1250)"
Count,589 Tasks,588 Chunks
Type,float32,numpy.ndarray


In [13]:
std.sel(
    x=xr.DataArray(x, dims='idx', coords={'idx': train_pts['INDEX'].tolist()}),
    y=xr.DataArray(y, dims='idx', coords={'idx': train_pts['INDEX'].tolist()}),
    method="nearest"
).drop_vars(['lat', 'lon', 'x', 'y']).to_zarr(pt_std_dest)

<xarray.backends.zarr.ZarrStore at 0x7fdd0b435e20>

In [14]:
check = xr.open_zarr(pt_std_dest)
check

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.51 MB 792.90 kB Shape (12, 198225) (1, 198225) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",198225  12,

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.51 MB 792.90 kB Shape (12, 198225) (1, 198225) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",198225  12,

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.51 MB 792.90 kB Shape (12, 198225) (1, 198225) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",198225  12,

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.51 MB 792.90 kB Shape (12, 198225) (1, 198225) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",198225  12,

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 9.51 MB 792.90 kB Shape (12, 198225) (1, 198225) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",198225  12,

Unnamed: 0,Array,Chunk
Bytes,9.51 MB,792.90 kB
Shape,"(12, 198225)","(1, 198225)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
