In [3]:
import argparse
import json
import logging

import dask
import numpy as np
import xarray as xr

from dask.distributed import Client
import dask.config
import dask.array as da

In [4]:
import sys

In [5]:
sys.path.append('../src')

In [6]:
import helper_modules

In [7]:
%load_ext autoreload
%autoreload 2

In [8]:
with open("../src/conf/domain_config.json", "r") as j:
    domain_config = json.loads(j.read())

In [9]:
with open("../src/conf/attribute_config.json", "r") as j:
    attribute_config = json.loads(j.read())

In [10]:
with open("../src/conf/variable_config.json", "r") as j:
    variable_config = json.loads(j.read())

In [11]:
domain_config = domain_config['west_africa']

In [12]:
variable_config = {
    key: value
    for key, value in variable_config.items()
    if key in domain_config["variables"]
}

In [13]:
reg_dir_dict, glob_dir_dict = helper_modules.set_and_make_dirs(domain_config)

In [14]:
syr_calib = domain_config["syr_calib"]
eyr_calib = domain_config["eyr_calib"]

In [15]:
client, cluster = helper_modules.getCluster('fat', 1, 60)
        
client.get_versions(check=True)
client.amm.start()
         
print(f"Dask dashboard available at {client.dashboard_link}")

Dask dashboard available at http://172.27.80.110:34792/status


Perhaps you already have a cluster running?
Hosting the HTTP server on port 34792 instead


In [16]:
raw_full, pp_full, refrcst_full, ref_full = helper_modules.set_input_files(domain_config, reg_dir_dict, 4, 2016, 'tp')

In [17]:
coords = helper_modules.get_coords_from_frcst(raw_full)

In [18]:
global_attributes = helper_modules.update_global_attributes(
    attribute_config, domain_config["bc_params"], coords, 'west_africa'
)

In [19]:
encoding = helper_modules.set_encoding(variable_config, coords)

In [20]:
ds_obs = xr.open_zarr(ref_full, consolidated=False)
ds_obs = xr.open_zarr(
    ref_full,
    chunks={"time": len(ds_obs.time), "lat": 1, "lon": 1},
    consolidated=False
    )
da_obs = ds_obs['tp']
#da_obs = da_obs.isel(lat=np.arange(100, 130), lon=np.arange(100, 130))

In [21]:
ds_mdl = xr.open_zarr(refrcst_full, consolidated=False)
ds_mdl = xr.open_zarr(
    refrcst_full,
    chunks={
       "time": len(ds_mdl.time),
       "ens": len(ds_mdl.ens),
       "lat": 1,
       "lon": 1
    },
    consolidated=False
    )
da_mdl = ds_mdl['tp']
#da_mdl = da_mdl.isel(lat=np.arange(100, 130), lon=np.arange(100, 130))

In [22]:
ds_pred = xr.open_dataset(raw_full)
ds_pred = xr.open_mfdataset(
    raw_full,
    chunks={
        "time": len(ds_pred.time),
        "ens": len(ds_pred.ens),
        "lat": 1,
        "lon": 1
     },
     parallel=False,
     engine="netcdf4",
)
da_pred = ds_pred['tp']

#da_pred = da_pred.isel(lat=np.arange(100, 130), lon=np.arange(100, 130))

In [24]:
pred_out = da.zeros(shape=(len(da_pred.time), len(da_pred.ens), len(da_pred.lat), len(da_pred.lon)), chunks=(len(da_pred.time), len(da_pred.ens), 1, 1))

In [None]:
import importlib

In [25]:
import bc_module_v2

In [26]:
client.upload_file("bc_module_v2.py")

{'tcp://172.27.80.111:32873': {'status': 'OK'},
 'tcp://172.27.80.111:33157': {'status': 'OK'},
 'tcp://172.27.80.111:33197': {'status': 'OK'},
 'tcp://172.27.80.111:33215': {'status': 'OK'},
 'tcp://172.27.80.111:33512': {'status': 'OK'},
 'tcp://172.27.80.111:33600': {'status': 'OK'},
 'tcp://172.27.80.111:33747': {'status': 'OK'},
 'tcp://172.27.80.111:33841': {'status': 'OK'},
 'tcp://172.27.80.111:34068': {'status': 'OK'},
 'tcp://172.27.80.111:34186': {'status': 'OK'},
 'tcp://172.27.80.111:34237': {'status': 'OK'},
 'tcp://172.27.80.111:35074': {'status': 'OK'},
 'tcp://172.27.80.111:35193': {'status': 'OK'},
 'tcp://172.27.80.111:35527': {'status': 'OK'},
 'tcp://172.27.80.111:35565': {'status': 'OK'},
 'tcp://172.27.80.111:35970': {'status': 'OK'},
 'tcp://172.27.80.111:36141': {'status': 'OK'},
 'tcp://172.27.80.111:36323': {'status': 'OK'},
 'tcp://172.27.80.111:36767': {'status': 'OK'},
 'tcp://172.27.80.111:37056': {'status': 'OK'},
 'tcp://172.27.80.111:37095': {'status':

In [None]:
da_obs.isel(lon=0, lat=0)

In [28]:
for i in range(0, len(da_pred.lat)):
    for j in range(0, len(da_pred.lon)):
        out = bc_module_v2.bc_module(da_pred, da_obs, da_mdl, i, j, 'fluff', domain_config, True)
        pred_out[:, :, i, j] = da.from_delayed(out, shape=[len(da_pred.time), len(da_pred.ens)], dtype=float)
        print(i, j)

0 0
0 1
0 2
0 3
0 4
0 5
0 6
0 7
0 8
0 9
0 10
0 11
0 12
0 13
0 14
0 15
0 16
0 17
0 18
0 19


KeyboardInterrupt: 

In [None]:
pred_out.persist()

In [None]:
da_out = xr.Dataset(
            data_vars=dict(tp=(["time", "ens", "lat", "lon"], pred_out)),       
            coords=dict(
                time=da_pred.time,
                ens=da_pred.ens,
                lon=da_pred.lon,
                lat=da_pred.lat
            ),
            attrs=dict(
            description="This is a small stupid test...",
            nits="And were going to kick some ass..."),
    )

In [None]:
da_out.to_zarr('/bg/data/NCZarr/bcsd_test.zarr')