# Test Atlas v2

See atlas v1 issues:
https://github.com/roocs/clisops/issues/317

In [1]:
import xarray as xr
from clisops.core import subset
from clisops.ops import subset as subset_op

import time
import os

In [2]:
basedir_atlas_v2 = "/mnt/lustre/work/ik1017/C3SATLAS_v2_test/c3s-atlas-dataset"

cmip6_nc = f"{basedir_atlas_v2}/CMIP6/historical/t_CMIP6_historical_mon_185001-201412_v02.nc"
cmip6_nc

'/mnt/lustre/work/ik1017/C3SATLAS_v2_test/c3s-atlas-dataset/CMIP6/historical/t_CMIP6_historical_mon_185001-201412_v02.nc'

## ncdump - CMIP6

In [3]:
! ncdump -h /mnt/lustre/work/ik1017/C3SATLAS_v2_test/c3s-atlas-dataset/CMIP6/historical/t_CMIP6_historical_mon_185001-201412_v02.nc

netcdf t_CMIP6_historical_mon_185001-201412_v02 {
dimensions:
	bnds = 2 ;
	lon = 360 ;
	lat = 180 ;
	time = 1980 ;
	member = 30 ;
variables:
	double lat(lat) ;
		lat:standard_name = "latitude" ;
		lat:units = "degrees_north" ;
		lat:axis = "Y" ;
		lat:long_name = "latitude" ;
		lat:bounds = "lat_bnds" ;
	double lat_bnds(lat, bnds) ;
	double lon(lon) ;
		lon:standard_name = "longitude" ;
		lon:units = "degrees_east" ;
		lon:axis = "X" ;
		lon:long_name = "longitude" ;
		lon:bounds = "lon_bnds" ;
	double lon_bnds(lon, bnds) ;
	double time(time) ;
		time:standard_name = "time" ;
		time:calendar = "standard" ;
		time:units = "days since 1850-01-01 00:00:00" ;
		time:axis = "T" ;
		time:long_name = "time" ;
		time:bounds = "time_bnds" ;
	double time_bnds(time, bnds) ;
	string member_id(member) ;
		member_id:standard_name = "realization" ;
		member_id:long_name = "Member ID" ;
		member_id:comment = "Values uniquely identify each member of the ensemble (<gcm_institution>_<gcm_model>_<gcm_vari

## xarray - CMIP6

TODO: still using two fill values for variable t. Without a fix it is not possible to write as netcdf file.

Error Message: 
Variable 't' has conflicting _FillValue (-1.7014118346046923e+38) and missing_value (1.0384593717069655e+34). Cannot encode data.

In [4]:
def ds_info(ds):
    # compression levels
    print("data vars with compression:")
    for var in ds.data_vars:
        # print(var, ds[var].encoding, "\n")
        complevel = ds[var].encoding.get("complevel", 0)
        print(var, "compression level =", complevel)
        
    # fill values
    print("\nfill values:")
    var_list = list(ds.coords) + list(ds.data_vars)
    for var in var_list:
        fill_value = ds[var].encoding.get("_FillValue")
        print(var, "fill value =", fill_value)
        
    # string attributes with compression
    print("\nstring attributes with compression:")
    for cvar in [
            "member_id",
            "gcm_variant",
            "gcm_model",
            "gcm_institution",
            "rcm_variant",
            "rcm_model",
            "rcm_institution",
        ]:
            for en in ["zlib", "shuffle", "complevel"]:
                try:
                    print(cvar, en, ds[cvar].encoding[en])
                except KeyError:
                    pass

In [5]:
ds_cmip6 = xr.open_dataset(cmip6_nc)
ds_cmip6

  new_vars[k] = decode_cf_variable(


In [6]:
ds_info(ds_cmip6)

data vars with compression:
lat_bnds compression level = 1
lon_bnds compression level = 1
time_bnds compression level = 1
t compression level = 1
crs compression level = 0

fill values:
lat fill value = None
lon fill value = None
time fill value = None
member_id fill value = None
gcm_institution fill value = None
gcm_model fill value = None
gcm_variant fill value = None
height2m fill value = None
lat_bnds fill value = None
lon_bnds fill value = None
time_bnds fill value = None
t fill value = -1.7014118e+38
crs fill value = None

string attributes with compression:
member_id zlib True
member_id shuffle True
member_id complevel 1
gcm_variant zlib True
gcm_variant shuffle True
gcm_variant complevel 1
gcm_model zlib True
gcm_model shuffle True
gcm_model complevel 1
gcm_institution zlib True
gcm_institution shuffle True
gcm_institution complevel 1


In [7]:
ds = ds_cmip6.isel(time=0)
ds

In [8]:
try:
    ds.to_netcdf("/tmp/atlas_v2_cmip6.nc")
except ValueError as e:
    print("Fails to write as netcdf!", e)

Fails to write as netcdf! Variable 't' has conflicting _FillValue (-1.7014118346046923e+38) and missing_value (1.0384593717069655e+34). Cannot encode data.


## clisops-core - cmip6

TODO: clisops-core has the same issues like xarray. It does not apply any fixes. 

In [9]:
ds = subset.subset_bbox(
    ds_cmip6, lat_bnds=[45, 50], lon_bnds=[-60, -55],
    start_date='2013-01', end_date='2013-12')
ds

In [10]:
try:
    ds.to_netcdf("/tmp/atlas_v2_cmip6.nc")
except ValueError as e:
    print("Fails to write as netcdf!", e)

Fails to write as netcdf! Variable 't' has conflicting _FillValue (-1.7014118346046923e+38) and missing_value (1.0384593717069655e+34). Cannot encode data.


## clisops-ops - cmip6 - subset by time

TODO: subset operation takes *very* long ... about 11 secs. It should only take much less than a second!

In [11]:
# clean up outputs

! rm /tmp/output_*

In [12]:
start = time.time()

outputs = subset_op(
    ds=ds_cmip6,
    time="2013-01/2013-01",
    # area=(0.0, 49.0, 10.0, 65.0),
    output_type="nc",
    # output_type="xarray",
    output_dir="/tmp",
    split_method="time:auto",
    file_namer="simple"
)

duration = time.time() - start
print(f"duration: {duration} secs")

print("Number of output files: ", len(outputs))
outputs[0]



duration: 11.224136352539062 secs
Number of output files:  1


'/tmp/output_001.nc'

In [13]:
file_size = os.path.getsize(outputs[0])
print("File Size is :", file_size/(1024*1024), "MB")

File Size is : 71.30526638031006 MB


In [14]:
ds = xr.open_dataset(outputs[0])
ds

In [15]:
ds_info(ds)

data vars with compression:
lat_bnds compression level = 1
lon_bnds compression level = 1
time_bnds compression level = 1
t compression level = 1
crs compression level = 0

fill values:
lat fill value = None
lon fill value = None
time fill value = None
member_id fill value = None
gcm_institution fill value = None
gcm_model fill value = None
gcm_variant fill value = None
height2m fill value = None
lat_bnds fill value = None
lon_bnds fill value = None
time_bnds fill value = None
t fill value = 1.0384594e+34
crs fill value = None

string attributes with compression:
member_id zlib False
member_id shuffle False
member_id complevel 0
gcm_variant zlib False
gcm_variant shuffle False
gcm_variant complevel 0
gcm_model zlib False
gcm_model shuffle False
gcm_model complevel 0
gcm_institution zlib False
gcm_institution shuffle False
gcm_institution complevel 0


In [16]:
ds.to_netcdf("/tmp/atlas_v2_cmip6.nc")

## clisops-ops - cmip6 - subset by bbox

TODO: subset bbox is not possible! It uses a lot of memory ... even my 32GB VM was not enough for a successful run.

In [17]:
if False:
    start = time.time()

    outputs = subset_op(
        ds=ds_cmip6,
        time="2013-01/2013-01",
        area=(0.0, 49.0, 10.0, 65.0),
        output_type="xarray",
    )

    duration = time.time() - start
    print(f"duration: {duration} secs")

    outputs[0]

## clisops-ops - cmip6 - subset by bbox ... with fix

In [18]:
ds = xr.open_dataset("/tmp/atlas_v2_cmip6.nc")
ds

In [19]:
# clean up outputs

! rm /tmp/output_*

In [20]:
start = time.time()

outputs = subset_op(
    ds=ds,
    time="2013-01/2013-01",
    area=(0.0, 49.0, 10.0, 65.0),
    output_type="nc",
    # output_type="xarray",
    output_dir="/tmp",
    split_method="time:auto",
    file_namer="simple"
)

duration = time.time() - start
print(f"duration: {duration} secs")

outputs[0]

duration: 1.412027359008789 secs




'/tmp/output_001.nc'

In [21]:
ds_info(ds)

data vars with compression:
lat_bnds compression level = 1
lon_bnds compression level = 1
time_bnds compression level = 1
t compression level = 1
crs compression level = 0

fill values:
lat fill value = nan
lon fill value = nan
time fill value = nan
member_id fill value = None
gcm_institution fill value = None
gcm_model fill value = None
gcm_variant fill value = None
height2m fill value = nan
lat_bnds fill value = nan
lon_bnds fill value = nan
time_bnds fill value = nan
t fill value = 1.0384594e+34
crs fill value = None

string attributes with compression:
member_id zlib False
member_id shuffle False
member_id complevel 0
gcm_variant zlib False
gcm_variant shuffle False
gcm_variant complevel 0
gcm_model zlib False
gcm_model shuffle False
gcm_model complevel 0
gcm_institution zlib False
gcm_institution shuffle False
gcm_institution complevel 0
