### This notebook shows how to use the CompChecker class to determine the optimal compression level

In [1]:
##on Cheyenne
##Make sure you are using the cmpi6-2019.10 kernel

# Add ldcpy root to system path (MODIFY FOR YOUR LDCPY CODE LOCATION)
import sys

sys.path.insert(0, '/glade/u/home/apinard/newldcpy/ldcpy')
import ldcpy

# Display output of plots directly in Notebook
%matplotlib inline
# Automatically reload module if it is editted
%reload_ext autoreload
%autoreload 2

# silence warnings
import warnings

warnings.filterwarnings("ignore")

# if you want to use the DASK daskboard on Cheyenne, then modify the below and run
import dask

dask.config.set(
    {'distributed.dashboard.link': 'https://jupyterhub.ucar.edu/ch/user/abaker/proxy/{port}/status'}
)

<dask.config.set at 0x2b4ada128d10>

In [2]:
from dask_jobqueue import PBSCluster

# For Casper
cluster = PBSCluster(
    queue="casper",
    walltime="02:00:00",
    project="NIOW0001",
    memory="40GB",
    resource_spec="select=1:ncpus=4:mem=40GB",
    cores=4,
    processes=1,
)

# for Cheyenne
# cluster = PBSCluster(
#    queue="regular",
#    walltime="02:00:00",
#    project="NIOW0001",
#    memory="109GB",
#    resource_spec="select=1:ncpus=9:mem=109GB",
#    cores=36,
#    processes=9,
# )


# scale as needed
cluster.adapt(minimum_jobs=1, maximum_jobs=30)
cluster

VBox(children=(HTML(value='<h2>PBSCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    .d…

In [3]:
from dask.distributed import Client

# Connect client to the remote dask workers
client = Client(cluster)
client

0,1
Client  Scheduler: tcp://10.12.206.54:41833  Dashboard: https://jupyterhub.ucar.edu/ch/user/abaker/proxy/46246/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [4]:
ts_col = ldcpy.open_datasets(
    "cam-fv",
    ["TS"],
    [
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/orig/TS.daily.20060101-20801231.nc",
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/TS/zfp.p10.TS.daily.20060101-20801231.nc",
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/TS/zfp.p12.TS.daily.20060101-20801231.nc",
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/TS/zfp.p14.TS.daily.20060101-20801231.nc",
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/TS/zfp.p16.TS.daily.20060101-20801231.nc",
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/TS/zfp.p18.TS.daily.20060101-20801231.nc",
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/TS/zfp.p20.TS.daily.20060101-20801231.nc",
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/TS/zfp.p22.TS.daily.20060101-20801231.nc",
        "/glade/p/cisl/asap/abaker/compression_samples/cam-lens/TS/zfp.p24.TS.daily.20060101-20801231.nc",
    ],
    ["orig", "p10", "p12", "p14", "p16", "p18", "p20", "p22", "p24"],
    chunks={"time": 500},
)

dataset size in GB 54.50



In [5]:
# first time step
# the original data
ts_orig_t0 = ts_col["TS"].sel(collection="orig").isel(time=0)

# mimic doing compression by
# collecting compressed slices of first time step into dictionary
ts_zfp_data_t0 = {}
ts_zfp_data_t0["10"] = ts_col["TS"].sel(collection="p10").isel(time=0)
ts_zfp_data_t0["12"] = ts_col["TS"].sel(collection="p12").isel(time=0)
ts_zfp_data_t0["14"] = ts_col["TS"].sel(collection="p14").isel(time=0)
ts_zfp_data_t0["16"] = ts_col["TS"].sel(collection="p16").isel(time=0)
ts_zfp_data_t0["18"] = ts_col["TS"].sel(collection="p18").isel(time=0)
ts_zfp_data_t0["20"] = ts_col["TS"].sel(collection="p20").isel(time=0)
ts_zfp_data_t0["22"] = ts_col["TS"].sel(collection="p22").isel(time=0)
ts_zfp_data_t0["24"] = ts_col["TS"].sel(collection="p24").isel(time=0)

In [6]:
# second time step
# the original data
ts_orig_t1 = ts_col["TS"].sel(collection="orig").isel(time=1)

# mimic doing compression by
# collecting compressed slices of first time step into dictionary
ts_zfp_data_t1 = {}
ts_zfp_data_t1["10"] = ts_col["TS"].sel(collection="p10").isel(time=1)
ts_zfp_data_t1["12"] = ts_col["TS"].sel(collection="p12").isel(time=1)
ts_zfp_data_t1["14"] = ts_col["TS"].sel(collection="p14").isel(time=1)
ts_zfp_data_t1["16"] = ts_col["TS"].sel(collection="p16").isel(time=1)
ts_zfp_data_t1["18"] = ts_col["TS"].sel(collection="p18").isel(time=1)
ts_zfp_data_t1["20"] = ts_col["TS"].sel(collection="p20").isel(time=1)
ts_zfp_data_t1["22"] = ts_col["TS"].sel(collection="p22").isel(time=1)
ts_zfp_data_t1["24"] = ts_col["TS"].sel(collection="p24").isel(time=1)

In [7]:
# set up checker for this time slice (all these choices are the default)
# so could also do:  cc = ldcpy.CompChecker()
cc = ldcpy.CompChecker(
    compressor="zfp",
    comp_mode="p",
    calc_type="ssim_fp",
    calc_tol=0.9995,
    tol_greater_than=True,
    accept_first=False,
)

In [8]:
# First do time step 0
# init starting guess
level = 18
comp_data = ts_zfp_data_t0[str(level)]
try_again = True

In [9]:
while try_again:
    print("checking level = ", level, "...")
    try_again = cc.eval_comp_level(ts_orig_t0, comp_data, level)
    if try_again:
        level = cc.get_new_level()
        comp_data = ts_zfp_data_t0[str(level)]
        print("Trying again...")
    else:
        opt_level = cc.get_opt_level()
        print("Optimal level  = ", opt_level)

checking level =  18 ...
Trying again...
checking level =  20 ...
Optimal level  =  20


In [10]:
cc.show_results()

Unnamed: 0,ssim_fp,Passed?
18,0.997967,False
20,0.999613,True


In [11]:
# Now go on to time step 1
# reset the checker between steps
cc.reset_checker()
# init starting guess
level = 22
comp_data = ts_zfp_data_t1[str(level)]
try_again = True

In [12]:
while try_again:
    print("checking level = ", level, "...")
    try_again = cc.eval_comp_level(ts_orig_t1, comp_data, level)
    print("Trying again....", try_again)
    if try_again:
        level = cc.get_new_level()
        comp_data = ts_zfp_data_t1[str(level)]
    else:
        opt_level = cc.get_opt_level()
        print("Optimal level  = ", opt_level)

checking level =  22 ...
Trying again.... True
checking level =  20 ...
Trying again.... True
checking level =  18 ...
Trying again.... False
Optimal level  =  20


In [13]:
cc.show_results()

Unnamed: 0,ssim_fp,Passed?
22,0.999851,True
20,0.99957,True
18,0.998038,False
