# DSSIM

This notebook opens original and compressed NetCDF files at a given data path, computes the DSSIM on the compressed files for specified time steps, and stores the values in a CSV file in the lcr/data/ directory.

In [1]:
# Make sure you are using the cmip6-2019.10 kernel

# Add ldcpy root to system path (MODIFY FOR YOUR LDCPY CODE LOCATION)
import sys

sys.path.insert(0, '/glade/u/home/abaker/repos/ldcpy')
import ldcpy

# Display output of plots directly in Notebook
%matplotlib inline
# Automatically reload module if it is edited
%reload_ext autoreload
%autoreload 2

# silence warnings
import warnings

warnings.filterwarnings("ignore")
import os
hdf_pp = os.environ["HDF5_PLUGIN_PATH"]
env_list = ['export HDF5_PLUGIN_PATH='+hdf_pp]

In [3]:
# from dask.distributed import Client

# from ncar_jobqueue import NCARCluster
# cluster = NCARCluster(project='NTDD0004', env_extra=env_list)

# # scale as needed
# cluster.adapt(minimum_jobs=1, maximum_jobs=30)
# cluster

In [2]:
# start the dask scheduler

# Note: This notebook should run on Cheyenne for now, running on casper will work for
# the most part but trying to plot the LHFLX variable will result in a timeout,
# presumably due to the large file sizes for the variable.

# for Cheyenne

from dask_jobqueue import PBSCluster



cluster = PBSCluster(
    queue="regular",
    walltime="02:00:00",
    project="NIOW0001",
    memory="109GB",
    resource_spec="select=1:ncpus=9:mem=109GB",
    cores=36,
    processes=9,
    env_extra=env_list
)


# scale as needed
cluster.adapt(minimum_jobs=1, maximum_jobs=30)
cluster

Tab(children=(HTML(value='<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-outpu…

In [3]:
from dask.distributed import Client

# Connect client to the remote dask workers
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abaker/proxy/{port}/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abaker/proxy/{port}/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.0.49:44149,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abaker/proxy/{port}/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [6]:
import time

monthly_variables = ["CCN3", "CLOUD", "FLNS", "FLNT", "FSNS", "FSNT", "LHFLX",
            "PRECC", "PRECL", "PS", "QFLX", "SHFLX", "TMQ", "TS", "U"]
daily_variables = ["FLUT", "LHFLX", "PRECT", "TAUX", "TS", "Z500"]

cols_monthly = {}
cols_daily = {}
sets = {}
levels = {}
data_path = "/glade/p/cisl/asap/CAM_lossy_test_data_31/"


for variable in daily_variables:
    print(variable)
    levels[variable] = [f"bg_2_{variable}",
                        f"bg_3_{variable}",
                        f"bg_4_{variable}", f"bg_5_{variable}",
                        f"bg_6_{variable}", f"bg_7_{variable}",]
    sets[variable] = [f"{data_path}/orig/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/research/bg/bg_2/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/research/bg/bg_3/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/research/bg/bg_4/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/research/bg/bg_5/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/research/bg/bg_6/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/research/bg/bg_7/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc"]
    cols_daily[variable] = ldcpy.open_datasets("cam-fv", [f"{variable}"], sets[variable], [f"orig_{variable}"] + levels[variable], chunks={"time":700})

for variable in monthly_variables:
    print(variable)
    levels[variable] = [f"bg_2_{variable}",
                        f"bg_3_{variable}",
                        f"bg_4_{variable}", f"bg_5_{variable}",
                        f"bg_6_{variable}", f"bg_7_{variable}",]
    sets[variable] = [f"{data_path}/orig/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/research/bg/bg_2/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/research/bg/bg_3/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/research/bg/bg_4/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/research/bg/bg_5/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/research/bg/bg_6/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/research/bg/bg_7/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc"]
    cols_monthly[variable] = ldcpy.open_datasets("cam-fv", [f"{variable}"], sets[variable], [f"orig_{variable}"] + levels[variable], chunks={"time":700})



FLUT
dataset size in GB 1.13

LHFLX
dataset size in GB 1.13

PRECT
dataset size in GB 1.13

TAUX
dataset size in GB 1.13

TS
dataset size in GB 1.13

Z500
dataset size in GB 1.13

CCN3
dataset size in GB 2.79

CLOUD
dataset size in GB 2.79

FLNS
dataset size in GB 0.10

FLNT
dataset size in GB 0.10

FSNS
dataset size in GB 0.10

FSNT
dataset size in GB 0.10

LHFLX
dataset size in GB 0.10

PRECC
dataset size in GB 0.10

PRECL
dataset size in GB 0.10

PS
dataset size in GB 0.10

QFLX
dataset size in GB 0.10

SHFLX
dataset size in GB 0.10

TMQ
dataset size in GB 0.10

TS
dataset size in GB 0.10

U
dataset size in GB 2.79



In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
#for variable in ["FLNS", "FLNT", "FSNS", "FSNT", "LHFLX",
#            "PRECC", "PRECL", "PS", "QFLX", "SHFLX", "TMQ", "TS"]:
#    for time in range(0,cols_monthly[variable].dims["time"]):
#        for i in ["bg_2", "bg_3", "bg_4", "bg_5", "bg_6", "bg_7"]:
#            ldcpy.save_metrics(cols_monthly[variable], variable, f"orig_{variable}", f"{i}_{variable}", time=time, location="../data/dssims.csv")

daily_variables = ["TS", "Z500"]
for variable in daily_variables:
    if variable == "TAUX":
        for time in range(160,cols_daily[variable].dims["time"]):
            for i in ["bg_2", "bg_3", "bg_4", "bg_5", "bg_6", "bg_7"]:
                ldcpy.save_metrics(cols_daily[variable], variable, f"orig_{variable}", f"{i}_{variable}", time=time, location="../data/dssims.csv")
    else:
        for time in range(0,cols_daily[variable].dims["time"]):
            for i in ["bg_2", "bg_3", "bg_4", "bg_5", "bg_6", "bg_7"]:
                ldcpy.save_metrics(cols_daily[variable], variable, f"orig_{variable}", f"{i}_{variable}", time=time, location="../data/dssims.csv")



In [7]:
import time

monthly_variables = ["CCN3", "CLOUD", "FLNS", "FLNT", "FSNS", "FSNT", "LHFLX",
            "PRECC", "PRECL", "PS", "QFLX", "SHFLX", "TMQ", "TS", "U"]
daily_variables = ["FLUT", "LHFLX", "PRECT", "TAUX", "TS", "Z500"]

cols_monthly = {}
cols_daily = {}
sets = {}
levels = {}
data_path = "/glade/p/cisl/asap/CAM_lossy_test_data_31/research/"


for variable in daily_variables:
    print(variable)
    levels[variable] = [f"zfp5_p_6_{variable}",
                        f"zfp5_p_8_{variable}",
                        f"zfp5_p_10_{variable}", f"zfp5_p_12_{variable}",
                        f"zfp5_p_14_{variable}", f"zfp5_p_16_{variable}",
                        f"zfp5_p_18_{variable}", f"zfp5_p_20_{variable}",
                        f"zfp5_p_22_{variable}", f"zfp5_p_24_{variable}",
                        f"zfp5_p_26_{variable}"]
    sets[variable] = [f"{data_path}/../orig/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_6/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_8/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_10/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_12/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_14/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_16/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_18/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_20/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_22/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_24/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_26/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc"]
    cols_daily[variable] = ldcpy.open_datasets("cam-fv", [f"{variable}"], sets[variable], [f"orig_{variable}"] + levels[variable], chunks={"time":700})

for variable in monthly_variables:
    print(variable)
    levels[variable] = [f"zfp5_p_6_{variable}",
                        f"zfp5_p_8_{variable}",
                        f"zfp5_p_10_{variable}", f"zfp5_p_12_{variable}",
                        f"zfp5_p_14_{variable}", f"zfp5_p_16_{variable}",
                        f"zfp5_p_18_{variable}", f"zfp5_p_20_{variable}",
                        f"zfp5_p_22_{variable}", f"zfp5_p_24_{variable}",
                        f"zfp5_p_26_{variable}"]
    sets[variable] = [f"{data_path}/../orig/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_6/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_8/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_10/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_12/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_14/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_16/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_18/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_20/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_22/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_24/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc",
                      f"{data_path}/zfp_hdf5/zfp_p_26/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h0.{variable}.200601-201012.nc"]
    cols_monthly[variable] = ldcpy.open_datasets("cam-fv", [f"{variable}"], sets[variable], [f"orig_{variable}"] + levels[variable], chunks={"time":700})



FLUT
dataset size in GB 1.94

LHFLX
dataset size in GB 1.94

PRECT
dataset size in GB 1.94

TAUX
dataset size in GB 1.94

TS
dataset size in GB 1.94

Z500
dataset size in GB 1.94

CCN3
dataset size in GB 4.78

CLOUD
dataset size in GB 4.78

FLNS
dataset size in GB 0.16

FLNT
dataset size in GB 0.16

FSNS
dataset size in GB 0.16

FSNT
dataset size in GB 0.16

LHFLX
dataset size in GB 0.16

PRECC
dataset size in GB 0.16

PRECL
dataset size in GB 0.16

PS
dataset size in GB 0.16

QFLX
dataset size in GB 0.16

SHFLX
dataset size in GB 0.16

TMQ
dataset size in GB 0.16

TS
dataset size in GB 0.16

U
dataset size in GB 4.78



In [6]:
# This one is for zfp hdf5


import matplotlib as mpl
import matplotlib.pyplot as plt

for variable in ["TAUX", "TS", "Z500"]:
    for time in range(0,60):
    #for time in range(0,cols_daily[variable].dims["time"]):
            for i in [f"zfp5_p_6",
                        f"zfp5_p_8",
                        f"zfp5_p_10", f"zfp5_p_12",
                        f"zfp5_p_14", f"zfp5_p_16",
                        f"zfp5_p_18", f"zfp5_p_20",
                        f"zfp5_p_22", f"zfp5_p_24",
                        f"zfp5_p_26"]:
                ldcpy.save_metrics(cols_daily[variable], variable, f"orig_{variable}", f"{i}_{variable}", time=time, location="../data/dssims_zfp5_daily.csv")



Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_6_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_8_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_10_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_12_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_14_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_16_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_18_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_20_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_22_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_24_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX data (set1) and zfp5_p_26_TAUX data (set2), time 0 :
Evaluating 4 metrics for orig_TAUX

In [None]:
cols_daily["TS"]

In [None]:
cols_daily["TS"].isel(time=0)

In [None]:

ldcpy.compare_stats(cols_daily["TS"].isel(time=0), "TS", ["orig_TS", "zfp5_p_6_TS"])

In [8]:
import time

daily_variables = ["bc_a1_SRF", "dst_a1_SRF", "dst_a3_SRF", "FLNS", "FLNSC",
                   "FLUT", "FSNS", "FSNSC", "FSNTOA", "ICEFRAC", "LHFLX", "pom_a1_SRF", "PRECL", "PRECSC",
                   "PRECSL", "PRECT", "PRECTMX", "PSL", "Q200", "Q500", "Q850", "QBOT", "SHFLX", "so4_a1_SRF",
                   "so4_a2_SRF", "so4_a3_SRF", "soa_a1_SRF", "soa_a2_SRF", "T010", "T200", "T500", "T850",
                   "TAUX", "TAUY", "TMQ", "TREFHT", "TREFHTMN", "TREFHTMX", "TS", "U010", "U200", "U500", "U850", "VBOT",
                   "WSPDSRFAV", "Z050", "Z500"]

cols_monthly = {}
cols_daily = {}
sets = {}
levels = {}
data_path = "/glade/p/cisl/asap/CAM_lossy_test_data_31/research"


for variable in daily_variables:
    print(variable)
    levels[variable] = [f"bg_2_{variable}",
                        f"bg_3_{variable}",
                        f"bg_4_{variable}", f"bg_5_{variable}",
                        f"bg_6_{variable}", f"bg_7_{variable}",]
    sets[variable] = [f"{data_path}/daily_orig/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/daily_bg/bg_2/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/daily_bg/bg_3/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/daily_bg/bg_4/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/daily_bg/bg_5/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/daily_bg/bg_6/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc",
                      f"{data_path}/daily_bg/bg_7/b.e11.BRCP85C5CNBDRD.f09_g16.031.cam.h1.{variable}.20060101-20071231.nc"]
    cols_daily[variable] = ldcpy.open_datasets("cam-fv", [f"{variable}"], sets[variable], [f"orig_{variable}"] + levels[variable], chunks={"time":700})


bc_a1_SRF
dataset size in GB 1.13

dst_a1_SRF
dataset size in GB 1.13

dst_a3_SRF
dataset size in GB 1.13

FLNS
dataset size in GB 1.13

FLNSC
dataset size in GB 1.13

FLUT
dataset size in GB 1.13

FSNS
dataset size in GB 1.13

FSNSC
dataset size in GB 1.13

FSNTOA
dataset size in GB 1.13

ICEFRAC
dataset size in GB 1.13

LHFLX
dataset size in GB 1.13

pom_a1_SRF
dataset size in GB 1.13

PRECL
dataset size in GB 1.13

PRECSC
dataset size in GB 1.13

PRECSL
dataset size in GB 1.13

PRECT
dataset size in GB 1.13

PRECTMX
dataset size in GB 1.13

PSL
dataset size in GB 1.13

Q200
dataset size in GB 1.13

Q500
dataset size in GB 1.13

Q850
dataset size in GB 1.13

QBOT
dataset size in GB 1.13

SHFLX
dataset size in GB 1.13

so4_a1_SRF
dataset size in GB 1.13

so4_a2_SRF
dataset size in GB 1.13

so4_a3_SRF
dataset size in GB 1.13

soa_a1_SRF
dataset size in GB 1.13

soa_a2_SRF
dataset size in GB 1.13

T010
dataset size in GB 1.13

T200
dataset size in GB 1.13

T500
dataset size in GB 1.13

