# test_netcdf_compression

# last edited 2 October 2018

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [3]:
print('BEGIN')

%matplotlib inline

__file__='jupyter_notebook' #this can be deleted when written to a python script and loaded as module.

import numpy as np
import numpy.ma as ma
import netCDF4
import inspect
# import pickle
# import bz2
# import glob
import socket
import re
import os
import timeit
# import scipy.stats as st
# import matplotlib.pyplot as plt
# import math
# from scipy import stats
# import pandas as pd
# from matplotlib.ticker import ScalarFormatter, FormatStrFormatter, FixedLocator

CRED = '\033[91m'
CEND = '\033[0m'

hostname=socket.gethostname()

print('hostname=',hostname)

if(re.match('raijin',hostname)):
  print('this is rajin')
  rundir='/short/v14/mac599/cafepp/rundir'
  topdir=''
elif(re.match('oa-3.-cdc',hostname)):
  print('this is vm32')
  rundir='/OSM/CBR/OA_DCFP/work/col414/cafepp'
  topdir='/OSM/CBR/OA_DCFP/data/CAFEPP/CMIP6'

os.chdir(rundir)

os.chdir('/OSM/CBR/OA_DCFP/work/col414/cafepp')

print('Current Working Directory=',os.getcwd())

from n_data_funcs import n_data_funcs

from decadal_diag import \
  convert_bytes, \
  file_size, \
  compress_nc
  
#   box_indices, \
#   diag_isothetaoNc, \
#   calc_isoN, \
#   shade_2d_latlon, \
#   shade_2d_simple, \
#   smooth, \
#   plot_xy, \
#   plot_box_indices, \
#   nino_indices, \
#   plot_2d_scatter, \
#   lagcorr, \
#   file_sort_ripf, \
#   file_spec_summary, \
#   basic_stats

print('END')

BEGIN
hostname= oa-32-cdc
this is vm32
Current Working Directory= /OSM/CBR/OA_DCFP/work/col414/cafepp
END


In [None]:
def convert_bytes(num):
    """
    this function will convert bytes to MB.... GB... etc
    """
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if num < 1024.0:
            return "%3.1f %s" % (num, x)
        num /= 1024.0


def file_size(file_path):
    """
    this function will return the file size
    """
    if os.path.isfile(file_path):
        file_info = os.stat(file_path)
        return convert_bytes(file_info.st_size)
      
def compress_nc(input_file, output_file, **kwargs):
  '''
  Copy the entire contents of a file and apply compression to output variables.
  Note all options & combinations tested.
  
  If Diag=True, put it first in the list of option/arguments.
  
  '''
  
  import numpy as np
  import netCDF4
  import inspect
  import os
  import timeit
  import datetime

  Diag=zlib=Clobber=False
  history=True
  nc_model='NETCDF4_CLASSIC'

  for key, value in kwargs.items():
    if(key=='Diag'):
      Diag=bool(value)
      if(Diag): print('Turning on diagnostics.')
    elif(key=='nc_model'):
      nc_model=value
      if(Diag): print('Using nc model=',nc_model)
    elif(key=='compression'):
      zlib=True
      complevel=int(value)
      if(Diag and zlib): print('Compressing with level (b/w 1-9)=',complevel)
    elif(key=='history'):
      history=bool(value)
      if(Diag and history): print('global history attribute being appended to or created.')
    elif(key=='Clobber'):
      Clobber=bool(value)
      if(Diag and Clobber): print('Overwriting output file if it exists.')
    
  #if(Diag): time_start = timeit.timeit()
  
  input_file_size=file_size(input_file)
  
  print('Input file: '+input_file+', File size='+input_file_size)
  print('Output file: '+output_file)
  
  if(Clobber and os.path.exists(output_file)):
    os.remove(output_file)
      
  ifh = netCDF4.Dataset(input_file, 'r')

  global_dictionary = {}
  for attr in ifh.ncattrs():
    global_dictionary[attr] = getattr(ifh,attr)

  #raise SystemExit('STOP!:'+__file__+' line number: '+str(inspect.stack()[0][2]))
  
  dims_dictionary = {}
  for dims in ifh.dimensions.keys():
    dims_dictionary[ifh.dimensions[dims].name] = ifh.dimensions[dims].size

  ofh = netCDF4.Dataset(output_file, 'w', format=nc_model)

  dims_dictionary_out = {}
  for dims in dims_dictionary.keys():

    dims_dictionary_out[dims] = ofh.createDimension(dims, dims_dictionary[dims])

  vars_dictionary_out = {}
  for cnt, var in enumerate(ifh.variables.keys()):
    if(Diag): print('cnt,var=',cnt,var)

    input_variable = ifh.variables[var]
    
    input_variable_atts_tmp=input_variable.ncattrs()
    
    try:
      fill_value_locator=input_variable_atts_tmp.index('_FillValue')
    except ValueError:
      fill_value_locator=-1
    
    #print('fill_value_locator=',fill_value_locator)
    
    if(fill_value_locator>=0):
      fill_value=getattr(input_variable, '_FillValue')
    else:
      fill_value=None
      
    #print('fill_value=',fill_value)
    
    #print('input_variable_atts_tmp',input_variable_atts_tmp)
    
    #raise SystemExit('STOP!:'+__file__+' line number: '+str(inspect.stack()[0][2]))
      
    dims_dictionary_out[var] = ofh.createVariable(var, \
                                                    ifh.variables[var].datatype, \
                                                    ifh.variables[var].dimensions, \
                                                    zlib=zlib, \
                                                    complevel=complevel, fill_value=fill_value)


    
    #print('var_dictionary=',var_dictionary)
    
    var_dictionary = {}
    
    #strip off _FillValue as defined at variable definition time.
    input_variable_atts=[]
    for input_variable_att in input_variable_atts_tmp:
      if(input_variable_att!='_FillValue'):
        input_variable_atts.append(input_variable_att)
        
    #print('input_variable_atts=',input_variable_atts)
    
    for attr in input_variable_atts:
      #print('attr=',attr)
      var_dictionary[attr] = getattr(input_variable, attr)
      
    #raise SystemExit('STOP!:'+__file__+' line number: '+str(inspect.stack()[0][2]))
    
    #print('var_dictionary=',var_dictionary)
    
    dims_dictionary_out[var][:] = input_variable[:]
    
    dims_dictionary_out[var].setncatts(var_dictionary)

    #raise SystemExit('STOP!:'+__file__+' line number: '+str(inspect.stack()[0][2]))
    
  ofh.sync()
  
  output_file_size=file_size(output_file)
  
  compression=100* (1 - float(output_file_size.split()[0])/float(input_file_size.split()[0]))
  
  if(history):
    history_to_append = ' compress_nc.py: input_file='+input_file+ \
    ', output file='+output_file+ \
    ', compressions='+str(compression)+ \
    ', creation time='+str(datetime.datetime.now())+'.'
  
    try:
      history_value=global_dictionary['history']+history_to_append
    except KeyError:
      history_value=history_to_append
    
  #print(history)
  
    global_dictionary['history'] = history_value
  
  ofh.setncatts(global_dictionary)

  print('Output file: '+output_file+', File size='+output_file_size+', Compression (approx.)='+str(compression)+'%.')
    
  ofh.close()

  #if(Diag): print('Total time =',timeit.timeit() - time_start)
  
  return(0) #end of compress_nc

In [4]:
print('BEGIN')

idir='/OSM/CBR/OA_DCFP/data/CAFEPP/g/data1/v14/coupled_model/v1/OUTPUT'
ifil='land_month_0500_01.nc'

#idir='/OSM/CBR/OA_DCFP/data/CAFEPP'
#ifil='coastal-stns-Vol-monthly.updated-oct2007.nc'

status = compress_nc(idir+'/'+ifil, './test.nc', Diag=True, nc_model='NETCDF4_CLASSIC', compression=1, Clobber=True, history=True)
  
if(status!=0):
  raise SystemExit('compress_nc non-zero return status:'+__file__+' line number: '+str(inspect.stack()[0][2]))
  
print('END')

BEGIN
kwargs.items()= dict_items([('Diag', True), ('nc_model', 'NETCDF4_CLASSIC'), ('compression', 1), ('Clobber', True), ('history', True)])
Turning on diagnostics.
Using nc model= NETCDF4_CLASSIC
Compressing with level (b/w 1-9)= 1
Overwriting output file if it exists.
global history attribute being appended to or created.
Input file: /OSM/CBR/OA_DCFP/data/CAFEPP/g/data1/v14/coupled_model/v1/OUTPUT/land_month_0500_01.nc, File size=28.0 MB
Output file: ./test.nc
cnt,var= 0 lon
cnt,var= 1 lonb
cnt,var= 2 lat
cnt,var= 3 latb
cnt,var= 4 time
cnt,var= 5 nv
cnt,var= 6 scalar_axis
cnt,var= 7 zfull
cnt,var= 8 zhalf
cnt,var= 9 disch_w
cnt,var= 10 disch_s
cnt,var= 11 area
cnt,var= 12 ground_type
cnt,var= 13 hlf
cnt,var= 14 hlv
cnt,var= 15 lfrac
cnt,var= 16 albedo
cnt,var= 17 evap
cnt,var= 18 flw
cnt,var= 19 fsw
cnt,var= 20 frozen
cnt,var= 21 groundwater
cnt,var= 22 latent
cnt,var= 23 precip
cnt,var= 24 sens
cnt,var= 25 temp
cnt,var= 26 water
cnt,var= 27 average_T1
cnt,var= 28 average_T2
cnt,va

In [None]:
print('BEGIN')

#===============================================================================

idir='/OSM/CBR/OA_DCFP/data/CAFEPP/g/data1/v14/coupled_model/v1/OUTPUT'
ifil='ocean_month_0500_01.nc'

idir='/OSM/CBR/OA_DCFP/data/CAFEPP/g/data1/v14/coupled_model/v1/OUTPUT'
ifil='atmos_month_0500_01.nc'

print('Input file: '+idir+'/'+ifil)

odir='.'

nc_models = ['nc4_classic', 'nc4_classic', 'nc4_classic']
zlibs = [False, True, True]
complevel = [1, 1, 9]

for nc_cnt,nc_model in enumerate(nc_models):
  
  time_start = timeit.timeit()
  
  name_tokens=['nccomp']
  
  if(nc_model=='nc4_classic'):
    name_tokens.append('nc4c')
  
  if(zlibs[nc_cnt]):
    name_tokens.append('zlib'+str(complevel[nc_cnt]))

  ofil=('_'.join(name_tokens))+'.nc'
  
  print('Output file: '+odir+'/'+ofil)
  
  if(os.path.exists(odir+'/'+ofil)):
    os.remove(odir+'/'+ofil)
  #raise SystemExit('STOP!:'+__file__+' line number: '+str(inspect.stack()[0][2]))
  
  ifh = netCDF4.Dataset(idir+'/'+ifil, 'r')

  global_dictionary = {}
  for attr in ifh.ncattrs():
    global_dictionary[attr] = getattr(ifh,attr)
  #global_dictionary['history'] = 'File generated using raijin:~mac599/decadal/paper_analysis/matt_ozone.ipynb combining CMIP6 ozone files and modified units to be in kg/kg suitable for reading into coupled model. Written as NETCDF3_CLASSIC.'

  print('global_dictionary=',global_dictionary)

  # print('ifh.dimensions=',ifh.dimensions)
  # print('ifh.dimensions.values=',ifh.dimensions.values)

  dims_dictionary = {}
  for dims in ifh.dimensions.keys():
    dims_dictionary[ifh.dimensions[dims].name] = ifh.dimensions[dims].size
  # print('dims_dictionary=',dims_dictionary)

  ofh = netCDF4.Dataset(odir+'/'+ofil, 'w', format='NETCDF4_CLASSIC') #NETCDF3_64BIT_OFFSET, NETCDF3_CLASSIC, NETCDF4_CLASSIC, NETCDF4

  dims_dictionary_out = {}
  for dims in dims_dictionary.keys():
    #print('dims=',dims)
    dims_dictionary_out[dims] = ofh.createDimension(dims, dims_dictionary[dims])
  #print(dims_dictionary_out)

  #raise SystemExit('STOP!:'+__file__+' line number: '+str(inspect.stack()[0][2]))

  #print('ifh.variables.keys()=',ifh.variables.keys())
  vars_dictionary_out = {}
  cnt=0
  for var in ifh.variables.keys():
    print('cnt,var=',cnt,var)
  #   print('name=',ifh.variables['xt_ocean'].name)
  #   print('dimensions=',ifh.variables['xt_ocean'].dimensions)
  #   print('datatype=',ifh.variables['xt_ocean'].datatype)
  #   print('dir(ifh.variables["xt_ocean"])=',dir(ifh.variables['xt_ocean']))
    #print('dir(ifh.variables[var])=',dir(ifh.variables[var]))

    if(zlibs[nc_cnt]):
        dims_dictionary_out[var] = ofh.createVariable(var, \
                                                      ifh.variables[var].datatype, \
                                                      ifh.variables[var].dimensions, \
                                                      zlib=True, \
                                                      complevel=complevel[nc_cnt])
    else:
      dims_dictionary_out[var] = ofh.createVariable(var, \
                                                    ifh.variables[var].datatype, \
                                                    ifh.variables[var].dimensions)

    #print('dims_dictionary_out=',dims_dictionary_out)
    
    input_variable = ifh.variables[var]
    
    var_dictionary = {}
    for attr in input_variable.ncattrs():
      var_dictionary[attr] = getattr(input_variable, attr)
      
    #print('var_dictionary=',var_dictionary)

    dims_dictionary_out[var][:] = input_variable[:]
    
    dims_dictionary_out[var].setncatts(var_dictionary)

    #ofh.close()

    #raise SystemExit('STOP!:'+__file__+' line number: '+str(inspect.stack()[0][2]))

  #   if(cnt==3):
  #     ofh.close()
  #     break


    #raise SystemExit('STOP!:'+__file__+' line number: '+str(inspect.stack()[0][2]))
    cnt+=1

  ofh.setncatts(global_dictionary)
  
  print('Total time =',timeit.timeit() - time_start)
  
  ofh.close()

#===============================================================================

print('END')