In [1]:
import xarray as xr
import numpy as np

In [2]:
ds = xr.open_dataset("20210614/cdp/nc_output/to332_cdp_r1.nc")
ds

In [3]:
def _fix_flag_var(ds, var_name):
    
    flag_values = ds[var_name].flag_values
    if isinstance(flag_values, str):
        dtype = ds[var_name].dtype
        vals = [np.array(s.replace("b", ""), dtype=dtype) for s in ds[var_name].flag_values.split(",")]
        flag_values = np.array(vals)
    ds[var_name].attrs['flag_values'] = flag_values
    assert ds[var_name].attrs['flag_values'].dtype == ds[var_name].dtype
    
    ds[var_name].attrs['flag_meanings'] = format(" ".join(ds[var_name].flag_meanings.split()))
    
    return ds[var_name]

_fix_flag_var(ds=ds, var_name="qc_flag_ambient_particle_number_per_channel")

In [4]:
ds = ds.copy()

In [5]:
# remove coordinates that don't have valid values anyway (they are all nans)

for v in "longitude latitude".split():
    if v in ds:
        ds = ds.drop_dims(v)
        
# drop variables we don't know the value for (also all nans)
for v in "altitude platform_speed_wrt_air platform_pitch_angle platform_yaw_angle".split():
    if v in ds:
        ds = ds.drop(v)

# ensure "coordinates" attributes is removed from encoding so
# it isn't saved with stored file
for v in ds.data_vars:
    if "coordinates" in ds[v].encoding:
        del(ds[v].encoding["coordinates"])
        
# CF-conventions stipulates that "Conventions" attribute should be with capital C
ds.attrs["Conventions"] = ds.attrs.pop("conventions")

In [6]:
ds.to_netcdf("test.nc",)

In [7]:
from cfchecker.cfchecks import getargs, CFChecker, CFVersion
import warnings
import textwrap

class ValidationError(Exception):
    pass

class MyCFChecker(CFChecker):
    _logged_messages = []
    def _add_message(self, category, msg, var=None, code=None):
        self._logged_messages.append(locals())

def _check_file(filename, show_warnings=True):
    arglist = ["", filename]
    
    (badc, coards, debug, uploader, useFileName, regionnames, standardName, areaTypes, cacheDir, cacheTables, cacheTime,
     version, files) = getargs(arglist)
    
    # this will attempt auto-finding of version
    version = CFVersion()
                               
    inst = MyCFChecker(uploader=uploader,
                     useFileName=useFileName,
                     badc=badc,
                     coards=coards,
                     cfRegionNamesXML=regionnames,
                     cfStandardNamesXML=standardName,
                     cfAreaTypesXML=areaTypes,
                     cacheDir=cacheDir,
                     cacheTables=cacheTables,
                     cacheTime=cacheTime,
                     version=version,
                     debug=False,
                     silent=True)

    inst.checker(filename)
    
    file_errors = {}
    file_warnings = {}
    
    for lm in inst._logged_messages:
        category = lm["category"]
        msg = textwrap.indent(textwrap.fill(lm["msg"]), prefix="    ")
        var = lm["var"] if lm["var"] else "__global__"
        if category == "WARN":
            var_warnings = file_warnings.setdefault(var, [])
            var_warnings.append(msg)
        elif category == "ERROR":
            var_errors = file_errors.setdefault(var, [])
            var_errors.append(msg)
        elif category in ["VERSION", "INFO"]:
            pass
        else:
            raise NotImplementedError(category)
            
    if len(file_errors) > 0:
        print("The following errors were detected:")
        for var in sorted(file_errors.keys()):
            print(f"  {var}:")
            for error in file_errors[var]:
                print(f"{error}")
    else:
        print("no errors!")
                
                
    if show_warnings and len(file_warnings) > 0:
        print()
        print("The following warnings were raised:")
        for var in sorted(file_warnings.keys()):
            print(f"  {var}:")
            for warning in file_warnings[var]:
                print(f"{warning}")
    
_check_file("test.nc")

no errors!

  __global__:
    A variable with the attribute cf_role should be included in a Discrete
    Geometry CF File
    CF Files containing timeSeries featureType should only include a
    single occurrence of a cf_role attribute
  ambient_particle_number_per_channel:
    Coordinate variable time should have bounds or climatology attribute


In [8]:
ds_new = xr.open_dataset("test.nc")
ds_new