In [20]:
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 14 08:22:35 2022

@author: kras
"""

# NOTE, this notebook functioned as basis for CF_compliancy_checker.py in coclicodata/etl and is outdated per 19/7/2022 

# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

# imports
import mock
import requests
from bs4 import BeautifulSoup
import re
import os
import pathlib
from ctypes import CDLL
import platform

# OS configurations
if platform.system() == "Windows":
    p_drive = pathlib.Path("P:/")
else:  # linux or other
    p_drive = pathlib.Path("/p/")

# # another CF tester (https://github.com/ioos/compliance-checker)
# from compliance_checker.runner import ComplianceChecker, CheckSuite

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [21]:
# toggles
main_folder = p_drive.joinpath(r"11205479-coclico\data")  # local data folder
testfile = main_folder.joinpath(
    r"18_AR5_SLP_IPCC\total-ens-slr-26-5.nc"
)  # file to be tested in this script
working_folder = main_folder.joinpath(r"CF")  # directory to save output check files

<IPython.core.display.Javascript object>

In [22]:
# workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
home = pathlib.Path().home()
os.environ["UDUNITS2_XML_PATH"] = str(
    home.joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"Anaconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml"
    )
)

<IPython.core.display.Javascript object>

In [29]:
# CF check initialization
update_versions = (
    True  # search for most recent CF tables if True, if False use default numbers
)
download_tables = (
    False  # download and save tables if True, else only use them from source
)

table_dict = {
    "cf-standard-name-table": {
        "version": 76,
        "page": "http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html", # default number
    },
    "area-type-table": {
        "version": 9,
        "page": "http://cfconventions.org/Data/area-type-table/current/build/area-type-table.html", # default number
    },
    "standardized-region-list": {
        "version": 4,
        "page": "http://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html", # default number
    },
}

# function to retrieve recent CF tables from the CF convention website if update_versions == True
def get_recent_versions(page):
    response = requests.get(page)
    parsed_html = BeautifulSoup(response.content)
    return int(str(parsed_html).split("Version")[1].split(",")[0])

# update table_dict if update_version == True
if update_versions:
    for idx, key in enumerate(table_dict.keys()):
        table_dict[key]["version"] = get_recent_versions(table_dict[key]["page"])

<IPython.core.display.Javascript object>

In [30]:
# extend table_dict with CF tables URL from CF conventions website
table_dict["cf-standard-name-table"][
    "url"
] = "http://cfconventions.org/Data/cf-standard-names/{0}/src/cf-standard-name-table.xml".format(
    table_dict["cf-standard-name-table"]["version"]
)
table_dict["area-type-table"][
    "url"
] = "http://cfconventions.org/Data/area-type-table/{0}/src/area-type-table.xml".format(
    table_dict["area-type-table"]["version"]
)
table_dict["standardized-region-list"][
    "url"
] = "http://cfconventions.org/Data/standardized-region-list/standardized-region-list.{0}.xml".format(
    table_dict["standardized-region-list"]["version"]
)

# extend table_dict with local path to save downloaded CF tables, if enabled
if download_tables:  # save CF tables to working folder if download_tables == True
    for tablename in table_dict.keys():
        table_dict[tablename]["local_path"] = "{0}\{1}-{2}.xml".format(
            working_folder, tablename, table_dict[tablename]["version"]
        )
    
        response = requests.get(table_dict[tablename]["url"])
        with open(table_dict[tablename]["local_path"], "wb",) as file:
            file.write(response.content)

<IPython.core.display.Javascript object>

In [32]:
%%capture cap --no-stderr

# check CF compliancy within the testfile
with mock.patch.object(
    CDLL.__init__, "__defaults__", (0, None, False, False, 0)
):  # monkeypatch workaround for the Windows OS (10) ctypes.dll error: https://stackoverflow.com/questions/59330863/cant-import-dll-module-in-python
    from cfchecker.cfchecks import (
        CFChecker,
    )  # import the cfchecker package i.s.o. subprocess application as in https://cmip-data-pool.dkrz.de/quality-assurance-cfchecker-ceda.html

    inst = CFChecker(
        useFileName="yes",
        cfStandardNamesXML=table_dict["cf-standard-name-table"]["url"],
        cfAreaTypesXML=table_dict["area-type-table"]["url"],
        cfRegionNamesXML=table_dict["standardized-region-list"]["url"],
        debug=False,
        silent=False,
    )
    inst.checker(str(testfile))

<IPython.core.display.Javascript object>

In [37]:
# create directory and save captured cell output to a .check file
working_folder.joinpath(str(testfile).split("\\")[-2]).mkdir(
    parents=True, exist_ok=True
)

with open(
    working_folder.joinpath(
        str(testfile).split("\\")[-2],
        str(testfile).split("\\")[-1].replace(".nc", "n.check"),
    ),
    "w",
) as f:
    f.write(cap.stdout)

# open the created file
with open(
    working_folder.joinpath(
        str(testfile).split("\\")[-2],
        str(testfile).split("\\")[-1].replace(".nc", "n.check"),
    )
) as f:
    file = f.read()

# print an in-line summary of the CF checker
files = [
    fileline.split(": ")[1]
    for fileline in file.split("\n")
    if "CHECKING NetCDF FILE" in fileline
]
warnings = [
    warningline.split(": ")[1]
    for warningline in file.split("\n")
    if "WARNINGS given" in warningline
]
errors = [
    errorline.split(": ")[1]
    for errorline in file.split("\n")
    if "ERRORS detected" in errorline
]

result_dict = {}
for idx, f in enumerate(files):
    result_dict[f] = {"warnings": warnings[idx], "errors": errors[idx]}
print(result_dict)



<IPython.core.display.Javascript object>