# Pull CV entries files; strip out extraneous entries
<div style="text-align: right">
<p>
    <img src="https://pcmdi.github.io/assets/PCMDI/199x65px-PCMDI-Logo-Text-rectangle.png"
         width="91"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="Program for Climate Model Diagnosis and Intercomparison"
         alt="Program for Climate Model Diagnosis and Intercomparison"
    >&nbsp;
    <img src="https://pcmdi.github.io/assets/LLNL/212px-LLNLiconPMS286-WHITEBACKGROUND.png"
         width="30"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="Lawrence Livermore National Laboratory"
         alt="Lawrence Livermore National Laboratory"
    >&nbsp;
    <img src="https://pcmdi.github.io/assets/DOE/459x127px-DOE-Logo_Color_TextOnly.png"
         width="108"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="United States Department of Energy"
         alt="United States Department of Energy"
    >
</p>
</div>

**Summary**

This file pulls a CMIP6Plus_CVs/CMOR3.9.0-era CV files, strips out
extraneous entries and saves the files for local use

**Authors**

Paul J. Durack ([durack1](https://github.com/durack1); [PCMDI](https://pcmdi.llnl.gov/), [Lawrence Livermore National Laboratory](https://www.llnl.gov/))

**Notes**

PJD 15 Feb 2025 - initiated<br>
PJD 15 Feb 2025 - first prototype functioning<br>
TODO:

**Links**

### imports

In [1]:
%%time
import datetime
import json
import os
import requests

CPU times: user 36.8 ms, sys: 14 ms, total: 50.8 ms
Wall time: 60.1 ms


### set table, coordinate, formula_terms and grids URLs and pull files

In [2]:
%%time
urls = {
    "frequency": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_frequency.json",
    "grid_label": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_grid_label.json",
    "license": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_license.json",
    "nominal_resolution": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_nominal_resolution.json",
    "realm": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/MIP_realm.json",
    "source_type": "https://raw.githubusercontent.com/PCMDI/obs4MIPs-cmor-tables/refs/heads/master/obs4MIPs_source_type.json",
}
# create: institution_id, product, required_global_attributes, source_id, table_id, tracking_id

# loop through urls
keys = urls.keys()
for count, key in enumerate(keys):
    print(count, key)
    url = urls[key]
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4/5xx)
        vars()[key] = json.loads(response.text)
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON decode failed: {e}")
    except Exception as e:
        print(f"Unexpected error occurred: {e}")

0 frequency
1 grid_label
2 license
3 nominal_resolution
4 realm
5 source_type
CPU times: user 41.7 ms, sys: 13.8 ms, total: 55.5 ms
Wall time: 888 ms


### process frequency

In [3]:
frequency.pop("version_metadata")
keyList = list(frequency["frequency"].keys())
keepKey = ["day"]
for count, key in enumerate(keyList):
    if key not in keepKey:
        frequency["frequency"].pop(key)
frequency

{'frequency': {'day': 'daily mean samples'}}

### process grid_label

In [4]:
grid_label.pop("version_metadata")
keyList = list(grid_label["grid_label"].keys())
keepKey = ["gn"]
for count, key in enumerate(keyList):
    if key not in keepKey:
        grid_label["grid_label"].pop(key)
grid_label["grid_label"]["gn"] = grid_label["grid_label"]["gn"].replace("a model's ", "the analysis-")
grid_label

{'grid_label': {'gn': 'data reported on the analysis-native grid'}}

### create institution_id

In [5]:
institution_id = {}
institution_id["institution_id"] = {}
institution_id["institution_id"]["UCM-SNRI"] = {}
institution_id["institution_id"]["UCM-SNRI"]["name"] = "Sierra Nevada Research Institute, University of California, Merced, 5200 N. Lake Road, Merced, CA 95343, USA"
institution_id["institution_id"]["UCM-SNRI"]["ror"] = "00d9ah105"
institution_id["institution_id"]["UCM-SNRI"]["url"] = "https://www.climatologylab.org"
institution_id["institution_id"]["UCSD-SIO"] = {}
institution_id["institution_id"]["UCSD-SIO"]["name"] = "Scripps Institution of Oceanography, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA"
institution_id["institution_id"]["UCSD-SIO"]["ror"] = "04v7hvq31"
institution_id["institution_id"]["UCSD-SIO"]["url"] = "https://scripps.ucsd.edu"
institution_id["institution_id"]["TTU"] = {}
institution_id["institution_id"]["TTU"]["name"] = "Texas Tech University, 2520 Broadway Avenue, Lubbock, TX 79409, USA"
institution_id["institution_id"]["TTU"]["ror"] = "0405mnx93"
institution_id["institution_id"]["TTU"]["url"] = "https://www.ttu.edu/"
institution_id

{'institution_id': {'UCM-SNRI': {'name': 'Sierra Nevada Research Institute, University of California, Merced, 5200 N. Lake Road, Merced, CA 95343, USA',
   'ror': '00d9ah105',
   'url': 'https://www.climatologylab.org'},
  'UCSD-SIO': {'name': 'Scripps Institution of Oceanography, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA',
   'ror': '04v7hvq31',
   'url': 'https://scripps.ucsd.edu'},
  'TTU': {'name': 'Texas Tech University, 2520 Broadway Avenue, Lubbock, TX 79409, USA',
   'ror': '0405mnx93',
   'url': 'https://www.ttu.edu/'}}}

### process license

In [6]:
license.pop("version_metadata")
license["license"]["license"] = license["license"]["license"].replace("CMIP model data", "DRDCP data")
license["license"]["license"] = license["license"]["license"].replace("CMIP_institution_id.", "DRDCP_institution_id.")
license["license"]["license"] = license["license"]["license"].replace(" CMIP6 ", " DRDCP ")
license

{'license': {'license': 'DRDCP data produced by <Your Institution; see DRDCP_institution_id.json> is licensed under a <Creative Commons; select and insert a license_id; see below> License (<insert the matching license_url; see below>). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing DRDCP output, including citation requirements and proper acknowledgment. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.',
  'license_options': {'CC BY 4.0': {'license_id': 'Creative Commons Attribution 4.0 International',
    'license_url': 'https://creativecommons.org/licenses/by/4.0/'},
   'CC0 1.0': {'license_id': 'Creative Commons CC0 1.0 Universal Public Domain Dedication',
    'license

In [7]:
### process nominal_resolution

In [8]:
nominal_resolution.pop("version_metadata")
nominal_resolution

{'nominal_resolution': {'0.5 km': 'Resolution of 0.5 km',
  '10000 km': 'Resolution of 10000 km',
  '1000 km': 'Resolution of 1000 km',
  '100 km': 'Resolution of 100 km',
  '10 km': 'Resolution of 10 km',
  '1 km': 'Resolution of 1 km',
  '1x1 degree': 'Resolution of 1x1 degree',
  '2.5 km': 'Resolution of 2.5 km',
  '2500 km': 'Resolution of 2500 km',
  '250 km': 'Resolution of 250 km',
  '25 km': 'Resolution of 25 km',
  '5000 km': 'Resolution of 5000 km',
  '500 km': 'Resolution of 500 km',
  '50 km': 'Resolution of 50 km',
  '5 km': 'Resolution of 5 km'}}

### create product

In [9]:
product = {}
product["product"] = {}
product["product"] = ["downscaled-dynamical", "downscaled-statistical"]
product

{'product': ['downscaled-dynamical', 'downscaled-statistical']}

### process realm

In [10]:
realm.pop("version_metadata")
keyList = list(realm["realm"].keys())
keepKeys = ["atmos", "land"]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        realm["realm"].pop(key)
realm

{'realm': {'atmos': 'Atmosphere', 'land': 'Land Surface and Subsurface'}}

### create region

In [11]:
region = {}
region["region"] = {}
region["region"]["NAM"] = "north_america"
region

{'region': {'NAM': 'north_america'}}

### create required_global_attributes

In [12]:
required_global_attributes = {}
required_global_attributes["required_global_attributes"] = [
    "Conventions",
    "activity_id",
    "calendar",
    "contact",
    "creation_date",
    "data_specs_version",
    "downscaling_institution_id",
    "downscaling_source_id",
    "driving_activity_id",
    "driving_experiment_id",
    "driving_mip_era",
    "driving_source_id",
    "driving_variant_label",
    "frequency",
    "grid",
    "grid_label",
    "license",
    "nominal_resolution",
    "product",
    "realm",
    "region",
    "region_id",
    "table_id",
    "tracking_id",
    "variable_id",
    "variant_info",
    "variant_label",
]
required_global_attributes

{'required_global_attributes': ['Conventions',
  'activity_id',
  'calendar',
  'contact',
  'creation_date',
  'data_specs_version',
  'downscaling_institution_id',
  'downscaling_source_id',
  'driving_activity_id',
  'driving_experiment_id',
  'driving_mip_era',
  'driving_source_id',
  'driving_variant_label',
  'frequency',
  'grid',
  'grid_label',
  'license',
  'nominal_resolution',
  'product',
  'realm',
  'region',
  'region_id',
  'table_id',
  'tracking_id',
  'variable_id',
  'variant_info',
  'variant_label']}

### create source_id

In [13]:
source_id = {}
source_id["source_id"] = {}
source_id["source_id"]["LOCA2-0"] = {}
source_id["source_id"]["LOCA2-0"]["institution_id"] = "UCSD-SIO"
source_id["source_id"]["LOCA2-0"]["license"] = "CC BY 4.0"
source_id["source_id"]["LOCA2-0"]["region"] = "NAM"
source_id["source_id"]["LOCA2-0"]["source_name"] = "LOCA"
source_id["source_id"]["LOCA2-0"]["source_version_number"] = "2.0"
source_id["source_id"]["LOCA2-1"] = {}


source_id["source_id"]["LOCA2-1"]["downscaling_institution_id"] = "UCSD-SIO"  # fix
source_id["source_id"]["LOCA2-1"]["downscaling_source_id"] = "LOCA2-1"
source_id["source_id"]["LOCA2-1"]["driving_activity_id"] = "CMIP"  # if more than one move to user_input.json
source_id["source_id"]["LOCA2-1"]["driving_experiment_id"] = "historical"  # if more than one move to user_input.json
source_id["source_id"]["LOCA2-1"]["driving_mip_era"] = "CMIP6"  # if more than one move to user_input.json
source_id["source_id"]["LOCA2-1"]["driving_source_id"] = "ACCESS-CM2"  # if more than one move to user_input.json
source_id["source_id"]["LOCA2-1"]["driving_variant_label"] = "r1i1p1f1"  # if more than one move to user_input.json
source_id["source_id"]["LOCA2-1"]["grid"] = "5 x 5 km latitude x longitude"
source_id["source_id"]["LOCA2-1"]["grid_label"] = "gn"
source_id["source_id"]["LOCA2-1"]["nominal_resolution"] = "5 km"
source_id["source_id"]["LOCA2-1"]["product"] = "downscaled-statistical"
source_id["source_id"]["LOCA2-1"]["region_id"] = "NAM"
source_id["source_id"]["LOCA2-1"]["source"] = "placeholder"  # fix



source_id["source_id"]["LOCA2-1"]["institution_id"] = "UCSD-SIO"
source_id["source_id"]["LOCA2-1"]["license"] = "CC BY 4.0"
source_id["source_id"]["LOCA2-1"]["region"] = "NAM"
source_id["source_id"]["LOCA2-1"]["source_name"] = "LOCA"
source_id["source_id"]["LOCA2-1"]["source_version_number"] = "2.1"
source_id["source_id"]["MACA3-0"] = {}
source_id["source_id"]["MACA3-0"]["institution_id"] = "UCM-SNRI"
source_id["source_id"]["MACA3-0"]["license"] = "CC0 1.0"
source_id["source_id"]["MACA3-0"]["region"] = "NAM"
source_id["source_id"]["MACA3-0"]["source_name"] = "MACA"
source_id["source_id"]["MACA3-0"]["source_version_number"] = "3.0"
source_id["source_id"]["STAR-ESDM1-0"] = {}
source_id["source_id"]["STAR-ESDM1-0"]["institution_id"] = "TTU"
source_id["source_id"]["STAR-ESDM1-0"]["license"] = "CC BY 4.0"
source_id["source_id"]["STAR-ESDM1-0"]["region"] = "NAM"
source_id["source_id"]["STAR-ESDM1-0"]["source_name"] = "STAR-ESDM"
source_id["source_id"]["STAR-ESDM1-0"]["source_version_number"] = "1.0"
source_id

{'source_id': {'LOCA2-0': {'institution_id': 'UCSD-SIO',
   'license': 'CC BY 4.0',
   'region': 'NAM',
   'source_name': 'LOCA',
   'source_version_number': '2.0'},
  'LOCA2-1': {'downscaling_institution_id': 'UCSD-SIO',
   'downscaling_source_id': 'UCSD-SIO',
   'driving_activity_id': 'CMIP',
   'driving_experiment_id': 'historical',
   'driving_mip_era': 'CMIP6',
   'driving_source_id': 'ACCESS-CM2',
   'driving_variant_label': 'r1i1p1f1',
   'grid': '5 x 5 km latitude x longitude',
   'grid_label': 'gn',
   'nominal_resolution': '5 km',
   'product': 'downscaled-statistical',
   'region_id': 'NAM',
   'source': 'placeholder',
   'institution_id': 'UCSD-SIO',
   'license': 'CC BY 4.0',
   'region': 'NAM',
   'source_name': 'LOCA',
   'source_version_number': '2.1'},
  'MACA3-0': {'institution_id': 'UCM-SNRI',
   'license': 'CC0 1.0',
   'region': 'NAM',
   'source_name': 'MACA',
   'source_version_number': '3.0'},
  'STAR-ESDM1-0': {'institution_id': 'TTU',
   'license': 'CC BY 4.0'

### create table_id

In [14]:
table_id = {}
table_id["table_id"] = ["DRCDP_APday"]


table_id["table_id"] = ["APday"]  # fix


table_id

{'table_id': ['APday']}

### create tracking_id

In [15]:
tracking_id = {}
tracking_id["tracking_id"] = ["hdl:21.14100/.*"]
tracking_id

{'tracking_id': ['hdl:21.14100/.*']}

### write all files out to repo root dir

In [16]:
files = ["frequency", "grid_label", "institution_id", "license", "nominal_resolution", "product", "realm", "region", "required_global_attributes", "source_id", "table_id", "tracking_id"]
for count, name in enumerate(files):
    print(count, name)
    dic = eval(name)
    # write file
    outFile = "".join(["../DRCDP_", name, ".json"])
    with open(outFile, "w") as f:
        json.dump(
            dic, f, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":")
        )

0 frequency
1 grid_label
2 institution_id
3 license
4 nominal_resolution
5 product
6 realm
7 region
8 required_global_attributes
9 source_id
10 table_id
11 tracking_id


### create composite DRCDP_CV.json for CMOR

In [17]:
CV = {}
CV["CV"] = {}
CV["CV"]["activity_id"] = "DRCDP"
CV["CV"]["frequency"] = frequency["frequency"]
CV["CV"]["grid_label"] = grid_label["grid_label"]
CV["CV"]["institution_id"] = institution_id["institution_id"]
CV["CV"]["license"] = license["license"]
CV["CV"]["nominal_resolution"] = nominal_resolution["nominal_resolution"]
CV["CV"]["product"] = product["product"]
CV["CV"]["realm"] = realm["realm"]
CV["CV"]["region"] = region["region"]
CV["CV"]["required_global_attributes"] = required_global_attributes["required_global_attributes"]
CV["CV"]["source_id"] = source_id["source_id"]
CV["CV"]["table_id"] = table_id["table_id"]
CV["CV"]["tracking_id"] = tracking_id["tracking_id"]
#CV
# write file
outFile = "".join(["../Tables/DRCDP_CV.json"])
with open(outFile, "w") as f:
    json.dump(
        CV, f, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":")
    )