# Assessing parameter importance in CAM6 with machine learning and a single-column perturbed-physics ensemble

Nikki Lydeen

Graduate Research Assistant, Climate Dynamics

Dept. of Atmospheric, Oceanic, and Earth Sciences

George Mason University, Fairfax VA 22030

### Specify the intensive observation periods (IOPs) and the parameter space.

In [None]:
# A. Gettelman, et al. The Single Column Atmosphere Model Version 6 (SCAM6)
# https://agupubs.onlinelibrary.wiley.com/doi/10.1029/2018MS001578
iops = [ "arm97"      # ARM Southern Great Plains       Land convection
       , "cgilsS6"    # ------------- " -------------   Shallow cumulus
       , "cgilsS11"   # CFMIP‐GASS SCM/LES Intercomp.   Stratocumulus
       , "cgilsS12"   # ------------- " -------------   Stratus
       , "mpace"      # Mixed Phase Arctic Clouds Exp.  Arctic
       , "sparticus"  # Small Particles in Cirrus       Cirrus, convection
       , "twp06"      # Tropical convection
       ]

# H. Zhang, et al. Low‐cloud feedback in CAM5‐CLUBB
# https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2018MS001423
param_space = { "clubb_C1"         : ( 1.25   ,  5.   )
              , "clubb_C2rt"       : ( 0.5    ,  2.   )
             #, "clubb_C6rt"       : ( 3.     ,  8.   )
             #, "clubb_C6rtb"      : ( 3.     ,  8.   )
              , "clubb_C7"         : ( 0.25   ,  1.   )
              , "clubb_C7b"        : ( 0.25   ,  1.   )
              , "clubb_C8"         : ( 1.5    ,  6.   )
              , "clubb_C11"        : ( 0.     ,  1.   )
              , "clubb_C11b"       : ( 0.     ,  1.   )
              , "clubb_C14"        : ( 1.     ,  2.   )
             #, "clubb_nu"         : (10.     , 40.   )
              , "clubb_beta"       : ( 0.     ,  3.   )
              , "clubb_gamma_coef" : ( 0.1    ,  0.6  )
             #, "clubb_mu"         : ( 0.0005 ,  0.002)
              }

n_samples = 512

### Integrate a perturbed-physics ensemble.

In [None]:
!pip install SobolSequence==0.2 > /dev/null

import pandas as pd
import sobol

from itertools import product
from multiprocessing import Pool
from os import environ
from os.path import exists
from shutil import rmtree
from subprocess import call
from tqdm.notebook import tqdm


CESM_ROOT = "/opt/ncar/cesm2"

SCRIPT_DIR = f"{CESM_ROOT}/cime/scripts"
MODS_DIR = f"{CESM_ROOT}/components/cam/cime_config/usermods_dirs"

CASE_ROOT = "/tmp/cases"
ARCHIVE_ROOT = f"{environ['HOME']}/archive"

IOP_CASE_DIR = f"{CASE_ROOT}/scm_ppe.base"



def quasirandom_sample(iops, space, n):
    sample = sobol.sample(len(space), n)

    for column, bounds in zip(sample.T, space.values()):
        low, high = bounds
        column *= high - low
        column += low

    return [dict(zip(space.keys(), x)) for x in sample]


def plan_cases(iops, param_space, n_cases):
    cases = product(quasirandom_sample(iops, param_space, n_cases), iops)

    df = pd.DataFrame([{"iop": iop, **params} for params, iop in cases])

    n_digits = len(str(df.index[-1]))
    df.insert(0, "name", [f"scm_ppe.{str(x).rjust(n_digits, '0')}"
                          for x in df.index])

    return df


def run_case(config):
    name, iop = config["name"], config["iop"]
    del config["name"], config["iop"]

    user_nl_cam = dict(config)
    user_nl_cam["nhtfrq"] = -1

    case_dir = f"{CASE_ROOT}/{name}"

    if not exists(f"{ARCHIVE_ROOT}/{name}"):
        rmtree(case_dir, ignore_errors=True)

        assert call([f"{SCRIPT_DIR}/create_clone",
                     "--clone", IOP_CASE_DIR,
                     "--user-mods-dir", f"{MODS_DIR}/scam_{iop}",
                     "--keepexe",
                     "--cime-output-root", case_dir,
                     "--case", case_dir]) == 0

        with open(f"{case_dir}/user_nl_cam", "a") as f:
            for k, v in user_nl_cam.items():
                print(f"{k} = {v}", file=f)

        assert call(["./xmlchange", "STOP_N=14,STOP_OPTION=ndays"],
                    cwd=case_dir) == 0

        assert call("./case.submit", cwd=case_dir) == 0

        rmtree(case_dir, ignore_errors=True)


def run_cases(df):
    configs = [dict(x[1]) for x in df.iterrows()]

    if not exists(f"{IOP_CASE_DIR}/bld/cesm.exe"):
        rmtree(IOP_CASE_DIR, ignore_errors=True)

        assert call([f"{SCRIPT_DIR}/create_newcase",
                     "--compset", "FSCAM",
                     "--res", "T42_T42",
                     "--user-mods-dir", f"{MODS_DIR}/scam_mandatory",
                     "--case", IOP_CASE_DIR]) == 0

        assert call("./case.setup", cwd=IOP_CASE_DIR) == 0
        assert call("./case.build", cwd=IOP_CASE_DIR) == 0

    with Pool() as p:
        for _ in tqdm(p.imap_unordered(run_case, configs), total=len(configs),
                      mininterval=0., miniters=1):
            pass

In [None]:
cases = plan_cases(iops, param_space, n_samples)
cases.to_csv("cases.csv", index=False)
display(cases)

run_cases(cases)