In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from glob import glob
from subprocess import Popen, PIPE, check_output

import yaml
import dask

import util

USER = os.environ['USER']

assert os.path.exists('/glade/campaign'), (
    'campaign is not accessible; run on Casper'
)

  from distributed.utils import tmpfile


## Get info on cases to process

In [3]:
restart_campaign = '/glade/campaign/collections/cmip/CMIP6/restarts'
cplhist_campaign = '/glade/campaign/collections/cmip/CMIP6/cpl_hist'

cplhist_stage_root = util.cplhist_stage_root
restart_stage_root = util.restart_stage_root


def find_restart_tar(refcase, refdate):
    output = check_output(['find', restart_campaign, '-name', f'{refcase}.rest.{refdate}.tar'])
    if not output:
        print(f'[WARNING]: restart package not found: {refcase}.rest.{refdate}.tar')
        return
    return output.decode("UTF-8").strip()


with open('cplhist-cases.yml') as fid:
    cplhist_cases = yaml.safe_load(fid)

experiments = []
cplhist_case_list = []
restart_cases = []
for exp, case_info in cplhist_cases.items():
    experiments.append(exp)
    cplhist_case_list.append(case_info['case'])
    if exp == 'historical':
        refcase = case_info['parent_experiment']
        refdate = f"{case_info['parent_branch_year']:04d}-01-01-00000"
        restart_cases.append({
            'refcase': refcase,
            'refdate': refdate,
            'tarfile': find_restart_tar(refcase, refdate),
        })

## Spin up dask cluster

In [4]:
cluster, client = util.get_ClusterClient(walltime='24:00:00')
cluster.scale(12)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mclong/asm/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mclong/asm/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.20:37618,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/mclong/asm/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Unpack `CPLHIST` forcing

In [5]:
streams = ['ha2x', 'ha2x1hi', 'ha2x1h', 'ha2x3h', 'ha2x1d', 'hr2x']

yr_lo = 1850
yr_hi = 2014

for case in cplhist_case_list:

    dirout = f"{cplhist_stage_root}/cpl_hist/{case}/orig"
    os.makedirs(dirout, exist_ok=True)

    for stream in streams:

        tarfiles = sorted(glob(f'{cplhist_campaign}/{case}/*.{stream}.*'))
        if not tarfiles:
            continue

        years = [int(f.split('.')[-2]) for f in tarfiles]

        tarfiles = [f for y, f in zip(years, tarfiles) if yr_lo <= y and y <= yr_hi]
        print(f'{case} {stream}:')

        delayed_objs = []
        for tarfile in tarfiles:
            files_in_tar = [f for f in util.list_files_in_tar(tarfile) if case in f]
            if not all([os.path.exists(f"{dirout}/{f}") for f in files_in_tar]):
                delayed_objs.append(dask.delayed(util.extract_tar)(tarfile, dirout))

        if delayed_objs:
            print(f'extracting {len(delayed_objs)} files')
            computed_objs = dask.compute(*delayed_objs)

b.e21.BHIST.f09_g17.CMIP6-historical.011 ha2x1hi:
extracting 114 files
b.e21.BHIST.f09_g17.CMIP6-historical.011 ha2x1h:
extracting 114 files
b.e21.BHIST.f09_g17.CMIP6-historical.011 ha2x3h:
extracting 114 files
b.e21.BHIST.f09_g17.CMIP6-historical.011 ha2x1d:
extracting 114 files
b.e21.BHIST.f09_g17.CMIP6-historical.011 hr2x:
extracting 114 files


## Concatenate daily `CPLHIST` files into monthly files 

In [None]:
for case in cplhist_case_list:
    util.concat_cplhist_mon(case, yr_lo, yr_hi)

setenv CASE b.e21.BHIST.f09_g17.CMIP6-historical.011
setenv DIR_DAILY /glade/scratch/mclong/cplhist_data/cpl_hist/b.e21.BHIST.f09_g17.CMIP6-historical.011/orig
setenv YEAR0 1850
setenv YEAR1 2014
setenv DIR_MONTHLY /glade/scratch/mclong/cplhist_data/cpl_hist/b.e21.BHIST.f09_g17.CMIP6-historical.011/monthly
unset echo


ha2x files not found, skipping
b.e21.BHIST.f09_g17.CMIP6-historical.011.cpl.ha2x1hi.1850-01-01.nc has 24 samples per day
creating /glade/scratch/mclong/cplhist_data/cpl_hist/b.e21.BHIST.f09_g17.CMIP6-historical.011/monthly/b.e21.BHIST.f09_g17.CMIP6-historical.011.cpl.ha2x1hi.1850-01.nc
creating /glade/scratch/mclong/cplhist_data/cpl_hist/b.e21.BHIST.f09_g17.CMIP6-historical.011/monthly/b.e21.BHIST.f09_g17.CMIP6-historical.011.cpl.ha2x1hi.1850-02.nc
creating /glade/scratch/mclong/cplhist_data/cpl_hist/b.e21.BHIST.f09_g17.CMIP6-historical.011/monthly/b.e21.BHIST.f09_g17.CMIP6-historical.011.cpl.ha2x1hi.1850-03.nc
creating /glade/scratch/mclong/cplhist_data/cpl_hist/b.e21.BHIST.f09_g17.CMIP6-historical.011/monthly/b.e21.BHIST.f09_g17.CMIP6-historical.011.cpl.ha2x1hi.1850-04.nc
creating /glade/scratch/mclong/cplhist_data/cpl_hist/b.e21.BHIST.f09_g17.CMIP6-historical.011/monthly/b.e21.BHIST.f09_g17.CMIP6-historical.011.cpl.ha2x1hi.1850-05.nc
creating /glade/scratch/mclong/cplhist_data/cpl_h

## Copy restarts

In [None]:
delayed_objs = []
for rest_info in restart_cases:
    refcase = rest_info['refcase']
    refdate = rest_info['refdate']
    tarfile = rest_info['tarfile']

    if tarfile is None:
        continue

    dirout = f"{restart_stage_root}/{refcase}"
    os.makedirs(dirout, exist_ok=True)

    if not os.path.exists(f"{dirout}/{refdate}"):
        print(f"unpacking to: {dirout}/{refdate}")
        util.extract_tar(tarfile, dirout)
    else:
        print(f"exists: {dirout}/{refdate}")
