# By-Band _g_-Point Reduction

# Dependencies

`numpy` is installed in the Python environment at NERSC (`module load python`), but `xarray` is not, so the user must install the package on their own. `PIPPATH` is the assumed location. This notebook depends heavily on `xarray`.

In [None]:
import os, sys, shutil, glob

# "standard" install
import numpy as np

# directory in which libraries installed with conda are saved
PIPPATH = '{}/.local/'.format(os.path.expanduser('~')) + \
    'cori/3.7-anaconda-2019.10/lib/python3.7/site-packages'
PATHS = ['common', PIPPATH]
for path in PATHS: sys.path.append(path)

# user must do `pip install xarray` on cori (or other NERSC machines)
import xarray as xa

# local module
import by_band_lib as BYBAND

# Static Inputs

In [None]:
# only do one domain or the other
DOLW = True
DOSW = not DOLW
DOMAIN = 'LW' if DOLW else 'SW'
NBANDS = 16 if DOLW else 14

# forcing scenario (0 is no forcing...need a more comprehensive list)
IFORCING = 0

# does band-splitting need to be done, or are there existing files 
# that have divided up the full k-distribution?
BANDSPLIT = False

# remove the netCDFs that are generated for all of the combinations 
# and iterations of combinations in bandOptimize()
CLEANUP = True

# Paths

In [None]:
PROJECT = '/global/project/projectdirs/e3sm/pernak18/'
KFULLNC = '{}/reference_netCDF/g-point-reduce/'.format(PROJECT) + \
  'rrtmgp-data-lw-g256-2018-12-04.nc'
EXE = '{}/g-point-reduction/garand_atmos/rrtmgp_garand_atmos'.format(
    PROJECT)
GARAND = '{}/reference_netCDF/g-point-reduce/'.format(PROJECT) + \
  'multi_garand_template_single_band.nc'
PATHS = [KFULLNC, EXE]

BANDSPLITDIR = 'band_k_dist'
FULLBANDFLUXDIR = 'full_band_flux'

for PATH in PATHS: BYBAND.pathCheck(PATH)

CWD = os.getcwd()

# Band Splitting

Break up full _k_-distribution file into separate distributions for each band, then calculate the corresponding fluxes. This should only need to be run once.

After some clarifications from Robert (30-Nov-2020), I believe the plan of action is:

1. Perform _g_-point combination for 1 band at a time
2. Combine condensed _k_-distribution for a given band with the full distributions from the rest of the bands
3. Run RRTMGP on output from item 2
4. Compute cost function for broadband fluxes, not by-band

Original Plan of Action:

- [] We would create Nbands k-distribution files
- [] We’d use Python to drive the Fortran executable Nbands times to produce Nbands flux results
- [] The trial g-point combinations then loop over bands and the possible g-point combinations within each band, creating k-distribution and band-wise flux files for each possible combination
- [] The Python code assembles broadband fluxes from the band-wise flux files in order to compute the cost functions

In [None]:
if BANDSPLIT:
    print('Band splitting commenced')
    BYBAND.pathCheck(BANDSPLITDIR, mkdir=True)
    BYBAND.pathCheck(FULLBANDFLUXDIR, mkdir=True)
    kFiles, fullBandFluxes = [], []
    for iBand in range(NBANDS):
        # divide full k-distribution into subsets for each band
        kObj = BYBAND.kDistOptBand(KFULLNC, iBand, DOLW, IFORCING, 1, 
            fullBandKDir=BANDSPLITDIR, fullBandFluxDir=FULLBANDFLUXDIR, 
            cleanup=CLEANUP)
        kFiles.append(kObj.kBandNC)
        kObj.kDistBand()

        # quick, non-parallelized flux calculations (because the 
        # executable is run in one directory)
        kObj.fluxCompute(kObj.kBandNC)
        fullBandFluxes.append(kObj.fluxBandNC)
    # end band loop
    print('Band splitting completed')
else:
    kFiles = sorted(glob.glob('{}/coefficients_{}_band??.nc'.format(
        BANDSPLITDIR, DOMAIN)))
    fullBandFluxes = sorted(glob.glob('{}/flux_{}_band??.nc'.format(
        FULLBANDFLUXDIR, DOMAIN)))

    if len(kFiles) == 0 or len(fullBandFluxes) == 0:
        print('WARNING: set `BANDSPLIT` to `True` and run this cell again')
# endif BANDSPLIT


# _g_-Point Combining

Loop over bands and combine their respective _g_-points. For each combination in a band, compute fluxes, then concatenate these fluxes with the full-band fluxes from other bands, then compute broadband fluxes and corresponding cost function.

In [None]:
from multiprocessing import Pool

def bandOptimize(kBandFile, iBand, doLW, iForce, fluxFiles, cleanup=False):
    """
    needs a lot of work -- just spitballin

    single-band optimization, to be combined with broadband fluxes 
    from other bands before cost function optimization 
    """

    band = iBand + 1
    iterBand = 1
    while True:
        print('Starting Band {}, iteration {}'.format(band, iterBand))
        # start with `kFile` with no g-point combinations for a given band
        kObj = BYBAND.kDistOptBand(kBandFile, iBand, doLW, iForce, iterBand, 
            fullBandKDir=BANDSPLITDIR, fullBandFluxDir=FULLBANDFLUXDIR, 
            cleanup=CLEANUP)

        # combine g-points in band and generate corresponding netCDF
        kObj.gPointCombine()

        # if there are not enough g-points to combine, stop iterating
        if kObj.nGpt == 1: break

        # run RRTMGP on all files self.trialNC (each g-point combination)
        # generate input dictionaries for fluxComputePool()
        kObj.configParallel()

        # replace `kFile` with netCDF that corresponds to g-point combination
        # that minimizes the cost function
        kBandFile = kObj.trialNC[0]

        # calculate fluxes corresponding to every g-point combination
        # break out of kDistOptBand object for 
        fluxPool = Pool(kObj.nGpt)
        poolMap = fluxPool.map(kObj.fluxComputePool, kObj.fluxInputs)

        # next iteration
        kObj.fluxCombine()
        break
        iterBand += 1
        continue

        # determine optimal combination
        kObj.findOptimal(kObj.iCombine)

        # keep a copy of the optimal netCDF
        shutil.copy2(kObj.optNC, '{}/{}'.format(
            kObj.optDir, os.path.basename(kObj.optNC)))
        
        # replace `kFile` with netCDF that corresponds to g-point combination
        # that minimizes the cost function
        kBandFile = kObj.optNC
        
        # REASSIGN everything in constructor that is expected to change!

        # next iteration
        iterBand += 1
    # end while

    # cleanup
    if cleanup: shutil.rmtree(kObj.workDir)
# end bandOptimize

# leave `bands` empty if all bands should be processed
bands = [1]
if not bands: bands = range(1, NBANDS+1)

# loop over bands and instantiate a band optimization object
# optimizing each band
for iBand, kFile in enumerate(kFiles):
    band = iBand + 1
    if band not in bands: continue

    #kObj = BYBAND.kDistOptBand(kFile, iBand, DOLW, IFORCING, 1, 
    #    fullBandKDir=BANDSPLITDIR, fullBandFluxDir=FULLBANDFLUXDIR, 
    #    cleanup=CLEANUP)
    bandOptimize(kFile, iBand, DOLW, IFORCING, fullBandFluxes, 
                        cleanup=False)

    print('Band {} complete'.format(band))
# end kFile loop
