# By-Band _g_-Point Reduction

# Dependencies

`numpy` is installed in the Python environment at NERSC (`module load python`), but `xarray` is not, so the user must install the package on their own. `PIPPATH` is the assumed location. This notebook depends heavily on `xarray`.

In [None]:
import os, sys, shutil, glob

# "standard" install
import numpy as np

from multiprocessing import Pool

# directory in which libraries installed with conda are saved
PIPPATH = '{}/.local/'.format(os.path.expanduser('~')) + \
    'cori/3.7-anaconda-2019.10/lib/python3.7/site-packages'
PATHS = ['common', PIPPATH]
for path in PATHS: sys.path.append(path)

# needed at AER unless i update `pandas`
import warnings
#warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# user must do `pip install xarray` on cori (or other NERSC machines)
import xarray as xa

# local module
import by_band_lib as BYBAND

# Paths

In [None]:
PROJECT = '/global/project/projectdirs/e3sm/pernak18/'
EXE = '{}/g-point-reduction/garand_atmos/rrtmgp_garand_atmos'.format(
    PROJECT)
REFDIR = '{}/reference_netCDF/g-point-reduce'.format(PROJECT)

KFULLNC = '{}/rrtmgp-data-lw-g256-2018-12-04.nc'.format(REFDIR)
GARAND = '{}/multi_garand_template_single_band.nc'.format(REFDIR)

# test (RRTMGP) and reference (LBL) flux netCDF files
TESTNC = '{}/rrtmgp-lw-flux-inputs-outputs-garandANDpreind.nc'.format(REFDIR)
REFNC = '{}/lblrtm-lw-flux-inputs-outputs-garandANDpreind.nc'.format(REFDIR)
PATHS = [KFULLNC, EXE, TESTNC, REFNC, GARAND]

BANDSPLITDIR = 'band_k_dist'
FULLBANDFLUXDIR = 'full_band_flux'

for PATH in PATHS: BYBAND.pathCheck(PATH)

CWD = os.getcwd()

# Static Inputs

In [None]:
# only do one domain or the other
DOLW = True
DOSW = not DOLW
DOMAIN = 'LW' if DOLW else 'SW'
NBANDS = 16 if DOLW else 14

# forcing scenario (0 is no forcing...need a more comprehensive list)
IFORCING = 0

# does band-splitting need to be done, or are there existing files 
# that have divided up the full k-distribution?
BANDSPLIT = False

# remove the netCDFs that are generated for all of the combinations 
# and iterations of combinations in bandOptimize()
CLEANUP = False

# number of iterations for the optimization
NITER = 1

# cost function variables
CFCOMPS = ['band_flux_dn', 'band_flux_up']
CFLEVS = [0, 10000, 102000] # pressure levels of interest in Pa
CFWGT = [0.5, 0.5]

# Band Splitting

Break up full _k_-distribution file into separate distributions for each band, then calculate the corresponding fluxes. This should only need to be run once.

After some clarifications from Robert (30-Nov-2020), I believe the plan of action is:

1. create Nbands k-distribution files
2. drive the Fortran executable Nbands times to produce Nbands flux results
3. the trial g-point combinations then loop over bands and the possible g-point combinations within each band, creating k-distribution and band-wise flux files for each possible combination
4. The Python code assembles broadband fluxes from the band-wise flux files in order to compute the cost functions

In [None]:
if BANDSPLIT:
    print('Band splitting commenced')
    BYBAND.pathCheck(BANDSPLITDIR, mkdir=True)
    BYBAND.pathCheck(FULLBANDFLUXDIR, mkdir=True)
    kFiles, fullBandFluxes = [], []
    for iBand in range(NBANDS):
        # divide full k-distribution into subsets for each band
        kObj = BYBAND.gCombine_kDist(KFULLNC, iBand, DOLW, 
            fullBandKDir=BANDSPLITDIR, fullBandFluxDir=FULLBANDFLUXDIR, 
            cleanup=CLEANUP)
        kFiles.append(kObj.kBandNC)
        kObj.kDistBand()

        # quick, non-parallelized flux calculations (because the 
        # executable is run in one directory)
        # TO DO: HAVEN'T TESTED THIS SINCE IT HAS BEEN MOVED OUT OF THE CLASS
        BYBAND.fluxCompute(kObj.kBandNC, kObj.profiles, kObj.exe, 
                           kObj.fullBandFluxDir, kObj.fluxBandNC)
        fullBandFluxes.append(kObj.fluxBandNC)
    # end band loop
    print('Band splitting completed')
else:
    kFiles = sorted(glob.glob('{}/coefficients_{}_band??.nc'.format(
        BANDSPLITDIR, DOMAIN)))
    fullBandFluxes = sorted(glob.glob('{}/flux_{}_band??.nc'.format(
        FULLBANDFLUXDIR, DOMAIN)))

    if len(kFiles) == 0 or len(fullBandFluxes) == 0:
        print('WARNING: set `BANDSPLIT` to `True` and run this cell again')
# endif BANDSPLIT


# _g_-Point Combining

Combine _g_-point reduced for bands with full-band fluxes from other bands, find optimal _g_-point combination for given iteration, proceed to next iteration.

First, find all _g_-point combinations for each band. Store the band object in a dictionary for use in flux computation. This cell only needs to be run once, and to save time in development, the dictionary is saved in a `pickle` file and can be loaded in the next cell.

In [None]:
# this should be parallelized; also is part of preprocessing so we 
# shouldn't have to run it multiple times
kBandDict = {}
for iBand, kFile in enumerate(kFiles):
    #if iBand != 0: continue
    band = iBand + 1
    kObj = BYBAND.gCombine_kDist(kFile, iBand, DOLW, 1, 
        fullBandKDir=BANDSPLITDIR, 
        fullBandFluxDir=FULLBANDFLUXDIR, cleanup=CLEANUP)
    kObj.gPointCombine()
    kBandDict['band{:02d}'.format(band)] = kObj

    print('Band {} complete'.format(band))
# end kFile loop

import pickle
with open('temp.pickle', 'wb') as fp: pickle.dump(kBandDict, fp)

Now compute fluxes in parallel for every _g_-point combination -- merging occurs in each band, and these combinations in a given band are used with broadband fluxes from other bands. These concatenations each have an associated `xarray` dataset assigned to it. Cost function components are then calculated based for each dataset, and the one that minimizes the error in the cost function will have its associated netCDF saved to disk.

Uncomment pickling block to restore dictionary from previous cell.

Still needs to work for next iteration (`setupNextIter` method and comment block in this cell).

In [None]:
# pickling for developement purposes so this dictionary doesn't need 
# to be regenerated for every code change.
import pickle
with open('temp.pickle', 'rb') as fp: kBandDict = pickle.load(fp)

CFCOMPS = ['heating_rate', 'band_heating_rate']

coObj = BYBAND.gCombine_Cost(
    kBandDict, fullBandFluxes, REFNC, TESTNC, 
    IFORCING, 0, profilesNC=GARAND, exeRRTMGP=EXE, 
    cleanup=CLEANUP, 
    costFuncComp=CFCOMPS, costFuncLevs=CFLEVS, 
    costWeights=CFWGT)

import copy
NITER = 1
for i in range(NITER):
    coObj.kMap()
    coObj.fluxComputePool()
    coObj.fluxCombine()
    coObj.costFuncComp()
    coObj.findOptimal()
    #coObj.setupNextIter()
    
    """
    bandKey = list(coObj.distBands.keys())[coObj.optBand]
    bandObj = coObj.distBands[bandKey]

    # modify attributes of object to reflect next iteration
    newObj = copy.deepcopy(coObj.distBands[bandKey])
    newObj.kInNC = str(coObj.optNC)
    newObj.iCombine = coObj.iCombine + 1

    # clean up the optimal band's working directory
    # shutil.rmtree(bandObj.workDir)

    # combine g-points for next iteration
    print('Recombining')
    newObj.gPointCombine()
    """
# end iteration loop