## Compute HCF variables not only at 12z as we were doing in HCF.ipynb, but also for 18z, 21z, and 00z. This enables us to use the retrospective method for determining if CI was triggered locally or not. 


In [4]:
# Import libraries 
import comet as cm 
import numpy as np 
import xarray as xr 
import pickle
import pandas as pd
import datetime 
import datetime 
import time 
from ComputeHCF import HCF 
from joblib import Parallel, delayed 
import multiprocess as mp
import itertools

# Plotting utils 
import matplotlib.pyplot as plt 
import matplotlib.colors as colors
import cartopy
import cartopy.feature as cfeature
import cartopy.crs as ccrs
import cartopy.util


## 1. Read in data 

In [2]:
# Directory with all the data saved in it...
dataDir = '/glade/work/mdfowler/data/HighOutput_IslasSims/'


### 1.1 Read in data for CAM6 + CLM5

In [5]:
# Set up strings used to define/access each file 
fileStart  = 'f.e21.FHIST.f09_f09.cesm2_cam6_clm5.001.cam.h4.'
profileEnd = '_conusAllTimes-VertProfiles.nc'
pressEnd   = '_conus-Pressure-UTCtimes.nc'

years = np.arange(1980,1983).astype(str)

# Read in yearly files 
for iYr in range(len(years)): 
    # Set up file names 
    fileName_profile = dataDir+fileStart+years[iYr]+profileEnd
    fileName_press   = dataDir+fileStart+years[iYr]+pressEnd
    
    # Open files and save to larger arrays 
    with xr.open_dataset(fileName_profile, decode_times=True) as profileDS:
        profileDS['time'] = profileDS.indexes['time'].to_datetimeindex()
        profileDS
        
        if iYr==0:
            fullDS_profiles = profileDS
        else: 
            fullDS_profiles = xr.concat([fullDS_profiles, profileDS], dim='time')

            
    with xr.open_dataset(fileName_press, decode_times=True) as pressDS:
        pressDS['time'] = pressDS.indexes['time'].to_datetimeindex()
        
        if iYr==0:
            fullDS_pressure = pressDS
        else: 
            fullDS_pressure = xr.concat([fullDS_pressure,pressDS], dim='time')
    
    print('Done with reading in files for year %s' % years[iYr])
    

  app.launch_new_instance()


Done with reading in files for year 1980
Done with reading in files for year 1981
Done with reading in files for year 1982


In [6]:
ds_CLM5 = fullDS_profiles
ds_CLM5['PRESSURE'] = (('time','lev','lat','lon'), fullDS_pressure.PRESSURE)
ds_CLM5['UTC_hr'] = (('time'), fullDS_pressure.UTC_hr)
ds_CLM5['UTC_day'] = (('time'), fullDS_pressure.UTC_day)
ds_CLM5['UTC_mon'] = (('time'), fullDS_pressure.UTC_mon)
ds_CLM5['UTC_yr'] = (('time'), fullDS_pressure.UTC_yr)


### 1.2 Read in data for CAM6 + CLM4.5

In [7]:
# Set up strings used to define/access each file 
fileStart  = 'f.e21.FHIST.f09_f09.cesm2_cam6_clm4p5.001.cam.h4.'
profileEnd = '_conusAllTimes-VertProfiles.nc'
pressEnd   = '_conus-Pressure-UTCtimes.nc'

years = np.arange(1980,1983).astype(str)


# Read in yearly files 
for iYr in range(len(years)): 
    # Set up file names 
    fileName_profile = dataDir+fileStart+years[iYr]+profileEnd
    fileName_press   = dataDir+fileStart+years[iYr]+pressEnd
    
    # Open files and save to larger arrays 
    with xr.open_dataset(fileName_profile, decode_times=True) as profileDS:
        profileDS['time'] = profileDS.indexes['time'].to_datetimeindex()
        profileDS
        
        if iYr==0:
            fullDS_profiles = profileDS
        else: 
            fullDS_profiles = xr.concat([fullDS_profiles, profileDS], dim='time')

            
    with xr.open_dataset(fileName_press, decode_times=True) as pressDS:
        pressDS['time'] = pressDS.indexes['time'].to_datetimeindex()
        
        if iYr==0:
            fullDS_pressure = pressDS
        else: 
            fullDS_pressure = xr.concat([fullDS_pressure,pressDS], dim='time')
    
    print('Done with reading in files for year %s' % years[iYr])




Done with reading in files for year 1980
Done with reading in files for year 1981
Done with reading in files for year 1982


In [8]:
ds_CLM45 = fullDS_profiles
ds_CLM45['PRESSURE'] = (('time','lev','lat','lon'), fullDS_pressure.PRESSURE)
ds_CLM45['UTC_hr'] = (('time'), fullDS_pressure.UTC_hr)
ds_CLM45['UTC_day'] = (('time'), fullDS_pressure.UTC_day)
ds_CLM45['UTC_mon'] = (('time'), fullDS_pressure.UTC_mon)
ds_CLM45['UTC_yr'] = (('time'), fullDS_pressure.UTC_yr)


### 1.3 Isolate 18z, 21z, and 00z into their own datasets

In [9]:
# Pick out 18Z 
ds_utc18_CLM45 = ds_CLM45.where( ds_CLM45.UTC_hr==18.0, drop=True )
#ds_utc18_CLM5  = ds_CLM5.where(  ds_CLM5.UTC_hr==18.0,  drop=True )
print('...18Z isolated...')

# # Pick out 21Z 
# ds_utc21_CLM45 = ds_CLM45.where( ds_CLM45.UTC_hr==21.0, drop=True )
# ds_utc21_CLM5  = ds_CLM5.where(  ds_CLM5.UTC_hr==21.0,  drop=True )
# print('...21Z isolated...')

# # Pick out 00Z 
# ds_utc00_CLM45 = ds_CLM45.where( ds_CLM45.UTC_hr==0.0, drop=True )
# ds_utc00_CLM5  = ds_CLM5.where(  ds_CLM5.UTC_hr==0.0,  drop=True )
# print('...00Z isolated...')


...18Z isolated...


In [10]:
# Also get lat and lon
lat = ds_utc18_CLM45.lat.values
lon = ds_utc18_CLM45.lon.values 

# Number of times (same for all...)
nTime = len(ds_utc18_CLM45.time.values)
print('Numer of days: ', nTime)


Numer of days:  1095


## 2. Compute HCF and save variables 

In [20]:
def my_HCF_parallel(DS, iT):
    
    # Define variable names 
    Tname = 'T'
    Qname = 'Q'
    Zname = 'Z3'
    Pname = 'PRESSURE'

    TBM_all     = np.full([len(lat),len(lon)], np.nan)
    BCLH_all    = np.full([len(lat),len(lon)], np.nan)
    BCLP_all    = np.full([len(lat),len(lon)], np.nan)
    TDEF_all    = np.full([len(lat),len(lon)], np.nan)
    TRAN_H_all  = np.full([len(lat),len(lon)], np.nan)
    TRAN_P_all  = np.full([len(lat),len(lon)], np.nan)
    TRAN_T_all  = np.full([len(lat),len(lon)], np.nan)
    SHDEF_M_all = np.full([len(lat),len(lon)], np.nan)
    LHDEF_M_all = np.full([len(lat),len(lon)], np.nan)
    EADV_M_all  = np.full([len(lat),len(lon)], np.nan)


    DS = DS.drop('time_bnds')  # Want to get rid of nbnd dimension 

    # Want to also drop ilev dimension
    DS = DS.drop('hyai')
    DS = DS.drop('hybi') 
    DS = DS.drop('ilev')
    
    # Number of levels to worry about in actual "sounding"
    nLev  = len(DS.lev)
    
    for iLat in range(len(lat)):
        for iLon in range(len(lon)): 
            # Pick out specific point and time period 
            DF  = DS.isel(lat=iLat,lon=iLon,time=iT).to_dataframe()

            # Flip order of levels so that surface comes first (required for function)
            DF = DF.reindex(index=DF.index[::-1])

            TBM_all[iLat,iLon],BCLH_all[iLat,iLon],BCLP_all[iLat,iLon],TDEF_all[iLat,iLon],TRAN_H_all[iLat,iLon],TRAN_P_all[iLat,iLon],TRAN_T_all[iLat,iLon],SHDEF_M_all[iLat,iLon],LHDEF_M_all[iLat,iLon], EADV_M_all[iLat,iLon] = HCF(DF, 
                                                                              Tname, 
                                                                              Qname, 
                                                                              Zname, 
                                                                              Pname, 
                                                                              nLev) 
    
    # print('Done with day %i of 1095  ...' % (iT) )
    
    return TBM_all,BCLH_all,BCLP_all,TDEF_all,TRAN_H_all,TRAN_P_all,TRAN_T_all,SHDEF_M_all,LHDEF_M_all,EADV_M_all



### Test out timing of using parallel nJobs

In [10]:
# Start out with just a for loop as a kind of "control"
start = time.time()
testResult = np.full([20,10,len(lat),len(lon)], np.nan)
for iT in range(20):
    testResult[iT,:,:,:] = my_HCF_parallel(ds_utc18_CLM45, iT)
end = time.time()
print('For loop took %.4f sec' %(end-start))


  iMask         = np.where((~np.isnan(xaxis1)) & (xaxis1>pthresh))[0]
  iMask         = np.where((~np.isnan(xaxis1)) & (xaxis1>pthresh))[0]
  if ( (np.all(xaxis1<=pthresh)) & (np.all(xaxis>=pbl_p)) & (np.all(np.isnan(xaxis1))) ):
  iMask       = np.where( (xaxis1>pthresh) & (xaxis<pbl_p) & (~np.isnan(xaxis1)))[0]
  if ( (np.all(np.isnan(eadv))) | (np.all(eadv<45)) | (np.all(np.isnan(eadv))) | (np.all(eadv>45)) ):
  if ( (np.all(np.isnan(eadv))) | (np.all(eadv<45)) | (np.all(np.isnan(eadv))) | (np.all(eadv>45)) ):
  iMask    = np.where((eadv_0<=0) & (~np.isnan(eadv_0)))[0]
  iMask    = np.where((eadv_0>0) & (~np.isnan(eadv_0)))[0]


For loop took 304.3860 sec


NameError: name 'ds_utc18_CLM5' is not defined

In [23]:
# n_jobs =2 
startParr   = time.time()
testResult2 = Parallel(n_jobs=2)(delayed(my_HCF_parallel)(ds_utc18_CLM45,iT) for iT in range(20))
start2      = time.time()
TBM,BCLH,BCLP,TDEF,TRANH,TRANP,TRANT,SHDEFM,LHDEFM,EADVM = zip(*testResult2)
endParr     = time.time()
print('With 2 threads, time to compute results: %.4f' %(start2-startParr))
print('Time to unzip: %.4f' %(endParr-start2))
print('Total time for parralelized: %.4f' %(endParr-startParr))


With 2 threads, time to compute results: 296.1083
Time to unzip: 0.0002
Total time for parralelized: 296.1086


In [22]:
# n_jobs = 5
startParr   = time.time()
testResult2 = Parallel(n_jobs=5)(delayed(my_HCF_parallel)(ds_utc18_CLM45,iT) for iT in range(20))
start2      = time.time()
TBM,BCLH,BCLP,TDEF,TRANH,TRANP,TRANT,SHDEFM,LHDEFM,EADVM = zip(*testResult2)
endParr     = time.time()
print('With 5 threads, time to compute results: %.4f' %(start2-startParr))
print('Time to unzip: %.4f' %(endParr-start2))
print('Total time for parralelized: %.4f' %(endParr-startParr))


With 5 threads, time to compute results: 313.8495
Time to unzip: 0.0008
Total time for parralelized: 313.8503


In [21]:
# n_jobs = -2 
startParr = time.time()
testResult2 = Parallel(n_jobs=-2)(delayed(my_HCF_parallel)(ds_utc18_CLM45,iT) for iT in range(20))
start2 = time.time()
TBM,BCLH,BCLP,TDEF,TRANH,TRANP,TRANT,SHDEFM,LHDEFM,EADVM = zip(*testResult2)
endParr = time.time()
print('With -2 threads, time to compute results: %.4f' %(start2-startParr))
print('Time to unzip: %.4f' %(endParr-start2))
print('Total time for parralelized: %.4f' %(endParr-startParr))


  iMask         = np.where((~np.isnan(xaxis1)) & (xaxis1>pthresh))[0]
  iMask         = np.where((~np.isnan(xaxis1)) & (xaxis1>pthresh))[0]
  if ( (np.all(xaxis1<=pthresh)) & (np.all(xaxis>=pbl_p)) & (np.all(np.isnan(xaxis1))) ):
  iMask       = np.where( (xaxis1>pthresh) & (xaxis<pbl_p) & (~np.isnan(xaxis1)))[0]
  if ( (np.all(np.isnan(eadv))) | (np.all(eadv<45)) | (np.all(np.isnan(eadv))) | (np.all(eadv>45)) ):
  if ( (np.all(np.isnan(eadv))) | (np.all(eadv<45)) | (np.all(np.isnan(eadv))) | (np.all(eadv>45)) ):
  iMask    = np.where((eadv_0<=0) & (~np.isnan(eadv_0)))[0]
  iMask    = np.where((eadv_0>0) & (~np.isnan(eadv_0)))[0]


With -2 threads, time to compute results: 339.7693
Time to unzip: 0.0010
Total time for parralelized: 339.7703


In [19]:
# n_jobs = 10
startParr = time.time()
testResult2 = Parallel(n_jobs=10)(delayed(my_HCF_parallel)(ds_utc18_CLM45,iT) for iT in range(20))
start2 = time.time()
TBM,BCLH,BCLP,TDEF,TRANH,TRANP,TRANT,SHDEFM,LHDEFM,EADVM = zip(*testResult2)
endParr = time.time()
print('With 10 threads, time to compute results: %.4f' %(start2-startParr))
print('Time to unzip: %.4f' %(endParr-start2))
print('Total time for parralelized: %.4f' %(endParr-startParr))


With 10 threads, time to compute results: 346.0705
Time to unzip: 0.0147
Total time for parralelized: 346.0853


In [12]:
# n_jobs = 20
startParr = time.time()
testResult2 = Parallel(n_jobs=20)(delayed(my_HCF_parallel)(ds_utc18_CLM45,iT) for iT in range(20))
start2 = time.time()
TBM,BCLH,BCLP,TDEF,TRANH,TRANP,TRANT,SHDEFM,LHDEFM,EADVM = zip(*testResult2)
endParr = time.time()
print('With 10 threads, time to compute results: %.4f' %(start2-startParr))
print('Time to unzip: %.4f' %(endParr-start2))
print('Total time for parralelized: %.4f' %(endParr-startParr))


With 10 threads, time to compute results: 333.0381
Time to unzip: 0.0002
Total time for parralelized: 333.0383


<br>What if I only have it return one variable of interest at a time? 

In [35]:
def my_HCF_parallelTest(DS, iT,iLat,iLon):
    
    # Define variable names 
    Tname = 'T'
    Qname = 'Q'
    Zname = 'Z3'
    Pname = 'PRESSURE'

#     TBM_all     = np.full([len(lat),len(lon)], np.nan)
#     BCLH_all    = np.full([len(lat),len(lon)], np.nan)
#     BCLP_all    = np.full([len(lat),len(lon)], np.nan)
#     TDEF_all    = np.full([len(lat),len(lon)], np.nan)
#     TRAN_H_all  = np.full([len(lat),len(lon)], np.nan)
#     TRAN_P_all  = np.full([len(lat),len(lon)], np.nan)
#     TRAN_T_all  = np.full([len(lat),len(lon)], np.nan)
#     SHDEF_M_all = np.full([len(lat),len(lon)], np.nan)
#     LHDEF_M_all = np.full([len(lat),len(lon)], np.nan)
#     EADV_M_all  = np.full([len(lat),len(lon)], np.nan)

    DS = DS.drop('time_bnds')  # Want to get rid of nbnd dimension 

    # Want to also drop ilev dimension
    DS = DS.drop('hyai')
    DS = DS.drop('hybi') 
    DS = DS.drop('ilev')
    
    # Number of levels to worry about in actual "sounding"
    nLev  = len(DS.lev)
    
#     for iLat in range(len(lat)):
#         for iLon in range(len(lon)): 
#             # Pick out specific point and time period 
    DF  = DS.isel(lat=iLat,lon=iLon,time=iT).to_dataframe()

    # Flip order of levels so that surface comes first (required for function)
    DF = DF.reindex(index=DF.index[::-1])

    TBM_all,BCLH_all,BCLP_all,TDEF_all,TRAN_H_all,TRAN_P_all,TRAN_T_all,SHDEF_M_all,LHDEF_M_all,EADV_M_al = HCF(DF, 
                                                                      Tname, 
                                                                      Qname, 
                                                                      Zname, 
                                                                      Pname, 
                                                                      nLev) 
    
    # print('Done with day %i of 1095  ...' % (iT) )
    
    return TDEF_all


In [28]:
# n_jobs = 5 -- had modified function to return just TDEF_ALL, but do all lat/lon in one go
#   But that made no difference in timing...
startParr   = time.time()
testResult2 = Parallel(n_jobs=5)(delayed(my_HCF_parallelTest)(ds_utc18_CLM45,iT) for iT in range(20))
start2      = time.time()
endParr     = time.time()
print('With 5 threads, time to compute results: %.4f' %(start2-startParr))
print('Time to unzip: %.4f' %(endParr-start2))
print('Total time for parralelized: %.4f' %(endParr-startParr))


With 5 threads, time to compute results: 303.2731
Time to unzip: 0.0001
Total time for parralelized: 303.2731


In [32]:
print('Before: [0,20,40] = ', np.asarray(testResult2)[0,20,40])

Before: [0,20,40] =  47.0075215786992


In [36]:
# This uses a test version of script that took in all the indices rather than relying on for loops, 
#   and only returns one variable to make life a bit easier. 
# But this has also had no real impact on timing... if anything it's the worst! 
# 
# n_jobs = 5
startParr      = time.time()
testResultLots = Parallel(n_jobs=5)(delayed(my_HCF_parallelTest)(ds_utc18_CLM45,iT,iLat,iLon) for iT in range(20) for iLat in range(len(lat)) for iLon in range(len(lon)))
endParr        = time.time()
print('Total time for parralelized, 5 threads: %.4f' %(endParr-startParr))

np.shape(testResultLots)
testReshape = np.asarray(testResultLots).reshape([20,len(lat),len(lon)])

# Error message no longer relevant; worked out in below cell. 

Total time for parralelized, 5 threads: 457.8225


NameError: name 'testResults2' is not defined

In [39]:
print('Before: [0,20,40] = ', np.asarray(testResult2)[10,20,40])
print('Now:    [0,20,40] = ', testReshape[10,20,40])


Before: [0,20,40] =  31.144030054987184
Now:    [0,20,40] =  [31.14403005]


Sanity check that parallel *does* speed things up in a very simple test, taken from online: https://medium.com/@measurespace/use-joblib-to-run-your-python-code-in-parallel-ad82abb26954 


In [24]:
import math 

def my_fun_2p(i, j):
    """ We define a simple function with two parameters.
    """
    time.sleep(1)
    return math.sqrt(i**j)

j_num = 3
num = 10
start = time.time()
for i in range(num):
    for j in range(j_num):
        my_fun_2p(i, j)
end = time.time()
print('{:.4f} s'.format(end-start))

start = time.time()
# n_jobs is the number of parallel jobs
Parallel(n_jobs=2)(delayed(my_fun_2p)(i, j) for i in range(num) for j in range(j_num))
end = time.time()
print('{:.4f} s'.format(end-start))


30.0305 s
15.0300 s


In [25]:
start = time.time()
# n_jobs is the number of parallel jobs
Parallel(n_jobs=3)(delayed(my_fun_2p)(i, j) for i in range(num) for j in range(j_num))
end = time.time()
print('{:.4f} s'.format(end-start))

11.2906 s


**This part is known to work at least...**

In [15]:
# Compute for 18Z, CLM5: 
result_18z = Parallel(n_jobs=5)(delayed(my_HCF_parallel)(ds_utc18_CLM5,iT) for iT in range(nTime))
TBM_18z,BCLH_18z,BCLP_18z,TDEF_18z,TRANH_18z,TRANP_18z,TRANT_18z,SHDEFM_18z,LHDEFM_18z,EADVM_18z = zip(*result_18z)


In [19]:
# ---------------------------------------
# Create xr dataset from variables above 
# ---------------------------------------

# First set missing values to -9999
missingValue  = -9999

TBM_write     = np.copy(TBM_18z)
BCLH_write    = np.copy(BCLH_18z)
BCLP_write    = np.copy(BCLP_18z)
TDEF_write    = np.copy(TDEF_18z)
TRAN_H_write  = np.copy(TRANH_18z)
TRAN_P_write  = np.copy(TRANP_18z)
TRAN_T_write  = np.copy(TRANT_18z)
SHDEF_M_write = np.copy(SHDEFM_18z)
LHDEF_M_write = np.copy(LHDEFM_18z)
EADV_M_write  = np.copy(EADVM_18z)

TBM_write    [np.isnan(TBM_18z)   ==True] = missingValue
BCLH_write   [np.isnan(BCLH_18z)  ==True] = missingValue
BCLP_write   [np.isnan(BCLP_18z)  ==True] = missingValue
TDEF_write   [np.isnan(TDEF_18z)  ==True] = missingValue
TRAN_H_write [np.isnan(TRANH_18z) ==True] = missingValue
TRAN_P_write [np.isnan(TRANP_18z) ==True] = missingValue
TRAN_T_write [np.isnan(TRANT_18z) ==True] = missingValue
SHDEF_M_write[np.isnan(SHDEFM_18z)==True] = missingValue
LHDEF_M_write[np.isnan(LHDEFM_18z)==True] = missingValue
EADV_M_write [np.isnan(EADVM_18z) ==True] = missingValue
 
HCF_ds = xr.Dataset({
    'TBM': xr.DataArray(
                data   = TBM_write,   # enter data here
                dims   = ['time','lat','lon'],
                coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
                attrs  = {
                    '_FillValue': missingValue,
                    'units'     : 'K',
                    'LongName'  : 'Buoyant mixing potential temperature (convective threshold)'
                    }
                ),
    'BCLH': xr.DataArray(
                data   = BCLH_write,   # enter data here
                dims   = ['time','lat','lon'],
                coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
                attrs  = {
                    '_FillValue': missingValue,
                    'units'     : 'm',
                    'LongName'  : 'Height above ground of convective threshold'
                    }
                ),
    'BCLP': xr.DataArray(
            data   = BCLP_write,   # enter data here
            dims   = ['time','lat','lon'],
            coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
            attrs  = {
                '_FillValue': missingValue,
                'units'     : 'Pa',
                'LongName'  : 'Pressure of convective threshold'
                }
            ),
    'TDEF': xr.DataArray(
            data   = TDEF_write,   # enter data here
            dims   = ['time','lat','lon'],
            coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
            attrs  = {
                '_FillValue': missingValue,
                'units'     : 'K',
                'LongName'  : 'Potential temperature deficit needed to initiate convection'
                }
            ),
    'TRAN_H': xr.DataArray(
            data   = TRAN_H_write,   # enter data here
            dims   = ['time','lat','lon'],
            coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
            attrs  = {
                '_FillValue': missingValue,
                'units'     : 'm',
                'LongName'  : 'Energy transition height'
                }
            ),
    'TRAN_P': xr.DataArray(
            data   = TRAN_P_write,   # enter data here
            dims   = ['time','lat','lon'],
            coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
            attrs  = {
                '_FillValue': missingValue,
                'units'     : 'Pa',
                'LongName'  : 'Energy transition pressure'
                }
            ),
    'TRAN_T': xr.DataArray(
            data   = TRAN_T_write,   # enter data here
            dims   = ['time','lat','lon'],
            coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
            attrs  = {
                '_FillValue': missingValue,
                'units'     : 'K',
                'LongName'  : 'Energy transition temperature'
                }
            ),
    'SHDEF_M': xr.DataArray(
            data   = SHDEF_M_write,   # enter data here
            dims   = ['time','lat','lon'],
            coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
            attrs  = {
                '_FillValue': missingValue,
                'units'     : 'J/m2',
                'LongName'  : 'Sensible heat deficit of mixed layer'
                }
            ),
    'LHDEF_M': xr.DataArray(
            data   = LHDEF_M_write,   # enter data here
            dims   = ['time','lat','lon'],
            coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
            attrs  = {
                '_FillValue': missingValue,
                'units'     : 'J/m2',
                'LongName'  : 'Latent heat deficit of mixed layer'
                }
            ),
    'EADV_M': xr.DataArray(
            data   = EADV_M_write,   # enter data here
            dims   = ['time','lat','lon'],
            coords = {'time': ds_utc18_CLM5.time.values, 'lat':lat, 'lon': lon},
            attrs  = {
                '_FillValue': missingValue,
                'units'     : '-',
                'LongName'  : 'Energy advantage of mixed layer'
                }
            )
    } )

# -------------------
# Save to netCDF 
# -------------------

savePath = '/glade/work/mdfowler/scripts/Coupling_CAM6CLM5/processed_data/HCFvariables-CAM6CLM5-18z_1980-1982.nc'
HCF_ds.to_netcdf(savePath, mode='w')
print('File saved!\n ', savePath)


File saved!
  /glade/work/mdfowler/scripts/Coupling_CAM6CLM5/processed_data/HCFvariables-CAM6CLM5-18z_1980-1982.nc


In [None]:
# ---------------------------------------
# Clear 18z, CLM5 variables from memory: 
#   Not sure if you can just list them with del...
# ---------------------------------------

del result_18z 
del TBM_18z
del BCLH_18z
del BCLP_18z
del TDEF_18z, 
del TRANH_18z
del TRANP_18z
del TRANT_18z
del SHDEFM_18z
del LHDEFM_18z
del EADVM_18z 
del TBM_write    
del BCLH_write  
del BCLP_write  
del TDEF_write  
del TRAN_H_write
del TRAN_P_write  
del TRAN_T_write 
del SHDEF_M_write 
del LHDEF_M_write 
del EADV_M_write  
del HCF_ds


In [None]:
# Compute for 21Z, CLM5: 
result_21z = Parallel(n_jobs=5)(delayed(my_HCF_parallel)(ds_utc21_CLM5,iT) for iT in range(nTime))
TBM_21z,BCLH_21z,BCLP_21z,TDEF_21z,TRANH_21z,TRANP_21z,TRANT_21z,SHDEFM_21z,LHDEFM_21z,EADVM_21z = zip(*result_21z)

