## Now that HCF script has been fully developed, implement it here and take a look at outputs.
<b>Author:</b> Meg Fowler <br>
<b>Date:</b> 30 Oct 2020 

In [1]:
# Import libraries 
import comet as cm 
import numpy as np 
import xarray as xr 
import pickle
import pandas as pd
import datetime 
import datetime 
import time 
from ComputeHCF import HCF 

# Plotting utils 
import matplotlib.pyplot as plt 
import cartopy
import cartopy.feature as cfeature
import cartopy.crs as ccrs
import cartopy.util


## 1. Read in data

<b>What data do we need?</b> <br>
Vertical profiles of: temperature (T), specific humidity (Q), geopotential height (zg in CESM2, Z3 in other runs), and pressure (P). <br>
In addition, need lowest level temperature, specfic humidity, height, and pressure - so basically T2m, Q2m, PSfc, and 2m height. <br><br>
<b>Units:</b><br>
Temperature --> K <br>
Height      --> m <br>
Sp. Humidity -> kg/kg <br>
Pressure    --> Pa  <br><br>



### 1.1 Read in data from hourly simulation I ran with CESM2

In [2]:
dataDir = '/Users/mdfowler/Documents/Analysis/Coupling_initial/data/hrSim_CONUS/'

Tpr_file = dataDir+'f.e21.FHIST_BGC.f09_f09_mg17.hourlyOutput.001.cam.h1.1980_hrT-UTCsel.nc'
Zpr_file = dataDir+'f.e21.FHIST_BGC.f09_f09_mg17.hourlyOutput.001.cam.h1.1980_hrZ3-UTCsel.nc'
Qpr_file = dataDir+'f.e21.FHIST_BGC.f09_f09_mg17.hourlyOutput.001.cam.h1.1980_hrQ-UTCsel.nc'
Ppr_file = dataDir+'f.e21.FHIST_BGC.f09_f09_mg17.hourlyOutput.001.cam.h1.1979-1981_hrP-levels-UTCsel.nc'

# ----------- Open files -------------
print('Read in profile of...')
Tpr = xr.open_dataset(Tpr_file, decode_times=True)
Tpr['time'] = Tpr.indexes['time'].to_datetimeindex()
print('.....T')
Zpr = xr.open_dataset(Zpr_file, decode_times=True)
Zpr['time'] = Zpr.indexes['time'].to_datetimeindex()
print('.....Z3')
Qpr = xr.open_dataset(Qpr_file, decode_times=True)
Qpr['time'] = Qpr.indexes['time'].to_datetimeindex()
print('.....Q')
Ppr = xr.open_dataset(Ppr_file, decode_times=True)
Ppr['time'] = Ppr.indexes['time'].to_datetimeindex()
print('.....P')

# ----------- Isolate 1980 in Ppr -----
dates1980     = pd.DatetimeIndex(Tpr['time'].values)
datesPpr      = pd.DatetimeIndex(Ppr['time'].values)
iTimes        = np.where( (datesPpr>=(dates1980[0])) & (datesPpr<=dates1980[-1]) )[0]

Ppr_sel       = Ppr.isel(time=iTimes)


Read in profile of...


  Tpr['time'] = Tpr.indexes['time'].to_datetimeindex()


.....T


  Zpr['time'] = Zpr.indexes['time'].to_datetimeindex()


.....Z3


  Qpr['time'] = Qpr.indexes['time'].to_datetimeindex()


.....Q
.....P


  Ppr['time'] = Ppr.indexes['time'].to_datetimeindex()


This is all a bit easier to work with if everything's in it's own xarray DataSet

In [3]:
ds_Full = Tpr 
ds_Full['Qpr'] = (('time','lev','lat','lon'), Qpr.Q)
ds_Full['Zpr'] = (('time','lev','lat','lon'), Zpr.Z3)
ds_Full['Ppr'] = (('time','lev','lat','lon'), Ppr_sel.PRESSURE)


In [4]:
# Pick out 12 UTC only (early morning for most of US)
ds_utc12 = ds_Full.where( ds_Full.UTC_hr==12.0 , drop=True )


In [5]:
lat = ds_utc12.lat.values
lon = ds_utc12.lon.values


### 1.2 Read in 3-hrly data from Isla's run with CAM6+CLM4.5

In [12]:
# Set up strings used to define/access each file 
dataDir    = '/Users/mdfowler/Documents/Analysis/Coupling_initial/data/3hrSim_CAM6-CLM45/'

fileStart  = 'f.e21.FHIST.f09_f09.cesm2_cam6_clm4p5.001.cam.h4.'
profileEnd = '_conus-VertProfiles.nc'
pressEnd   = '_conus-Pressure-UTCtimes.nc'

years = np.arange(1980,1986).astype(str)

# Read in yearly files 
for iYr in range(len(years)): 
    # Set up file names 
    fileName_profile = dataDir+fileStart+years[iYr]+profileEnd
    fileName_press   = dataDir+fileStart+years[iYr]+pressEnd
    
    # Open files and save to larger arrays 
    with xr.open_dataset(fileName_profile, decode_times=True) as profileDS:
        profileDS['time'] = profileDS.indexes['time'].to_datetimeindex()
        profileDS
        
        if iYr==0:
            fullDS_profiles = profileDS
        else: 
            fullDS_profiles = xr.concat([fullDS_profiles, profileDS], dim='time')

            
    with xr.open_dataset(fileName_press, decode_times=True) as pressDS:
        pressDS['time'] = pressDS.indexes['time'].to_datetimeindex()
        
        if iYr==0:
            fullDS_pressure = pressDS
        else: 
            fullDS_pressure = xr.concat([fullDS_pressure,pressDS], dim='time')
    
    print('Done with reading in files for year %s' % years[iYr])



  profileDS['time'] = profileDS.indexes['time'].to_datetimeindex()
  pressDS['time'] = pressDS.indexes['time'].to_datetimeindex()


Done with reading in files for year 1980
Done with reading in files for year 1981
Done with reading in files for year 1982
Done with reading in files for year 1983
Done with reading in files for year 1984
Done with reading in files for year 1985


In [7]:
ds_CLM45 = fullDS_profiles
ds_CLM45['PRESSURE'] = (('time','lev','lat','lon'), fullDS_pressure.PRESSURE)
ds_CLM45['UTC_hr'] = (('time'), fullDS_pressure.UTC_hr)
ds_CLM45['UTC_day'] = (('time'), fullDS_pressure.UTC_day)
ds_CLM45['UTC_mon'] = (('time'), fullDS_pressure.UTC_mon)
ds_CLM45['UTC_yr'] = (('time'), fullDS_pressure.UTC_yr)

# ----------- Isolate 1980 for testing -----
i1980        = np.where(ds_CLM45.UTC_yr==1980)[0]
ds1980_CLM45 = Ppr.isel(time=iTimes)


In [8]:
ds1980_CLM45

In [10]:
fullDS_profiles.time

In [11]:
fullDS_pressure.time

## 2. Compute HCF for each grid point. 

Do note though that this is not the most *efficient* code, by any means. There may be ways to speed this up though, and parallel processing should certainly be do-able as well, since each profile is treated independently. 

In [13]:
# # ------------------------------------------
# # Compute for each day and lat/lon point
# #   Takes 35-40 minutes to run for one year
# # ------------------------------------------

# # Define variable names 
# Tname = 'T'
# Qname = 'Qpr'
# Zname = 'Zpr'
# Pname = 'Ppr'

# # Number of levels to worry about in actual "sounding"
# nLev  = len(ds_utc12.lev)

# # Define dimensions 
# nLat  = len(ds_utc12.lat)
# nLon  = len(ds_utc12.lon)
# nTime = len(ds_utc12.time)

# # Define empty arrays to save things into 
# TBM_all     = np.full([nTime,nLat,nLon], np.nan)
# BCLH_all    = np.full([nTime,nLat,nLon], np.nan)
# BCLP_all    = np.full([nTime,nLat,nLon], np.nan)
# TDEF_all    = np.full([nTime,nLat,nLon], np.nan)

# TRAN_H_all  = np.full([nTime,nLat,nLon], np.nan)
# TRAN_P_all  = np.full([nTime,nLat,nLon], np.nan)
# TRAN_T_all  = np.full([nTime,nLat,nLon], np.nan)
# SHDEF_M_all = np.full([nTime,nLat,nLon], np.nan)
# LHDEF_M_all = np.full([nTime,nLat,nLon], np.nan)
# EADV_M_all  = np.full([nTime,nLat,nLon], np.nan)

# # Time how long this takes... 
# t_start     = time.time()

# for iLat in range(nLat):
#     for iLon in range(nLon):
#         for iT in range(nTime):
            
#             # Pick out specific point and time period 
#             DF = ds_utc12.isel(lat=iLat,lon=iLon,time=iT).to_dataframe()
            
#             # Flip order of levels so that surface comes first (required for function)
#             DF = DF.reindex(index=DF.index[::-1])
            
#             # Compute HCF variables
#             TBM_all[iT,iLat,iLon],BCLH_all[iT,iLat,iLon],BCLP_all[iT,iLat,iLon],TDEF_all[iT,iLat,iLon],TRAN_H_all[iT,iLat,iLon],TRAN_P_all[iT,iLat,iLon],TRAN_T_all[iT,iLat,iLon],SHDEF_M_all[iT,iLat,iLon],LHDEF_M_all[iT,iLat,iLon], EADV_M_all[iT,iLat,iLon] = HCF(DF, 
#                                                                       Tname, 
#                                                                       Qname, 
#                                                                       Zname, 
#                                                                       Pname, 
#                                                                       nLev) 
                                                                      
#     print('Done with lat %i of %i ' % (iLat, nLat))
        

# print('Time elapsed for all points and times: %.3f sec' % (time.time() - t_start))




























Done with lat 0 of 43 
























Done with lat 1 of 43 
























Done with lat 2 of 43 


























Done with lat 3 of 43 






















Done with lat 4 of 43 






















Done with lat 5 of 43 




















Done with lat 6 of 43 






















Done with lat 7 of 43 






















Done with lat 8 of 43 
























Done with lat 9 of 43 
























Done with lat 10 of 43 




























Done with lat 11 of 43 


























Done with lat 12 of 43 


























Done with lat 13 of 43 


























Done with lat 14 of 43 


























Done with lat 15 of 43 


























Done with lat 16 of 43 


























Done with lat 17 of 43 


























Done with lat 18 of 43 
























Done with lat 19 of 43 




























Done with lat 20 of 43 




























Done with lat 21 of 43 
































Done with lat 22 of 43 


































Done with lat 23 of 43 






































Done with lat 24 of 43 






































Done with lat 25 of 43 








































Done with lat 26 of 43 






































Done with lat 27 of 43 






































Done with lat 28 of 43 










































Done with lat 29 of 43 










































Done with lat 30 of 43 














































Done with lat 31 of 43 


















































Done with lat 32 of 43 






















































Done with lat 33 of 43 






























































Done with lat 34 of 43 
































































Done with lat 35 of 43 






































































Done with lat 36 of 43 












































































Done with lat 37 of 43 


















































































Done with lat 38 of 43 


















































































Done with lat 39 of 43 


















































































Done with lat 40 of 43 




















































































Done with lat 41 of 43 




















































































Done with lat 42 of 43 
Time elapsed for all points and times: 2255.592 sec


Since computing HCF at each grid point and day can take so long, it's best to only run the above loops once. From there, you can save the output for each variable into a pickle file (a bit easier to work with in python than a full netCDF file or anything like that. <br> 
Alternatively, in the implementation used here, you can first place all the variables into a single xarray dataset again, and save that to a netCDF file. 

In [20]:
# # ---------------------------------------
# # Create xr dataset from variables above 
# # ---------------------------------------

# # First set missing values to -9999
# missingValue  = -9999

# TBM_write     = np.copy(TBM_all)
# BCLH_write    = np.copy(BCLH_all)
# BCLP_write    = np.copy(BCLP_all)
# TDEF_write    = np.copy(TDEF_all)
# TRAN_H_write  = np.copy(TRAN_H_all)
# TRAN_P_write  = np.copy(TRAN_P_all)
# TRAN_T_write  = np.copy(TRAN_T_all)
# SHDEF_M_write = np.copy(SHDEF_M_all)
# LHDEF_M_write = np.copy(LHDEF_M_all)
# EADV_M_write  = np.copy(EADV_M_all)

# TBM_write    [np.isnan(TBM_all)    ==True] = missingValue
# BCLH_write   [np.isnan(BCLH_all)   ==True] = missingValue
# BCLP_write   [np.isnan(BCLP_all)   ==True] = missingValue
# TDEF_write   [np.isnan(TDEF_all)   ==True] = missingValue
# TRAN_H_write [np.isnan(TRAN_H_all) ==True] = missingValue
# TRAN_P_write [np.isnan(TRAN_P_all) ==True] = missingValue
# TRAN_T_write [np.isnan(TRAN_T_all) ==True] = missingValue
# SHDEF_M_write[np.isnan(SHDEF_M_all)==True] = missingValue
# LHDEF_M_write[np.isnan(LHDEF_M_all)==True] = missingValue
# EADV_M_write [np.isnan(EADV_M_all) ==True] = missingValue
 
# HCF_ds = xr.Dataset({
#     'TBM': xr.DataArray(
#                 data   = TBM_write,   # enter data here
#                 dims   = ['time','lat','lon'],
#                 coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#                 attrs  = {
#                     '_FillValue': missingValue,
#                     'units'     : 'K',
#                     'LongName'  : 'Buoyant mixing potential temperature (convective threshold)'
#                     }
#                 ),
#     'BCLH': xr.DataArray(
#                 data   = BCLH_write,   # enter data here
#                 dims   = ['time','lat','lon'],
#                 coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#                 attrs  = {
#                     '_FillValue': missingValue,
#                     'units'     : 'm',
#                     'LongName'  : 'Height above ground of convective threshold'
#                     }
#                 ),
#     'BCLP': xr.DataArray(
#             data   = BCLP_write,   # enter data here
#             dims   = ['time','lat','lon'],
#             coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#             attrs  = {
#                 '_FillValue': missingValue,
#                 'units'     : 'Pa',
#                 'LongName'  : 'Pressure of convective threshold'
#                 }
#             ),
#     'TDEF': xr.DataArray(
#             data   = TDEF_write,   # enter data here
#             dims   = ['time','lat','lon'],
#             coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#             attrs  = {
#                 '_FillValue': missingValue,
#                 'units'     : 'K',
#                 'LongName'  : 'Potential temperature deficit needed to initiate convection'
#                 }
#             ),
#     'TRAN_H': xr.DataArray(
#             data   = TRAN_H_write,   # enter data here
#             dims   = ['time','lat','lon'],
#             coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#             attrs  = {
#                 '_FillValue': missingValue,
#                 'units'     : 'm',
#                 'LongName'  : 'Energy transition height'
#                 }
#             ),
#     'TRAN_P': xr.DataArray(
#             data   = TRAN_P_write,   # enter data here
#             dims   = ['time','lat','lon'],
#             coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#             attrs  = {
#                 '_FillValue': missingValue,
#                 'units'     : 'Pa',
#                 'LongName'  : 'Energy transition pressure'
#                 }
#             ),
#     'TRAN_T': xr.DataArray(
#             data   = TRAN_T_write,   # enter data here
#             dims   = ['time','lat','lon'],
#             coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#             attrs  = {
#                 '_FillValue': missingValue,
#                 'units'     : 'K',
#                 'LongName'  : 'Energy transition temperature'
#                 }
#             ),
#     'SHDEF_M': xr.DataArray(
#             data   = SHDEF_M_write,   # enter data here
#             dims   = ['time','lat','lon'],
#             coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#             attrs  = {
#                 '_FillValue': missingValue,
#                 'units'     : 'J/m2',
#                 'LongName'  : 'Sensible heat deficit of mixed layer'
#                 }
#             ),
#     'LHDEF_M': xr.DataArray(
#             data   = LHDEF_M_write,   # enter data here
#             dims   = ['time','lat','lon'],
#             coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#             attrs  = {
#                 '_FillValue': missingValue,
#                 'units'     : 'J/m2',
#                 'LongName'  : 'Latent heat deficit of mixed layer'
#                 }
#             ),
#     'EADV_M': xr.DataArray(
#             data   = EADV_M_write,   # enter data here
#             dims   = ['time','lat','lon'],
#             coords = {'time': ds_utc12.time.values, 'lat':lat, 'lon': lon},
#             attrs  = {
#                 '_FillValue': missingValue,
#                 'units'     : '-',
#                 'LongName'  : 'Energy advantage of mixed layer'
#                 }
#             )
#     } )

# # -------------------
# # Save to netCDF 
# # -------------------

# savePath = '/Users/mdfowler/Documents/Analysis/Coupling_initial/Coupling_CAM6CLM5/processed_data/HCFvariables_1980.nc'
# HCF_ds.to_netcdf(savePath, mode='w')


Now we can just load in the netCDF file created above (or more files as well, as necessary):

In [22]:
# Read in netCDF file containing all the HCF variables previously computed... 
filePath = '/Users/mdfowler/Documents/Analysis/Coupling_initial/Coupling_CAM6CLM5/processed_data/HCFvariables_1980.nc'
HCFvars  = xr.open_dataset(filePath, decode_times=True)


### 2.1 Compute HCF for CAM6+CLM4.5 simulation

In [None]:
# ------------------------------------------
# Compute for each day and lat/lon point
#   Takes 35-40 minutes to run for one year
# ------------------------------------------

# Define variable names 
Tname = 'T'
Qname = 'Qpr'
Zname = 'Zpr'
Pname = 'Ppr'

# Number of levels to worry about in actual "sounding"
nLev  = len(ds_utc12.lev)

# Define dimensions 
nLat  = len(ds_utc12.lat)
nLon  = len(ds_utc12.lon)
nTime = len(ds_utc12.time)

# Define empty arrays to save things into 
TBM_all     = np.full([nTime,nLat,nLon], np.nan)
BCLH_all    = np.full([nTime,nLat,nLon], np.nan)
BCLP_all    = np.full([nTime,nLat,nLon], np.nan)
TDEF_all    = np.full([nTime,nLat,nLon], np.nan)

TRAN_H_all  = np.full([nTime,nLat,nLon], np.nan)
TRAN_P_all  = np.full([nTime,nLat,nLon], np.nan)
TRAN_T_all  = np.full([nTime,nLat,nLon], np.nan)
SHDEF_M_all = np.full([nTime,nLat,nLon], np.nan)
LHDEF_M_all = np.full([nTime,nLat,nLon], np.nan)
EADV_M_all  = np.full([nTime,nLat,nLon], np.nan)

# Time how long this takes... 
t_start     = time.time()

for iLat in range(nLat):
    for iLon in range(nLon):
        for iT in range(nTime):

            # Pick out specific point and time period 
            DF = ds_utc12.isel(lat=iLat,lon=iLon,time=iT).to_dataframe()

            # Flip order of levels so that surface comes first (required for function)
            DF = DF.reindex(index=DF.index[::-1])

            # Compute HCF variables
            TBM_all[iT,iLat,iLon],BCLH_all[iT,iLat,iLon],BCLP_all[iT,iLat,iLon],TDEF_all[iT,iLat,iLon],TRAN_H_all[iT,iLat,iLon],TRAN_P_all[iT,iLat,iLon],TRAN_T_all[iT,iLat,iLon],SHDEF_M_all[iT,iLat,iLon],LHDEF_M_all[iT,iLat,iLon], EADV_M_all[iT,iLat,iLon] = HCF(DF, 
                                                                      Tname, 
                                                                      Qname, 
                                                                      Zname, 
                                                                      Pname, 
                                                                      nLev) 

    print('Done with lat %i of %i ' % (iLat, nLat))


print('Time elapsed for all points and times: %.3f sec' % (time.time() - t_start))



## Now plot some of these variables 

In [26]:
# Pull out dimensions
lat  = HCFvars.lat.values
lon  = HCFvars.lon.values
time = HCFvars.time.values
