<a name="top"></a>
<div style="width:1000 px">

<div style="float:right; width:98 px; height:98px;">
<img src="https://cdn.miami.edu/_assets-common/images/system/um-logo-gray-bg.png" alt="Miami Logo" style="height: 98px;">
</div>

<div style="float:right; width:98 px; height:98px;">
<img src="https://media.licdn.com/dms/image/C4E0BAQFlOZSAJABP4w/company-logo_200_200/0/1548285168598?e=2147483647&v=beta&t=g4jl8rEhB7HLJuNZhU6OkJWHW4cul_y9Kj_aoD7p0_Y" alt="STI Logo" style="height: 98px;">
</div>


<h1>Finalize Database Files</h1>
By: Kayla Besong, PhD
    <br>
Last Edited: 05/15/24
<br>
<br>    
<br>
This script takes all the downloaded, cleaned, and regridded renalaysis and forecast data in the database and generates the final form of each file. This includes changing variable names where needed, adding metadata, and unifying missing/nan values.
<br>
<br>
A second section for finalizing harmonic analysis output is provided.  
<br>
<br>
This code is not pretty but it got the job done in the time it needed to get done! Make appropriate edits you need for your version.
<br>
<br>
Note: if you did not regrid the NAM, NARR, HRRR, or CONUS404 this will not work on them to an extent. Some cleaning functions for them can be found in Regrid_Database.ipynb and Regrid_Check_n_Clean.ipynb.
<div style="clear:both"></div>
</div>

<hr style="height:2px;">

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
import os
import glob

In [2]:
%run File_concat_mod_functions.ipynb

In [3]:
model_options = ['CONUS404', 'HRRR', 'NAM', 'NARR', 'ERA5', 'NCEP']

In [4]:
### manually input the attributes you'd like to store as metadata in each netcdf file

var_names_units = {

          'PBL': {'name': 'pbl', 'long': 'Planetary Boundary Layer Height', 'units': 'm'},                   
          'CAPE': {'name': 'cape', 'long': 'Convective Available Potential Enegery', 'units': 'J/kg'},
          'MLCAPE': {'name': 'mlcape',  'long': 'Mixed Layer Convective Available Potential Enegery', 'units': 'J/kg'},
          'SBCAPE': {'name': 'sbcape', 'long': 'Surface Based Convective Available Potential Enegery', 'units': 'J/kg'},
          'SOILM': {'name': 'sm', 'long': 'Soil Moisture', 'units': 'kg/m^3', 'other': 'The first layer below the surface, varies per sFWD to what depth'},
          'VSM': {'name': 'vsm', 'long': 'Volumetric Soil Moisture', 'units': 'm^3/m^3 (ratio)', 'other': 'The first layer below the surface, varies per sFWD to what depth'},
          'U10': {'name': 'u10', 'long': 'u-component of the 10-m wind', 'units': 'm/s'},
          'V10': {'name': 'v10', 'long': 'v-component of the 10-m wind', 'units': 'm/s'},
          'GUST':{'name':  'gust', 'long': 'Instantaneous 10-m Wind Gust', 'units': 'm/s'},
          'PRATE': {'name': 'prate', 'long': 'Precipitation Rate', 'units': 'kg/m^2s'},
          'TP': {'name': 'tp', 'long': 'Total Precipitation', 'units': 'kg/m^2'},
          'TCP': {'name': 'tcp', 'long': 'Total Convective Precipitation', 'units': 'm'},
          'LSPR': {'name': 'lspr', 'long': 'Large Scale Rain Rate', 'units': 'kg/m^2s'},
          'TEMP': {'name': 't2',  'long': '2-m Temperature', 'units': 'K'},
          'RH': {'name': 'rh', 'long': 'Relative Humidity', 'units': '%'},
          'DEWPOINT': {'name': 'd2', 'long': '2-m Dewpoint', 'units': 'K'},
          'WINDSPEED': {'name': 'ws',  'long': 'Wind Speed', 'units': 'm/s'},
          'WINDDIR': {'name': 'wd',  'long': 'Wind Direction', 'units': 'degrees'},
          'HDWI': {'name': 'hdw',  'long': 'Surface-Based Hot Dry Windy', 'units': 'hPa*m/s'},
          'VPD': {'name': 'vpd', 'long': 'Vapor Pressure Deficit', 'units': 'hPa'},
          'FOSBERG': {'name': 'ffwi', 'long': 'Fosberg Fire Weather Index', 'units': 'N/A [0-100 scale]'}}


In [5]:
### A manual list of variables that link via keys-to-keys to the metadata dict. It is seperate from the dictionaries stored in File_concat_mod_functions.ipynb for that reason. 

variables_by_model =  {

    'CONUS404': {'PBL': 'PBLH', 'SBCAPE': 'SBCAPE', 'MLCAPE': 'MLCAPE','VSM':'SMOIS', 'U10': 'U10', 'V10': 'V10','WINDSPEED': 'wspeed', 'WINDDIR': 'wdir', 'HDWI': 'hdwi','VPD': 'vpd', 'TP': 'PREC_ACC_NC', 'TEMP': 'T2', 'RH': 'rh', 'FOSBERG': 'ffwi', 'DEWPOINT': 'TD2'},
    'ERA5': {'PBL': 'blh', 'CAPE': 'cape', 'VSM':'swvl1','U10': 'u10', 'V10': 'v10', 'GUST': 'i10fg','TP': 'tp', 'TCP': 'cp', 'LSPR': 'lsrr','TEMP': 't2m', 'DEWPOINT': 'd2m','RH': 'rh', 'WINDSPEED': 'wspeed', 'WINDDIR': 'wdir', 'HDWI': 'hdwi', 'VPD': 'vpd', 'FOSBERG': 'ffwi'},
    'HRRR': {'PBL': 'blh', 'CAPE': 'cape', 'SOILM':'mstav','U10': 'u10', 'V10': 'v10', 'GUST': 'gust','PRATE': 'prate', 'TP': 'tp', 'TEMP': 't2m', 'DEWPOINT': 'd2m', 'WINDSPEED': 'wspeed', 'WINDDIR': 'wdir', 'HDWI': 'hdwi', 'VPD': 'vpd', 'FOSBERG': 'ffwi', 'RH': 'rh'},
    'NAM': {'PBL': 'hpbl', 'CAPE': 'cape', 'SOILM': 'sm', 'U10': 'u10', 'V10': 'v10', 'GUST': 'gust', 'TP': 'tp', 'TEMP': 't2m', 'RH': 'r', 'WINDSPEED': 'wspeed', 'WINDDIR': 'wdir', 'HDWI': 'hdwi', 'VPD': 'vpd', 'FOSBERG': 'ffwi'},
    'NARR': {'PBL': 'Planetary_boundary_layer_height_surface', 'SOILM':'Soil_moisture_content_layer_between_two_depths_below_surface_layer', 'U10': 'u-component_of_wind_height_above_ground', 'V10': 'v-component_of_wind_height_above_ground', 'PRATE': 'Precipitation_rate_surface', 'TP': 'Total_precipitation_surface_3_Hour_Accumulation','TEMP': 'Temperature_height_above_ground', 'RH': 'Relative_humidity_height_above_ground', 'WINDSPEED': 'wspeed', 'WINDDIR': 'wdir', 'HDWI': 'hdwi', 'VPD': 'vpd', 'FOSBERG': 'ffwi'},
    'NCEP': {'SOILM':'soilw', 'U10': 'uwnd', 'V10': 'vwnd', 'PRATE': 'prate', 'TEMP': 'air', 'RH': 'rhum', 'WINDSPEED': 'wspeed', 'WINDDIR': 'wdir', 'HDWI': 'hdwi', 'VPD': 'vpd', 'FOSBERG': 'ffwi'},
    'UFS_S2S': {'PBL': 'hpbl', 'CAPE': 'cape', 'VSM':'soilw', 'U10': 'u10', 'V10': 'v10', 'GUST': 'gust', 'PRATE': 'prate', 'TEMP': 't2m', 'RH': 'r2', 'WINDSPEED': 'wspeed', 'WINDDIR': 'wdir', 'HDWI': 'hdwi', 'VPD': 'vpd', 'FOSBERG': 'ffwi'}}


# For the Database

### make final directory for files

In [6]:
for m in model_options:
    dir_maker(f'database_files_final/{m}')

## Final Clean Function

In [7]:
def special_NCEP_cleaner(db_file, v, f):

    ''' This function takes problematic NCEP Renalysis II files from the database 
        and adds a missing time dimension that got lost or selects one level, converts 
        longitudes to -180 to 0, and changes -inf values to np.nan for consistency. 
    
    
    Inputs:
    
    db_file: (xarray dataset) a NCEP Renalysis file opened with xarray
    v: (str) variable name
    f: (str) file name -- not path, just filename from final_clean_db function
    
    Outputs:
    
    db_file: (xarray dataset) the new, cleaned version of the input 
    
    '''
    

    if 'level' in list(db_file.dims):

        db_file = db_file.isel(level = 0)
        db_file = db_file.drop('level')

    if 'level' in list(db_file.coords):
        
        db_file = db_file.drop('level')        

    if v == 'vpd' and f.split('_')[0][-3:] == 'AVG':

        db_file['time'] = db_file[v].time.values

    db_file[v] = xr.where(db_file[v] == -np.Inf, np.nan, db_file[v])
    db_file = db_file.rename({'lat': 'latitude', 'lon': 'longitude'})
    db_file['longitude'] = ((db_file['longitude'] + 180) % 360) - 180

    return db_file

    

In [8]:
def final_clean_db(m, vars):

    ''' This function takes database files and unifies all variable and coordinate names, 
    alters the file out names, performs any cleaning still necessary, assigns attributes
    and information such as units, exporting to the database_files_final directory. 

    This function assumes that CONUS404, HRRR, NAM, and NARR have already been regridded. 
    These 4 models were already 'cleaned' through the regridding process. 
    
    Inputs:
    
    m: (s) model name
    vars: (list of strings) list of variable names for the model 
    
    Outputs:
    
    Nothing, files are just saved to specified output directory. 
    
    '''

    
    vars_dict = {}                                                                                                                           # Initialize a dictionary to store variable mappings
                                                              
    for v in vars:                                                                                                                           # Iterate over variables
        vars_dict[v.split('_')[0]] = v                                                                                                       # Map the first part of the variable name to the full variable name
                                                              
    if m == 'ERA5' or m == 'NCEP':                                                                                                           # Check if the model is ERA5 or NCEP
        input_dir = f'database_files/{m}'                                                                                                    # Set the input directory for standard models
    
    else:                                                          
        input_dir = f'database_files_regridded/{m}'                                                                                          # Set the input directory for regridded models

    bad = []
    
    for f in sorted(os.listdir(input_dir)):                                                                                                  # Iterate over sorted files in the input directory
        
        try:
        
            if f[-2:] == 'nc':                                                                                                                   # Check if the file is a NetCDF file
                file = f'{input_dir}/{f}'                                                                                                        # Construct the full file path
                db_file = xr.open_dataset(file)                                                                                                  
        
                try:
                    v = vars_dict[file.split('/')[-1].split('_')[0]]                                                                             # Extract the variable name from the file name
                    outfile_pre = f'{get_filename(m)}_Abs_{file.split('/')[-1].split('_')[-1]}'                                                  # Prepare the output file prefix
        
                    if m == 'NARR':                                                                                                              # Special handling for NARR model
                        if 'Daily' in file.split('/')[-1].split('_'):                                                                            # Check if the file is daily
                            outfile_pre = f'{get_filename(m)}_{file.split('/')[-1].split('_')[-5][-3:]}_Daily_{file.split('/')[-1].split('_')[-1]}'          
                
                except KeyError:                                                                                                                 # Handle cases where the variable is not found
                    v = vars_dict[file.split('/')[-1].split('_')[0][0:-3]]                                                                       # Adjust the variable extraction for special cases
                    outfile_pre = f'{get_filename(m)}_{file.split('/')[-1].split('_')[0][-3:]}_Daily_{file.split('/')[-1].split('_')[-1]}'          
        
                searchkey = list(variables_by_model[m].keys())[list(variables_by_model[m].values()).index(v)]                                    # Find the search key for the variable
                newkey = var_names_units[searchkey]                                                                                              # Get the new variable name and units
                                                                  
                if newkey['name'] != v:                                                                                                          # Check if the variable name needs to be updated
                    db_file[newkey['name']] = db_file[v]                                                                                         # Update the variable name
                    db_file = db_file.drop(v)                                                                                                    # Drop the old variable name
                                                                  
                attrs_to_del = []                                                                                                                # Clear old attributes 
    
                for key, item in db_file.attrs.items():
                    attrs_to_del.append(key)
                
                for i in attrs_to_del:
                    del db_file.attrs[i]
    
                attrs_to_del_var = []                                                                                                            # Clear old attributes             
                
                for key, item in db_file[newkey['name']].attrs.items():
                    attrs_to_del_var.append(key)
                
                for j in attrs_to_del_var:
                    del db_file[newkey['name']].attrs[j]  
                            
                db_file[newkey['name']] = db_file[newkey['name']].assign_attrs({'Variable Name': newkey['long'], 'Units': newkey['units']})    # Assign new attributes
        
                if m == 'ERA5' or m == 'NCEP':                                                                                                   # Handle attributes for ERA5 and NCEP
                    db_file = db_file.assign_attrs({'Original Grid': 'latlon', 'New Regrid': 'no regridding, latlon'})                      
                else:                      
                    db_file = db_file.assign_attrs({'Original Grid': 'Lambert Conformal', 'New Regrid': 'latlon'})                               # Handle attributes for other models
                              
                if m == 'CONUS404':                                                                                                              # Special handling for CONUS404 model
                    db_file = db_file.rename({'Time': 'time'})                                                            
                elif m == 'NCEP':                                                                                                                # Special cleaning for NCEP model
                    db_file = special_NCEP_cleaner(db_file, v, f)
    
                elif m == 'NAM':
                    db_file = db_file.groupby('time').first()
                                                                    
                db_file = db_file.sortby('time')                                                                                                 # Sort the dataset by time
                                                                    
                outfile = f'{newkey['name']}_{outfile_pre}'                                                                                      # Construct the final output file name
                db_file.to_netcdf(f'database_files_final/{m}/{outfile}')                                                                         # Save the dataset to a NetCDF file
                
                print(outfile)                                                                                                                   
                del outfile                                                                                                                      
        
        except:
    
            bad.append(f)
    
    return bad


### ERA5

In [13]:
m = 'ERA5'

In [14]:
era_vars = [value for key,value in variables_by_model[m].items()]

In [15]:
era_vars

['blh',
 'cape',
 'swvl1',
 'u10',
 'v10',
 'i10fg',
 'tp',
 'cp',
 'lsrr',
 't2m',
 'd2m',
 'rh',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [20]:
%%time

final_clean_db(m, era_vars)

pbl_ERA5_REANALYSIS_AVG_Daily_2011.nc
pbl_ERA5_REANALYSIS_AVG_Daily_2012.nc
pbl_ERA5_REANALYSIS_AVG_Daily_2013.nc
pbl_ERA5_REANALYSIS_AVG_Daily_2014.nc
pbl_ERA5_REANALYSIS_AVG_Daily_2015.nc
pbl_ERA5_REANALYSIS_AVG_Daily_2016.nc
pbl_ERA5_REANALYSIS_AVG_Daily_2017.nc
pbl_ERA5_REANALYSIS_AVG_Daily_2018.nc
pbl_ERA5_REANALYSIS_MAX_Daily_2011.nc
pbl_ERA5_REANALYSIS_MAX_Daily_2012.nc
pbl_ERA5_REANALYSIS_MAX_Daily_2013.nc
pbl_ERA5_REANALYSIS_MAX_Daily_2014.nc
pbl_ERA5_REANALYSIS_MAX_Daily_2015.nc
pbl_ERA5_REANALYSIS_MAX_Daily_2016.nc
pbl_ERA5_REANALYSIS_MAX_Daily_2017.nc
pbl_ERA5_REANALYSIS_MAX_Daily_2018.nc
pbl_ERA5_REANALYSIS_MIN_Daily_2011.nc
pbl_ERA5_REANALYSIS_MIN_Daily_2012.nc
pbl_ERA5_REANALYSIS_MIN_Daily_2013.nc
pbl_ERA5_REANALYSIS_MIN_Daily_2014.nc
pbl_ERA5_REANALYSIS_MIN_Daily_2015.nc
pbl_ERA5_REANALYSIS_MIN_Daily_2016.nc
pbl_ERA5_REANALYSIS_MIN_Daily_2017.nc
pbl_ERA5_REANALYSIS_MIN_Daily_2018.nc
pbl_ERA5_REANALYSIS_Abs_2011.nc
pbl_ERA5_REANALYSIS_Abs_2012.nc
pbl_ERA5_REANALYSIS_Abs_

### NCEP

In [21]:
m = 'NCEP'

In [22]:
ncep_vars = [value for key,value in variables_by_model[m].items()]

In [23]:
ncep_vars

['soilw',
 'uwnd',
 'vwnd',
 'prate',
 'air',
 'rhum',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [24]:
%%time

final_clean_db(m, ncep_vars)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/s2sfire/miniconda3/envs/noaa_s2s/lib/python3.12/site-packages/xarray/core/dataset.py", line 1446, in _construct_dataarray
    variable = self._variables[name]
               ~~~~~~~~~~~~~~~^^^^^^
KeyError: 'air'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/s2sfire/miniconda3/envs/noaa_s2s/lib/python3.12/site-packages/IPython/core/magics/execution.py", line 1332, in time
    out = eval(code, glob, local_ns)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<timed eval>", line 1, in <module>
  File "/tmp/ipykernel_27033/3566287398.py", line 83, in final_clean_db
    db_file = special_NCEP_cleaner(db_file, v, f)
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_27033/171401216.py", line 30, in special_NCEP_cleaner
    db_file[v] = xr.where(db_file[v] == -np.Inf, np.nan, db_file[v])
                          ~~~~~~~^^^
  File "/home/s2sfire/miniconda3/envs/noaa

### NARR

In [25]:
m = 'NARR'

In [26]:
narr_vars = [value for key,value in variables_by_model[m].items()]

In [27]:
narr_vars

['Planetary_boundary_layer_height_surface',
 'Soil_moisture_content_layer_between_two_depths_below_surface_layer',
 'u-component_of_wind_height_above_ground',
 'v-component_of_wind_height_above_ground',
 'Precipitation_rate_surface',
 'Total_precipitation_surface_3_Hour_Accumulation',
 'Temperature_height_above_ground',
 'Relative_humidity_height_above_ground',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [28]:
%%time

final_clean_db(m, narr_vars)

pbl_NARR_REANALYSIS_AVG_Daily_2011.nc
pbl_NARR_REANALYSIS_AVG_Daily_2012.nc
pbl_NARR_REANALYSIS_AVG_Daily_2013.nc
pbl_NARR_REANALYSIS_AVG_Daily_2014.nc
pbl_NARR_REANALYSIS_MAX_Daily_2011.nc
pbl_NARR_REANALYSIS_MAX_Daily_2012.nc
pbl_NARR_REANALYSIS_MAX_Daily_2013.nc
pbl_NARR_REANALYSIS_MAX_Daily_2014.nc
pbl_NARR_REANALYSIS_MIN_Daily_2011.nc
pbl_NARR_REANALYSIS_MIN_Daily_2012.nc
pbl_NARR_REANALYSIS_MIN_Daily_2013.nc
pbl_NARR_REANALYSIS_MIN_Daily_2014.nc
pbl_NARR_REANALYSIS_Abs_2011.nc
pbl_NARR_REANALYSIS_Abs_2012.nc
pbl_NARR_REANALYSIS_Abs_2013.nc
pbl_NARR_REANALYSIS_Abs_2014.nc
prate_NARR_REANALYSIS_AVG_Daily_2011.nc
prate_NARR_REANALYSIS_AVG_Daily_2012.nc
prate_NARR_REANALYSIS_AVG_Daily_2013.nc
prate_NARR_REANALYSIS_AVG_Daily_2014.nc
prate_NARR_REANALYSIS_MAX_Daily_2011.nc
prate_NARR_REANALYSIS_MAX_Daily_2012.nc
prate_NARR_REANALYSIS_MAX_Daily_2013.nc
prate_NARR_REANALYSIS_MAX_Daily_2014.nc
prate_NARR_REANALYSIS_MIN_Daily_2011.nc
prate_NARR_REANALYSIS_MIN_Daily_2012.nc
prate_NARR_REANA

### NAM

In [17]:
m = 'NAM'

In [18]:
nam_vars = [value for key,value in variables_by_model[m].items()]

In [19]:
nam_vars

['hpbl',
 'cape',
 'sm',
 'u10',
 'v10',
 'gust',
 'tp',
 't2m',
 'r',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [24]:
%%time

final_clean_db(m, nam_vars)

cape_NAM_HISTORICAL_AVG_Daily_2017.nc
cape_NAM_HISTORICAL_AVG_Daily_2018.nc
cape_NAM_HISTORICAL_MAX_Daily_2017.nc
cape_NAM_HISTORICAL_MAX_Daily_2018.nc
cape_NAM_HISTORICAL_MIN_Daily_2017.nc
cape_NAM_HISTORICAL_MIN_Daily_2018.nc
cape_NAM_HISTORICAL_Abs_2017.nc
cape_NAM_HISTORICAL_Abs_2018.nc
ffwi_NAM_HISTORICAL_AVG_Daily_2011.nc
ffwi_NAM_HISTORICAL_AVG_Daily_2013.nc
ffwi_NAM_HISTORICAL_AVG_Daily_2014.nc
ffwi_NAM_HISTORICAL_AVG_Daily_2015.nc
ffwi_NAM_HISTORICAL_AVG_Daily_2016.nc
ffwi_NAM_HISTORICAL_AVG_Daily_2017.nc
ffwi_NAM_HISTORICAL_AVG_Daily_2018.nc
ffwi_NAM_HISTORICAL_MAX_Daily_2011.nc
ffwi_NAM_HISTORICAL_MAX_Daily_2013.nc
ffwi_NAM_HISTORICAL_MAX_Daily_2014.nc
ffwi_NAM_HISTORICAL_MAX_Daily_2015.nc
ffwi_NAM_HISTORICAL_MAX_Daily_2016.nc
ffwi_NAM_HISTORICAL_MAX_Daily_2017.nc
ffwi_NAM_HISTORICAL_MAX_Daily_2018.nc
ffwi_NAM_HISTORICAL_MIN_Daily_2011.nc
ffwi_NAM_HISTORICAL_MIN_Daily_2013.nc
ffwi_NAM_HISTORICAL_MIN_Daily_2014.nc
ffwi_NAM_HISTORICAL_MIN_Daily_2015.nc
ffwi_NAM_HISTORICAL_MIN_

### HRRR

In [38]:
m = 'HRRR'

In [39]:
hrrr_vars = [value for key,value in variables_by_model[m].items()]

In [40]:
hrrr_vars

['blh',
 'cape',
 'mstav',
 'u10',
 'v10',
 'gust',
 'prate',
 'tp',
 't2m',
 'd2m',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi',
 'rh']

In [45]:
%%time

final_clean_db(m, hrrr_vars)

pbl_HRRR_HISTORICAL_AVG_Daily_2014.nc
pbl_HRRR_HISTORICAL_AVG_Daily_2015.nc
pbl_HRRR_HISTORICAL_AVG_Daily_2016.nc
pbl_HRRR_HISTORICAL_AVG_Daily_2017.nc
pbl_HRRR_HISTORICAL_AVG_Daily_2018.nc
pbl_HRRR_HISTORICAL_MAX_Daily_2014.nc
pbl_HRRR_HISTORICAL_MAX_Daily_2015.nc
pbl_HRRR_HISTORICAL_MAX_Daily_2016.nc
pbl_HRRR_HISTORICAL_MAX_Daily_2017.nc
pbl_HRRR_HISTORICAL_MAX_Daily_2018.nc
pbl_HRRR_HISTORICAL_MIN_Daily_2014.nc
pbl_HRRR_HISTORICAL_MIN_Daily_2015.nc
pbl_HRRR_HISTORICAL_MIN_Daily_2016.nc
pbl_HRRR_HISTORICAL_MIN_Daily_2017.nc
pbl_HRRR_HISTORICAL_MIN_Daily_2018.nc
pbl_HRRR_HISTORICAL_Abs_2014.nc
pbl_HRRR_HISTORICAL_Abs_2015.nc
pbl_HRRR_HISTORICAL_Abs_2016.nc
pbl_HRRR_HISTORICAL_Abs_2017.nc
pbl_HRRR_HISTORICAL_Abs_2018.nc
cape_HRRR_HISTORICAL_AVG_Daily_2014.nc
cape_HRRR_HISTORICAL_AVG_Daily_2015.nc
cape_HRRR_HISTORICAL_AVG_Daily_2016.nc
cape_HRRR_HISTORICAL_AVG_Daily_2017.nc
cape_HRRR_HISTORICAL_AVG_Daily_2018.nc
cape_HRRR_HISTORICAL_MAX_Daily_2014.nc
cape_HRRR_HISTORICAL_MAX_Daily_2015.

['wspeed_HRRR_HISTORICAL_Abs_2018.nc']

### CONUS404 

In [26]:
m = 'CONUS404'

In [35]:
c44_vars = [value for key,value in variables_by_model[m].items()]

In [36]:
c44_vars

['PBLH',
 'SBCAPE',
 'MLCAPE',
 'SMOIS',
 'U10',
 'V10',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'PREC_ACC_NC',
 'T2',
 'rh',
 'ffwi',
 'TD2']

In [None]:
%%time

final_clean_db(m, c44_vars)

mlcape_CONUS404_ANALYSIS_AVG_Daily_2011.nc
mlcape_CONUS404_ANALYSIS_AVG_Daily_2012.nc
mlcape_CONUS404_ANALYSIS_AVG_Daily_2013.nc
mlcape_CONUS404_ANALYSIS_AVG_Daily_2014.nc
mlcape_CONUS404_ANALYSIS_AVG_Daily_2015.nc
mlcape_CONUS404_ANALYSIS_AVG_Daily_2016.nc
mlcape_CONUS404_ANALYSIS_AVG_Daily_2017.nc
mlcape_CONUS404_ANALYSIS_AVG_Daily_2018.nc
mlcape_CONUS404_ANALYSIS_AVG_Daily_2017.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2011.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2012.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2013.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2014.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2015.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2016.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2017.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2018.nc
mlcape_CONUS404_ANALYSIS_MAX_Daily_2017.nc
mlcape_CONUS404_ANALYSIS_MIN_Daily_2011.nc
mlcape_CONUS404_ANALYSIS_MIN_Daily_2012.nc
mlcape_CONUS404_ANALYSIS_MIN_Daily_2013.nc
mlcape_CONUS404_ANALYSIS_MIN_Daily_2014.nc
mlcape_CONUS404_ANALYSIS_MIN_Daily_2015.nc
mlcape_CONU

# For Harmonic Normals and Anomalies 

In [9]:
harms = ['anoms', 'climos']

for m in model_options:
    for h in harms:
        dir_maker(f'database_files_final/{m}/{h}')

In [6]:
def get_harmonic(m, v, ac):


    '' This function is designed to generate specific attribute information for harmonics. 
    
    Inputs:
    
    m: (s) model name
    v: (s) variable name
    ac: (s) anomaly or normal harmonic 
    
    Outputs:
    
    harm_dict: (s) attribute information for harmonic
    
    '''


    k_vals = {'CONUS404': '5',
              'ERA5':  '5',
              'HRRR':  '5',
              'NAM':  '5',
              'NARR': '3 (only 2011-2014 available)',
              'NCEP':  '5',
              'UFS_S2S':  '5'}
  
    k_out = k_vals[m]

    if m == 'NAM':
        if v == 'cape' or v == 'gust':

            k_out = '1 (only 2017-2018 available)'

    if ac == 'anoms':

        harm_dict = f'Anomaly generated by taking the difference of the timeseries with the period normal which was computed as the sum of the first ({kout}) harmonics'

    else:

        harm_dict = f'Period normal computed as the sum of the first ({k_out}) harmonics'
            
    
    return harm_dict


In [7]:
def special_NCEP_cleaner_ac(db_file, v, f):

    ''' This function takes problematic NCEP Renalysis II files from the database 
        and adds a missing time dimension that got lost or selects one level, and 
        changes -inf values to np.nan for consistency. 
    
    
    Inputs:
    
    db_file: (xarray dataset) a NCEP Renalysis file opened with xarray
    v: (str) variable name
    f: (str) file name -- not path, just filename from final_clean_db function
    
    Outputs:
    
    db_file: (xarray dataset) the new, cleaned version of the input 
    
    '''
    

    if 'level' in list(db_file.dims):

        db_file = db_file.isel(level = 0)
        db_file = db_file.drop('level')

    if 'level' in list(db_file.coords):
        
        db_file = db_file.drop('level')        

    db_file[v] = xr.where(db_file[v] == -np.Inf, np.nan, db_file[v])

    try:
    
        db_file = db_file.rename({'lat': 'latitude', 'lon': 'longitude'})

    except:
        pass

    return db_file

    

In [59]:
def final_clean_db_ac(m, vars, ac):

    ''' This function takes database files and unifies all variable and coordinate names, 
    alters the file out names, performs any cleaning still necessary, assigns attributes
    and information such as units, exporting to the database_files_final directory. 

    This function assumes that CONUS404, HRRR, NAM, and NARR have already been regridded. 
    These 4 models were already 'cleaned' through the regridding process. 
    
    Inputs:
    
    m: (s) model name
    vars: (list of strings) list of variable names for the model 
    
    Outputs:
    
    Nothing, files are just saved to specified output directory. 
    
    '''

    
    vars_dict = {}                                                                                                                                # Initialize a dictionary to store variable mappings
                                                                   
    for v in vars:                                                                                                                                # Iterate over variables
        vars_dict[v.split('_')[0]] = v                                                                                                            # Map the first part of the variable name to the full variable name
                                                                   
    if m == 'ERA5' or m == 'NCEP':                                                                                                                # Check if the model is ERA5 or NCEP
        input_dir = f'database_files/{m}/{ac}'                                                                                                    # Set the input directory for standard models
    
    else:                                                          
        input_dir = f'database_files_regridded/{ac}/{m}'                                                                                          # Set the input directory for regridded models

    bad = []
    
    for f in sorted(os.listdir(input_dir)):                                                                                                       # Iterate over sorted files in the input directory
        
        try:
        
            if f[-2:] == 'nc':                                                                                                                    # Check if the file is a NetCDF file
                file = f'{input_dir}/{f}'                                                                                                         # Construct the full file path
                db_file = xr.open_dataset(file)                                                                                                  

                file_split = file.split('/')[-1].split('_')
                v = vars_dict[file_split[0]]                                                                                                      # Extract the variable name from the file name
        
                if ac == 'anoms':
        
                    ac_file = f'ANOM'
        
                else:
        
                    ac_file = f'NORM'
                  
                if 'Daily' in file.split('/')[-1].split('_'):
        
                    if m == 'NARR':
                        possible = ['MIN', 'MAX', 'AVG']
                        outfile_pre = f'{list(set(possible) & set(file_split))[0]}_{ac_file}_{get_filename(m)}_Daily_{file.split('/')[-1].split('_')[-1]}'           # Prepare the output file prefix
        
                    else:
                        
                        outfile_pre = f'{file_split[1]}_{ac_file}_{get_filename(m)}_Daily_{file.split('/')[-1].split('_')[-1]}'                                      # Prepare the output file prefix
        
                else:
                
                    outfile_pre = f'{ac_file}_{get_filename(m)}_Abs_{file.split('/')[-1].split('_')[-1]}'                                                            # Prepare the output file prefix
        
                searchkey = list(variables_by_model[m].keys())[list(variables_by_model[m].values()).index(v)]                                                        # Find the search key for the variable
                newkey = var_names_units[searchkey]                                                                                                                  # Get the new variable name and units

                try:

                    if newkey['name'] != v:
                        db_file[newkey['name']] = db_file[v]                                                                                                         # Update the variable name
                        db_file = db_file.drop(v)                                                                                                                    # Drop the old variable name

                    elif newkey['name'] == v:

                        try:
                            
                            db_file[newkey['name']] = db_file[v]                                                                                                     # Update the variable name

                        except KeyError:
                            
                            if ac == 'anoms':
                                v = f'{v}_anoms'
                
                            else:
                                v = f'{v}_climo'                        
                            
                            db_file[newkey['name']] = db_file[v]                                                                                                     # Update the variable name
                            db_file = db_file.drop(v)                                                                                                                # Drop the old variable name
                    
                
                except KeyError:
    
                    try:
                    
                        if ac == 'anoms':
                            v = f'{v}_anoms'
            
                        else:
                            v = f'{v}_climo'
                        
                        if newkey['name'] != v:
                            db_file[newkey['name']] = db_file[v]                                                                                                    # Update the variable name
                            db_file = db_file.drop(v)                                                                                                               # Drop the old variable name

                    except:
                        
                        raise                                                                  
                
                attrs_to_del = []                                                                                                                                   # Clear old attributes 
    
                for key, item in db_file.attrs.items():
                    attrs_to_del.append(key)
                
                for i in attrs_to_del:
                    del db_file.attrs[i]
    
                attrs_to_del_var = []                                                                                                                               # Clear old attributes             
                
                for key, item in db_file[newkey['name']].attrs.items():
                    attrs_to_del_var.append(key)
                
                for j in attrs_to_del_var:
                    del db_file[newkey['name']].attrs[j]  
                            
                db_file[newkey['name']] = db_file[newkey['name']].assign_attrs({'Variable Name': newkey['long'], 'Units': newkey['units'], 'Harmonic': get_harmonic(m, newkey['name'], ac)})    # Assign new attributes
        
                if m == 'ERA5' or m == 'NCEP':                                                                                                                               # Handle attributes for ERA5 and NCEP
                    db_file = db_file.assign_attrs({'Original Grid': 'latlon', 'New Regrid': 'no regridding, latlon'})                                                  
                else:                                                  
                    db_file = db_file.assign_attrs({'Original Grid': 'Lambert Conformal', 'New Regrid': 'latlon'})                                                           # Handle attributes for other models
                                                          
                if m == 'CONUS404':                                                                                                                                          # Special handling for CONUS404 model
                    db_file = db_file.rename({'Time': 'time'})                                                                                        
                elif m == 'NCEP':                                                                                                                                            # Special cleaning for NCEP model
                    db_file = special_NCEP_cleaner_ac(db_file, newkey['name'], f)                            
                                
                elif m == 'NAM':                            
                    try:                            
                        db_file = db_file.groupby('time').first()                            
                    except:                            
                        pass                            
                            
                vars_to_keep = ['latitude', 'longitude', 'time', newkey['name']]                            
                for vout in list(db_file.variables):                            
                    if vout not in vars_to_keep:                            
                        db_file = db_file.drop(vout)                            
                                            
                db_file = db_file.sortby('time')                                                                                                                             # Sort the dataset by time
                            
                outfile = f'{newkey['name']}_{outfile_pre}'                                                                                                                  # Construct the final output file name
                print(outfile)                                                                                                                                                                                                                   
                db_file.to_netcdf(f'database_files_final/{m}/{ac}/{outfile}')                                                                                                # Save the dataset to a NetCDF file
                                            
                                                                                                                                   
                del outfile                                                                                                                      
        
        except:
    
            bad.append(f)
    
    return bad


### ERA5

In [19]:
m = 'ERA5'

In [20]:
era_vars = [value for key,value in variables_by_model[m].items()]

In [21]:
era_vars

['blh',
 'cape',
 'swvl1',
 'u10',
 'v10',
 'i10fg',
 'tp',
 'cp',
 'lsrr',
 't2m',
 'd2m',
 'rh',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [27]:
%%time

bad_eras = []

for ac in harms:
    print(f'{ac} for {m}')
    bad_eras.append(final_clean_db_ac(m, era_vars, ac))

anoms for ERA5
pbl_AVG_ANOM_ERA5_REANALYSIS_Daily_period.nc
pbl_ANOM_ERA5_REANALYSIS_Abs_period.nc
pbl_MAX_ANOM_ERA5_REANALYSIS_Daily_period.nc
pbl_MIN_ANOM_ERA5_REANALYSIS_Daily_period.nc
cape_AVG_ANOM_ERA5_REANALYSIS_Daily_period.nc
cape_ANOM_ERA5_REANALYSIS_Abs_period.nc
cape_MAX_ANOM_ERA5_REANALYSIS_Daily_period.nc
cape_MIN_ANOM_ERA5_REANALYSIS_Daily_period.nc
tcp_AVG_ANOM_ERA5_REANALYSIS_Daily_period.nc
tcp_ANOM_ERA5_REANALYSIS_Abs_period.nc
tcp_MAX_ANOM_ERA5_REANALYSIS_Daily_period.nc
tcp_MIN_ANOM_ERA5_REANALYSIS_Daily_period.nc
d2_AVG_ANOM_ERA5_REANALYSIS_Daily_period.nc
d2_ANOM_ERA5_REANALYSIS_Abs_period.nc
d2_MAX_ANOM_ERA5_REANALYSIS_Daily_period.nc
d2_MIN_ANOM_ERA5_REANALYSIS_Daily_period.nc
ffwi_AVG_ANOM_ERA5_REANALYSIS_Daily_period.nc
ffwi_ANOM_ERA5_REANALYSIS_Abs_period.nc
ffwi_MAX_ANOM_ERA5_REANALYSIS_Daily_period.nc
ffwi_MIN_ANOM_ERA5_REANALYSIS_Daily_period.nc
hdw_AVG_ANOM_ERA5_REANALYSIS_Daily_period.nc
hdw_ANOM_ERA5_REANALYSIS_Abs_period.nc
hdw_MAX_ANOM_ERA5_REANALYSI

In [28]:
bad_eras

[[], []]

### NCEP

In [60]:
m = 'NCEP'

In [61]:
ncep_vars = [value for key,value in variables_by_model[m].items()]

In [62]:
ncep_vars

['soilw',
 'uwnd',
 'vwnd',
 'prate',
 'air',
 'rhum',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [63]:
%%time

bad_nceps = []

for ac in harms[1:]:
    print(f'{ac} for {m}')
    bad_nceps.append(final_clean_db_ac(m, ncep_vars, ac))

climos for NCEP
t2_AVG_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
t2_MAX_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
t2_MIN_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
t2_NORM_NCEP_REANALYSIS_V2_Abs_period.nc
ffwi_AVG_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
ffwi_MAX_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
ffwi_MIN_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
ffwi_NORM_NCEP_REANALYSIS_V2_Abs_period.nc
hdw_AVG_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
hdw_MAX_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
hdw_MIN_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
hdw_NORM_NCEP_REANALYSIS_V2_Abs_period.nc
prate_AVG_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
prate_MAX_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
prate_MIN_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
prate_NORM_NCEP_REANALYSIS_V2_Abs_period.nc
rh_AVG_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
rh_MAX_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
rh_MIN_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
rh_NORM_NCEP_REANALYSIS_V2_Abs_period.nc
sm_AVG_NORM_NCEP_REANALYSIS_V2_Daily_period.nc
sm_

In [58]:
bad_nceps

[[]]

### NARR

In [29]:
m = 'NARR'

In [30]:
narr_vars = [value for key,value in variables_by_model[m].items()]

In [31]:
narr_vars

['Planetary_boundary_layer_height_surface',
 'Soil_moisture_content_layer_between_two_depths_below_surface_layer',
 'u-component_of_wind_height_above_ground',
 'v-component_of_wind_height_above_ground',
 'Precipitation_rate_surface',
 'Total_precipitation_surface_3_Hour_Accumulation',
 'Temperature_height_above_ground',
 'Relative_humidity_height_above_ground',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [32]:
%%time

bad_narr = []

for ac in harms:
    print(f'{ac} for {m}')
    bad_narr.append(final_clean_db_ac(m, narr_vars, ac))

anoms for NARR
pbl_AVG_ANOM_NARR_REANALYSIS_Daily_period.nc
pbl_MAX_ANOM_NARR_REANALYSIS_Daily_period.nc
pbl_MIN_ANOM_NARR_REANALYSIS_Daily_period.nc
pbl_ANOM_NARR_REANALYSIS_Abs_period.nc
prate_AVG_ANOM_NARR_REANALYSIS_Daily_period.nc
prate_MAX_ANOM_NARR_REANALYSIS_Daily_period.nc
prate_MIN_ANOM_NARR_REANALYSIS_Daily_period.nc
prate_ANOM_NARR_REANALYSIS_Abs_period.nc
rh_AVG_ANOM_NARR_REANALYSIS_Daily_period.nc
rh_MAX_ANOM_NARR_REANALYSIS_Daily_period.nc
rh_MIN_ANOM_NARR_REANALYSIS_Daily_period.nc
rh_ANOM_NARR_REANALYSIS_Abs_period.nc
sm_AVG_ANOM_NARR_REANALYSIS_Daily_period.nc
sm_MAX_ANOM_NARR_REANALYSIS_Daily_period.nc
sm_MIN_ANOM_NARR_REANALYSIS_Daily_period.nc
sm_ANOM_NARR_REANALYSIS_Abs_period.nc
t2_AVG_ANOM_NARR_REANALYSIS_Daily_period.nc
t2_MAX_ANOM_NARR_REANALYSIS_Daily_period.nc
t2_MIN_ANOM_NARR_REANALYSIS_Daily_period.nc
t2_ANOM_NARR_REANALYSIS_Abs_period.nc
tp_AVG_ANOM_NARR_REANALYSIS_Daily_period.nc
tp_MAX_ANOM_NARR_REANALYSIS_Daily_period.nc
tp_MIN_ANOM_NARR_REANALYSIS_Dai

In [None]:
bad_narr

### NAM

In [33]:
m = 'NAM'

In [34]:
nam_vars = [value for key,value in variables_by_model[m].items()]

In [35]:
nam_vars

['hpbl',
 'cape',
 'sm',
 'u10',
 'v10',
 'gust',
 'tp',
 't2m',
 'r',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [36]:
%%time

bad_nam = []

for ac in harms:
    print(f'{ac} for {m}')
    bad_nam.append(final_clean_db_ac(m, nam_vars, ac))

anoms for NAM
cape_AVG_ANOM_NAM_HISTORICAL_Daily_period.nc
cape_MAX_ANOM_NAM_HISTORICAL_Daily_period.nc
cape_MIN_ANOM_NAM_HISTORICAL_Daily_period.nc
cape_ANOM_NAM_HISTORICAL_Abs_period.nc
ffwi_AVG_ANOM_NAM_HISTORICAL_Daily_period.nc
ffwi_MAX_ANOM_NAM_HISTORICAL_Daily_period.nc
ffwi_MIN_ANOM_NAM_HISTORICAL_Daily_period.nc
ffwi_ANOM_NAM_HISTORICAL_Abs_period.nc
gust_AVG_ANOM_NAM_HISTORICAL_Daily_period.nc
gust_MAX_ANOM_NAM_HISTORICAL_Daily_period.nc
gust_MIN_ANOM_NAM_HISTORICAL_Daily_period.nc
gust_ANOM_NAM_HISTORICAL_Abs_period.nc
hdw_AVG_ANOM_NAM_HISTORICAL_Daily_period.nc
hdw_MAX_ANOM_NAM_HISTORICAL_Daily_period.nc
hdw_MIN_ANOM_NAM_HISTORICAL_Daily_period.nc
hdw_ANOM_NAM_HISTORICAL_Abs_period.nc
rh_AVG_ANOM_NAM_HISTORICAL_Daily_period.nc
rh_MAX_ANOM_NAM_HISTORICAL_Daily_period.nc
rh_MIN_ANOM_NAM_HISTORICAL_Daily_period.nc
rh_ANOM_NAM_HISTORICAL_Abs_period.nc
sm_AVG_ANOM_NAM_HISTORICAL_Daily_period.nc
sm_MAX_ANOM_NAM_HISTORICAL_Daily_period.nc
sm_MIN_ANOM_NAM_HISTORICAL_Daily_period.nc

In [None]:
bad_nam

### HRRR

In [37]:
m = 'HRRR'

In [38]:
hrrr_vars = [value for key,value in variables_by_model[m].items()]

In [39]:
hrrr_vars

['blh',
 'cape',
 'mstav',
 'u10',
 'v10',
 'gust',
 'prate',
 'tp',
 't2m',
 'd2m',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi',
 'rh']

In [40]:
%%time 

bad_hrrr = []

for ac in harms:
    print(f'{ac} for {m}')
    bad_hrrr.append(final_clean_db_ac(m, hrrr_vars, ac))

anoms for HRRR
pbl_AVG_ANOM_HRRR_HISTORICAL_Daily_period.nc
pbl_ANOM_HRRR_HISTORICAL_Abs_period.nc
pbl_MAX_ANOM_HRRR_HISTORICAL_Daily_period.nc
pbl_MIN_ANOM_HRRR_HISTORICAL_Daily_period.nc
cape_AVG_ANOM_HRRR_HISTORICAL_Daily_period.nc
cape_ANOM_HRRR_HISTORICAL_Abs_period.nc
cape_MAX_ANOM_HRRR_HISTORICAL_Daily_period.nc
cape_MIN_ANOM_HRRR_HISTORICAL_Daily_period.nc
d2_AVG_ANOM_HRRR_HISTORICAL_Daily_period.nc
d2_ANOM_HRRR_HISTORICAL_Abs_period.nc
d2_MAX_ANOM_HRRR_HISTORICAL_Daily_period.nc
d2_MIN_ANOM_HRRR_HISTORICAL_Daily_period.nc
ffwi_AVG_ANOM_HRRR_HISTORICAL_Daily_period.nc
ffwi_ANOM_HRRR_HISTORICAL_Abs_period.nc
ffwi_MAX_ANOM_HRRR_HISTORICAL_Daily_period.nc
ffwi_MIN_ANOM_HRRR_HISTORICAL_Daily_period.nc
gust_AVG_ANOM_HRRR_HISTORICAL_Daily_period.nc
gust_ANOM_HRRR_HISTORICAL_Abs_period.nc
gust_MAX_ANOM_HRRR_HISTORICAL_Daily_period.nc
gust_MIN_ANOM_HRRR_HISTORICAL_Daily_period.nc
hdw_AVG_ANOM_HRRR_HISTORICAL_Daily_period.nc
hdw_ANOM_HRRR_HISTORICAL_Abs_period.nc
hdw_MAX_ANOM_HRRR_HISTO

In [47]:
bad_hrrr

[[], []]

### CONUS404 

In [41]:
m = 'CONUS404'

In [42]:
c44_vars = [value for key,value in variables_by_model[m].items()]

In [43]:
c44_vars

['PBLH',
 'SBCAPE',
 'MLCAPE',
 'SMOIS',
 'U10',
 'V10',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'PREC_ACC_NC',
 'T2',
 'rh',
 'ffwi',
 'TD2']

In [44]:
%%time

bad_c44 = []

for ac in harms:
    print(f'{ac} for {m}')
    bad_c44.append(final_clean_db_ac(m, c44_vars, ac))

anoms for CONUS404
mlcape_AVG_ANOM_CONUS404_ANALYSIS_Daily_period.nc
mlcape_ANOM_CONUS404_ANALYSIS_Abs_period.nc
mlcape_MAX_ANOM_CONUS404_ANALYSIS_Daily_period.nc
mlcape_MIN_ANOM_CONUS404_ANALYSIS_Daily_period.nc
pbl_AVG_ANOM_CONUS404_ANALYSIS_Daily_period.nc
pbl_ANOM_CONUS404_ANALYSIS_Abs_period.nc
pbl_MAX_ANOM_CONUS404_ANALYSIS_Daily_period.nc
pbl_MIN_ANOM_CONUS404_ANALYSIS_Daily_period.nc
tp_ACC_ANOM_CONUS404_ANALYSIS_Daily_period.nc
tp_ANOM_CONUS404_ANALYSIS_Abs_period.nc
tp_ACC_ANOM_CONUS404_ANALYSIS_Daily_period.nc
tp_ACC_ANOM_CONUS404_ANALYSIS_Daily_period.nc
sbcape_AVG_ANOM_CONUS404_ANALYSIS_Daily_period.nc
sbcape_ANOM_CONUS404_ANALYSIS_Abs_period.nc
sbcape_MAX_ANOM_CONUS404_ANALYSIS_Daily_period.nc
sbcape_MIN_ANOM_CONUS404_ANALYSIS_Daily_period.nc
vsm_AVG_ANOM_CONUS404_ANALYSIS_Daily_period.nc
vsm_ANOM_CONUS404_ANALYSIS_Abs_period.nc
vsm_MAX_ANOM_CONUS404_ANALYSIS_Daily_period.nc
vsm_MIN_ANOM_CONUS404_ANALYSIS_Daily_period.nc
t2_AVG_ANOM_CONUS404_ANALYSIS_Daily_period.nc
t2_AN

In [49]:
bad_c44

[[], []]