<a name="top"></a>
<div style="width:1000 px">

<div style="float:right; width:98 px; height:98px;">
<img src="https://cdn.miami.edu/_assets-common/images/system/um-logo-gray-bg.png" alt="Miami Logo" style="height: 98px;">
</div>

<div style="float:right; width:98 px; height:98px;">
<img src="https://media.licdn.com/dms/image/C4E0BAQFlOZSAJABP4w/company-logo_200_200/0/1548285168598?e=2147483647&v=beta&t=g4jl8rEhB7HLJuNZhU6OkJWHW4cul_y9Kj_aoD7p0_Y" alt="STI Logo" style="height: 98px;">
</div>


<h1>Finalize Database Files - UFS S2S</h1>
By: Kayla Besong, PhD
    <br>
Last Edited: 05/15/24
<br>
<br>    
<br>
This script takes all the downloaded forecast data in the database and generates the final form of each file. This includes changing variable names where needed, adding metadata, and unifying missing/nan values.
<br>
<br>
A second section for finalizing harmonic analysis output is provided.  
<br>
<br>
This code is not pretty but it got the job done in the time it needed to get done! Make appropriate edits you need for your version. Adapted from a version that iterated over many models and changes everything to the sFWRD database. The original UFSS2S files provide plenty of metadata. 
<br>
<div style="clear:both"></div>
</div>

<hr style="height:2px;">

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
import os
import glob

In [2]:
%run File_concat_mod_functions.ipynb

In [3]:
model_options = ['UFS_S2S']

In [4]:
### manually input the attributes you'd like to store as metadata in each netcdf file

var_names_units = {

          'PBL': {'name': 'pbl', 'long': 'Planetary Boundary Layer Height', 'units': 'm'},                   
          'CAPE': {'name': 'cape', 'long': 'Convective Available Potential Enegery', 'units': 'J/kg'},
          'MLCAPE': {'name': 'mlcape',  'long': 'Mixed Layer Convective Available Potential Enegery', 'units': 'J/kg'},
          'SBCAPE': {'name': 'sbcape', 'long': 'Surface Based Convective Available Potential Enegery', 'units': 'J/kg'},
          'SOILM': {'name': 'sm', 'long': 'Soil Moisture', 'units': 'kg/m^3', 'other': 'The first layer below the surface, varies per sFWD to what depth'},
          'VSM': {'name': 'vsm', 'long': 'Volumetric Soil Moisture', 'units': 'm^3/m^3 (ratio)', 'other': 'The first layer below the surface, varies per sFWD to what depth'},
          'U10': {'name': 'u10', 'long': 'u-component of the 10-m wind', 'units': 'm/s'},
          'V10': {'name': 'v10', 'long': 'v-component of the 10-m wind', 'units': 'm/s'},
          'GUST':{'name':  'gust', 'long': 'Instantaneous 10-m Wind Gust', 'units': 'm/s'},
          'PRATE': {'name': 'prate', 'long': 'Precipitation Rate', 'units': 'kg/m^2s'},
          'TP': {'name': 'tp', 'long': 'Total Precipitation', 'units': 'kg/m^2'},
          'TCP': {'name': 'tcp', 'long': 'Total Convective Precipitation', 'units': 'm'},
          'LSPR': {'name': 'lspr', 'long': 'Large Scale Rain Rate', 'units': 'kg/m^2s'},
          'TEMP': {'name': 't2',  'long': '2-m Temperature', 'units': 'K'},
          'RH': {'name': 'rh', 'long': 'Relative Humidity', 'units': '%'},
          'DEWPOINT': {'name': 'd2', 'long': '2-m Dewpoint', 'units': 'K'},
          'WINDSPEED': {'name': 'ws',  'long': 'Wind Speed', 'units': 'm/s'},
          'WINDDIR': {'name': 'wd',  'long': 'Wind Direction', 'units': 'degrees'},
          'HDWI': {'name': 'hdw',  'long': 'Surface-Based Hot Dry Windy', 'units': 'hPa*m/s'},
          'VPD': {'name': 'vpd', 'long': 'Vapor Pressure Deficit', 'units': 'hPa'},
          'FOSBERG': {'name': 'ffwi', 'long': 'Fosberg Fire Weather Index', 'units': 'N/A [0-100 scale]'}}


In [5]:
### A manual list of variables that link via keys-to-keys to the metadata dict. It is seperate from the dictionaries stored in File_concat_mod_functions.ipynb for that reason. 

variables_by_model =  {
    'UFS_S2S': {'PBL': 'hpbl', 'CAPE': 'cape', 'VSM':'soilw', 'U10': 'u10', 'V10': 'v10', 'GUST': 'gust', 'PRATE': 'prate', 'TEMP': 't2m', 'RH': 'r2', 'WINDSPEED': 'wspeed', 'WINDDIR': 'wdir', 'HDWI': 'hdwi', 'VPD': 'vpd', 'FOSBERG': 'ffwi'}}


# For the Database

### make final directory for files

In [6]:
input_dir = 'database_files'
output_dir = 'database_files_final'

In [7]:
m = 'UFS_S2S'
prototypes = [5,6,7,8, 'MPM']

In [8]:
for m in model_options:
    dir_maker(f'{output_dir}/{m}')

In [9]:
for p in prototypes:
    
    dir_maker(f'{output_dir}/{m}')
    dir_maker(os.path.join(f'{output_dir}/{m}', f'{p}'))
    parent_dirs = glob.glob(os.path.join(f'{input_dir}/{m}/{p}/', '*'))
    
    for i in parent_dirs:
        subdir = i.split('/')[-1]                         
        dir_maker(os.path.join(f'{output_dir}/{m}/{p}', f'{subdir}'))

## Final Clean Function

In [10]:
def final_clean_db(m, vars, p):

    ''' This function takes database files and unifies all variable and coordinate names, 
    alters the file out names, performs any cleaning still necessary, assigns attributes
    and information such as units, exporting to the database_files_final directory. 

    This function assumes that CONUS404, HRRR, NAM, and NARR have already been regridded. 
    These 4 models were already 'cleaned' through the regridding process. 
    
    Inputs:
    
    m: (str) model name
    vars: (list of strings) list of variable names for the model 
    p: (str) the prototype number, 5, 6, 7, 8, MPM 

    Outputs:
    
    bad: (list of str) list of filenames that failed during execution  
    
    '''

    
    vars_dict = {}                                                                                                                                   # Initialize a dictionary to store variable mappings
                                                                      
    for v in vars:                                                                                                                                   # Iterate over variables
        vars_dict[v.split('_')[0]] = v                                                                                                               # Map the first part of the variable name to the full variable name
                                                                      
    input_dir = f'database_files/{m}/{p}'                                                                                                            # Set the input directory for standard models
    
    bad = []

    for subdir in sorted(os.listdir(input_dir)):
    
        for f in sorted(os.listdir(f'{input_dir}/{subdir}')):                                                                                        # Iterate over sorted files in the input directory
            
            try:
            
                if f[-2:] == 'nc':                                                                                                                   # Check if the file is a NetCDF file
                    file = f'{input_dir}/{subdir}/{f}'                                                                                               # Construct the full file path
                    db_file = xr.open_dataset(file)                                                                                                  
            
                    if 'Abs' in file.split('/')[-1].split('_'):
                        v = vars_dict[file.split('/')[-1].split('_')[0]]                                                                             # Extract the variable name from the file name
                        outfile_pre = f'{get_filename(m)}_Abs_{file.split('/')[-1].split('_')[-1]}'                                                  # Prepare the output file prefix
                    
                    elif 'Daily' in file.split('/')[-1].split('_'):                                                                                  # Handle cases where the variable is not found
                        v = vars_dict[file.split('/')[-1].split('_')[0][0:-3]]                                                                       # Adjust the variable extraction for special cases
                        outfile_pre = f'{get_filename(m)}_{file.split('/')[-1].split('_')[0][-3:]}_Daily_{file.split('/')[-1].split('_')[-1]}'          
    
                    else:
                        print('file type does not match')
                        raise
                    
                    searchkey = list(variables_by_model[m].keys())[list(variables_by_model[m].values()).index(v)]                                    # Find the search key for the variable
                    newkey = var_names_units[searchkey]    
                                        
                    if newkey['name'] != v:                                                                                                          # Check if the variable name needs to be updated
                        db_file[newkey['name']] = db_file[v]                                                                                         # Update the variable name
                        db_file = db_file.drop(v)                                                                                                    # Drop the old variable name
                                                                      
                    
                    vars_to_keep = ['time', 'step', 'valid_time', 'latitude', 'longitude', newkey['name']]
    
                    if 'step' not in list(db_file.variables):
                        db_file = db_file.assign_coords({'step': db_file['valid_time'] -  db_file['time']})
                    
                    for keepv in vars_to_keep:
                        
                        attrs_to_del_var = []                                                                                                         # Clear old attributes             
                        
                        for key, item in db_file[keepv].attrs.items():
                            attrs_to_del_var.append(key)
                        
                        for j in attrs_to_del_var:
                            del db_file[keepv].attrs[j]  
                    
                    time_attrs = {'time': {'long_name': 'initial time of forecast','standard_name': 'forecast_reference_time'},
                                  'step': {'long_name': 'time since forecast_reference_time','standard_name': 'forecast_period'},
                                  'valid_time': {'long_name': 'time + step', 'standard_name': 'time'}}
                    
                    db_file[newkey['name']] = db_file[newkey['name']].assign_attrs({'Variable Name': newkey['long'], 'Units': newkey['units']})    # Assign new attributes
                                             
                    
                    for vout in list(db_file.coords):                            
                        if vout not in vars_to_keep:                            
                            db_file = db_file.drop(vout)
                    
                        if vout in vars_to_keep[0:3]:
                    
                            db_file[vout] = db_file[vout].assign_attrs(time_attrs[vout])               
                                                                        
                    db_file = db_file.sortby('valid_time')                                                                                           # Sort the dataset by time
                                                                        
                    outfile = f'{newkey['name']}_{outfile_pre}'                                                                                      # Construct the final output file name
                    db_file.to_netcdf(f'database_files_final/{m}/{p}/{subdir}/{outfile}')                                                            # Save the dataset to a NetCDF file
                    
                    print(outfile)                                                                                                                   
                    del outfile                                                                                                                      
            
            except:
                print(f'{f} to bad')
                bad.append(f)
    
    return bad


### P = 5

In [11]:
m = 'UFS_S2S'
p = 5

In [12]:
ufs_vars = [value for key,value in variables_by_model[m].items()]

In [13]:
ufs_vars

['hpbl',
 'cape',
 'soilw',
 'u10',
 'v10',
 'gust',
 'prate',
 't2m',
 'r2',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [14]:
%%time

bad_ufs_5_1 = final_clean_db(m, ufs_vars, p)

Struct() takes at most 1 argument (3 given)


cape_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
cape_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
cape_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
cape_UFS_S2S_FORECAST_Abs_20110401.nc to bad
ffwi_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
ffwi_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
ffwi_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
ffwi_UFS_S2S_FORECAST_Abs_20110401.nc
gust_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
gust_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
gust_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
gust_UFS_S2S_FORECAST_Abs_20110401.nc
hdw_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
hdw_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
hdw_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
hdw_UFS_S2S_FORECAST_Abs_20110401.nc
prate_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
prate_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
prate_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
prate_UFS_S2S_FORECAST_Abs_20110401.nc
rh_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
rh_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
rh_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
rh_UFS_S2S_FORECA

In [40]:
bad_ufs_5_1

['cape_UFS_S2S_FORECAST_Abs_20110401.nc']

### P = 6, 7, 8, MPM
5 was used for testing

In [36]:
m = 'UFS_S2S'
ps = [6, 7, 8, 'MPM']

In [37]:
ufs_vars = [value for key,value in variables_by_model[m].items()]

In [38]:
ufs_vars

['hpbl',
 'cape',
 'soilw',
 'u10',
 'v10',
 'gust',
 'prate',
 't2m',
 'r2',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [39]:
%%time

bad_ufs_all_1 = []
for p in ps:
    bad_ufs_all_1.append(final_clean_db(m, ufs_vars, p))

cape_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
cape_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
cape_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
cape_UFS_S2S_FORECAST_Abs_20110401.nc
ffwi_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
ffwi_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
ffwi_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
ffwi_UFS_S2S_FORECAST_Abs_20110401.nc
gust_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
gust_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
gust_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
gust_UFS_S2S_FORECAST_Abs_20110401.nc
prate_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
prate_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
prate_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
prate_UFS_S2S_FORECAST_Abs_20110401.nc
rh_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
rh_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
rh_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
rh_UFS_S2S_FORECAST_Abs_20110401.nc
vsm_UFS_S2S_FORECAST_AVG_Daily_20110401.nc
vsm_UFS_S2S_FORECAST_MAX_Daily_20110401.nc
vsm_UFS_S2S_FORECAST_MIN_Daily_20110401.nc
vsm_UFS_S2S_FORECAST_Abs_

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [41]:
bad_ufs_all_1

[[],
 [],
 ['ffwiMAX_UFS_S2S_FORECAST_Daily_20171201.nc',
  'ffwiMIN_UFS_S2S_FORECAST_Daily_20171201.nc'],
 []]

# For Harmonic Normals and Anomalies 

### make final directory for files

In [19]:
input_dir = 'database_files'
output_dir = 'database_files_final'

In [20]:
m = 'UFS_S2S'
prototypes = [5,6,7,8, 'MPM']
dd = ['climos', 'anoms']
ddd = ['day', 'week']

In [21]:
for m in model_options:
    dir_maker(f'{output_dir}/{m}')

In [22]:
for d in dd:

    dir_maker(f'{output_dir}/{m}/{d}')
    
    for p in prototypes:
        
        dir_maker(os.path.join(f'{output_dir}/{m}/{d}', f'{p}'))
        
        for i in ddd:
                                  
            dir_maker(os.path.join(f'{output_dir}/{m}/{d}/{p}', f'{i}'))

## Final Clean Functions

In [23]:
def get_harmonic(m, v, ac, subdir):


    ''' This function is designed to generate specific attribute information for harmonics. 
    
    Inputs:
    
    m: (str) model name
    v: (str) variable name
    ac: (str) anomaly or normal harmonic 
    
    Outputs:
    
    harm_dict: (str) attribute information for harmonic
    
    '''
  
    k_out = 5

    if ac == 'anoms':

        harm_dict = f'Anomaly generated by taking the difference of the timeseries with the period normal, based on lead {subdir}, which was computed as the sum of the first ({k_out}) harmonics'

    else:

        harm_dict = f'Period normal, based on lead {subdir}, computed as the sum of the first ({k_out}) harmonics'
            
    
    return harm_dict


In [24]:
def special_NCEP_cleaner_ac(db_file, v, f):

    ''' This function takes problematic NCEP Renalysis II files from the database 
        and adds a missing time dimension that got lost or selects one level, and 
        changes -inf values to np.nan for consistency. 
    
    
    Inputs:
    
    db_file: (xarray dataset) a NCEP Renalysis file opened with xarray
    v: (str) variable name
    f: (str) file name -- not path, just filename from final_clean_db function
    
    Outputs:
    
    db_file: (xarray dataset) the new, cleaned version of the input 
    
    '''
    

    if 'level' in list(db_file.dims):

        db_file = db_file.isel(level = 0)
        db_file = db_file.drop('level')

    if 'level' in list(db_file.coords):
        
        db_file = db_file.drop('level')        

    db_file[v] = xr.where(db_file[v] == -np.Inf, np.nan, db_file[v])

    try:
    
        db_file = db_file.rename({'lat': 'latitude', 'lon': 'longitude'})

    except:
        pass

    return db_file

    

In [25]:
def final_clean_db_ac(m, vars, p, ac):

    ''' This function takes database files and unifies all variable and coordinate names, 
    alters the file out names, performs any cleaning still necessary, assigns attributes
    and information such as units, exporting to the database_files_final directory. 

    This function assumes that CONUS404, HRRR, NAM, and NARR have already been regridded. 
    These 4 models were already 'cleaned' through the regridding process. 
    
    Inputs:
    
    m: (str) model name
    vars: (list of str) list of variable names for the model 
    p: (str) the prototype number, 5, 6, 7, 8, MPM 
    ac: (str) anoms or climos subfolder or harmonic type 
    
    Outputs:
    
    bad: (list of str) list of filenames that failed during execution  
    
    '''
    
    vars_dict = {}                                                                                                                                   # Initialize a dictionary to store variable mappings
                                                                      
    for v in vars:                                                                                                                                   # Iterate over variables
        vars_dict[v.split('_')[0]] = v                                                                                                               # Map the first part of the variable name to the full variable name
                                                                      
    input_dir = f'database_files/{m}/LEAD/{ac}/{p}'                                                                                                            # Set the input directory for standard models
    
    bad = []

    for subdir in sorted(os.listdir(input_dir)):
    
        for f in sorted(os.listdir(f'{input_dir}/{subdir}')):                                                                                        # Iterate over sorted files in the input directory
            
            try:
                
                if f[-2:] == 'nc':                                                                                                                   # Check if the file is a NetCDF file
                    file = f'{input_dir}/{subdir}/{f}'                                                                                               # Construct the full file path
                    db_file = xr.open_dataset(file)
        
        
                    if ac == 'anoms':
                        
                        file_ac = 'ANOM'
        
                    else:
                        file_ac = 'NORM'
                    
                    v = vars_dict[file.split('/')[-1].split('_')[0]]
        
                    if 'Abs' in file.split('/')[-1].split('_'):
        
                        outfile_pre = f'{file_ac}_{get_filename(m)}_Abs_{file.split('/')[-1].split('_')[-1]}'                                                  # Prepare the output file prefix
                    
                    else:
                        
                        possible = ['MIN', 'MAX', 'AVG']
                        outfile_pre = f'{list(set(possible) & set(file.split('/')[-1].split('_')))[0]}_{file_ac}_{get_filename(m)}_{file.split('/')[-1].split('_')[-1]}'     
                    
                    searchkey = list(variables_by_model[m].keys())[list(variables_by_model[m].values()).index(v)]                                    # Find the search key for the variable
                    newkey = var_names_units[searchkey]    
    
                    
                    try:
                    
                        if newkey['name'] != v:
                            db_file[newkey['name']] = db_file[v]                                                                                                         # Update the variable name
                            db_file = db_file.drop(v)                                                                                                                    # Drop the old variable name
                    
                        elif newkey['name'] == v:
                    
                            try:
                                
                                db_file[newkey['name']] = db_file[v]                                                                                                     # Update the variable name
                    
                            except KeyError:
                                
                                if ac == 'anoms':
                                    vnew = f'{v}_anoms'
                    
                                else:
                                    vnew = f'{v}_climo'                        
                                
                                db_file[newkey['name']] = db_file[vnew]                                                                                                     # Update the variable name
                                db_file = db_file.drop(vnew)                                                                                                                # Drop the old variable name
                        
                    
                    except KeyError:
                    
                        try:
                        
                            if ac == 'anoms':
                                v = f'{v}_anoms'
                    
                            else:
                                v = f'{v}_climo'
                            
                            if newkey['name'] != v:
                                db_file[newkey['name']] = db_file[v]                                                                                                    # Update the variable name
                                db_file = db_file.drop(v)                                                                                                               # Drop the old variable name
                    
                        except:
                            
                            raise
    
                    
                    if subdir == 'week':
                        
                        db_file = db_file.assign_coords({'lead_week': int(file.split('/')[-1].split('_')[-1][4:6])})                  
                        vars_to_keep = ['time', 'valid_time', 'latitude', 'longitude', newkey['name'], 'lead_week']
                    
                    else:
    
                        db_file = db_file.assign_coords({'lead_day': int(file.split('/')[-1].split('_')[-1][4:6])})                  
                        vars_to_keep = ['time', 'valid_time', 'latitude', 'longitude', newkey['name'], 'lead_day']
    
                    for keepv in vars_to_keep:
                        
                        attrs_to_del_var = []                                                                                                         # Clear old attributes             
                        
                        for key, item in db_file[keepv].attrs.items():
                            attrs_to_del_var.append(key)
                        
                        for j in attrs_to_del_var:
                            del db_file[keepv].attrs[j]  
                    
                    time_attrs = {'time': {'long_name': 'initial time of forecast','standard_name': 'forecast_reference_time'},
                                  'valid_time': {'long_name': 'time + lead', 'standard_name': 'time'}}
                    
                    db_file[newkey['name']] = db_file[newkey['name']].assign_attrs({'Variable Name': newkey['long'], 'Units': newkey['units'], 'Harmonic': get_harmonic(m, newkey['name'], ac, subdir)})    # Assign new attributes
                    
                    if subdir == 'week':
                    
                        db_file['lead_week'] = db_file['lead_week'].assign_attrs({'Description': "We aligned all valid times that have the same lead time (in weeks) from the initialization date along the 'week' axis. This means that for each forecast, the valid times that are a specific number of weeks from the initialization date are grouped together, allowing us to analyze the forecast accuracy for each lead time."
                        }) 
                    
                    else:
                    
                        db_file['lead_day'] = db_file['lead_day'].assign_attrs({'Description': "We aligned all valid times that have the same lead time (in days) from the initialization date along the 'day' axis. This means that for each forecast, the valid times that are a specific number of days from the initialization date are grouped together, allowing us to analyze the forecast accuracy for each lead time."
                        })                                              
                    
                    for vout in list(db_file.coords):                            
                        if vout not in vars_to_keep:                            
                            db_file = db_file.drop(vout)
                    
                        if vout in vars_to_keep[0:2]:
                    
                            db_file[vout] = db_file[vout].assign_attrs(time_attrs[vout])               
                                                                        
                    db_file = db_file.sortby('valid_time')                                                                                           # Sort the dataset by time
                                                                        
                    outfile = f'{newkey['name']}_{outfile_pre}'                                                                                       # Construct the final output file name
                    db_file.to_netcdf(f'database_files_final/{m}/{ac}/{p}/{subdir}/{outfile}')                                                            # Save the dataset to a NetCDF file
                    
                    print(outfile)                                                                                                                   
                    del outfile                                                                                                                      
            
            except:
                print(f'{f} to bad')
                bad.append(f)

    return bad


### P = 5

In [26]:
m = 'UFS_S2S'
p = 5
harms = ['anoms', 'climos']

In [27]:
ufs_vars = [value for key,value in variables_by_model[m].items()]

In [28]:
ufs_vars

['hpbl',
 'cape',
 'soilw',
 'u10',
 'v10',
 'gust',
 'prate',
 't2m',
 'r2',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [29]:
%%time

bad_ufs_5 = []
for ac in harms:
    print(ac)
    bad_ufs_5.append(final_clean_db_ac(m, ufs_vars, p, ac))


anoms
cape_AVG_ANOM_UFS_S2S_FORECAST_lead00.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead01.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead02.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead03.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead04.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead05.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead06.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead07.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead08.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead09.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead10.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead11.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead12.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead13.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead14.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead15.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead16.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead17.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead18.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead19.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead20.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead21.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead22.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead23.nc
cape_AVG_A

In [42]:
bad_ufs_5

[[], []]

### P = 6, 7, 8, MPM
5 was used for testing

In [31]:
m = 'UFS_S2S'
ps = [6, 7, 8, 'MPM']
harms = ['anoms', 'climos']

In [32]:
ufs_vars = [value for key,value in variables_by_model[m].items()]

In [33]:
ufs_vars

['hpbl',
 'cape',
 'soilw',
 'u10',
 'v10',
 'gust',
 'prate',
 't2m',
 'r2',
 'wspeed',
 'wdir',
 'hdwi',
 'vpd',
 'ffwi']

In [34]:
%%time

bad_ufs_all = []
for p in ps:
    sub_bad = []
    for ac in harms:
        sub_bad.append(final_clean_db_ac(m, ufs_vars, p, ac))

    bad_ufs_all.append(sub_bad)

cape_AVG_ANOM_UFS_S2S_FORECAST_lead00.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead01.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead02.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead03.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead04.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead05.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead06.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead07.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead08.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead09.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead10.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead11.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead12.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead13.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead14.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead15.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead16.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead17.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead18.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead19.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead20.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead21.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead22.nc
cape_AVG_ANOM_UFS_S2S_FORECAST_lead23.nc
cape_AVG_ANOM_UF

In [35]:
bad_ufs_all

[[[], []], [[], []], [[], []], [[], []]]