In [5]:
# Utility imports
import importlib
import os
import sys

# Local imports
sys.path.insert(1, '/projects/GEOCLIM/gr7610/scripts')
import composite_TC, track_TC_GCM

In [6]:
def check_climatology_exists(model_name: str,
                             experiment_name: str,
                             field_name: str,
                             year_range: tuple[int, int],
                             pressure_level: int|None=None,
                             domain: str='atmos',
                             diagnostic: bool=False):

    ''' 
    Make sure a daily climatology exists for the model-experiment configuration in question. 
    Stops function execution if no climatological data are found.
    '''

    diagnostic_tag = '[check_climatology_exists()]'

    # Correct pressure level if None
    pressure_level = 'full' if pressure_level is None else pressure_level
    # Storage directory for climatological data
    storage_dirname = '/tigress/GEOCLIM/gr7610/analysis/model_out/'
    # Define the substring used to search for matching data in the storage directory
    search_substr = f'model_{model_name}-exp_{experiment_name}-type_{domain}-var_{field_name}-mean_daily-resample-{pressure_level}'

    # Helper function to check start and end years [filename minimum (maximum) should be less than (greater than) or equal to requested years]
    check_years = lambda x: (int(x.split('-')[-1].split('.')[0].split('_')[0]) <= min(year_range)) and (int(x.split('-')[-1].split('.')[0].split('_')[1]) >= max(year_range))
    # Check if file exists in the directory
    filenames = [f for f in os.listdir(storage_dirname)
                 if search_substr in f 
                 and f.endswith('.nc')
                 and check_years(f)]

    if diagnostic:
        print(f'{diagnostic_tag} filenames found: {filenames}')

    assert len(filenames) == 1, f'{diagnostic_tag} Multiple or insufficient filenames for model {model_name}, experiment {experiment_name}, field name {field_name} found: {filenames}. Please check the year ranges and remove any duplicate files as needed.'

In [7]:
def check_track_data_exists(model_name: str,
                            experiment_name: str,
                            year_range: tuple[int, int],
                            diagnostic: bool=False):

    ''' 
    Make sure track data for the model-experiment configuration in question. 
    Stops function execution if no track data are found.
    '''

    diagnostic_tag = '[check_track_data_exists()]'

    # Storage directory for track data
    storage_dirname = '/projects/GEOCLIM/gr7610/analysis/tc_storage/track_data'
    # Define the substring used to search for matching data in the storage directory
    search_substr = f'model_{model_name}.experiment_{experiment_name}'

    # Helper function to check start and end years [filename minimum (maximum) should be less than (greater than) or equal to requested years]
    check_years = lambda x: (int(x.split('.')[1].split('_')[0][1:]) <= min(year_range)) and (int(x.split('.')[1].split('_')[1][1:]) >= max(year_range))
    # Check if file exists in the directory
    filenames = [f for f in os.listdir(storage_dirname)
                 if search_substr in f 
                 and f.endswith('.pkl') 
                 and check_years(f)]

    if diagnostic:
        print(f'{diagnostic_tag} filenames found: {filenames}')

    assert len(filenames) == 1, f'{diagnostic_tag} Multiple or insufficient filenames for model {model_name}, experiment {experiment_name}, field name {field_name} found: {filenames}. Please check the year ranges and remove any duplicate files as needed.'

In [12]:
def generate_TC_GCM_data(model_name: str,
                         experiment_name: str,
                         year_range: tuple[int, int],
                         GCM_data_type: str='atmos_4xdaily',
                         number_of_TCs: int=25,
                         override_user_input: bool=False,
                         diagnostic: bool=False):
    
    ''' Method to generate GCM output corresponding to TCs for a given model-experiemnt configuration. '''

    importlib.reload(track_TC_GCM)
    diagnostic_tag = '[generate_TC_GCM_data()]'

    # TC generation parameters
    intensity_parameter = 'min_slp'
    number_of_storms = 25
    intensity_range = (0, 1000)
    latitude_range = (-40, 40)
    storage_dirname = '/projects/GEOCLIM/gr7610/analysis/TC-AQP/data/individual_TCs'
    
    # Check how many TCs are currently saved. 
    # If below `threshold_TC_count`, generate automatically. Else, prompt the user.
    
    generate_new_TCs = False # boolean to determine if new TCs will be generated
    threshold_TC_count = 50 # minimum number of TCs needed for adequate compositing sample
    config_TC_substr = f'TC.model-{model_name}.experiment-{experiment_name}' # substring used to search for configuration-specific TCs
    filenames_TCs = [f for f in os.listdir(storage_dirname)
                     if config_TC_substr in f 
                     and f.endswith('.nc')]
    # Determine if new TCs will be generated
    if len(filenames_TCs) > threshold_TC_count and not override_user_input:
        prompt = input(f'{diagnostic_tag} {len(filenames_TCs)} TCs currently have data generated for configuration {model_name}-{experiment_name}. Should additional data be generated? (y/n)')
        generate_new_TCs = True if 'y' in prompt.lower() else False
    else:
        generate_new_TCs = True

    # Generate new GCM data corresponding to tracked TCs for the iterand configuration
    if generate_new_TCs:
        track_data = track_TC_GCM.main(model_name=model_name, 
                                       experiment_name=experiment_name,
                                       year_range=year_range, 
                                       intensity_parameter=intensity_parameter, 
                                       intensity_range=intensity_range, 
                                       latitude_range=latitude_range,
                                       number_of_storms=number_of_storms,
                                       GCM_data_type=GCM_data_type,
                                       storage_dirname=storage_dirname)

In [13]:
def generate_TC_composites(model_name: str,
                           experiment_name: str,
                           year_range: tuple[int, int],
                           field_name: str,
                           number_of_snapshots: int,
                           diagnostic: bool=False):
    
    importlib.reload(composite_TC)
    intensity_parameter = 'min_slp'
    intensity_range = (0, 1000)
    compositing_mode = 'anomaly'
    save_data = True
    storage_dirname = '/projects/GEOCLIM/gr7610/analysis/TC-AQP/data/composite'

    composite_config_name = f'{model_name}-{experiment_name}'
    composite_TC.main(configuration_name=composite_config_name,
                      field_name=field_name,
                      year_range=year_range,
                      intensity_parameter=intensity_parameter,
                      intensity_range=intensity_range,
                      number_of_snapshots=number_of_snapshots,
                      compositing_mode=compositing_mode,
                      save_data=save_data,
                      parallel=True,
                      TC_source_dirname='/projects/GEOCLIM/gr7610/analysis/TC-AQP/data/individual_TCs',
                      storage_dirname=storage_dirname)

In [14]:
def main(configuration_name: str,
         year_range: tuple[int, int],
         field_name: str,
         GCM_data_type: str='atmos_4xdaily',
         pressure_level: str|None=None,
         number_of_snapshots: int=50,
         processing_mode: str='generate_TC_GCM_data',
         diagnostic: bool=False):

    assert processing_mode in ['generate_TC_GCM_data', 'generate_TC_GCM_composites'], f"Processing mode must be one of ['generate_TC_GCM_data', 'generate_TC_GCM_composites']. "
    if diagnostic:
        print(f'Processing composite anomalies for configuration {configuration_name} for field {field_name}...')
    
    # Get model and experiment name
    assert len(configuration_name.split(':')) == 2, 'Configuration name must be of format {MODEL_NAME}:{EXPERIMENT_NAME}.'
    model_name, experiment_name = configuration_name.split(':')

    # 1. Check that track data for the configuration exists
    check_track_data_exists(model_name=model_name, 
                            experiment_name=experiment_name, 
                            year_range=year_range,
                            diagnostic=diagnostic)

    # 2. Generate TC-tracking GCM output for the iterand configuration
    generate_TC_GCM_data(model_name=model_name, 
                         experiment_name=experiment_name, 
                         year_range=year_range,
                         GCM_data_type=GCM_data_type,
                         number_of_TCs=number_of_snapshots,
                         diagnostic=diagnostic)

    if processing_mode == 'generate_TC_GCM_composites':

        # 3. Check if the climatology for the configuration exists
        check_climatology_exists(model_name=model_name, 
                                 experiment_name=experiment_name, 
                                 year_range=year_range, 
                                 field_name=field_name, 
                                 pressure_level=pressure_level,
                                 diagnostic=diagnostic)

        # 4. Perform anomaly compositing for a given model-experiment configuration
        generate_TC_composites(model_name=model_name,
                               experiment_name=experiment_name,
                               year_range=year_range,
                               field_name=field_name,
                               number_of_snapshots=number_of_snapshots,
                               diagnostic=diagnostic)

In [18]:
importlib.reload(track_TC_GCM)

processing_mode = 'generate_TC_GCM_composites'
field_names = ['WVP'] 
configurations = {'HIRAM:CTL1990.15N': {'year_range': (2, 16), 'number_of_snapshots': 4000}}

for configuration_name in configurations.keys():
    for field_name in field_names:
        print('==========================================================================================')
        print(f'Processing {field_name} for {configuration_name}...')
        main(configuration_name=configuration_name,
             year_range=configurations[configuration_name]['year_range'],
             field_name=field_name,
             processing_mode=processing_mode,
             GCM_data_type='atmos_4xdaily',
             number_of_snapshots=configurations[configuration_name]['number_of_snapshots'])

Processing WVP for HIRAM:CTL1990.15N...


[generate_TC_GCM_data()] 166 TCs currently have data generated for configuration HIRAM-CTL1990.15N. Should additional data be generated? (y/n) n


Processing composites for HIRAM, CTL1990.15N over years (2, 16)...
[load_track_data()] Number of unique storms in track dataset: 3008
[filter_track_data()] Number of unique storms in filtered track dataset: 166
[get_snapshots_from_track_data()] Number of snapshots: 4000; number of track dataset entries: 5817
Number of snapshots in container: 4000
Time elapsed for TCs: 44.95 s; per snapshot: 0.01 s.
Time elapsed for GCM data: 1039.48 s; per snapshot: 0.26 s.
Saving composite data to /projects/GEOCLIM/gr7610/analysis/TC-AQP/data/composite/TC.configuration.HIRAM-CTL1990.15N.field_name.WVP.year_range.2_16.intensity_range.0_1000.basin.global.nc...
Saving composite data to /projects/GEOCLIM/gr7610/analysis/TC-AQP/data/composite/GCM.configuration.HIRAM-CTL1990.15N.field_name.WVP.year_range.2_16.intensity_range.0_1000.basin.global.nc...
