In [None]:
# Block 0: Documentation

print('Script to list and download GOES-16, -17, and -18 ABI L2 data files from Amazon Web Services (AWS)\n')
print('Version 4.0, August 25, 2022\n')
print('Written using using Python v3.9 and s3fs v2022.5.0\n')
print('Author: Dr. Amy Huff (IMSG at NOAA/NESDIS/STAR), amy.huff@noaa.gov\n')
print('This script accesses the GOES ABI data archive on AWS and lists/downloads data files for the satellite, L2 product, and observation date and time period specified by the user.\n')
print('Block 1 imports modules and libraries. Blocks 2-5 are functions that require no input from the user; there is no visible output from these blocks. The user specifies search parameters in Block 6 and obtains output from Block 7.\n')
print('**Please acknowledge the NOAA/NESDIS/STAR Aerosols and Atmospheric Composition Science Team if using any of this code in your work/research!**')

In [None]:
# Block 1: Import Python packages

# Library to perform array operations
import numpy as np

# Module to interface with Amazon Simple Storage Service (S3)
import s3fs

# Module for manipulating dates and times
import datetime

# Library to create progress bars for loops/functions
from tqdm import tqdm

# Module for accessing system-specific parameters and functions
import sys

# Library to access core utilities for Python packages
from packaging.version import parse

# Module to set filesystem paths appropriate for user's operating system
from pathlib import Path

# Modules to create interactive menus in Jupyter Notebook
from IPython.display import display
import ipywidgets as widgets

# Import supporting functions needed to run script
# These functions moved to external .py file to make training Notebook cleaner
import supporting_functions

In [None]:
# Block 2: Find Julian day from user-specified observation year/month/day
# ABI data files are classified by Julian day, not Gregorian day/month, so Julian day needed to search AWS ABI archive
# "year", "month", "day": parameter variables from widget menus, set in main function

def find_julian(year, month, day):
    calendar = datetime.datetime(year, month, day)
    julian_day = calendar.strftime('%j')
    
    return julian_day

In [None]:
# Block 3: Find ABI L2 product abbreviation from user-specified product/scan sector
# Abbreviation is part of ABI file name; needed for AWS search
# "sector", "product": parameter variables from widget menus, set in main function

def get_product_abbreviation(sector, product):
    
    # Define dictionary keys
    keys = ['Full Disk', 'CONUS', 'Meso 1', 'Meso 2']
    
    # Define dictionary values for each ABI L2 product 
    if product == 'Aerosol Detection':
        values = ['ABI-L2-ADPF', 'ABI-L2-ADPC', 'ABI-L2-ADPM', 'ABI-L2-ADPM']
    elif product == 'Aerosol Optical Depth':
        values = ['ABI-L2-AODF', 'ABI-L2-AODC', 'None', 'None']
    elif product == 'Clear Sky Mask':
        values = ['ABI-L2-ACMF', 'ABI-L2-ACMC', 'ABI-L2-ACMM', 'ABI-L2-ACMM']
    elif product == 'Cloud & Moisture Imagery':
        values = ['ABI-L2-CMIPF', 'ABI-L2-CMIPC', 'ABI-L2-CMIPM', 'ABI-L2-CMIPM']
    elif product == 'Cloud & Moisture Imagery Multiband':
        values = ['ABI-L2-MCMIPF', 'ABI-L2-MCMIPC', 'ABI-L2-MCMIPM', 'ABI-L2-MCMIPM']
    elif product == 'Cloud Optical Depth':
        values = ['ABI-L2-CODF', 'ABI-L2-CODC', 'None', 'None']
    elif product == 'Cloud Particle Size':
        values = ['ABI-L2-CPSF', 'ABI-L2-CPSC', 'ABI-L2-CPSM', 'ABI-L2-CPSM']
    elif product == 'Cloud Top Height':
        values = ['ABI-L2-ACHAF', 'ABI-L2-ACHAC', 'ABI-L2-ACHAM', 'ABI-L2-ACHAM']
    elif product == 'Cloud Top Phase':
        values = ['ABI-L2-ACTPF', 'ABI-L2-ACTPC', 'ABI-L2-ACTPM', 'ABI-L2-ACTPM']
    elif product == 'Cloud Top Pressure':
        values = ['ABI-L2-CTPF', 'ABI-L2-CTPC', 'None', 'None']
    elif product == 'Cloud Top Temperature':
        values = ['ABI-L2-ACHTF', 'None', 'ABI-L2-ACHTM', 'ABI-L2-ACHTM']
    elif product == 'Derived Motion Winds':
        values = ['ABI-L2-DMWF', 'ABI-L2-DMWC', 'ABI-L2-DMWM', 'ABI-L2-DMWM']
    elif product == 'Derived Stability Indices':
        values = ['ABI-L2-DSIF', 'ABI-L2-DSIC', 'ABI-L2-DSIM', 'ABI-L2-DSIM']
    elif product == 'Downward Shortwave Radiation':
        values = ['ABI-L2-DSRF', 'ABI-L2-DSRC', 'ABI-L2-DSRM', 'ABI-L2-DSRM']
    elif product == 'Fire Hotspot Characterization':
        values = ['ABI-L2-FDCF', 'ABI-L2-FDCC', 'ABI-L2-FDCM', 'ABI-L2-FDCM']
    elif product == 'Land Surface Temperature':
        values = ['ABI-L2-LSTF', 'ABI-L2-LSTC', 'ABI-L2-LSTM', 'ABI-L2-LSTM']
    elif product == 'Legacy Vertical Moisture Profile':
        values = ['ABI-L2-LVMPF', 'ABI-L2-LVMPC', 'ABI-L2-LVMPM', 'ABI-L2-LVMPM']
    elif product == 'Legacy Vertical Temperature Profile':
        values = ['ABI-L2-LVTPF', 'ABI-L2-LVTPC', 'ABI-L2-LVTPM', 'ABI-L2-LVTPM']
    elif product == 'Rainfall Rate/QPE':
        values = ['ABI-L2-RRQPEF', 'None', 'None', 'None']
    elif product == 'Reflected Shortwave Radiation':
        values = ['ABI-L2-RSRF', 'ABI-L2-RSRC', 'None', 'None']
    elif product == 'Sea Surface Temperature':
        values = ['ABI-L2-SSTF', 'None', 'None', 'None']
    elif product == 'Total Precipitable Water':
        values = ['ABI-L2-TPWF', 'ABI-L2-TPWC', 'ABI-L2-TPWM', 'ABI-L2-TPWM']
    elif product == 'Volcanic Ash':
        values = ['ABI-L2-VAAF', 'None', 'None', 'None']

    # Use list comprehension to combine "values" and "keys" lists
    abbreviation_dictionary = {keys[i]: values[i] for i in range(len(keys))}
    
    # Get product abbreviation for specified product and scan sector
    product_abbreviation = abbreviation_dictionary.get(sector)
    
    return product_abbreviation

In [None]:
# Block 4: Create list containing ABI L2 data file names for user-specified satellite/product and date/time period
# "year", "month", "day, "start_hour", "start_min", "end_hour", "end_min", "satellite", "sector", 'product': parameter 
# variables from widget menus, set in main function

def aws_abi_list(year, month, day, start_hour, start_min, end_hour, end_min, satellite, sector, product):
    
    # Access AWS S3 using anonymous credentials
    aws = s3fs.S3FileSystem(anon=True)
    
    # Get all ABI L2 data file names encompassing user-specified satellite/product, date, and start/end hours
    julian_day = find_julian(year, month, day)
    product_abbreviation = get_product_abbreviation(sector, product)
    hour_range = range(int(start_hour), int(end_hour) + 1)
    all_hours_list = []
    for hour in hour_range:
        # Query AWS ABI archive for ABI L2 file names
        # "'{number:02d}'.format(number=hour)" adds leading zero to hours < 10 in hour_range array
        # "refresh=True" argument clears cache so NRT files on AWS ABI archive are retrievable
        hour_files = aws.ls('noaa-goes' + str(satellite) + '/' + product_abbreviation + '/' + str(year) + '/' + julian_day + '/' + '{number:02d}'.format(number=hour) + '/', refresh=True)
        all_hours_list.extend(hour_files)
    
    # Extract ABI L2 data file names for exact period set by user-specified observation start/end times
    # Use reverse indexing to count from end of ABI file names
    data = []
    for file in all_hours_list:
        # For Meso products, extract only file names for user-specified view sector (e.g., "Meso 1" or "Meso 2")
        if sector == 'Meso 1' or sector == 'Meso 2':
            # Extract file names for L2 products that have files for individual ABI bands
            if product == 'Cloud & Moisture Imagery' or product == 'Derived Motion Winds':
                if file[-42:-38] >= (start_hour + start_min) and file[-42:-38] <= (end_hour + end_min) and file[-62] == sector[-1]:
                    data.append(file)
                else:
                    continue
            else:
                # Extract file names for remaining L2 products
                if file[-42:-38] >= (start_hour + start_min) and file[-42:-38] <= (end_hour + end_min) and file[-59] == sector[-1]:
                    data.append(file)
                else:
                    continue
        else:
            # Extract file names for Full Disk and CONUS products
            if file[-42:-38] >= (start_hour + start_min) and file[-42:-38] <= (end_hour + end_min):
                data.append(file)
            else:
                continue

    return data

In [None]:
# Block 5: Print available ABI L2 data files that match user specifications, with option to download files
# "save_path": parameter variable assigned in main function

def get_abi_files(year, month, day, start_hour, start_min, end_hour, end_min, satellite, sector, product, save_path):

    # Query AWS ABI archive and print names/sizes of available L2 files
    data = aws_abi_list(year, month, day, start_hour, start_min, end_hour, end_min, satellite, sector, product)
    
    if len(data) > 0:
        # Access AWS using anonymous credentials
        aws = s3fs.S3FileSystem(anon=True)
        
        # Print list of available data files
        print('Available data files (approximate file size):')
        for file in data:
            file_size = aws.size(file)
            # sep='' removes extra spaces b/w print elements
            print(file.split('/')[-1], ' (', np.format_float_positional(np.float16(file_size/1.0E6), unique=False, precision=1), ' MB)', sep='')
        
        # Print directory where files will be saved
        print('\nData files will be saved to: ' + str(save_path))
        
        # Ask user if they want to download the available data files
        # If yes, download files to specified directory
        download_question = 'Would you like to download the ' + str(len(data)) + ' files?\nType "yes" or "no" and hit "Enter"\n'
        download_files = input(download_question)
        if download_files in ['yes', 'YES', 'Yes', 'y', 'Y']:
            
            # Display progress bar using tqdm library
            # Flush buffer if Python version < v3.9 to avoid glitch in tqdm library
            if parse(sys.version.split(' ')[0]) < parse('3.9'):
                sys.stdout.flush()
            else:
                pass
            for name in tqdm(data, unit='files', bar_format="{desc}Downloading:{percentage:3.0f}%|{bar}|{n_fmt}/{total_fmt} [{elapsed}<{remaining}]"):
                # Set save_path + file_name as pathlib.Path object and convert to string for AWS
                full_path = str(save_path / name.split('/')[-1])
                # Download file from AWS archive
                aws.get(name, full_path)
            print('\nDownload complete!')
        else:
            print('Files are not being downloaded.')
    else:
        print('No files retrieved. Check settings and try again.')

In [None]:
# Block 6: Enter satellite, ABI L2 product, view sector, observation date & start/end times for AWS search using interactive 
# Jupyter Notebook widgets

# Run this block *once* to generate menus
# When main function is run, it reads "(widget-menu-variable).value" of each menu selection
# Do NOT re-run block if you change menu selections! Re-running block resets menus to defaults!

# Formatting settings for drop-down menus
style = {'description_width':'120px'}
layout = widgets.Layout(width='375px')

# Create drop-down menus using widgets
satellite = widgets.Dropdown(options=[('GOES-16', 16), ('GOES-17', 17), ('GOES-18', 18)], description='Satellite:', style=style, layout=layout)
product = widgets.Dropdown(options=[('Aerosol Detection'), ('Aerosol Optical Depth'), ('Clear Sky Mask'), ('Cloud & Moisture Imagery'), ('Cloud & Moisture Imagery Multiband'), ('Cloud Optical Depth'), ('Cloud Particle Size'), ('Cloud Top Height'), ('Cloud Top Phase'), ('Cloud Top Pressure'), ('Cloud Top Temperature'), ('Derived Motion Winds'), ('Derived Stability Indices'), ('Downward Shortwave Radiation'), ('Fire Hotspot Characterization'), ('Land Surface Temperature'), ('Legacy Vertical Moisture Profile'), ('Legacy Vertical Temperature Profile'), ('Rainfall Rate/QPE'), ('Reflected Shortwave Radiation'), ('Sea Surface Temperature'), ('Total Precipitable Water'), ('Volcanic Ash')], description='Product:', style=style, layout=layout)
sector = widgets.Dropdown(options=[('Full Disk'), ('CONUS'), ('Meso 1'), ('Meso 2')], description='Scan Sector:', style=style, layout=layout)
year = widgets.Dropdown(options=[('2019', 2019), ('2020', 2020), ('2021', 2021), ('2022', 2022), ('2023', 2023), ('2024', 2024), ('2025', 2025)], description='Year:', style=style, layout=layout)
month = widgets.Dropdown(options=[('Jan', 1), ('Feb', 2), ('Mar', 3), ('Apr', 4), ('May', 5), ('Jun', 6), ('Jul', 7), ('Aug', 8), ('Sep', 9), ('Oct', 10), ('Nov', 11), ('Dec', 12)], description='Month:', style=style, layout=layout)
day = widgets.Dropdown(options=[('1', 1), ('2', 2), ('3', 3), ('4', 4), ('5', 5), ('6', 6), ('7', 7), ('8', 8), ('9', 9), ('10', 10), ('11', 11), ('12', 12), ('13', 13), ('14', 14), ('15', 15), ('16', 16), ('17', 17), ('18', 18), ('19', 19), ('20', 20), ('21', 21), ('22', 22), ('23', 23), ('24', 24), ('25', 25), ('26', 26), ('27', 27), ('28', 28), ('29', 29), ('30', 30), ('31', 31)], description='Day:', style=style, layout=layout)
shour = widgets.Dropdown(options=[('00'), ('01'), ('02'), ('03'), ('04'), ('05'), ('06'), ('07'), ('08'), ('09'), ('10'), ('11'), ('12'), ('13'), ('14'), ('15'), ('16'), ('17'), ('18'), ('19'), ('20'), ('21'), ('22'), ('23')], description='Start Hour (UTC):', style=style, layout=layout)
smin = widgets.Dropdown(options=[('00'), ('01'), ('02'), ('03'), ('04'), ('05'), ('06'), ('07'), ('08'), ('09'), ('10'), ('11'), ('12'), ('13'), ('14'), ('15'), ('16'), ('17'), ('18'), ('19'), ('20'), ('21'), ('22'), ('23'), ('24'), ('25'), ('26'), ('27'), ('28'), ('29'), ('30'), ('31'), ('32'), ('33'), ('34'), ('35'), ('36'), ('37'), ('38'), ('39'), ('40'), ('41'), ('42'), ('43'), ('44'), ('45'), ('46'), ('47'), ('48'), ('49'), ('50'), ('51'), ('52'), ('53'), ('54'), ('55'), ('56'), ('57'), ('58'), ('59')], description='Start Minutes (UTC):', style=style, layout=layout)
ehour = widgets.Dropdown(options=[('00'), ('01'), ('02'), ('03'), ('04'), ('05'), ('06'), ('07'), ('08'), ('09'), ('10'), ('11'), ('12'), ('13'), ('14'), ('15'), ('16'), ('17'), ('18'), ('19'), ('20'), ('21'), ('22'), ('23')], description='End Hour (UTC):', style=style, layout=layout)
emin = widgets.Dropdown(options=[('00'), ('01'), ('02'), ('03'), ('04'), ('05'), ('06'), ('07'), ('08'), ('09'), ('10'), ('11'), ('12'), ('13'), ('14'), ('15'), ('16'), ('17'), ('18'), ('19'), ('20'), ('21'), ('22'), ('23'), ('24'), ('25'), ('26'), ('27'), ('28'), ('29'), ('30'), ('31'), ('32'), ('33'), ('34'), ('35'), ('36'), ('37'), ('38'), ('39'), ('40'), ('41'), ('42'), ('43'), ('44'), ('45'), ('46'), ('47'), ('48'), ('49'), ('50'), ('51'), ('52'), ('53'), ('54'), ('55'), ('56'), ('57'), ('58'), ('59')], description='End Minutes (UTC):', style=style, layout=layout)

# Format observation start/end time hour and minutes menus to display side-by-side
start_time = widgets.HBox([shour, smin])
end_time = widgets.HBox([ehour, emin])

# Display drop-down menus
print('If you change menu selections (e.g., to run another search), do NOT re-run this block!\nRe-running will re-set all menus to their defaults!')
display(satellite, product, sector, year, month, day)
display(start_time, end_time)

# Create radiobutton menu to select directory to save downloaded files
caption = widgets.Label(value='SELECT A DIRECTORY TO SAVE DOWNLOADED FILES', layout=widgets.Layout(height='20px'))
radiobutton = widgets.RadioButtons(options=[('Current Working Directory', 1), ('Specify a Directory: (e.g., D://Data)', 2)], disabled=False, layout=widgets.Layout(height='40px'))
directory = widgets.Text(disabled=True, layout=widgets.Layout(width='500px', height='30px'))

# Function to enable text entry only if radiobutton to "specify a directory" is selected
def handle_directory_change(change):
    directory.disabled=False if change.new == 2 else True

# Monitor values of radiobuttons
radiobutton.observe(handle_directory_change, names='value')

# Display directory widgets
display(caption, radiobutton, directory)

In [None]:
# Block 7: Main Function (search AWS to find ABI L2 data files, with option to download files)
# Selections from widget menus (AWS search parameters) imported using "(widget-menu-variable).value"

# Main function
if __name__ == "__main__":
    
    # Set directory to save downloaded ABI files (as pathlib.Path object)
    if radiobutton.value == 1:  # 1=cwd, 2=user-specified directory
        save_path = Path.cwd()  # Set current working directory as pathlib.Path object
    else:
        save_path_error_message = supporting_functions.check_directory(directory.value)  # Check for errors
        if save_path_error_message == 0:
            save_path = Path(directory.value)  # Set user-entered directory as pathlib.Path object
        else:
            save_path = 'error'
    
    # Check user-specified information for errors; if none, proceed to list/download available ABI L2 files
    start, end = supporting_functions.check_times(shour.value, smin.value, ehour.value, emin.value)
    observation_date, today = supporting_functions.check_future(year.value, month.value, day.value)
    product_abbreviation = get_product_abbreviation(sector.value, product.value)
    # Notify user if entered observation end time is before start time
    if end < start:
        print('You entered an end time that is before the start time. Try again.')
    # Notify user if entered observation date is in the future
    elif observation_date > today:
        print('You entered a date that is in the future. Try again.')
    # Notify user if selected product is not generated for selected view sector
    elif product_abbreviation == 'None':
        print('The selected product is not generated for the selected view sector. Try again.')
    # Notify user if errors in directory entered to save files
    elif save_path == 'error':
        if save_path_error_message == 1:
            print('The directory you entered to save files does not exist. Try again.')
        elif save_path_error_message == 2:
            print('You entered a directory to save files but the field is blank. Try again.')
        elif save_path_error_message == 3:
            print('There is a syntax error in the the directory name to save files. Try again.')
    else:
        # List/download available ABI L2 data files
        get_abi_files(year.value, month.value, day.value, shour.value, smin.value, ehour.value, emin.value, satellite.value, 
                  sector.value, product.value, save_path)        