In [1]:
import earthaccess
import ipywidgets as widgets
from ipywidgets import GridspecLayout
import pandas as pd
from datetime import datetime, timezone

from typing import Any, Dict, List, Optional, Type, Union

In [2]:
# f@#$QFac890fq#@
earthaccess.login(persist=True)

<earthaccess.auth.Auth at 0x111c36640>

# TEMPO search - Datasets

The flow for working with this data seems to be as follows:
1. Use `search_datasets` to find relevant collections
2. Once you have one or more relevant collections, filter the granules within that collection (there may be thousands in each returned collection, you don't want all of them).
3. Use `search_data` with the `concept_id`s from the granules you want to get the download links.

The tricky part here is step 2. The user needs to select the collection they want, then you need to allow them some filtering. It mightmake sense to use [tabs, accordion or stack](https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html#accordion) in `ipywidgets` to better organize this (probably accordion or stack.) 

Display a table of collections, and have each row have a "select" button. Clicking that opens up a new section to filter the data. This will then populate that section with basic info on the data (number of granules, total size, etc.), as well as allow for filtering. Filtering will show the new reduced number of granules. 

In addition, they should be able to see basic data on each returned granule (size, time frame, spatial extent, etc.). Then a button allows them to click it and get the data link and/or download it directly.



## Quick test search and results

In [3]:
tempo_datasets = earthaccess.search_datasets(
    project="TEMPO"
)

Datasets found: 16


In [4]:
# Just grab data from a random dataset
granule_data = earthaccess.search_data(
    concept_id=tempo_datasets[0].concept_id()
)

Granules found: 2238


In [5]:
granule = granule_data[0]

granule

In [6]:
granule_data

[Collection: {'ShortName': 'TEMPO_NO2_L2', 'Version': 'V01'}
 Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 60.370410919, 'Longitude': -41.201129913}, {'Latitude': 57.238494873, 'Longitude': -47.382972717}, {'Latitude': 53.402107239, 'Longitude': -52.791793823}, {'Latitude': 48.647026062, 'Longitude': -57.590606689}, {'Latitude': 43.638454437, 'Longitude': -61.240318298}, {'Latitude': 37.941421509, 'Longitude': -64.284980774}, {'Latitude': 31.616851807, 'Longitude': -66.739814758}, {'Latitude': 24.743545532, 'Longitude': -68.645263672}, {'Latitude': 17.406463623, 'Longitude': -70.047592163}, {'Latitude': 17.530563354, 'Longitude': -64.069190979}, {'Latitude': 24.212823868, 'Longitude': -62.385253906}, {'Latitude': 30.410648346, 'Longitude': -60.163841248}, {'Latitude': 36.03717041, 'Longitude': -57.39541626}, {'Latitude': 41.058563232, 'Longitude': -54.065185547}, {'Latitude': 45.462844849, 'Longitude': -50.162239075}, {

### Examining results

In [5]:
list(tempo_datasets[0]['umm'].keys())

['DataLanguage',
 'CollectionCitations',
 'AdditionalAttributes',
 'SpatialExtent',
 'CollectionProgress',
 'StandardProduct',
 'ScienceKeywords',
 'TemporalExtents',
 'ProcessingLevel',
 'DOI',
 'ShortName',
 'EntryTitle',
 'DirectDistributionInformation',
 'AccessConstraints',
 'MetadataLanguage',
 'RelatedUrls',
 'DataDates',
 'Abstract',
 'Purpose',
 'LocationKeywords',
 'MetadataDates',
 'VersionDescription',
 'Version',
 'Projects',
 'UseConstraints',
 'ContactPersons',
 'DataCenters',
 'Platforms',
 'MetadataSpecification',
 'ArchiveAndDistributionInformation']

In [6]:
list(tempo_datasets[0]['meta'].keys())

['revision-id',
 'deleted',
 'format',
 'provider-id',
 'has-combine',
 'user-id',
 'has-formats',
 'associations',
 'has-spatial-subsetting',
 'native-id',
 'has-transforms',
 'association-details',
 'has-variables',
 'concept-id',
 'revision-date',
 'granule-count',
 'has-temporal-subsetting',
 'concept-type']

In [7]:
tempo_datasets[0].summary()

{'short-name': 'TEMPO_NO2_L2',
 'entity-title': 'TEMPO NO2 tropospheric, stratospheric, and total columns (UNVALIDATED)',
 'concept-id': 'C2724057189-LARC_CLOUD',
 'version': 'V01',
 'file-type': 'NetCDF-4',
 'file-size': 750.0,
 'num-granules': 2238,
 'get-data': 'https://search.earthdata.nasa.gov/search/granules?p=C2724057189-LARC_CLOUD',
 'cloud-info': {'Region': 'us-west-2',
  'S3CredentialsAPIEndpoint': 'https://data.asdc.earthdata.nasa.gov/s3credentials',
  'S3CredentialsAPIDocumentationURL': 'https://data.asdc.earthdata.nasa.gov/s3credentialsREADME'}}

In [8]:
earthaccess.search_data(
    concept_id=tempo_datasets[0].concept_id()
)

Granules found: 2238


[Collection: {'ShortName': 'TEMPO_NO2_L2', 'Version': 'V01'}
 Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 60.370410919, 'Longitude': -41.201129913}, {'Latitude': 57.238494873, 'Longitude': -47.382972717}, {'Latitude': 53.402107239, 'Longitude': -52.791793823}, {'Latitude': 48.647026062, 'Longitude': -57.590606689}, {'Latitude': 43.638454437, 'Longitude': -61.240318298}, {'Latitude': 37.941421509, 'Longitude': -64.284980774}, {'Latitude': 31.616851807, 'Longitude': -66.739814758}, {'Latitude': 24.743545532, 'Longitude': -68.645263672}, {'Latitude': 17.406463623, 'Longitude': -70.047592163}, {'Latitude': 17.530563354, 'Longitude': -64.069190979}, {'Latitude': 24.212823868, 'Longitude': -62.385253906}, {'Latitude': 30.410648346, 'Longitude': -60.163841248}, {'Latitude': 36.03717041, 'Longitude': -57.39541626}, {'Latitude': 41.058563232, 'Longitude': -54.065185547}, {'Latitude': 45.462844849, 'Longitude': -50.162239075}, {

## Pretty summary results

Just a simple function to take the list of search results and turn it into a nicely formatted table. Later I will a) turn this into a function (perhaps with its own class), and b) change this table into being displayed as an `ipywidget` with additional functionality, like buttons. But this section is just to format the results.

In [5]:
# Generate HTML for buttons column
def create_button_html(link):
    return f'<button onclick="window.open(\'{link}\', \'_blank\')">View Data</button>'

def results_summary_table(results: List) -> None:
    """
    Create a summary table of search results.

    Parameters
    ----------
    results : List
        List of search results from `earthaccess.search_datasets` or `earthaccess.search_data`.

    Returns
    -------
    pd.DataFrame
        Summary table of search results.
    """
    summary_list = []
    for result in results:
        summary_list.append(result.summary())

    summary_df = pd.DataFrame(summary_list)

    # Change cloud hosting column to a bool
    summary_df['cloud-hosted'] = summary_df['cloud-info'].map(lambda x: 'Region' in x.keys())
    summary_df = summary_df.drop(columns='cloud-info')

    # Convert data links to buttons
    buttons_html = [create_button_html(link) for link in summary_df['get-data']]

    # Insert the buttons HTML as a new column in the DataFrame
    summary_df['view-data'] = buttons_html
    summary_df = summary_df.drop(columns='get-data')

    # Reorder columns to put most useful columns first
    col_order = ['view-data', 'entity-title', 'short-name', 'file-size', 'file-type', 'num-granules', 'cloud-hosted']
    summary_df = summary_df[col_order]

    show(summary_df)

In [6]:
results_summary_table(tempo_datasets)

view-data,entity-title,short-name,file-size,file-type,num-granules,cloud-hosted
Loading ITables v2.0.1 from the internet... (need help?),,,,,,


## Pretty granules

Similar to above, but now it's showing the granules associated with a project. Down the line, the user will click a button to select the project they want, and it will open up a new accordion section for the granules. The project and accordion sections will be linked. But for right now, I'm just formatting and displaying the results.

In [8]:
granule_data[0].keys()

dict_keys(['meta', 'umm', 'size'])

In [10]:
granule_data[0]['meta']

{'concept-type': 'granule',
 'concept-id': 'G2810637431-LARC_CLOUD',
 'revision-id': 2,
 'native-id': 'TEMPO_NO2_L2_V01_20230802T151249Z_S001G01.nc',
 'collection-concept-id': 'C2724057189-LARC_CLOUD',
 'provider-id': 'LARC_CLOUD',
 'format': 'application/vnd.nasa.cmr.umm+json',
 'revision-date': '2024-02-05T20:30:07.493Z'}

In [11]:
granule_data[0]['umm']

{'TemporalExtent': {'RangeDateTime': {'BeginningDateTime': '2023-08-02T15:12:49+00:00',
   'EndingDateTime': '2023-08-02T15:19:01+00:00'}},
 'GranuleUR': 'TEMPO_NO2_L2_V01_20230802T151249Z_S001G01.nc',
 'SpatialExtent': {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 60.370410919,
         'Longitude': -41.201129913},
        {'Latitude': 57.238494873, 'Longitude': -47.382972717},
        {'Latitude': 53.402107239, 'Longitude': -52.791793823},
        {'Latitude': 48.647026062, 'Longitude': -57.590606689},
        {'Latitude': 43.638454437, 'Longitude': -61.240318298},
        {'Latitude': 37.941421509, 'Longitude': -64.284980774},
        {'Latitude': 31.616851807, 'Longitude': -66.739814758},
        {'Latitude': 24.743545532, 'Longitude': -68.645263672},
        {'Latitude': 17.406463623, 'Longitude': -70.047592163},
        {'Latitude': 17.530563354, 'Longitude': -64.069190979},
        {'Latitude': 24.212823868, 'Longitude': -62.385253

Extract spatial extent for all granules. Then I can plot the unique ones.

In [36]:
def unique_lists_of_lists_of_lists(list_of_lists_of_lists):
    # Convert each inner list of lists to a tuple of tuples and add them to a set
    unique_tuples = set(tuple(tuple(inner_list) for inner_list in outer_list) for outer_list in list_of_lists_of_lists)
    
    # Convert the unique tuples back to lists of lists
    unique_lists = [[list(inner_tuple) for inner_tuple in outer_tuple] for outer_tuple in unique_tuples]
    
    return unique_lists

In [82]:
unique_lists_of_lists_of_lists(
    [
        [[1, 2], [3, 4]],
        [[3, 4], [1, 2]],
        [[1, 2], [3, 4]],
        [[3, 4], [1, 2]],
    ]
)

[[[1, 2], [3, 4]], [[3, 4], [1, 2]]]

In [103]:
spatial_extents = dict()

for granule in granule_data:
    spatial_extent_boundary = granule['umm']['SpatialExtent']['HorizontalSpatialDomain']['Geometry']['GPolygons'][0]['Boundary']['Points']
    coords = [[entry['Longitude'], entry['Latitude']] for entry in spatial_extent_boundary]
    spatial_extents[granule['meta']['concept-id']] = coords

## ipyleaflet

The goal of this section is to see if I can use an `ipyleaflet` map to plot the spatial extent of each granule, and allow simple interactivity (print the granule id) when clicking on them.

In [117]:
from ipyleaflet import Map, GeoJSON
from shapely.geometry import shape, Point

# Create a map
m = Map(center=[42, -90], zoom=3)

# Define event handler for mouse clicks
def handle_click(event, feature, **kwargs):
    # Check if the event is a click event
    if event == "click":
        granule_id = feature['properties']['index']
        print(granule_id)

# Define the polygon as a GeoJSON object
def create_polygon_geojson(coords, granule_id):
    polygon_geojson = {
        "type": "Feature",
        "geometry": {
            "type": "Polygon",
            "coordinates": [coords]
        },
        "properties": {
            "index": granule_id
        }
    }   
    return polygon_geojson

for granule_id in list(spatial_extents.keys())[20:30]:
    coords = spatial_extents[granule_id]
    polygon_geojson = create_polygon_geojson(coords, granule_id)
    
    # Create a GeoJSON layer with the polygon
    polygon_layer = GeoJSON(data=polygon_geojson)
    m.add_layer(polygon_layer)
    polygon_layer.on_click(handle_click)

# Display the map
m


Map(center=[42, -90], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_te…

## Temporal filtering

It looks like a lot of the granules scan the same locations, but presumably at different dates and times? So below I work on allowing filtering based on the date and time.

In [138]:
temporal_extents = dict()
str_format = "%Y-%m-%dT%H:%M:%S%z"

for granule in granule_data:
    temporal_extent_boundary = granule['umm']['TemporalExtent']['RangeDateTime']

    extent = [datetime.strptime(temporal_extent_boundary['BeginningDateTime'], str_format), 
              datetime.strptime(temporal_extent_boundary['EndingDateTime'], str_format)]
    temporal_extents[granule['meta']['concept-id']] = extent

In [143]:
list(temporal_extents.items())[0]

('G2810637431-LARC_CLOUD',
 [datetime.datetime(2023, 8, 2, 15, 12, 49, tzinfo=datetime.timezone.utc),
  datetime.datetime(2023, 8, 2, 15, 19, 1, tzinfo=datetime.timezone.utc)])

In [157]:
def datetime_range_within(range1, start=None, finish=None):
    # Extract the start and end times
    start1, end1 = range1[0], range1[-1]
    
    # Check if the first range lies entirely within the second range, depending on what was supplied
    # Default to True if no start or finish is supplied (no range to check against)
    within_range = True
    if start is not None:
        within_range = start1 >= start
    if finish is not None:
        within_range = within_range and end1 <= finish
    
    return within_range

In [156]:
# Create start and finish datetime pickers
start_datetime_picker = widgets.DatetimePicker(description='Start Datetime (UTC):')

finish_datetime_picker = widgets.DatetimePicker(description='Finish Datetime (UTC):')

# Create an output widget to display the result
output = widgets.Output()

# Function to update finish datetime picker minimum value based on start datetime picker value
def update_finish_min(change):
    finish_datetime_picker.min = start_datetime_picker.value

# Attach event handler to start datetime picker value change
start_datetime_picker.observe(update_finish_min, 'value')

# Function to handle button click event
def handle_button_click(button):
    with output:
        output.clear_output()
        
        start_datetime_utc = None
        finish_datetime_utc = None
        
        if start_datetime_picker.value is not None:
            start_datetime_utc = start_datetime_picker.value.replace(tzinfo=timezone.utc)
        if finish_datetime_picker.value is not None:
            finish_datetime_utc = finish_datetime_picker.value.replace(tzinfo=timezone.utc)

        if start_datetime_utc is not None and finish_datetime_utc is not None:
            num_granules_within_range = 0
            for granule_id, temporal_extent in temporal_extents.items():
                if datetime_range_within(temporal_extent, [start_datetime_utc, finish_datetime_utc]):
                    num_granules_within_range += 1
            print(f"Number of granules within the specified temporal range: {num_granules_within_range}")


# Create a button widget
button = widgets.Button(description='Print Dates')

# Attach event handler to button click
button.on_click(handle_button_click)

# Create a VBox to hold the widgets
app = widgets.VBox([start_datetime_picker, finish_datetime_picker, button, output])

# Display the app
app

VBox(children=(DatetimePicker(value=None, description='Start Datetime (UTC):'), DatetimePicker(value=None, des…

## Search box
- Project name (initially the only option available)
- Date range (via a date picker)
- Location range (via an interactive map, ideally)
- Instrument (autofills after selecting a project and extracting the instruments)

In [12]:
class SearchWidget:
    def __init__(self):
        # Display the whole description, don't truncate it
        self.style = {'description_width': 'initial'}

        self.params = dict()
        self.results = None
        self.widgets = dict()

        self.widgets['project'] = self._project_widget()
        self.widgets['start_date'] = self._start_date_widget()
        self.widgets['end_date'] = self._end_date_widget()
        self.widgets['instrument'] = self._instrument_widget()
        self.widgets['output'] = widgets.Output()

        self._grid = GridspecLayout(5, 2)
        self._grid[0, 0] = self.widgets['project']
        self._grid[3, 0] = self._search_button()
        self._grid[3, 1] = self._reset_button()
        self._grid[4, :] = self._output_widget()

    def __repr__(self) -> str:
        return "Run in a Jupyter Notebook to see the widget."
    
    def _ipython_display_(self):
        # Display the widget
        display(self._grid)

    def _output_widget(self) -> widgets.Output:
        output_widget = widgets.Output()
        return output_widget

    def _project_widget(self) -> widgets.Text:
        project_widget = widgets.Text(
            description="Project/Campaign:",
            style=self.style,
            disabled=self.results is not None,
        )
        return project_widget
    
    def _start_date_widget(self) -> widgets.DatePicker:
        start_date_widget = widgets.DatePicker(
            description="Start Date:",
            style=self.style,
        )
        return start_date_widget
    
    def _end_date_widget(self) -> widgets.DatePicker:
        end_date_widget = widgets.DatePicker(
            description="Start Date:",
            style=self.style,
        )
        return end_date_widget
    
    def _instrument_widget(self) -> widgets.Dropdown:
        instrument_widget = widgets.Dropdown(
            options=[],
            description="Instrument:",
            style=self.style,
        )
        return instrument_widget
    
    def _search_button(self) -> widgets.Button:
        search_widget = widgets.Button(
            description="Search",
            button_style="primary",
        )
        search_widget.on_click(self.on_search)
        return search_widget
    
    def _reset_button(self) -> widgets.Button:
        reset_widget = widgets.Button(
            description="Reset",
            button_style="warning",
        )
        reset_widget.on_click(self.on_reset)
        return reset_widget

    def on_search(self, button):
        # Assign parameter values
        self.params['project'] = self.widgets['project'].value
        self.params['start_date'] = self.widgets['start_date'].value
        self.params['end_date'] = self.widgets['end_date'].value
        self.params['instrument'] = self.widgets['instrument'].value

        print(f"Project: {self.widgets['project'].value}")
        print(f"Start date: {self.widgets['start_date'].value}")
        print(f"End date: {self.widgets['end_date'].value}")
        print(f"Instrument: {self.widgets['instrument'].value}")

        # Execute the search
        if self.results is None:
            self.results = earthaccess.search_datasets(
                project=self.params['project']
            )
        else:
            self.results = earthaccess.search_datasets(
                project=self.params['project'],
                start_date=self.params['start_date'],
                end_date=self.params['end_date'],
                instrument=self.params['instrument']
            )

        if len(self.results) > 0:
            self.display_results(self.results)

    def on_reset(self, button):
        # Reset widgets to initial values
        self.widgets['project'].value = self.widgets['project'].placeholder
        # self.widgets['start_date'].value = self.widgets['start_date'].placeholder
        # self.widgets['end_date'].value = self.widgets['end_date'].placeholder
        # self.widgets['instrument'].value = self.widgets['instrument'].placeholder

        # Reset parameters to empty
        self.params['project'] = None
        self.params['start_date'] = None
        self.params['end_date'] = None
        self.params['instrument'] = None
        
        # Re-enable the project widget and set focus to it
        self.widgets['project'].disabled = False
        self.widgets['project'].focus()

    def display_results(self, results):
        with self.widgets['output']:
            display(results)

In [13]:
grid = SearchWidget()

grid

GridspecLayout(children=(Text(value='', description='Project/Campaign:', layout=Layout(grid_area='widget001'),…