In [8]:
import requests
from requests.auth import HTTPBasicAuth
from datetime import datetime, timedelta
import pandas as pd
import getpass

def search_omi_no2_granules(date, bbox, page_size=5):
    """
    Search NASA Earthdata CMR API for OMI NO2 granules.
    date: 'YYYY-MM-DD'
    bbox: [west, south, east, north] (e.g., [-125, 25, -65, 50] for CONUS)
    Returns: List of download URLs
    """
    cmr_url = "https://cmr.earthdata.nasa.gov/search/granules.json"
    params = {
        'short_name': 'OMNO2d',  # OMI/Aura NO2 Daily L3 Global Gridded 0.25x0.25 degree V003
        'version': '003',
        'temporal': f"{date}T00:00:00Z,{date}T23:59:59Z",
        'bounding_box': ','.join(map(str, bbox)),
        'page_size': page_size,
        'provider': 'GES_DISC'
    }
    response = requests.get(cmr_url, params=params)
    response.raise_for_status()
    results = response.json()['feed']['entry']
    urls = []
    for granule in results:
        for link in granule.get('links', []):
            if link.get('type', '').startswith('application/x-hdf') or link.get('type', '').startswith('application/octet-stream'):
                urls.append(link['href'])
    return urls

def download_granule(url, username, password, out_path):
    """Download a granule file with Earthdata authentication."""
    with requests.get(url, auth=HTTPBasicAuth(username, password), stream=True) as r:
        r.raise_for_status()
        with open(out_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    print(f"Downloaded: {out_path}")

# Prompt for credentials
username = input('Enter your NASA Earthdata username: ')
password = getpass.getpass('Enter your NASA Earthdata password: ')

# Example usage:
date = '2024-10-01'
# Example bounding box for Los Angeles area
bbox = [-119, 33, -117, 35]
urls = search_omi_no2_granules(date, bbox)
print('OMI NO2 granule URLs for', date, ':')
for url in urls:
    print(url)

# Download the first granule as an example (if available)
if urls:
    granule_url = urls[0]
    out_file = 'omi_no2_sample.h5'
    download_granule(granule_url, username, password, out_file)
else:
    print('No granule URLs found for this date/bbox.')

# Convert the URLs to a DataFrame and print it
df = pd.DataFrame({'granule_url': urls})
print('\nGranule DataFrame:')
print(df)

OMI NO2 granule URLs for 2024-10-01 :
https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMNO2d.003/2024/OMI-Aura_L3-OMNO2d_2024m1001_v003-2024m1110t123552.he5.ncml


HTTPError: 403 Client Error: 403 for url: https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMNO2d.003/2024/OMI-Aura_L3-OMNO2d_2024m1001_v003-2024m1110t123552.he5.ncml