# PACE Spectra Extraction

**Description**: This notebook demonstrates how to extract PACE remote sensing reflectances at sites of interest via cloud access. 

In [1]:
# Load Modules
import earthaccess
import xarray as xr
from xarray.backends.api import open_datatree
import matplotlib.pyplot as plt
import cartopy
import numpy as np
import h5netcdf
from IPython.display import JSON
import geopandas as gpd
from shapely.geometry import Point, LineString
import re
import pandas as pd
import seaborn as sns
import cartopy.crs as ccrs
import pace_func
import pandas as pd
import os
import glob

In [2]:
# Authentification
auth = earthaccess.Auth()
auth.login(strategy="interactive", persist=True)
print('Authenticated:' + str(auth.authenticated))

Enter your Earthdata Login username:  m11keluis
Enter your Earthdata password:  ········
Enter your Earthdata Login username:  m11keluis
Enter your Earthdata password:  ········


# Search for PACE Data in Region of Interest

In [10]:
# PACE Search Criteria
tspan = ("2025-02-17", "2025-02-21")
bbox=(-84.951187, 9.645435, -84.653903, 9.954900)

# Search for PACE Imagery
results = earthaccess.search_data(
    short_name="PACE_OCI_L2_AOP_NRT",
    temporal=tspan,
    bounding_box=bbox,
)

# Open Results
paths = earthaccess.open(results)

[Collection: {'Version': '3.0', 'ShortName': 'PACE_OCI_L2_AOP_NRT'}
Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 14.12882, 'Longitude': -65.80718}, {'Latitude': 9.02838, 'Longitude': -89.92632}, {'Latitude': -14.10494, 'Longitude': -85.17632}, {'Latitude': -8.99002, 'Longitude': -60.96164}, {'Latitude': 14.12882, 'Longitude': -65.80718}]}}]}}}
Temporal coverage: {'RangeDateTime': {'EndingDateTime': '2025-02-17T18:04:22Z', 'BeginningDateTime': '2025-02-17T17:59:23Z'}}
Size(MB): 75.64469909667969
Data: ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20250217T175923.L2.OC_AOP.V3_0.NRT.nc'], Collection: {'ShortName': 'PACE_OCI_L2_AOP_NRT', 'Version': '3.0'}
Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 14.50219, 'Longitude': -74.73515}, {'Latitude': 9.39925, 'Longitude': -98.88442}, {'Latitude': -8.39359, 'Longitude': -95.0526}, {

AttributeError: 'NoneType' object has no attribute 'open'

## Load In Situ Stations

In [9]:
# Load In Situ Sites
df = pd.read_csv('../data/cast_info.csv').dropna()
df['timestamp'] = pd.to_datetime(df['timestamp'])

## Spectra Loop

In [1]:
# Extract Specta
output_dir = "spectra_results"
os.makedirs(output_dir, exist_ok=True)  # Ensure output directory exists

for temp_path in paths:
    try:
        # Extract filename for saving
        match = re.search(r"(PACE_OCI\.[^/,]+\.nc)", temp_path.details['name'])
        if not match:
            print(f"Skipping: Could not extract filename from {temp_path}")
            continue
        
        filename = match.group(1)
        output_path = os.path.join(output_dir, f"{filename}.csv")

        # **Skip if file already exists**
        if os.path.exists(output_path):
            print(f"Skipping {filename}, already processed.")
            continue

        # Process the file if it hasn't been saved
        dataset = process_l2(temp_path)
        wl_dataset = xr.open_dataset(temp_path, group="sensor_band_parameters")
        wl = wl_dataset['wavelength_3d'].values
        date = re.search(r"\d{8}", filename).group()

        site_df_list = []
        for site_name, data in sites_df.iterrows():
            nearest_rrs = target_spectra(dataset, data['latitude'], data['longitude'])
            site_df = pd.DataFrame({
                "cast": site_name,
                "wl": wl,
                "date": date,
                "filename": filename,
                "rrs": nearest_rrs.values
            })
            site_df_list.append(site_df)

        # Save processed data as CSV
        combined_df = pd.concat(site_df_list)
        combined_df.to_csv(output_path, index=False)
        print(f"Saved: {output_path}")

    except Exception as e:
        print(f"Error processing {temp_path}: {e}")

NameError: name 'os' is not defined