# PACE Spectra Extraction

**Description**: This notebook demonstrates how to extract PACE remote sensing reflectances at sites of interest via cloud access. 

In [57]:
# Load Modules
import earthaccess
import xarray as xr
from xarray.backends.api import open_datatree
import matplotlib.pyplot as plt
import cartopy
import numpy as np
import h5netcdf
from IPython.display import JSON
import geopandas as gpd
from shapely.geometry import Point, LineString
import re
import pandas as pd
import seaborn as sns
import cartopy.crs as ccrs
from pace_func import process_l2, subset_xr, target_spectra
import pandas as pd
import os
import glob

!pip install openpyxl



In [63]:
# Authentification
earthaccess.login(strategy='interactive', persist=True)
auth = earthaccess.login()

# Search for PACE Data in Region of Interest

In [64]:
# PACE Search Criteria
tspan = ("2024-03-01", "2025-02-19")
bbox=(-84.951187, 9.645435, -84.653903, 9.954900)

# Search for PACE Imagery
results = earthaccess.search_data(
    short_name="PACE_OCI_L2_AOP",
    temporal=tspan,
    bounding_box=bbox,
)

paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/282 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/282 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/282 [00:00<?, ?it/s]

## Load In Situ Stations

In [65]:
# Load In Situ Sites
sites_df = pd.read_excel('/home/jovyan/avuelo-cr/data/PCOLOR_cast_info.xlsx', header=1)
sites_df.head()

Unnamed: 0,cast_number,latitude,longitude,timestamp
0,1,9.751767,-84.68565,Feb 17 2025 13:58:00
1,2,9.918883,-84.76025,Feb 17 2025 15:10:00
2,3,9.952317,-84.956583,Feb 17 2025 16:37:00
3,4,9.872467,-84.885083,Feb 17 2025 17:39:00
4,5,9.708533,-84.922283,Feb 17 2025 18:55:00


## Spectra Loop

In [None]:
# Extract Specta
output_dir = "spectra_results"
os.makedirs(output_dir, exist_ok=True)  # Ensure output directory exists

for temp_path in paths:
    try:
        # Extract filename for saving
        match = re.search(r"(PACE_OCI\.[^/,]+\.nc)", temp_path.details['name'])
        if not match:
            print(f"Skipping: Could not extract filename from {temp_path}")
            continue
        
        filename = match.group(1)
        output_path = os.path.join(output_dir, f"{filename}.csv")

        # **Skip if file already exists**
        if os.path.exists(output_path):
            print(f"Skipping {filename}, already processed.")
            continue

        # Process the file if it hasn't been saved
        dataset = process_l2(temp_path)
        wl_dataset = xr.open_dataset(temp_path, group="sensor_band_parameters")
        wl = wl_dataset['wavelength_3d'].values
        date = re.search(r"\d{8}", filename).group()

        site_df_list = []
        for site_name, data in sites_df.iterrows():
            nearest_rrs = target_spectra(dataset, data['latitude'], data['longitude'])
            site_df = pd.DataFrame({
                "cast": site_name,
                "wl": wl,
                "date": date,
                "filename": filename,
                "rrs": nearest_rrs.values
            })
            site_df_list.append(site_df)

        # Save processed data as CSV
        combined_df = pd.concat(site_df_list)
        combined_df.to_csv(output_path, index=False)
        print(f"Saved: {output_path}")

    except Exception as e:
        print(f"Error processing {temp_path}: {e}")

Skipping PACE_OCI.20240305T180043.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240306T183549.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240307T191056.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240308T180742.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240309T184249.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240310T173935.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240310T191755.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240311T181444.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240316T175334.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240317T182840.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240318T190345.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240319T180030.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240320T183536.L2.OC_AOP.V3_0.nc, already processed.
Skipping PACE_OCI.20240321T191041.L2.OC_AOP.V3_0.nc, already pro