In [15]:
import pandas as pd
import numpy as np
import xarray as xr
from datetime import datetime
import earthaccess
from tqdm import tqdm

# === Authenticate ===
auth = earthaccess.login()
assert auth.authenticated, "Earthaccess login failed."

# === Load simplified catch data ===
df = pd.read_csv("simplified_catch_summary.csv")

# === Helper: convert date to 8-day PACE period ===
def get_8day_range(date_str):
    dt = pd.to_datetime(date_str)
    doy = (dt - pd.Timestamp("2024-01-01")).days
    start_day = 8 * (doy // 8)
    start_date = pd.Timestamp("2024-01-01") + pd.Timedelta(days=start_day)
    end_date = start_date + pd.Timedelta(days=7)
    return start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")

# === Caching logic ===
rrs_cache = {}
avw_cache = {}

def load_cached_dataset(short_name, start_date, end_date, cache):
    key = (short_name, start_date)
    if key in cache:
        return cache[key]
    
    results = earthaccess.search_data(
        short_name=short_name,
        temporal=(start_date, end_date),
        granule_name="*.8D.*.4km.*"
    )
    if not results:
        return None

    try:
        ds = xr.open_dataset(earthaccess.open(results)[0])
        cache[key] = ds
        return ds
    except Exception as e:
        print(f"Failed to open dataset for {short_name} on {start_date}: {e}")
        return None

# === Main loop ===
rrs_data = []

for _, row in tqdm(df.iterrows(), total=len(df)):
    tow_date = row["TOWDATETIME_EST"]
    lat, lon = row["LAT"], row["LON"]
    start_date, end_date = get_8day_range(tow_date)

    # === Load datasets with caching ===
    ds_rrs = load_cached_dataset("PACE_OCI_L3M_RRS", start_date, end_date, rrs_cache)
    ds_avw = load_cached_dataset("PACE_OCI_L3M_AVW", start_date, end_date, avw_cache)

    if ds_rrs is None or ds_avw is None:
        print(f"Skipping {tow_date} due to missing datasets.")
        continue

    try:
        # === Find nearest grid point ===
        lat_idx = np.abs(ds_rrs["lat"].values - lat).argmin()
        lon_idx = np.abs(ds_rrs["lon"].values - lon).argmin()

        lat_slice = slice(max(lat_idx - 2, 0), min(lat_idx + 3, ds_rrs.sizes["lat"]))
        lon_slice = slice(max(lon_idx - 2, 0), min(lon_idx + 3, ds_rrs.sizes["lon"]))

        # === Rrs: mean over 5x5 box ===
        rrs_vals = ds_rrs["Rrs"][lat_slice, lon_slice, :].mean(dim=("lat", "lon")).values
        wavelengths = ds_rrs["wavelength"].values

        if np.all(np.isnan(rrs_vals)):
            print(f"All Rrs NaN for {tow_date}")
            continue

        # === AVW: mean over 5x5 box ===
        avw_val = ds_avw["avw"][lat_slice, lon_slice].mean().item()

        # === Store result ===
        result = {
            "TOWDATETIME_EST": tow_date,
            "AVW": avw_val
        }

        for wl, val in zip(wavelengths, rrs_vals):
            result[f"Rrs_{int(wl)}"] = val

        rrs_data.append(result)

    except Exception as e:
        print(f"Skipping {tow_date} at {lat:.2f}, {lon:.2f} due to error: {e}")
        continue

# === Merge with fisheries data and save ===
rrs_df = pd.DataFrame(rrs_data)
merged = pd.merge(df, rrs_df, on="TOWDATETIME_EST", how="left")
merged.to_csv("fisheries_with_pace_rrs_avw.csv", index=False)

print("✅ Done! Output saved to: fisheries_with_pace_rrs_avw.csv")

import pandas as pd
import numpy as np

# Load the merged file
df = pd.read_csv("fisheries_with_pace_rrs_avw.csv")

# --- Identify Rrs columns from 400 to 700 nm ---
rrs_cols = [col for col in df.columns if col.startswith("Rrs_")]

# Extract wavelengths and keep only those between 400 and 700 nm
rrs_wavelengths = [int(col.split("_")[1]) for col in rrs_cols]
rrs_filtered = [(wl, col) for wl, col in zip(rrs_wavelengths, rrs_cols) if 400 <= wl <= 700]

# Sort by wavelength
rrs_filtered.sort()
wavelengths, rrs_ordered_cols = zip(*rrs_filtered)

# Convert to NumPy array for vectorized integration
rrs_values = df[list(rrs_ordered_cols)].values
rrs_brightness = np.trapz(rrs_values, x=wavelengths, axis=1)

# Add to DataFrame
df["Rrs_brightness"] = rrs_brightness

# Save updated CSV
df.to_csv("fisheries_with_pace_rrs_avw.csv", index=False)

print("✅ Added 'Rrs_brightness' and saved updated CSV.")




Unauthorized access. Please check your Earthdata credentials.


  0%|          | 0/368 [00:00<?, ?it/s]

QUEUEING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/1 [00:00<?, ?it/s]

QUEUEING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/368 [00:54<?, ?it/s]


TypeError: download_sst_data() missing 2 required positional arguments: 'earthdata_username' and 'earthdata_password'