# EMIT L2A Reflectance – end‑to‑end example
This notebook follows the same **flow** as your original: setup → search → pick → download → open/flatten → mask → (optional) orthorectify → visualize → (optional) export/merge.

It uses the refactored helpers in `emitref/` plus your original `emit_tools` (vendored in the package).

## 1) Environment & imports

In [None]:
! pip install -r requirements.txt

In [None]:
# If you unzipped the package next to this notebook, ensure Python can import it.
# Adjust the path below if needed.
import sys, pathlib
p = pathlib.Path().resolve()
# Example: sys.path.append(str(p / "emitref_refactor_with_emit_tools"))
# If you've installed as a package (pip -e .), you can skip sys.path fiddling.

import numpy as np
import xarray as xr
import matplotlib.pyplot as plt

# interactive (optional)
import hvplot.xarray  # noqa: F401
import holoviews as hv
hv.extension('bokeh')

from datetime import datetime, timezone, timedelta

# Our helpers
from EMIT_data.EMIT_utils import (
login, search, choose_nearest, download_reflectance,
open_reflectance, attach_wavelengths
)
from EMIT_data.geo import get_pixel_center_coords, apply_glt, ortho_xr
from EMIT_data.viz import plot_band_nm, rgb_from_nm, hv_quicklook
from EMIT_data.emit_tools import (
    quality_mask, band_mask,
    spatial_subset,
    ortho_xr 
)


In [None]:
LAT = 50.3794
LON = 43.1656
DATE = "2024-06-04"
DAYS_BEFORE = 5
DAYS_AFTER  = 5
SEARCH_BUFFER_M = 500000    

target_dt = datetime(2024, 6, 4, tzinfo=timezone.utc)  # center date
start = target_dt - timedelta(days=DAYS_BEFORE)
end   = target_dt + timedelta(days=DAYS_AFTER)

print(start, "→", end)


In [None]:
# This opens a browser/device-code flow on first run and caches your token (persist=True)
login(persist=True)


## 4) Search EMIT L2A reflectance and pick nearest granule

In [None]:
picks = search(point=(LON, LAT), buffer_m=SEARCH_BUFFER_M, start=None, end=None, cloud_cover = (0.0,100.0))

In [None]:
picks = search(point=(LON, LAT), buffer_m=SEARCH_BUFFER_M, start=None, end=None, cloud_cover = (0.0,0.01))

In [None]:
picks[0]["umm"]["CloudCover"]

In [None]:
picks[0].keys()

In [None]:
picks[0]["umm"]["ProviderDates"][0]["Date"]

In [None]:
pick = choose_nearest(picks, target_dt)
if pick is None:
    raise SystemExit("No EMIT granules found in the window.")
pick

## 5) Download reflectance (NetCDF)

In [None]:
from pathlib import Path
out_dir = Path("data/emit")
paths = download_reflectance(pick, out_dir, assets=['_RFL_'])
paths

In [None]:
ds = open_reflectance(paths[0], engine='h5netcdf')      
ds = attach_wavelengths(ds)           
ds


In [None]:
mask_paths = download_reflectance(pick, out_dir, assets=['_MASK_'])

In [None]:
from EMIT_data.emit_tools import emit_xarray

In [None]:
qmask = quality_mask(mask_paths[0], quality_bands=[0,1,2,3,4])
bmask = band_mask(mask_paths[0])  

ds_emit = emit_xarray(paths[0], ortho=True, engine="netcdf4")

In [None]:
ds_emit['wavelengths'].values.shape

In [None]:
ds_vis = ds_emit


In [None]:
rfl_var = "reflectance" if "reflectance" in ds_vis.data_vars else list(ds_vis.data_vars)[0]
ds_vis[rfl_var].dims

In [None]:
from EMIT_data.viz import hv_quicklook_nearest_valid, emit_quicklook_matplotlib

In [None]:
import hvplot.xarray
import jupyter_bokeh
import holoviews as hv, panel as pn
pn.extension(comms='colab')
hv.extension('bokeh')

In [None]:
emit_quicklook_matplotlib(ds_vis, white_balance=False, gamma = 1)


In [None]:
import os

In [None]:
# !pip install brain_pipe

In [None]:
# !pip uninstall -y hytools

In [None]:
# !pip install hy-tools-lite

In [None]:
!pip install "git+https://github.com/EnSpec/hytools.git"

In [None]:
from EMIT_data.emit_proj import nc_to_envi

In [None]:
emit_rfl_nc = "data/emit/EMIT_L2A_RFL_001_20240529T123132_2415008_008.nc"      # or EMIT L1B radiance
emit_obs_nc = "data/emit/EMIT_L2A_MASK_001_20240529T123132_2415008_008.nc"      # optional
s2_tif_path = "data/s2/S2A_37UGR_20230417_0_L2A_visual.tif"       # your Sentinel-2 GeoTIFF
out_dir      = "emit_out"             # or any writable folder
tmp_dir      = "emit_tmp"
os.makedirs(out_dir, exist_ok=True)
os.makedirs(tmp_dir, exist_ok=True)

# Run: match CRS to S2, keep EMIT at 60 m (set match_res=True to copy S2 pixel size)
nc_to_envi(
    img_file=emit_rfl_nc,
    out_dir=out_dir,
    temp_dir=tmp_dir,
    obs_file=emit_obs_nc,     # or None
    export_loc=True,          # also produce *_LOC
    crid="000",
    s2_tif_path=s2_tif_path,  # copy CRS from S2
    match_res=False,          # True → resample to S2 pixel size
    write_xml=True            # write XML sidecars
)

In [None]:
import os, glob, re, ast
import numpy as np
import rasterio
import matplotlib.pyplot as plt

def _parse_wavelengths_from_tags(tags: dict):
    """
    ENVI stores wavelengths as a header key like:
      'wavelength': '{ 400.0, 401.0, ... }'
    This parses it into a list[float]. Returns None if absent.
    """
    w_txt = tags.get('wavelength') or tags.get('WAVELENGTH')
    if not w_txt:
        return None
    # normalize to Python list literal
    w_txt = w_txt.strip()
    if w_txt.startswith('{') and w_txt.endswith('}'):
        w_txt = '[' + w_txt[1:-1] + ']'
    # remove potential line breaks, duplicate spaces
    w_txt = re.sub(r'\s+', ' ', w_txt)
    try:
        vals = ast.literal_eval(w_txt)
        return [float(v) for v in vals]
    except Exception:
        return None

def _nearest_band_indices(wavelengths, targets_nm):
    """Return indices in `wavelengths` nearest to each nm in `targets_nm`."""
    wl = np.asarray(wavelengths, dtype=float)
    idxs, picked = [], []
    for t in targets_nm:
        i = int(np.argmin(np.abs(wl - float(t))))
        idxs.append(i)
        picked.append(wl[i])
    return idxs, picked

def _percentile_stretch(img, p_low=2, p_high=98):
    """Apply per-channel percentile stretch to 0..1."""
    img = img.astype(np.float32)
    out = np.zeros_like(img, dtype=np.float32)
    if img.ndim == 3:
        for c in range(3):
            ch = img[..., c]
            finite = np.isfinite(ch)
            if not np.any(finite):
                continue
            lo, hi = np.percentile(ch[finite], (p_low, p_high))
            if hi <= lo:
                continue
            out[..., c] = np.clip((ch - lo) / (hi - lo), 0, 1)
    else:
        finite = np.isfinite(img)
        if np.any(finite):
            lo, hi = np.percentile(img[finite], (p_low, p_high))
            if hi > lo:
                out = np.clip((img - lo) / (hi - lo), 0, 1)
    return out

def show_emit_rgb_from_envi(out_dir,
                            pattern="SISTER_EMIT_L2A_RFL_*_000.bin",
                            targets_nm=(665.0, 560.0, 492.0),
                            percentile=(2, 98),
                            gamma=1/2.2,
                            white_balance=True):
    """
    Find the EMIT RFL ENVI output in out_dir and display an RGB.
    """
    # 1) pick file
    bins = sorted(glob.glob(os.path.join(out_dir, pattern)))
    if not bins:
        raise FileNotFoundError(f"No files matching {pattern} in {out_dir}")
    data_bin = bins[-1]  # latest
    print(f"Reading: {os.path.basename(data_bin)}")

    # 2) open with rasterio, read metadata + bands
    with rasterio.open(data_bin) as ds:
        # wavelengths from header tags (file-level)
        tags = ds.tags()
        wavelengths = _parse_wavelengths_from_tags(tags)
        if wavelengths is None:
            # sometimes wavelengths end up as per-band tags; try that
            wavelengths = []
            for b in range(1, ds.count+1):
                bt = ds.tags(b)
                w = bt.get('wavelength') or bt.get('WAVELENGTH')
                wavelengths.append(float(w) if w else np.nan)
            if not np.isfinite(wavelengths).any():
                raise ValueError("No wavelengths found in ENVI header tags.")

        idxs, picked = _nearest_band_indices(wavelengths, targets_nm)
        # rasterio bands are 1-based
        R = ds.read(idxs[0] + 1).astype(np.float32)
        G = ds.read(idxs[1] + 1).astype(np.float32)
        B = ds.read(idxs[2] + 1).astype(np.float32)

        # 3) handle nodata and reflectance scaling (EMIT reflectance should be 0..1)
        nodata = ds.nodata
        if nodata is not None:
            for arr in (R, G, B):
                arr[arr == nodata] = np.nan

        # clip extreme outliers (safety), then percentile-stretch
        rgb = np.dstack([np.clip(R, 0, 1),
                         np.clip(G, 0, 1),
                         np.clip(B, 0, 1)])

        rgb = _percentile_stretch(rgb, *percentile)

        if white_balance:
            means = np.nanmean(rgb.reshape(-1, 3), axis=0)
            scale = np.nanmean(means) / np.maximum(means, 1e-6)
            rgb = np.clip(rgb * scale, 0, 1)

        rgb_disp = np.clip(rgb, 0, 1) ** gamma

        # 4) plot
        plt.figure(figsize=(8, 8))
        plt.imshow(rgb_disp, origin="upper")
        epsg = ds.crs.to_string() if ds.crs else "unknown CRS"
        plt.title(f"EMIT True Color ~ {picked[0]:.0f}/{picked[1]:.0f}/{picked[2]:.0f} nm\n{epsg}")
        plt.axis("off")
        plt.show()




In [None]:
# ---- call it ----
out_dir = "emit_out"  # <- change to your out_dir
show_emit_rgb_from_envi(out_dir)

In [None]:
emit_quicklook_matplotlib(ds_vis, white_balance=False, gamma = 1)