### Plot combined variables against original data 
To run this Notebook follow instructions at https://github.com/mbari-org/auv-python.

Processed files must be available locally. Execute `uv run src/data/process_lrauv.py` with the `--no_cleanup` option to create them,e.g.:
```
src/data/process_lrauv.py -v --log_file pontus/missionlogs/2024/20240715_20240725/20240723T023501/202407230235_202407232319.nc4 --no_cleanup
```

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('../src/data'));
if module_path not in sys.path:
    sys.path.append(module_path)
import xarray as xr
import hvplot.pandas
import hvplot.xarray
import holoviews as hv
import ipywidgets as widgets
import pandas as pd
from pathlib import Path
import netCDF4 as nc4
import logging
from nc42netcdfs import BASE_LRAUV_PATH

# Configure logging
logging.basicConfig(
    level=logging.INFO,  # Change to logging.DEBUG for more detailed output
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# Enable bokeh extension for hvplot
hv.extension('bokeh')

# Get time coordinate for each variable by introspection
def get_time_coord(var):
    """Get the time coordinate name for a variable.
    
    Args:
        var: Either an xarray.DataArray or netCDF4.Variable
        
    Returns:
        str: Name of the time coordinate/dimension
    """
    # Check if it's an xarray DataArray (has .dims attribute)
    if hasattr(var, 'dims'):
        # xarray DataArray
        time_dims = [dim for dim in var.dims if 'time' in dim.lower()]
        return time_dims[0] if time_dims else var.dims[0]
    elif hasattr(var, 'dimensions'):
        # netCDF4 Variable
        time_dims = [dim for dim in var.dimensions if 'time' in dim.lower()]
        return time_dims[0] if time_dims else var.dimensions[0]
    else:
        raise TypeError(f"Unsupported variable type: {type(var)}")

# Pick the auv_name
auv_name = widgets.Dropdown(
    options=[f for f in sorted(os.listdir(BASE_LRAUV_PATH)) if f != ".DS_Store"],
    description='auv_name:',
    disabled=False,
)
display(auv_name)


In [None]:
# Pick the log file from the selected LRAUV directory
# Pattern: {lrauv_name}/missionlogs/{year}/{date_range}/{mission_start}/{log_file}.nc4

lrauv_name = auv_name.value
log_files = sorted(Path(BASE_LRAUV_PATH).glob(f"{lrauv_name}/missionlogs/*/*/*/*[0-9].nc4"))
log_file_options = [str(f.relative_to(BASE_LRAUV_PATH)) for f in log_files]

log_file_picker = widgets.Select(
    options=log_file_options,
    description='Log File:',
    disabled=False,
    rows=15,
    layout=widgets.Layout(width='800px')
)
display(log_file_picker)

In [None]:
# Find all Group .nc files produced by nc42netcdfs.py for the selected log file
log_file = log_file_picker.value
log_path = Path(BASE_LRAUV_PATH) / log_file
log_stem = log_path.stem
log_dir = log_path.parent

# Pattern: {log_stem}_Group_{GroupName}.nc
group_files = sorted(log_dir.glob(f"{log_stem}_Group_*.nc"))

# Create dictionary keyed by converted group name (remove underscores and lowercase)
group_file_dict = {}
for group_file in group_files:
    # Extract group name from filename: {log_stem}_Group_{GroupName}.nc
    group_name = group_file.stem.split("_Group_")[1]
    # Apply same transformation as group_name_mapping: remove underscores and lowercase
    group_file_dict[group_name.replace("_", "").lower()] = group_file

logger.debug(f"Found {len(group_file_dict)} extracted group files for {log_file}:")
for group_name, file_path in sorted(group_file_dict.items()):
    logger.debug(f"  {group_name} -> {file_path.name}")

# Read the log_file and the corresponding combined netCDF files into xarray Datasets
combined_file = log_file.replace('.nc4', '_combined.nc4')

# Open log file with all groups
log_nc = nc4.Dataset(os.path.join(BASE_LRAUV_PATH, log_file))
logger.debug("Log file: " + os.path.join(BASE_LRAUV_PATH, log_file))
log_ds = xr.open_dataset(os.path.join(BASE_LRAUV_PATH, log_file))

# Show root group details only in DEBUG mode
if logger.isEnabledFor(logging.DEBUG):
    logger.debug("Only root group (universals):")
    display(log_ds)

# Create dictionary mapping converted group names to original group names
# Conversion logic from combine.py: remove underscores and lowercase
group_name_mapping = {
    group.replace("_", "").lower(): group
    for group in log_nc.groups.keys()
}
logger.debug("Group name mapping (converted -> original) with extracted NetCDF3 file listed underneath.")
logger.debug("File is created for items put into the SCIENG_PARMS group in the nc42netcdfs.py script: ")
logger.debug("")
for converted, original in sorted(group_name_mapping.items()):
    logger.debug(f"{converted} -> {original}")
    if converted in group_file_dict:
        logger.debug(f"File: {group_file_dict[converted]}")
logger.debug("")

combined_ds = xr.open_dataset(os.path.join(BASE_LRAUV_PATH, combined_file))

# Show combined dataset details only in DEBUG mode
if logger.isEnabledFor(logging.DEBUG):
    logger.debug("\nCombined file: " + os.path.join(BASE_LRAUV_PATH, combined_file))
    display(combined_ds)

def plot_nudged_position(variable_name):
    """Plot nudged_longitude or nudged_latitude against GPS fixes and universals.
    
    Shows the relationship between:
    - universals (dead-reckoned positions)
    - nal9602 GPS fixes
    - nudged (corrected positions)
    
    Args:
        variable_name: Either 'nudged_longitude' or 'nudged_latitude'
        
    Returns:
        Overlay plot of universals, GPS fixes, and nudged data
    """
    # Extract coordinate name (longitude or latitude)
    coord_name = variable_name.split('_')[1]  # 'longitude' or 'latitude'
    
    universals_var = f'universals_{coord_name}'
    gps_var = f'nal9602_{coord_name}_fix'
    
    logger.debug(f"Plotting nudged position: {variable_name}")
    logger.debug(f"Using: {universals_var}, {gps_var}, {variable_name}")
    
    # Get time coordinates
    universals_time_coord = get_time_coord(combined_ds[universals_var])
    nudged_time_coord = get_time_coord(combined_ds[variable_name])
    
    # Get units for ylabel
    try:
        units = combined_ds[variable_name].attrs.get('units', '')
    except AttributeError:
        units = ''
    
    ylabel = f"{coord_name} ({units})" if units else coord_name
    
    # Create plots with distinct colors for position data
    universals_plot = combined_ds[universals_var].hvplot.line(
        x=universals_time_coord, 
        label='Universals (Dead Reckoned)', 
        color='#007BFF',  # Blue
        width=900, 
        height=400, 
        ylabel=ylabel, 
        title=log_file, 
        alpha=0.7
    )
    
    # Check if GPS fix variable exists
    if gps_var in combined_ds:
        gps_time_coord = get_time_coord(combined_ds[gps_var])
        gps_plot = combined_ds[gps_var].hvplot.scatter(
            x=gps_time_coord, 
            label='GPS Fixes', 
            color='#FFC107',  # Yellow
            width=900, 
            height=400, 
            ylabel=ylabel, 
            title=log_file, 
            alpha=0.9,
            size=50
        )
    else:
        logger.warning(f"GPS fix variable {gps_var} not found - skipping GPS plot")
        gps_plot = None
    
    nudged_plot = combined_ds[variable_name].hvplot.line(
        x=nudged_time_coord, 
        label='Nudged', 
        color='#DC3545',  # Red
        width=900, 
        height=400, 
        ylabel=ylabel, 
        title=log_file, 
        alpha=0.7
    )
    
    logger.debug("Position plots created successfully")
    
    # Overlay plots (GPS as scatter on top for visibility)
    # Set legend inside the plot area
    if gps_plot:
        return (universals_plot * nudged_plot * gps_plot).opts(legend_position='top_right')
    else:
        return (universals_plot * nudged_plot).opts(legend_position='top_right')

def plot_original_vs_combined(variable_name):
    """Plot a variable from both original group and combined file overlaid.
    
    Args:
        variable_name: Name of the variable from combined dataset (format: {group}_{variable})
        
    Returns:
        Overlay plot of original and combined data
    """
    # Check for nudged longitude/latitude - special case
    if variable_name in ('nudged_longitude', 'nudged_latitude'):
        return plot_nudged_position(variable_name)
    
    # Parse variable name to extract converted group name and original variable name
    # Format: {converted_group}_{original_variable}
    parts = variable_name.split('_', 1)
    converted_group = parts[0]
    original_var = parts[1] if len(parts) > 1 else variable_name

    # Map back to original group name
    original_group = group_name_mapping.get(converted_group, converted_group)

    logger.debug(f"Processing variable: {variable_name}")
    logger.debug(f"Converted group: {converted_group}, Original group: {original_group}")

    # Get the original group dataset from log_nc
    # Root group (universals) is accessed directly as log_nc, not through .groups
    if original_group == "universals":
        group_ds = log_nc
    else:
        group_ds = log_nc.groups[original_group]

    # Find the original variable from the group in log_file ignoring case
    original_var_lower = original_var.lower()
    matching_vars = [var for var in group_ds.variables if var.lower() == original_var_lower]
    if matching_vars:
        original_var = matching_vars[0]
    logger.debug(f"Original variable: {original_var}")

    logger.debug(f"Plotting '{original_var}' from group '{original_group}' vs combined variable '{variable_name}'")

    # Get time coordinates
    original_time_coord = get_time_coord(group_ds.variables[original_var])
    combined_time_coord = get_time_coord(combined_ds[variable_name])
    logger.debug(f"Time coords - Original: {original_time_coord}, Combined: {combined_time_coord}")

    # Extract data from netCDF4 as numpy arrays
    original_time_data = group_ds.variables[original_time_coord][:]
    original_var_data = group_ds.variables[original_var][:]
    logger.debug(f"Data shape - Original: {original_var_data.shape}, time: {original_time_data.shape}")

    # Convert Unix timestamps to datetime64 to match combined file format
    original_time_datetime = pd.to_datetime(original_time_data, unit='s')
    original_series = pd.Series(original_var_data, index=original_time_datetime, name=f'{original_var} (Original)')

    # Read from individual group file (use lowercase converted_group as key)
    group_file_path = group_file_dict[converted_group]
    logger.debug(f"Reading from group file: {group_file_path}")
    group_file_ds = xr.open_dataset(group_file_path)
    group_file_time_coord = get_time_coord(group_file_ds[original_var])
    logger.debug(f"Group file time coordinate: {group_file_time_coord}")

    # Get units for ylabel
    try:
        original_units = group_ds.variables[original_var].getncattr('units')
    except AttributeError:
        original_units = ''
    
    # Create ylabel with format "name (units)"
    ylabel = f"{original_var} ({original_units})" if original_units else original_var
    
    # Create time series plots with bold colors and transparency for blending
    # Use pandas Series for original (with datetime index), xarray for others (with explicit time coord)
    original_plot = original_series.hvplot.line(label='Original', color='#007BFF', width=900, height=400, ylabel=ylabel, title=log_file, alpha=0.7)
    group_file_plot = group_file_ds[original_var].hvplot.line(x=group_file_time_coord, label='Group File', color='#FFC107', width=900, height=400, ylabel=ylabel, title=log_file, alpha=0.7)
    combined_plot = combined_ds[variable_name].hvplot.line(x=combined_time_coord, label='Combined', color='#DC3545', width=900, height=400, ylabel=ylabel, title=log_file, alpha=0.7)
    
    logger.debug("Plots created successfully")
    
    # Overlay the plots on the same axes (order determines legend order: Original, Group File, Combined)
    return original_plot * group_file_plot * combined_plot

# Select multiple variables to plot from the original, group file, and combined datasets
variable_picker = widgets.SelectMultiple(
    options=sorted([var for var in combined_ds.data_vars]),
    description='Variables:',    disabled=False,    rows=15,    layout=widgets.Layout(width='800px'))
display(variable_picker)

In [None]:
# Plot all selected variables with linked axes
plots = []
for variable_name in variable_picker.value:
    plot = plot_original_vs_combined(variable_name)
    plots.append(plot)
display(hv.Layout(plots).cols(1).opts(shared_axes=True))
