### Plot combined variables against original data 
To run this Notebook follow instructions at https://github.com/mbari-org/auv-python.

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('../src/data'));
if module_path not in sys.path:
    sys.path.append(module_path)
import xarray as xr
import hvplot.pandas
import hvplot.xarray
import holoviews as hv
import ipywidgets as widgets
import pylab as plt
import pandas as pd
from pathlib import Path
import netCDF4 as nc4
import textwrap
from nc42netcdfs import BASE_LRAUV_PATH

# Enable bokeh extension for hvplot
hv.extension('bokeh')

# Pick the auv_name
auv_name = widgets.Dropdown(
    options=[f for f in sorted(os.listdir(BASE_LRAUV_PATH)) if f != ".DS_Store"],
    description='auv_name:',
    disabled=False,
)
display(auv_name)

In [None]:
# Pick the log file from the selected LRAUV directory
# Pattern: {lrauv_name}/missionlogs/{year}/{date_range}/{mission_start}/{log_file}.nc4

lrauv_name = auv_name.value
log_files = sorted(Path(BASE_LRAUV_PATH).glob(f"{lrauv_name}/missionlogs/*/*/*/*[0-9].nc4"))
log_file_options = [str(f.relative_to(BASE_LRAUV_PATH)) for f in log_files]

log_file_picker = widgets.Select(
    options=log_file_options,
    description='Log File:',
    disabled=False,
    rows=15,
    layout=widgets.Layout(width='800px')
)
display(log_file_picker)

In [None]:
# Find all Group .nc files produced by nc42netcdfs.py for the selected log file
log_file = log_file_picker.value
log_path = Path(BASE_LRAUV_PATH) / log_file
log_stem = log_path.stem
log_dir = log_path.parent

# Pattern: {log_stem}_Group_{GroupName}.nc
group_files = sorted(log_dir.glob(f"{log_stem}_Group_*.nc"))

# Create dictionary keyed by original group name
group_file_dict = {}
for group_file in group_files:
    # Extract group name from filename: {log_stem}_Group_{GroupName}.nc
    group_name = group_file.stem.split("_Group_")[1]
    group_file_dict[group_name] = group_file

print(f"Found {len(group_file_dict)} extracted group files for {log_file}:")
for group_name, file_path in sorted(group_file_dict.items()):
    print(f"  {group_name} -> {file_path.name}")


In [None]:
# Read the log_file and the corresponding combined netCDF files into xarray Datasets
log_file = log_file_picker.value
combined_file = log_file.replace('.nc4', '_combined.nc4')

# Open log file with all groups
log_nc = nc4.Dataset(os.path.join(BASE_LRAUV_PATH, log_file))
print("Log file: " + os.path.join(BASE_LRAUV_PATH, log_file))
groups_str = ', '.join(list(log_nc.groups.keys()))
# print(f"Groups in log file:\n{textwrap.fill(groups_str, width=80)}\n")
log_ds = xr.open_dataset(os.path.join(BASE_LRAUV_PATH, log_file))
# print("Only root group (universals):")
# display(log_ds)

# Create dictionary mapping converted group names to original group names
# Conversion logic from combine.py: remove underscores and lowercase
group_name_mapping = {
    group.replace("_", "").lower(): group
    for group in log_nc.groups.keys()
}
print("Group name mapping (converted -> original) with extracted NetCDF3 file listed underneath.")
print("File is created for items put into the SCIENG_PARMS group in the nc42netcdfs.py script: ")
print()
for converted, original in sorted(group_name_mapping.items()):
    print(f"{converted} -> {original}")
    if original in group_file_dict:
        print(f"File: {group_file_dict[original]}")
print()


combined_ds = xr.open_dataset(os.path.join(BASE_LRAUV_PATH, combined_file))
# print("\nCombined file: " + os.path.join(BASE_LRAUV_PATH, combined_file))
# display(combined_ds)

In [None]:
# Select a variable to plot from the combined dataset
variable_picker = widgets.Dropdown(
    options=[var for var in combined_ds.data_vars],
    description='Variable:',
    disabled=False,
)
display(variable_picker)

In [None]:
# Get time coordinate for each variable by introspection
def get_time_coord(var):
    """Get the time coordinate name for a variable.
    
    Args:
        var: Either an xarray.DataArray or netCDF4.Variable
        
    Returns:
        str: Name of the time coordinate/dimension
    """
    # Check if it's an xarray DataArray (has .dims attribute)
    if hasattr(var, 'dims'):
        # xarray DataArray
        time_dims = [dim for dim in var.dims if 'time' in dim.lower()]
        return time_dims[0] if time_dims else var.dims[0]
    elif hasattr(var, 'dimensions'):
        # netCDF4 Variable
        time_dims = [dim for dim in var.dimensions if 'time' in dim.lower()]
        return time_dims[0] if time_dims else var.dimensions[0]
    else:
        raise TypeError(f"Unsupported variable type: {type(var)}")


In [None]:
# Make a time series plot of the selected variable from both original group and combined file
variable_name = variable_picker.value

# Parse variable name to extract converted group name and original variable name
# Format: {converted_group}_{original_variable}
parts = variable_name.split('_', 1)
converted_group = parts[0]
original_var = parts[1] if len(parts) > 1 else variable_name

# Map back to original group name
original_group = group_name_mapping.get(converted_group, converted_group)

print(f"Variable: {variable_name}")
print(f"Converted group: {converted_group}")
print(f"Original group: {original_group}")

# Get the original group dataset from log_nc
group_ds = log_nc.groups[original_group]

# Find the original variable from the group in log_file ignoring case
original_var_lower = original_var.lower()
matching_vars = [var for var in group_ds.variables if var.lower() == original_var_lower]
if matching_vars:
    original_var = matching_vars[0]
print(f"Original variable: {original_var}")

print(f"Plotting variable '{original_var}' from original group '{original_group}' and combined variable '{variable_name}'.")

# Get time coordinates
original_time_coord = get_time_coord(group_ds.variables[original_var])
combined_time_coord = get_time_coord(combined_ds[variable_name])
print(f"Original time coord: {original_time_coord}, Combined time coord: {combined_time_coord}")

# Extract data from netCDF4 as numpy arrays
original_time_data = group_ds.variables[original_time_coord][:]
original_var_data = group_ds.variables[original_var][:]

# Convert Unix timestamps to datetime64 to match combined file format
original_time_datetime = pd.to_datetime(original_time_data, unit='s')
original_series = pd.Series(original_var_data, index=original_time_datetime, name=f'{original_var} (Original)')

# Create time series plots with modern colors
original_plot = original_series.hvplot.line(label='Original', color='#2E86AB', width=900, height=400)
combined_plot = combined_ds[variable_name].hvplot.line(x=combined_time_coord, label='Combined', color='#E63946', width=900, height=400)

# Overlay the plots on the same axes
original_plot * combined_plot


In [None]:
# Select a second variable to plot from the combined dataset
variable_picker2 = widgets.Dropdown(
    options=[var for var in combined_ds.data_vars],
    description='Variable:',
    disabled=False,
)
display(variable_picker2)

In [None]:
# Create two linked plots (zoom and pan together)
variable_name = variable_picker.value
variable_name2 = variable_picker2.value

time_coord1 = get_time_coord(combined_ds[variable_name])
time_coord2 = get_time_coord(combined_ds[variable_name2])

print(f"{variable_name} uses time coordinate: {time_coord1}")
print(f"{variable_name2} uses time coordinate: {time_coord2}")

# Create first plot
plot1 = combined_ds[variable_name].hvplot.line(
    x=time_coord1, 
    title=f'{variable_name}', 
    width=900, 
    height=300
)

# Create second plot linked to first
plot2 = combined_ds[variable_name2].hvplot.line(
    x=time_coord2, 
    title=f'{variable_name2}', 
    width=900, 
    height=300
)

# Stack plots vertically with shared axes for synchronized zoom/pan
(plot1 + plot2).cols(1).opts(shared_axes=True)
