## First program: Create empty CSV files tracking profile status

In [None]:
"""
Generate CSV file tracking CTD temperature profile status for OOI RCA Slope Base shallow profiler.
Creates rca_sb_ctd_temp_profile_status.csv with daily profile availability (2014-2025).
"""

import csv
import datetime
from pathlib import Path

def is_leap_year(year):
    """Check if year is a leap year."""
    return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)

def get_days_in_year(year):
    """Get number of days in year."""
    return 366 if is_leap_year(year) else 365

def julian_to_date(year, julian_day):
    """Convert Julian day to dd-MON-yyyy format."""
    date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=julian_day - 1)
    return date.strftime("%d-%b-%Y").upper()

def generate_profile_status_csv():
    """Generate the profile status CSV file."""
    
    output_file = Path("rca_sb_ctd_temp_profile_status.csv")
    
    # Define year range
    start_year = 2014
    end_year = 2025
    
    # Column headers
    headers = ['year', 'julian_day', 'date', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Total', 'Noon', 'Midnight']
    
    total_days = 0
    total_profiles = 0
    
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        
        # Write headers
        writer.writerow(headers)
        
        # Generate rows for each year
        for year in range(start_year, end_year + 1):
            days_in_year = get_days_in_year(year)
            
            for julian_day in range(1, days_in_year + 1):
                date_str = julian_to_date(year, julian_day)
                
                # Initialize profile columns (1-9) as 0 (will be populated when processing actual data)
                profiles = [0] * 9
                
                # Calculate totals
                total_profiles_day = sum(profiles)
                
                # Placeholder values for noon and midnight profile indices
                noon_profile = 0  # Will be determined from actual profile timing
                midnight_profile = 0  # Will be determined from actual profile timing
                
                # Write row
                row = [year, julian_day, date_str] + profiles + [total_profiles_day, noon_profile, midnight_profile]
                writer.writerow(row)
                
                total_days += 1
                total_profiles += total_profiles_day
    
    # Print diagnostics
    print(f"Generated {output_file}")
    print(f"Total days: {total_days}")
    print(f"Date range: {start_year} - {end_year}")
    print(f"Years covered: {end_year - start_year + 1}")
    print(f"Current mean profiles per day: {total_profiles / total_days:.2f}")
    print(f"Expected profiles per day when populated: 9")
    print(f"File ready for population with actual profile data")

if __name__ == "__main__":
    generate_profile_status_csv()

## Update the profile status program, write extracted profile files, create a timeline file 

In [None]:
"""
Extract individual temperature profiles from CTD NetCDF files to redux files.
"""

import pandas as pd
import xarray as xr
from pathlib import Path

def analyze_source_file(netcdf_file):
    """Analyze source NetCDF file time range and estimate profiles."""
    
    ds = xr.open_dataset(netcdf_file)
    ds = ds.swap_dims({'obs': 'time'})
    
    start_time = pd.to_datetime(ds.time.values[0])
    end_time = pd.to_datetime(ds.time.values[-1])
    
    time_range_days = (end_time - start_time).days + 1
    estimated_profiles = time_range_days * 9
    
    print(f"=== SOURCE FILE ANALYSIS ===")
    print(f"File: {netcdf_file}")
    print(f"Start time: {start_time}")
    print(f"End time: {end_time}")
    print(f"Time range: {time_range_days} days")
    print(f"Estimated profiles (9/day): {estimated_profiles}")
    print(f"================================\n")
    
    return ds, start_time, end_time

def load_profile_indices(year):
    """Load profile indices for given year."""
    profile_file = Path(f"~/profileIndices/RS01SBPS_profiles_{year}.csv").expanduser()
    if not profile_file.exists():
        return None
    return pd.read_csv(profile_file)

def extract_profiles(ds, start_time, end_time, output_dir):
    """Extract temperature profiles from NetCDF dataset."""
    
    attempted = 0
    successful = 0
    
    for year in range(start_time.year, end_time.year + 1):
        profiles_df = load_profile_indices(year)
        if profiles_df is None:
            print(f"No profile indices for {year}")
            continue
            
        daily_profiles = {}
        
        for _, profile_row in profiles_df.iterrows():
            attempted += 1
            
            profile_index = profile_row['profile']
            start_str = profile_row['start']
            peak_str = profile_row['peak']
            
            start_time_profile = pd.to_datetime(start_str)
            peak_time_profile = pd.to_datetime(peak_str)
            
            # Track daily profile sequence
            date_key = start_time_profile.date()
            if date_key not in daily_profiles:
                daily_profiles[date_key] = 0
            daily_profiles[date_key] += 1
            daily_sequence = daily_profiles[date_key]
            
            try:
                profile_data = ds.sel(time=slice(start_time_profile, peak_time_profile))
                
                if len(profile_data.time) == 0:
                    continue
                    
                # Check for sea_water_temperature data
                if 'sea_water_temperature' not in profile_data.data_vars:
                    continue
                
                # Create temperature dataset (rename variable)
                temp_ds = xr.Dataset({
                    'temperature': profile_data['sea_water_temperature']
                })
                
                # Add depth coordinate if available
                if 'depth' in profile_data.coords:
                    temp_ds = temp_ds.assign_coords(depth=profile_data['depth'])
                
                # Generate filename: AAA_SSS_TTT_BBB_YYYY_DDD_PPPP_Q_VVVV.nc
                julian_day = start_time_profile.timetuple().tm_yday
                filename = f"RCA_OSB_Profiler_Temp_{year}_{julian_day:03d}_{profile_index}_{daily_sequence}_V1.nc"
                output_path = output_dir / filename
                
                # Write file
                temp_ds.to_netcdf(output_path)
                successful += 1
                
                if successful % 50 == 0:
                    print(f"Extracted {successful} profiles...")
                    
            except Exception as e:
                print(f"Error processing profile {profile_index}: {e}")
                continue
    
    return attempted, successful

def main():
    """Main processing function."""
    
    output_dir = Path("~/redux").expanduser()
    output_dir.mkdir(exist_ok=True)
    
    ctd_file = Path("~/ooidata/rca/sb/scalar/2015_2025_ctd/deployment0004_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180208T000000.840174-20180226T115959.391002.nc").expanduser()
    
    if not ctd_file.exists():
        print(f"CTD file not found: {ctd_file}")
        return
    
    # Analyze source file first
    ds, start_time, end_time = analyze_source_file(ctd_file)
    
    # Extract profiles
    attempted, successful = extract_profiles(ds, start_time, end_time, output_dir)
    
    # Print diagnostics
    print(f"\n=== EXTRACTION COMPLETE ===")
    print(f"Profiles attempted: {attempted}")
    print(f"Profiles successfully extracted: {successful}")
    print(f"Success rate: {successful/attempted*100:.1f}%" if attempted > 0 else "No profiles attempted")
    print(f"Redux files written to: {output_dir}")

if __name__ == "__main__":
    main()


In [None]:
import xarray as xr

ds = xr.open_dataset('~/redux/RCA_OSB_Profiler_Temp_2018_048_5440_9_V1.nc')

In [None]:
ds

In [None]:
"""
Plot temperature profiles with temperature on x-axis and depth on y-axis.
"""

import matplotlib.pyplot as plt
import xarray as xr
from pathlib import Path
import sys

    
# Load the profile data
ds = xr.open_dataset('~/redux/RCA_OSB_Profiler_Temp_2018_048_5440_9_V1.nc')

# Extract temperature and depth
temperature = ds['temperature'].values
depth = ds['depth'].values

# Create the plot
plt.figure(figsize=(8, 10))
plt.plot(temperature, depth, 'b-', linewidth=2, marker='o', markersize=2)

# Set up axes
plt.xlabel('Temperature (°C)', fontsize=12)
plt.ylabel('Depth (m)', fontsize=12)
plt.ylim(200, 0)  # 200m at bottom, 0m at top
plt.grid(True, alpha=0.3)

# Add title with filename
profile_name = Path('~/redux/RCA_OSB_Profiler_Temp_2018_048_5440_9_V1.nc').stem
plt.title(f'Temperature Profile: {profile_name}', fontsize=14)

# Tight layout and show
plt.tight_layout()
plt.show()


## Generate Temperature Mixed Layer Depth estimates: Interactive 


This code does not run in a Jupyter notebook: Something about the mouse events.
It will run in IDLE or from the PowerShell command line. 
The file is called `tmld_selector.py`.
The output file is `tmld_estimates.csv`.
It lives in the home directory of the `argosy` repository.
Eventually it will be renamed MLDSelector.py for Mixed Layer Depth Selector.


There is a major **bug** in the code however: The bundle plotter gets the profile index
wrong so the MLD shows up in the wrong place.

In [None]:
# This code does not run in IPython (incongruous, no?)

## Bundle plotter: includes TMLD estimates


If the TMLD CSV file exists the locations of the bottom of the mixed layer appear as red dots: If that feature is selected.

In [13]:
import matplotlib.pyplot as plt
import xarray as xr
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display
import numpy as np
import pandas as pd

# Load TMLD data
try:
    tmld_df = pd.read_csv('tmld_estimates.csv')
except FileNotFoundError:
    print("TMLD file not found, TMLD markers will be disabled")
    tmld_df = pd.DataFrame()

# Get list of redux files
redux_dir = Path("~/redux").expanduser()
profile_files = sorted(list(redux_dir.glob("*.nc")))
total_profiles = len(profile_files)

def extract_profile_info(filename):
    """Extract year, day, and profile number from filename."""
    parts = filename.stem.split('_')
    year = int(parts[4])
    doy = int(parts[5])
    profile_num = int(parts[7])
    return year, doy, profile_num

def plot_bundle_with_tmld_toggle(nProfiles, index0, show_tmld):
    """Plot a bundle of consecutive profiles with optional TMLD markers."""
    
    if nProfiles == 0:
        plt.figure(figsize=(10, 8))
        plt.text(0.5, 0.5, 'Select nProfiles > 0', ha='center', va='center', transform=plt.gca().transAxes)
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        plt.show()
        return
    
    start_idx = index0 - 1
    end_idx = min(start_idx + nProfiles, total_profiles)
    
    if start_idx >= total_profiles:
        plt.figure(figsize=(10, 8))
        plt.text(0.5, 0.5, f'Index {index0} exceeds available profiles ({total_profiles})', 
                ha='center', va='center', transform=plt.gca().transAxes)
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        plt.show()
        return
    
    plt.figure(figsize=(12, 8))
    
    all_temps = []
    
    # Plot each profile in the bundle
    for i in range(start_idx, end_idx):
        try:
            ds = xr.open_dataset(profile_files[i])
            temperature = ds['temperature'].values
            depth = ds['depth'].values
            
            valid_mask = ~(np.isnan(temperature) | np.isnan(depth))
            if np.any(valid_mask):
                temp_clean = temperature[valid_mask]
                depth_clean = depth[valid_mask]
                
                plt.plot(temp_clean, depth_clean, '-', markersize=1, alpha=0.7, linewidth=1)
                all_temps.extend(temp_clean)
                
                # Add TMLD marker if toggle is on
                if show_tmld and not tmld_df.empty:
                    profile_idx = i + 1
                    tmld_row = tmld_df[tmld_df['profile_index'] == profile_idx]
                    if not tmld_row.empty and not np.isnan(tmld_row.iloc[0]['Estimated_TMLD']):
                        tmld_depth = tmld_row.iloc[0]['Estimated_TMLD']
                        tmld_temp = tmld_row.iloc[0]['temperature_at_TMLD']
                        plt.plot(tmld_temp, tmld_depth, 'ro', markersize=4, alpha=0.8)
                
        except Exception as e:
            print(f"Error loading {profile_files[i]}: {e}")
            continue
    
    plt.xlabel('Temperature (°C)', fontsize=12)
    plt.ylabel('Depth (m)', fontsize=12)
    plt.ylim(200, 0)
    
    if all_temps:
        temp_min, temp_max = min(all_temps), max(all_temps)
        temp_range = temp_max - temp_min
        plt.xlim(temp_min - 0.1 * temp_range, temp_max + 0.1 * temp_range)
    
    plt.grid(True, alpha=0.3)
    
    if end_idx > start_idx:
        first_year, first_doy, first_profile = extract_profile_info(profile_files[start_idx])
        last_year, last_doy, last_profile = extract_profile_info(profile_files[end_idx-1])
        tmld_status = " (TMLD ON)" if show_tmld else " (TMLD OFF)"
        title = f'Bundle Plot{tmld_status}: {first_year}-{first_doy:03d}-{first_profile} to {last_year}-{last_doy:03d}-{last_profile}'
        plt.title(title, fontsize=14)
    
    plt.tight_layout()
    plt.show()

# Create interactive widgets
nProfiles_slider = widgets.IntSlider(value=1, min=0, max=100, step=1, description='nProfiles:', continuous_update=False)
index0_slider = widgets.IntSlider(value=1, min=1, max=total_profiles, step=1, description='index0:', continuous_update=False)
tmld_toggle = widgets.ToggleButton(value=False, description='Show TMLD', button_style='', tooltip='Toggle TMLD markers on/off')

interactive_plot = widgets.interactive(plot_bundle_with_tmld_toggle, 
                                     nProfiles=nProfiles_slider, 
                                     index0=index0_slider, 
                                     show_tmld=tmld_toggle)
display(interactive_plot)


interactive(children=(IntSlider(value=1, continuous_update=False, description='nProfiles:'), IntSlider(value=1…

In [5]:
import os
from pathlib import Path

# Define the CTD data folder
ctd_folder = Path('~/ooidata/rca/sb/scalar/2015_2025_ctd').expanduser()

# Create the file list
output_file = 'source_ctd_filelist.txt'

with open(output_file, 'w') as f:
    # Write the folder path as first line
    f.write(str(ctd_folder) + '\n')
    
    # Find all CTD NetCDF files
    if ctd_folder.exists():
        ctd_files = []
        for file in ctd_folder.glob('*.nc'):
            if 'CTDPF' in file.name:
                ctd_files.append(file.name)
        
        # Sort files and write to list
        for filename in sorted(ctd_files):
            f.write(filename + '\n')
        
        print(f'Created {output_file} with {len(ctd_files)} CTD files')
        print(f'Folder: {ctd_folder}')
    else:
        print(f'Folder not found: {ctd_folder}')

Created source_ctd_filelist.txt with 47 CTD files
Folder: /home/rob/ooidata/rca/sb/scalar/2015_2025_ctd


## Shard a collection of source files into redux profile files

In [8]:
import pandas as pd
import xarray as xr
from pathlib import Path
import numpy as np

def load_profile_indices(year):
    """Load profile indices for given year."""
    profile_file = Path(f"~/profileIndices/RS01SBPS_profiles_{year}.csv").expanduser()
    if not profile_file.exists():
        return None
    return pd.read_csv(profile_file)

def get_user_input(prompt):
    """Get user input for continue/halt decisions."""
    while True:
        response = input(prompt).strip().upper()
        if response in ['C', 'H']:
            return response
        print("Please enter 'C' for Continue or 'H' for Halt")

def process_ctd_files():
    """Process CTD files with detailed reporting."""
    
    # Read the file list
    with open('source_ctd_filelist.txt', 'r') as f:
        lines = f.read().strip().split('\n')
    
    ctd_folder = Path(lines[0])
    ctd_files = lines[1:]
    
    output_dir = Path("~/redux").expanduser()
    output_dir.mkdir(exist_ok=True)
    
    print(ctd_folder)
    
    for filename in ctd_files:
        print(f"\n{filename}")
        
        file_path = ctd_folder / filename
        
        if not file_path.exists():
            print(f"File not found: {filename}")
            continue
            
        # Load NetCDF file and get basic info
        ds = xr.open_dataset(file_path)
        ds = ds.swap_dims({'obs': 'time'})
        
        start_time = pd.to_datetime(ds.time.values[0])
        end_time = pd.to_datetime(ds.time.values[-1])
        
        print(f"{start_time.strftime('%Y-%m-%d')} - {end_time.strftime('%Y-%m-%d')}")
        
        # Check for sea_water_temperature
        if 'sea_water_temperature' not in ds.data_vars:
            print("sea_water_temperature is NOT present")
            print(f"Data variables present: {list(ds.data_vars.keys())}")
            response = get_user_input("Continue [C] or Halt [H]? ")
            if response == 'H':
                break
            continue
        else:
            print("sea_water_temperature is present")
        
        # Process profiles
        profile_count = 0
        total_samples = 0
        
        for year in range(start_time.year, end_time.year + 1):
            profiles_df = load_profile_indices(year)
            if profiles_df is None:
                continue
                
            daily_profiles = {}
            
            for _, profile_row in profiles_df.iterrows():
                profile_index = profile_row['profile']
                start_str = profile_row['start']
                peak_str = profile_row['peak']
                
                start_time_profile = pd.to_datetime(start_str)
                peak_time_profile = pd.to_datetime(peak_str)
                
                # Track daily profile sequence
                date_key = start_time_profile.date()
                if date_key not in daily_profiles:
                    daily_profiles[date_key] = 0
                daily_profiles[date_key] += 1
                daily_sequence = daily_profiles[date_key]
                
                try:
                    profile_data = ds.sel(time=slice(start_time_profile, peak_time_profile))
                    
                    if len(profile_data.time) == 0:
                        continue
                    
                    # Create temperature dataset
                    temp_ds = xr.Dataset({
                        'temperature': profile_data['sea_water_temperature']
                    })
                    
                    if 'depth' in profile_data.coords:
                        temp_ds = temp_ds.assign_coords(depth=profile_data['depth'])
                    
                    # Generate filename
                    julian_day = start_time_profile.timetuple().tm_yday
                    output_filename = f"RCA_OSB_Profiler_Temp_{year}_{julian_day:03d}_{profile_index}_{daily_sequence}_V1.nc"
                    output_path = output_dir / output_filename
                    
                    # Write file
                    temp_ds.to_netcdf(output_path)
                    
                    profile_count += 1
                    total_samples += len(profile_data.time)
                    
                    # Progress report every 90 profiles
                    if profile_count % 90 == 0:
                        print(f"Profiles written so far: {profile_count}")
                    
                except Exception as e:
                    continue
        
        # File completion diagnostics
        if profile_count == 0:
            print("Zero profile files were written for this input file")
            response = get_user_input("Continue [C] or Halt [H]? ")
            if response == 'H':
                break
        else:
            avg_samples = total_samples / profile_count if profile_count > 0 else 0
            print(f"Profile files written: {profile_count}")
            print(f"Average data values per profile: {avg_samples:.1f}")
    
    print("\nProcessing complete!")

# Run the processing
process_ctd_files()


/home/rob/ooidata/rca/sb/scalar/2015_2025_ctd

deployment0001_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20141231T235959.533995-20150513T000000.462476.nc
2014-12-31 - 2015-05-13
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0001_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20150512T235959.461849-20150705T235959.089334.nc
2015-05-12 - 2015-07-05
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0002_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20150708T220539.700577-20151121T000000.896973.nc
2015-07-08 - 2015-11-21
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0002_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20151120T235959.896548-20160320T000000.236721.nc
2015-11-20 - 2016-03-20
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0002_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20160319T235959.237131-20160716T111049.607585.nc
2016-03-19 - 2016-07-16
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0003_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20160718T174252.421331-20161115T000000.080347.nc
2016-07-18 - 2016-11-15
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0003_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20161114T235959.079720-20170130T014303.019857.nc
2016-11-14 - 2017-01-30
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0004_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20170804T200441.032162-20171204T000000.184699.nc
2017-08-04 - 2017-12-04
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0004_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20171203T235959.184690-20180321T001400.555731.nc
2017-12-03 - 2018-03-21
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0004_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180101T153820.335574-20180120T115959.203770.nc
2018-01-01 - 2018-01-20
sea_water_temperature is present
Profiles written so far: 90
Profile files written: 160
Average data values per profile: 3837.3

deployment0004_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180120T120000.203881-20180207T235959.840792.nc
2018-01-20 - 2018-02-07
sea_water_temperature is present
Profiles written so far: 90
Profile files written: 164
Average data values per profile: 3819.8

deployment0004_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180208T000000.840174-20180226T115959.391002.nc
2018-02-08 - 2018-02-26
sea_water_temperature is present
Profiles written so far: 90
Profile files written: 157
Average data values per profile: 3886.2

deployment0004_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180226T120000.390906-20180318T235959.986256.nc
2018-02-26 - 2018-03-18
sea_water_temperature is present
Pro

Continue [C] or Halt [H]?  c



deployment0005_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180624T165600.731931-20180701T213819.372833.nc
2018-06-24 - 2018-07-01
sea_water_temperature is present
Profile files written: 40
Average data values per profile: 3674.9

deployment0005_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180624T165600.731931-20180709T001301.994718.nc
2018-06-24 - 2018-07-09
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0005_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180701T000000.867511-20180709T001301.994718.nc
2018-07-01 - 2018-07-09
sea_water_temperature is present
Zero profile files were written for this input file


Continue [C] or Halt [H]?  c



deployment0006_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180710T170527.140820-20180728T235959.413343.nc
2018-07-10 - 2018-07-28
sea_water_temperature is present
Profiles written so far: 90
Profile files written: 161
Average data values per profile: 3995.0

deployment0006_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180710T170527.140820-20181217T000000.485697.nc
2018-07-10 - 2018-12-17
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0006_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180729T000000.413354-20180820T235959.957187.nc
2018-07-29 - 2018-08-20
sea_water_temperature is present
Profiles written so far: 90
Profile files written: 162
Average data values per profile: 4119.9

deployment0006_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180821T000000.957092-20180925T115959.978965.nc
2018-08-21 - 2018-09-25
sea_water_temperature is present
Profiles written so far: 90
Profile files written: 93
Average data values per profile: 3994.7

deployment0006_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20180925T120000.979288-20181018T084220.403580.nc
2018-09-25 - 2018-10-18
sea_water_temperature is present
Profiles written so far: 90
Profile files written: 136
Average data values per profile: 4035.5

deployment0006_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20181019T201406.543980-20181106T235959.302943.nc
2018-10-19 - 2018-11-06
sea_water_temperature is present
Prof

Continue [C] or Halt [H]?  c



deployment0006_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20190409T235959.420066-20190614T051242.692351.nc
2019-04-09 - 2019-06-14
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0007_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20190614T231354.539487-20190927T184152.175125.nc
2019-06-14 - 2019-09-27
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0008_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20200803T164413.888856-20201206T000000.627631.nc
2020-08-03 - 2020-12-06
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0008_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20201205T235959.628357-20210405T000000.346239.nc
2020-12-05 - 2021-04-05
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0008_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20210404T235959.346341-20210803T000000.617449.nc
2021-04-04 - 2021-08-03
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0008_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20210802T235959.617448-20210804T190841.180708.nc
2021-08-02 - 2021-08-04
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0009_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20210824T233256.569856-20211231T000000.353722.nc
2021-08-24 - 2021-12-31
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0009_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20211230T235959.353507-20220430T000000.341125.nc
2021-12-30 - 2022-04-30
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0009_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20220429T235959.341431-20220902T204459.535113.nc
2022-04-29 - 2022-09-02
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0010_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20220915T181349.594332-20230119T000000.425078.nc
2022-09-15 - 2023-01-19
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0010_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20230118T235959.425181-20230224T084327.668331.nc
2023-01-18 - 2023-02-24
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0011_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20230817T212929.521060-20240219T000000.666761.nc
2023-08-17 - 2024-02-19
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0011_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20240218T235959.666650-20240724T000000.487004.nc
2024-02-18 - 2024-07-24
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0011_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20240723T235959.487308-20240809T063045.498432.nc
2024-07-23 - 2024-08-09
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0012_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20240809T203132.739326-20241209T000000.897501.nc
2024-08-09 - 2024-12-09
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0012_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20241208T235959.897597-20250408T000000.426260.nc
2024-12-08 - 2025-04-08
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0012_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20250407T235959.426149-20250805T142747.771470.nc
2025-04-07 - 2025-08-05
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0013_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20250812T163844.062361-20250812T204805.918593.nc
2025-08-12 - 2025-08-12
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0014_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20250814T232320.067702-20251210T000000.284807.nc
2025-08-14 - 2025-12-10
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



deployment0014_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20251209T235959.284384-20251211T082826.980834.nc
2025-12-09 - 2025-12-11
sea_water_temperature is NOT present
Data variables present: ['preferred_timestamp', 'sea_water_pressure', 'ingestion_timestamp', 'port_timestamp', 'deployment', 'id', 'internal_timestamp', 'driver_timestamp']


Continue [C] or Halt [H]?  c



Processing complete!


In [12]:
# ds = xr.open_dataset('/home/rob/ooidata/rca/sb/scalar/2015_2025_ctd/deployment0014_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20250814T232320.067702-20251210T000000.284807.nc')
# ds
import netCDF4 as nc
with nc.Dataset('/home/rob/ooidata/rca/sb/scalar/2015_2025_ctd/deployment0014_RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample_20250814T232320.067702-20251210T000000.284807.nc') as ds:
    print(list(ds.groups.keys()))

[]


## Bundle plot animation generator

In [16]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import xarray as xr
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def get_input_with_default(prompt, default):
    """Get user input with default value."""
    response = input(f"{prompt} ").strip()
    return response if response else default

def load_tmld_data():
    """Load TMLD data if available."""
    try:
        return pd.read_csv('tmld_estimates.csv')
    except FileNotFoundError:
        return pd.DataFrame()

def check_time_gap(files, start_idx, end_idx):
    """Check if there's a >48 hour gap between consecutive profiles."""
    for i in range(start_idx, end_idx - 1):
        parts1 = files[i].stem.split('_')
        parts2 = files[i + 1].stem.split('_')
        
        year1, doy1 = int(parts1[4]), int(parts1[5])
        year2, doy2 = int(parts2[4]), int(parts2[5])
        
        date1 = datetime(year1, 1, 1) + timedelta(days=doy1 - 1)
        date2 = datetime(year2, 1, 1) + timedelta(days=doy2 - 1)
        
        time_diff = (date2 - date1).total_seconds() / 3600  # hours
        
        if time_diff > 48:
            return True
    
    return False

def create_animated_bundle_file():
    """Create animated bundle plot with time gap detection."""
    
    # Get user inputs
    show_tmld = get_input_with_default("Include TMLD estimate in the visualization? Default is no. [y/n]", "n").lower() == 'y'
    n_profiles = int(get_input_with_default("How many profiles in the bundle? Default is 18 (two days)", "18"))
    delay = float(get_input_with_default("How many seconds delay between frames? (0.1 sec):", "0.1"))
    start_date = get_input_with_default("Start date (default 01-JAN-2018):", "01-JAN-2018")
    end_date = get_input_with_default("End date (default 31-DEC-2018):", "31-DEC-2018")
    
    # Parse dates
    start_dt = datetime.strptime(start_date, "%d-%b-%Y")
    end_dt = datetime.strptime(end_date, "%d-%b-%Y")
    
    # Load data
    redux_dir = Path("~/redux").expanduser()
    profile_files = sorted(list(redux_dir.glob("*.nc")))
    tmld_df = load_tmld_data() if show_tmld else pd.DataFrame()
    
    # Filter files by date range
    filtered_files = []
    for file in profile_files:
        parts = file.stem.split('_')
        year = int(parts[4])
        doy = int(parts[5])
        file_date = datetime(year, 1, 1) + timedelta(days=doy - 1)
        if start_dt <= file_date <= end_dt:
            filtered_files.append(file)
    
    if len(filtered_files) < n_profiles:
        print(f"Only {len(filtered_files)} profiles found in date range")
        return
    
    print(f"Creating animation with {len(filtered_files)} profiles...")
    
    # Set up the figure
    fig, ax = plt.subplots(figsize=(12, 8))
    
    def animate(frame):
        """Animation function."""
        ax.clear()
        ax.set_xlim(7, 19)
        ax.set_ylim(200, 0)
        ax.set_xlabel('Temperature (°C)', fontsize=12)
        ax.set_ylabel('Depth (m)', fontsize=12)
        ax.grid(True, alpha=0.3)
        
        start_idx = frame
        end_idx = min(start_idx + n_profiles, len(filtered_files))
        
        if start_idx >= len(filtered_files):
            return
        
        # Check for time gap
        has_time_gap = check_time_gap(filtered_files, start_idx, end_idx)
        
        # Plot profiles in current frame
        for i in range(start_idx, end_idx):
            try:
                ds = xr.open_dataset(filtered_files[i])
                temperature = ds['temperature'].values
                depth = ds['depth'].values
                
                valid_mask = ~(np.isnan(temperature) | np.isnan(depth))
                if np.any(valid_mask):
                    temp_clean = temperature[valid_mask]
                    depth_clean = depth[valid_mask]
                    
                    ax.plot(temp_clean, depth_clean, '-', linewidth=1, alpha=0.7)
                    
                    # Add TMLD marker if requested
                    if show_tmld and not tmld_df.empty:
                        profile_idx = i + 1
                        tmld_row = tmld_df[tmld_df['profile_index'] == profile_idx]
                        if not tmld_row.empty and not np.isnan(tmld_row.iloc[0]['Estimated_TMLD']):
                            tmld_depth = tmld_row.iloc[0]['Estimated_TMLD']
                            tmld_temp = tmld_row.iloc[0]['temperature_at_TMLD']
                            if 7 <= tmld_temp <= 19:
                                ax.plot(tmld_temp, tmld_depth, 'ro', markersize=4, alpha=0.8)
                
            except Exception:
                continue
        
        # Add Time Gap warning if needed
        if has_time_gap:
            ax.text(0.95, 0.05, 'Time Gap', transform=ax.transAxes,
                   fontsize=20, fontweight='bold', ha='right', va='bottom',
                   bbox=dict(boxstyle='round', facecolor='white', edgecolor='black', linewidth=2))
        
        # Set title with date range
        if end_idx > start_idx:
            first_parts = filtered_files[start_idx].stem.split('_')
            last_parts = filtered_files[end_idx-1].stem.split('_')
            first_year, first_doy = int(first_parts[4]), int(first_parts[5])
            last_year, last_doy = int(last_parts[4]), int(last_parts[5])
            
            first_date = datetime(first_year, 1, 1) + timedelta(days=first_doy - 1)
            last_date = datetime(last_year, 1, 1) + timedelta(days=last_doy - 1)
            
            tmld_status = " (TMLD)" if show_tmld else ""
            title = f'Bundle Animation{tmld_status}: {first_date.strftime("%d-%b-%Y")} to {last_date.strftime("%d-%b-%Y")}'
            ax.set_title(title, fontsize=14)
    
    # Create animation
    total_frames = len(filtered_files) - n_profiles + 1
    anim = animation.FuncAnimation(fig, animate, frames=total_frames, 
                                 interval=delay*1000, repeat=True, blit=False)
    
    # Save animation
    output_file = 'temp_bundle_animation.mp4'
    print(f"Saving animation to {output_file}...")
    
    try:
        anim.save(output_file, writer='ffmpeg', fps=1/delay, dpi=100)
        
        # Check file status
        output_path = Path(output_file)
        if output_path.exists():
            file_size = output_path.stat().st_size / (1024*1024)  # MB
            print(f"Animation saved successfully!")
            print(f"File: {output_file}")
            print(f"Size: {file_size:.1f} MB")
            print(f"Frames: {total_frames}")
        else:
            print("Error: Output file was not created")
            
    except Exception as e:
        print(f"Error saving animation: {e}")
        print("Note: ffmpeg must be installed for MP4 output")
    
    plt.close(fig)

# Run the animation creation
create_animated_bundle_file()


Include TMLD estimate in the visualization? Default is no. [y/n]  
How many profiles in the bundle? Default is 18 (two days)  
How many seconds delay between frames? (0.1 sec):  
Start date (default 01-JAN-2018):  
End date (default 31-DEC-2018):  


Creating animation with 1849 profiles...
Saving animation to temp_bundle_animation.mp4...
Animation saved successfully!
File: temp_bundle_animation.mp4
Size: 14.0 MB
Frames: 1832
