# Simple Sensor Data Visualization v2

**Purpose**: Load sensor data around sync events with individual time axes for manual sync event identification.

**Features**:
- Load 4 hours around sync start time (configurable)
- Each sensor has its own independent time axis
- Time shifts controlled by Sync_Parameters.yaml
- Preprocessing done before plotting
- Simple and focused approach

## 1. Configuration

In [1]:
# Configuration parameters
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import yaml
from datetime import datetime, timedelta
import ipywidgets as widgets
from IPython.display import display, clear_output
import random
import matplotlib.colors as mcolors

# ========== CONFIGURATION ==========
SUBJECT_ID = "OutSense-608"  # Change this to your subject
HOURS_AROUND_SYNC = 120  # Hours to load around sync start time (2 hours before, 2 hours after)
TARGET_FREQUENCY = 25  # Hz for resampling

# Paths
script_dir = os.path.dirname(os.path.abspath('.'))
project_root = os.path.dirname(script_dir)
sync_params_path = os.path.join(project_root, 'Sync_Parameters.yaml')
sync_events_path = os.path.join(project_root, 'Sync_Events_Times.csv')
config_path = os.path.join(project_root, 'config.yaml')
labels_path = os.path.join(project_root, 'Final_Labels.csv')

print(f"📋 Configuration:")
print(f"  Subject: {SUBJECT_ID}")
print(f"  Time window: ±{HOURS_AROUND_SYNC} hours around sync start")
print(f"  Target frequency: {TARGET_FREQUENCY} Hz")
print(f"  Project root: {project_root}")
print(f"  Labels file: {labels_path}")

# Generate a consistent set of colors for labels
def generate_label_colors(labels_list):
    """Generate consistent random colors for each unique label"""
    unique_labels = list(set(labels_list))
    random.seed(42)  # For consistent colors across runs
    colors = []
    
    for i, label in enumerate(unique_labels):
        # Use HSV color space for better color distribution
        hue = (i * 137.5) % 360  # Golden angle for good distribution
        saturation = 0.7 + (i % 3) * 0.1  # Vary saturation
        value = 0.8 + (i % 2) * 0.15  # Vary brightness
        
        # Convert HSV to RGB
        rgb = mcolors.hsv_to_rgb([hue/360, saturation, value])
        colors.append(rgb)
    
    return dict(zip(unique_labels, colors))

📋 Configuration:
  Subject: OutSense-608
  Time window: ±120 hours around sync start
  Target frequency: 25 Hz
  Project root: /home/muff_an/scai_data_process
  Labels file: /home/muff_an/scai_data_process/Final_Labels.csv


## 2. Load Configuration and Sync Parameters

In [2]:
# Load main configuration
with open('/home/muff_an/scai_data_process/Master_Thesis_Ricardo/config.yaml', 'r') as f:
    cfg = yaml.safe_load(f)

# Load sync parameters
with open('/home/muff_an/scai_data_process/Master_Thesis_Ricardo/Sync_Parameters_andre.yaml', 'r') as f:
    sync_params = yaml.safe_load(f)
# Load sync events
sync_events_df = pd.read_csv('/home/muff_an/scai_data_process/Master_Thesis_Ricardo/Sync_Events_Times.csv')

# Load Final_Labels.csv
try:
    labels_df = pd.read_csv('/home/muff_an/scai_data_process/Master_Thesis_Ricardo/Final_Labels.csv')
    print(f"✅ Loaded Final_Labels.csv with {len(labels_df)} entries")
    
    # Filter labels for the current subject
    subject_labels = labels_df[labels_df['Video_File'].str.contains(SUBJECT_ID, na=False)]
    print(f"📊 Found {len(subject_labels)} labels for subject {SUBJECT_ID}")
    
    if len(subject_labels) > 0:
        # Parse the Real_Start_Time and Real_End_Time columns
        subject_labels = subject_labels.copy()
        subject_labels['Real_Start_Time'] = pd.to_datetime(subject_labels['Real_Start_Time'], errors='coerce')
        subject_labels['Real_End_Time'] = pd.to_datetime(subject_labels['Real_End_Time'], errors='coerce')
        
        # Remove any rows with invalid timestamps
        valid_labels = subject_labels.dropna(subset=['Real_Start_Time', 'Real_End_Time'])
        print(f"📅 {len(valid_labels)} labels have valid timestamps")
        
        # Generate colors for labels
        label_colors = generate_label_colors(valid_labels['Label'].tolist())
        print(f"🎨 Generated colors for {len(label_colors)} unique labels")
        
        # Show label summary
        label_summary = valid_labels['Label'].value_counts()
        print(f"\n📋 Label distribution:")
        for label, count in label_summary.head(10).items():
            print(f"  {label}: {count} instances")
        if len(label_summary) > 10:
            print(f"  ... and {len(label_summary) - 10} more labels")
    else:
        valid_labels = pd.DataFrame()
        label_colors = {}
        
except FileNotFoundError:
    print("⚠️ Final_Labels.csv not found - plots will show without labels")
    valid_labels = pd.DataFrame()
    label_colors = {}
except Exception as e:
    print(f"⚠️ Error loading Final_Labels.csv: {e}")
    valid_labels = pd.DataFrame()
    label_colors = {}

print(f"✅ Loaded configurations:")
print(f"  Main config: {len(cfg)} sections")
print(f"  Sync parameters: {len(sync_params)} subjects")
print(f"  Sync events: {len(sync_events_df)} entries")
print(f"  Labels: {len(valid_labels)} for {SUBJECT_ID}")

# Get sync start time for the subject
subject_sync = sync_events_df[sync_events_df['Subject'] == SUBJECT_ID]
if subject_sync.empty:
    raise ValueError(f"No sync events found for subject {SUBJECT_ID}")

sync_start_str = subject_sync.iloc[0]['Sync Start']
sync_end_str = subject_sync.iloc[0]['Sync End']

# Parse sync times
sync_start_time = pd.to_datetime(sync_start_str, format='%d.%m.%Y.%H.%M.%S')
sync_end_time = pd.to_datetime(sync_end_str, format='%d.%m.%Y.%H.%M.%S')

print(f"\n🎯 Sync times for {SUBJECT_ID}:")
print(f"  Sync Start: {sync_start_time}")
print(f"  Sync End: {sync_end_time}")
print(f"  Duration: {sync_end_time - sync_start_time}")

# Calculate data window
data_window_start = sync_start_time - pd.Timedelta(hours=HOURS_AROUND_SYNC//2)
data_window_end = sync_start_time + pd.Timedelta(hours=HOURS_AROUND_SYNC//2)

print(f"\n📊 Data window ({HOURS_AROUND_SYNC}h around sync start):")
print(f"  Window Start: {data_window_start}")
print(f"  Window End: {data_window_end}")
print(f"  Total Duration: {data_window_end - data_window_start}")

# Show labels in the data window
if len(valid_labels) > 0:
    window_labels = valid_labels[
        (valid_labels['Real_Start_Time'] <= data_window_end) & 
        (valid_labels['Real_End_Time'] >= data_window_start)
    ]
    print(f"\n🏷️ Labels in data window: {len(window_labels)}")
    if len(window_labels) > 0:
        window_label_summary = window_labels['Label'].value_counts()
        for label, count in window_label_summary.head(5).items():
            print(f"  {label}: {count} instances")

✅ Loaded Final_Labels.csv with 7214 entries
📊 Found 638 labels for subject OutSense-608
📅 638 labels have valid timestamps
🎨 Generated colors for 28 unique labels

📋 Label distribution:
  self_propulsion: 187 instances
  sitting_wheelchair: 150 instances
  conversation: 68 instances
  wheelchair _in_storage: 59 instances
  watching_tv: 31 instances
  assisted_propulsion: 28 instances
  pressure_relief: 24 instances
  handling: 21 instances
  reading: 17 instances
  toilet_routine: 12 instances
  ... and 18 more labels
✅ Loaded configurations:
  Main config: 62 sections
  Sync parameters: 16 subjects
  Sync events: 16 entries
  Labels: 638 for OutSense-608

🎯 Sync times for OutSense-608:
  Sync Start: 2024-06-04 09:39:30
  Sync End: 2024-06-06 10:57:40
  Duration: 2 days 01:18:10

📊 Data window (120h around sync start):
  Window Start: 2024-06-01 21:39:30
  Window End: 2024-06-06 21:39:30
  Total Duration: 5 days 00:00:00

🏷️ Labels in data window: 638
  self_propulsion: 187 instances
 

## 3. Load and Import Required Functions

In [3]:
# Import data loading functions from the original notebook/scripts
import sys
sys.path.append(project_root)

# Import necessary functions (you may need to adjust these based on your actual module structure)
try:
    from raw_data_processor import (
        select_data_loader,
        modify_modality_names,
        process_modality_duplicates,
        handle_missing_data_interpolation,
        correct_timestamp_drift
    )
    print("✅ Imported functions from raw_data_processor")
except ImportError as e:
    print(f"⚠️ Could not import from raw_data_processor: {e}")
    print("You may need to adjust the import paths or copy the required functions")
    
    # Define minimal data loader selection function
    def select_data_loader(sensor_name):
        """Simple data loader selector - you may need to implement based on your data structure"""
        def simple_csv_loader(subject_dir, sensor_name, sensor_settings):
            # This is a placeholder - implement based on your actual data structure
            csv_path = os.path.join(subject_dir, f"{sensor_name}.csv")
            if os.path.exists(csv_path):
                return pd.read_csv(csv_path)
            else:
                return pd.DataFrame()
        return simple_csv_loader
    
    def modify_modality_names(data, sensor_name):
        """Simple modality name modifier"""
        return sensor_name, data
    
    def process_modality_duplicates(data, sample_rate):
        """Simple duplicate processor"""
        return data.drop_duplicates()
    
    def handle_missing_data_interpolation(data, max_interp_gap_s=2, target_freq=50):
        """Simple interpolation"""
        return data.interpolate(method='linear', limit=int(max_interp_gap_s * target_freq))
    
    def correct_timestamp_drift(timestamp, t0, t1, drift_secs):
        """Simple drift correction"""
        if t0 <= timestamp <= t1:
            progress = (timestamp - t0) / (t1 - t0)
            return timestamp + (drift_secs * progress)
        return timestamp
    
    print("📝 Using simplified placeholder functions")

# Get raw data configuration
raw_data_parsing_config = cfg.get('raw_data_parsing_config', {})
raw_data_base_dir = os.path.join(project_root, cfg.get('raw_data_input_dir', 'data'))
subject_dir = os.path.join(raw_data_base_dir, SUBJECT_ID)

print(f"\n📂 Data paths:")
print(f"  Raw data dir: {raw_data_base_dir}")
print(f"  Subject dir: {subject_dir}")
print(f"  Available sensors: {list(raw_data_parsing_config.keys())}")

✅ Imported functions from raw_data_processor

📂 Data paths:
  Raw data dir: /scai_data2/scai_datasets/interim/scai-outsense/
  Subject dir: /scai_data2/scai_datasets/interim/scai-outsense/OutSense-608
  Available sensors: ['corsano_wrist_acc', 'cosinuss_ear_acc_x_acc_y_acc_z', 'mbient_imu_wc_accelerometer', 'mbient_imu_wc_gyroscope', 'vivalnk_vv330_acceleration', 'sensomative_bottom_logger', 'sensomative_back_logger', 'corsano_bioz_acc']


## 4. Load and Process Sensor Data

In [4]:
# Load and process each sensor with time shifts from Sync_Parameters.yaml
print(f"\n=== LOADING SENSOR DATA ===")
print(f"Processing sensors for subject: {SUBJECT_ID}")
print(f"Time window: {data_window_start} to {data_window_end}")

processed_sensors = {}
subject_correction_params = sync_params.get(SUBJECT_ID, {})

for sensor_name, sensor_settings in raw_data_parsing_config.items():
    print(f"\n--- Processing sensor: {sensor_name} ---")
    
    try:
        # Load raw sensor data
        loader = select_data_loader(sensor_name)
        sensor_data_raw = loader(subject_dir, sensor_name, sensor_settings)
        
        if sensor_data_raw.empty or 'time' not in sensor_data_raw.columns:
            print(f"❌ No data loaded for {sensor_name}")
            continue
        
        print(f"📊 Loaded {len(sensor_data_raw)} raw samples")
        
        # Get time correction parameters for this sensor
        sensor_corr_params = subject_correction_params.get(sensor_name, {'unit': 's'})
        time_unit = sensor_corr_params.get('unit', 's')
        shift_val = sensor_corr_params.get('shift', 0)
        
        # Apply time corrections
        time_col_num = sensor_data_raw['time'].astype(float)
        
        # Convert to seconds if needed
        if time_unit == 'ms':
            time_col_num = time_col_num / 1000.0
        
        # Apply shift correction
        if shift_val != 0:
            time_col_num = time_col_num + shift_val
            print(f"⏱️ Applied time shift: {shift_val}s")
        
        # Apply drift correction if available
        drift_params = sensor_corr_params.get('drift')
        if drift_params and all(k in drift_params for k in ['t0', 't1', 'drift_secs']):
            t0_ts = pd.Timestamp(drift_params['t0'])
            t1_ts = pd.Timestamp(drift_params['t1'])
            if not pd.isna(t0_ts) and not pd.isna(t1_ts):
                t0, t1 = t0_ts.timestamp(), t1_ts.timestamp()
                drift = drift_params['drift_secs']
                time_col_num = time_col_num.apply(correct_timestamp_drift, args=(t0, t1, drift))
                print(f"📐 Applied drift correction: {drift}s over {t1-t0:.1f}s interval")
        
        # Convert to datetime
        corrected_timestamps = pd.to_datetime(time_col_num, unit='s', errors='coerce')
        sensor_data_corrected = sensor_data_raw.drop(columns=['time']).copy()
        sensor_data_corrected['time'] = corrected_timestamps
        sensor_data_corrected.dropna(subset=['time'], inplace=True)
        
        if sensor_data_corrected.empty:
            print(f"❌ No valid data after time correction for {sensor_name}")
            continue
        
        # Filter to data window
        original_count = len(sensor_data_corrected)
        time_mask = (sensor_data_corrected['time'] >= data_window_start) & (sensor_data_corrected['time'] <= data_window_end)
        sensor_data_filtered = sensor_data_corrected[time_mask].copy()
        
        filtered_count = len(sensor_data_filtered)
        retention_pct = (filtered_count / original_count * 100) if original_count > 0 else 0
        print(f"🔍 Filtered from {original_count} to {filtered_count} samples ({retention_pct:.1f}% retained)")
        
        if sensor_data_filtered.empty:
            print(f"❌ No data in time window for {sensor_name}")
            continue
        
        # Set time as index
        sensor_data_filtered.set_index('time', inplace=True)
        sensor_data_filtered.sort_index(inplace=True)
        
        # Apply basic preprocessing
        sample_rate = sensor_settings.get('sample_rate', TARGET_FREQUENCY)
        processed_data = process_modality_duplicates(sensor_data_filtered, sample_rate)
        processed_data = handle_missing_data_interpolation(processed_data, max_interp_gap_s=2, target_freq=TARGET_FREQUENCY)
        
        # Apply column renaming
        new_name, processed_data = modify_modality_names(processed_data, sensor_name)
        
        if processed_data.empty:
            print(f"❌ No data after preprocessing for {sensor_name}")
            continue
        
        print(f"✅ Final shape: {processed_data.shape}")
        print(f"✅ Time range: {processed_data.index.min()} to {processed_data.index.max()}")
        
        processed_sensors[new_name] = processed_data
        
    except Exception as e:
        print(f"❌ Error processing sensor {sensor_name}: {e}")
        import traceback
        traceback.print_exc()

print(f"\n📈 Successfully processed {len(processed_sensors)} sensors:")
for sensor_name, data in processed_sensors.items():
    duration = data.index.max() - data.index.min()
    print(f"  📊 {sensor_name}: {len(data)} samples, duration {duration}")

if not processed_sensors:
    raise ValueError("No sensor data was successfully processed!")


=== LOADING SENSOR DATA ===
Processing sensors for subject: OutSense-608
Time window: 2024-06-01 21:39:30 to 2024-06-06 21:39:30

--- Processing sensor: corsano_wrist_acc ---
📊 Loaded 5033216 raw samples
⏱️ Applied time shift: 1s
🔍 Filtered from 5033216 to 5033216 samples (100.0% retained)
✅ Final shape: (5033216, 3)
✅ Time range: 2024-06-04 07:37:36 to 2024-06-06 09:32:09.967999935

--- Processing sensor: cosinuss_ear_acc_x_acc_y_acc_z ---
📊 Loaded 1297834 raw samples
🔍 Filtered from 1297834 to 1297834 samples (100.0% retained)
✅ Final shape: (1297834, 3)
✅ Time range: 2024-06-04 11:01:15.559000015 to 2024-06-05 15:43:43.335999966

--- Processing sensor: mbient_imu_wc_accelerometer ---
📊 Loaded 9179034 raw samples
⏱️ Applied time shift: 7218s
📐 Applied drift correction: -15s over 177490.0s interval
🔍 Filtered from 9179034 to 9179034 samples (100.0% retained)
✅ Final shape: (9179034, 3)
✅ Time range: 2024-06-04 08:32:28.987851620 to 2024-06-06 11:02:18.006235361

--- Processing sensor

## 5. Interactive Plotting with Independent Time Axes

In [None]:
# Create interactive plotting tool with independent time axes
print("=== INTERACTIVE SENSOR VISUALIZATION ===")
print("🎯 Each sensor has its own independent time axis")
print("🔍 Perfect for manual sync event identification")
print("🏷️ Labels from Final_Labels.csv will be shown as shaded areas")

# === Initial Sync Parameters ===
sync_start_ref = sync_start_time  # for relative adjustments
label_offset_seconds = 0


# Create controls
sensor_names = list(processed_sensors.keys())

# Sensor selection
sensor_selection = widgets.SelectMultiple(
    options=sensor_names,
    value=sensor_names[:3] if len(sensor_names) >= 3 else sensor_names,  # Select first 3 by default
    description='Select Sensors:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(height='200px', width='300px')
)

# Label display controls
show_labels = widgets.Checkbox(
    value=True,
    description='Show Labels',
    style={'description_width': 'initial'}
)

label_alpha = widgets.FloatSlider(
    value=0.3,
    min=0.1,
    max=0.8,
    step=0.1,
    description='Label Alpha:',
    style={'description_width': 'initial'}
)

# Label filter (show only specific labels)
if len(valid_labels) > 0:
    unique_labels_in_window = sorted(valid_labels['Label'].unique())
    label_filter = widgets.SelectMultiple(
        options=unique_labels_in_window,
        value=unique_labels_in_window #[:10] if len(unique_labels_in_window) > 10 else unique_labels_in_window,
        description='Show Labels:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(height='150px', width='300px')
    )
else:
    label_filter = widgets.SelectMultiple(
        options=[],
        value=[],
        description='Show Labels:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(height='150px', width='300px')
    )

# Time window controls
center_time_text = widgets.Text(
    value=sync_start_time.strftime('%Y-%m-%d %H:%M:%S'),
    description='Center Time:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

window_minutes = widgets.IntSlider(
    value=2,  
    min=1,
    max=60,  # 4 hours max
    step=1,
    description='Window (min):',
    style={'description_width': 'initial'}
)
label_offset_slider = widgets.IntSlider(
    value=0, min=-300, max=300, step=1,
    description='Label Offset (s):',
    layout=widgets.Layout(width='80%')
)

label_offset_label = widgets.Label(value="Current label offset: 0 seconds")

# Quick jump buttons
jump_sync_start = widgets.Button(description='🎯 Jump to Sync Start', button_style='success')
jump_sync_end = widgets.Button(description='🎯 Jump to Sync End', button_style='warning')
jump_data_start = widgets.Button(description='📊 Jump to Data Start', button_style='info')
jump_data_end = widgets.Button(description='📊 Jump to Data End', button_style='info')

# Plot button
plot_button = widgets.Button(description='📈 Plot Sensors', button_style='primary', layout=widgets.Layout(width='150px'))

# Offset slider and label
sync_offset_slider = widgets.IntSlider(
    value=0, min=-300, max=300, step=1,
    description='Offset (s):',
    layout=widgets.Layout(width='80%')
)
sync_offset_label = widgets.Label(value="Current offset: 0 seconds")


# Output area
plot_output = widgets.Output()

def get_center_time():
    """Get center time from text widget"""
    try:
        return pd.to_datetime(center_time_text.value)
    except:
        return sync_start_time

def update_center_time(new_time):
    """Update center time text widget"""
    center_time_text.value = new_time.strftime('%Y-%m-%d %H:%M:%S')

def plot_sensors(btn):
    """Plot selected sensors with independent time axes and label overlays"""
    with plot_output:
        clear_output(wait=True)
        
        try:
            selected_sensors = list(sensor_selection.value)
            if not selected_sensors:
                print("❌ Please select at least one sensor")
                return
            
            center_time = get_center_time()
            window_mins = window_minutes.value
            
            # Calculate time window
            half_window = pd.Timedelta(minutes=window_mins/2)
            plot_start = center_time - half_window
            plot_end = center_time + half_window
            
            print(f"📊 Plotting {len(selected_sensors)} sensors")
            print(f"⏱️ Time window: {plot_start} to {plot_end} ({window_mins} minutes)")
            print(f"🎯 Center time: {center_time}")
            
            # Filter labels for the current plot window
            if show_labels.value and len(valid_labels) > 0:
                selected_label_types = list(label_filter.value)
                plot_labels = valid_labels.copy()

                # Apply global label offset
                # if label_offset_seconds != 0:
                plot_labels['Shifted_Start_Time'] = plot_labels['Real_Start_Time'] + pd.Timedelta(seconds=label_offset_seconds)
                plot_labels['Shifted_End_Time'] = plot_labels['Real_End_Time'] + pd.Timedelta(seconds=label_offset_seconds)

                # Filter to window
                plot_labels = plot_labels[
                    (plot_labels['Shifted_Start_Time'] <= plot_end) &
                    (plot_labels['Shifted_End_Time'] >= plot_start) &
                    (plot_labels['Label'].isin(selected_label_types))
                ]
                print(f"🏷️ Showing {len(plot_labels)} labels in window")
            else:
                plot_labels = pd.DataFrame()
            
            # Create plot with INDEPENDENT time axes for each sensor
            fig, axes = plt.subplots(len(selected_sensors), 1, 
                                   figsize=(16, 3*len(selected_sensors)), 
                                   sharex=False)  # Independent time axes!
            if len(selected_sensors) == 1:
                axes = [axes]
            
            for i, sensor_name in enumerate(selected_sensors):
                ax = axes[i]
                
                if sensor_name not in processed_sensors:
                    ax.text(0.5, 0.5, f'No data for {sensor_name}', 
                           ha='center', va='center', transform=ax.transAxes)
                    ax.set_title(f'{sensor_name} - No Data')
                    continue
                
                sensor_data = processed_sensors[sensor_name]
                
                # Filter to plot window
                mask = (sensor_data.index >= plot_start) & (sensor_data.index <= plot_end)
                plot_data = sensor_data[mask]
                
                if plot_data.empty:
                    ax.text(0.5, 0.5, f'No data in time window for {sensor_name}', 
                           ha='center', va='center', transform=ax.transAxes)
                    ax.set_title(f'{sensor_name} - No Data in Window')
                    continue
                
                # Plot all numeric columns
                numeric_cols = plot_data.select_dtypes(include=[np.number]).columns
                for col in numeric_cols:
                    ax.plot(plot_data.index, plot_data[col], 
                           label=col, alpha=0.7, linewidth=1)
                
                # Add label shading BEHIND the sensor data
                if show_labels.value and len(plot_labels) > 0:
                    y_min, y_max = ax.get_ylim() if len(numeric_cols) > 0 else (0, 1)
                    
                    # Group labels by type for better visualization
                    label_count = {}
                    for _, label_row in plot_labels.iterrows():
                        label_name = label_row['Label']
                        start_time = max(label_row['Shifted_Start_Time'], plot_start)
                        end_time = min(label_row['Shifted_End_Time'], plot_end)
                        
                        if start_time < end_time:  # Valid time range
                            color = label_colors.get(label_name, 'gray')
                            
                            # Count occurrences for label positioning
                            if label_name not in label_count:
                                label_count[label_name] = 0
                            label_count[label_name] += 1
                            
                            # Add shaded region
                            ax.axvspan(start_time, end_time, 
                                     alpha=label_alpha.value, 
                                     color=color,
                                     label=f'{label_name}' if label_count[label_name] == 1 else "")
                            
                            # Add label text for longer labels
                            duration = end_time - start_time
                            if duration > pd.Timedelta(minutes=2):  # Only show text for longer labels
                                mid_time = start_time + (end_time - start_time) / 2
                                if len(numeric_cols) > 0:
                                    # Position text at top of the plot
                                    y_pos = y_max - (y_max - y_min) * 0.05
                                else:
                                    y_pos = 0.5
                                
                                ax.text(mid_time, y_pos, label_name, 
                                       ha='center', va='top', rotation=0,
                                       fontsize=8, alpha=0.8,
                                       bbox=dict(boxstyle='round,pad=0.2', 
                                               facecolor='white', alpha=0.7))
                
                # Mark sync events
                if plot_start <= sync_start_time <= plot_end:
                    ax.axvline(sync_start_time, color='red', linestyle='--', 
                             linewidth=2, alpha=0.8, label='🎯 Sync Start')
                
                if plot_start <= sync_end_time <= plot_end:
                    ax.axvline(sync_end_time, color='darkred', linestyle='--', 
                             linewidth=2, alpha=0.8, label='🎯 Sync End')
                
                # Mark center time
                ax.axvline(center_time, color='green', linestyle=':', 
                         linewidth=1, alpha=0.6, label='Center')
                
                # Mark data boundaries for this sensor
                sensor_start = sensor_data.index.min()
                sensor_end = sensor_data.index.max()
                
                if plot_start <= sensor_start <= plot_end:
                    ax.axvline(sensor_start, color='blue', linestyle='-', 
                             linewidth=1, alpha=0.4, label='Data Start')
                
                if plot_start <= sensor_end <= plot_end:
                    ax.axvline(sensor_end, color='orange', linestyle='-', 
                             linewidth=1, alpha=0.4, label='Data End')
                
                # Formatting for EACH sensor's independent time axis
                title_text = f'{sensor_name} ({len(numeric_cols)} channels)'
                if show_labels.value and len(plot_labels) > 0:
                    title_text += f' | {len(plot_labels)} labels'
                ax.set_title(title_text)
                ax.set_ylabel('Value')
                ax.set_xlabel('Time')
                ax.grid(True, alpha=0.3)
                
                # Format time axis for THIS sensor
                ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
                ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=max(1, window_mins//10)))
                plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
                
                # Legend if not too many columns
                handles, labels = ax.get_legend_handles_labels()
                if len(handles) <= 15:  # Reasonable number for legend
                    ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=8)
                
                print(f"  📈 {sensor_name}: {len(plot_data)} samples in window")
            
            title_text = f'Sensor Data - Independent Time Axes\nWindow: {plot_start} to {plot_end}'
            if show_labels.value and len(plot_labels) > 0:
                title_text += f' | {len(plot_labels)} labels shown'
            
            plt.suptitle(title_text, fontsize=14, y=0.98)
            
            plt.tight_layout()
            plt.subplots_adjust(right=0.85, top=0.92)
            plt.show()
            
            # Show sync event info
            print(f"\n🎯 Sync Event Information:")
            print(f"  📍 Sync Start: {sync_start_time}")
            print(f"  📍 Sync End: {sync_end_time}")
            print(f"  ⏱️ Sync Duration: {sync_end_time - sync_start_time}")
            print(f"🕒 Offset from original: {(sync_start_time - sync_start_ref).total_seconds()} seconds")
            
            if plot_start <= sync_start_time <= plot_end:
                print(f"  ✅ Sync Start is visible in current window")
            else:
                print(f"  ❌ Sync Start is outside current window")
                
            if plot_start <= sync_end_time <= plot_end:
                print(f"  ✅ Sync End is visible in current window")
            else:
                print(f"  ❌ Sync End is outside current window")
            
            # Show label information
            if show_labels.value and len(plot_labels) > 0:
                print(f"\n🏷️ Label Information:")
                label_summary = plot_labels['Label'].value_counts()
                for label, count in label_summary.items():
                    print(f"  📋 {label}: {count} instances")
                
                total_duration = 0
                for _, label_row in plot_labels.iterrows():
                    duration = label_row['Real_End_Time'] - label_row['Real_Start_Time']
                    total_duration += duration.total_seconds()
                
                avg_duration = total_duration / len(plot_labels) if len(plot_labels) > 0 else 0
                print(f"  ⏱️ Average label duration: {avg_duration:.1f}s")
            
        except Exception as e:
            print(f"❌ Error creating plot: {e}")
            import traceback
            traceback.print_exc()
# Sync offset handler
def on_sync_offset_slider_change(change):
    global sync_start_time
    offset_seconds = change['new']
    sync_start_time = sync_start_ref + pd.Timedelta(seconds=offset_seconds)
    sync_offset_label.value = f"Current offset: {offset_seconds:+d} seconds"
    plot_sensors(None)

sync_offset_slider.observe(on_sync_offset_slider_change, names='value')

def on_label_offset_slider_change(change):
    global label_offset_seconds
    label_offset_seconds = change['new']
    label_offset_label.value = f"Current label offset: {label_offset_seconds:+d} seconds"
    plot_sensors(None)
    
label_offset_slider.observe(on_label_offset_slider_change, names='value')
# Button functions
def jump_to_sync_start(btn):
    update_center_time(sync_start_time)
    plot_sensors(None)

def jump_to_sync_end(btn):
    update_center_time(sync_end_time)
    plot_sensors(None)

def jump_to_data_start(btn):
    all_starts = [data.index.min() for data in processed_sensors.values()]
    earliest = min(all_starts)
    update_center_time(earliest + pd.Timedelta(minutes=window_minutes.value/2))
    plot_sensors(None)

def jump_to_data_end(btn):
    all_ends = [data.index.max() for data in processed_sensors.values()]
    latest = max(all_ends)
    update_center_time(latest - pd.Timedelta(minutes=window_minutes.value/2))
    plot_sensors(None)

# Connect buttons
plot_button.on_click(plot_sensors)
jump_sync_start.on_click(jump_to_sync_start)
jump_sync_end.on_click(jump_to_sync_end)
jump_data_start.on_click(jump_to_data_start)
jump_data_end.on_click(jump_to_data_end)

# Layout
label_controls = widgets.VBox([
    widgets.HTML("<h4>🏷️ Label Controls</h4>"),
    show_labels,
    label_alpha,
    label_filter
]) if len(valid_labels) > 0 else widgets.HTML("<p>No labels available</p>")

controls = widgets.VBox([
    widgets.HTML("<h3>🎛️ Controls</h3>"),
    sensor_selection,
    label_controls,
    center_time_text,
    window_minutes,
    widgets.HBox([jump_sync_start, jump_sync_end]),
    widgets.HBox([jump_data_start, jump_data_end]),
    plot_button
])

full_ui = (widgets.VBox([
    controls,
    widgets.HTML("<h4>🔧 Adjust Sync Start Time</h4>"),
    sync_offset_slider,
    sync_offset_label,
    widgets.HTML("<h4>🏷️ Adjust Label Offset</h4>"),
    label_offset_slider,
    label_offset_label,
    plot_output
]))



display(full_ui)

print("\n🚀 Interactive visualization ready!")
print("\n📝 Instructions:")
print("  1. Select sensors to visualize")
print("  2. Choose which labels to display")
print("  3. Set center time and window size") 
print("  4. Use quick jump buttons to navigate")
print("  5. Each sensor has its own independent time axis")
print("  6. Labels are shown as colored shaded areas")
print("  7. Look for sync events marked with red dashed lines")
print("\n💡 Key Features:")
print("  ✅ Independent time axes per sensor")
print("  ✅ Time shifts from Sync_Parameters.yaml applied")
print("  ✅ Preprocessing completed before plotting")
print("  ✅ Labels from Final_Labels.csv overlaid as shaded regions")
print("  ✅ Perfect for manual sync event identification")
print("  ✅ Adjustable label transparency and filtering")

=== INTERACTIVE SENSOR VISUALIZATION ===
🎯 Each sensor has its own independent time axis
🔍 Perfect for manual sync event identification
🏷️ Labels from Final_Labels.csv will be shown as shaded areas


VBox(children=(VBox(children=(HTML(value='<h3>🎛️ Controls</h3>'), SelectMultiple(description='Select Sensors:'…


🚀 Interactive visualization ready!

📝 Instructions:
  1. Select sensors to visualize
  2. Choose which labels to display
  3. Set center time and window size
  4. Use quick jump buttons to navigate
  5. Each sensor has its own independent time axis
  6. Labels are shown as colored shaded areas
  7. Look for sync events marked with red dashed lines

💡 Key Features:
  ✅ Independent time axes per sensor
  ✅ Time shifts from Sync_Parameters.yaml applied
  ✅ Preprocessing completed before plotting
  ✅ Labels from Final_Labels.csv overlaid as shaded regions
  ✅ Perfect for manual sync event identification
  ✅ Adjustable label transparency and filtering


## 6. Summary Information

In [6]:
# Display summary information
print("=== SUMMARY ===")
print(f"Subject: {SUBJECT_ID}")
print(f"Data window: {HOURS_AROUND_SYNC}h around sync start")
print(f"Sync start: {sync_start_time}")
print(f"Sync end: {sync_end_time}")
print(f"Processed sensors: {len(processed_sensors)}")
print(f"Available labels: {len(valid_labels)}")

print("\n📊 Sensor Details:")
for sensor_name, data in processed_sensors.items():
    # Get time shift applied
    original_sensor_name = sensor_name  # May be modified by modify_modality_names
    for orig_name in raw_data_parsing_config.keys():
        if orig_name in sensor_name:
            original_sensor_name = orig_name
            break
    
    sensor_corr_params = subject_correction_params.get(original_sensor_name, {})
    shift_applied = sensor_corr_params.get('shift', 0)
    
    print(f"  📈 {sensor_name}:")
    print(f"    Samples: {len(data)}")
    print(f"    Time range: {data.index.min()} to {data.index.max()}")
    print(f"    Duration: {data.index.max() - data.index.min()}")
    print(f"    Columns: {list(data.columns)}")
    print(f"    Time shift applied: {shift_applied}s")

if len(valid_labels) > 0:
    print(f"\n🏷️ Label Details:")
    print(f"  Total labels for {SUBJECT_ID}: {len(valid_labels)}")
    
    # Time range of labels
    label_start = valid_labels['Real_Start_Time'].min()
    label_end = valid_labels['Real_End_Time'].max()
    print(f"  Label time range: {label_start} to {label_end}")
    print(f"  Label duration span: {label_end - label_start}")
    
    # Most common labels
    print(f"  Top 5 most common labels:")
    for label, count in valid_labels['Label'].value_counts().head(5).items():
        total_duration = 0
        label_instances = valid_labels[valid_labels['Label'] == label]
        for _, row in label_instances.iterrows():
            duration = row['Real_End_Time'] - row['Real_Start_Time']
            total_duration += duration.total_seconds()
        avg_duration = total_duration / count if count > 0 else 0
        print(f"    📋 {label}: {count} instances (avg: {avg_duration:.1f}s)")
    
    # Labels in the data window
    window_labels = valid_labels[
        (valid_labels['Real_Start_Time'] <= data_window_end) & 
        (valid_labels['Real_End_Time'] >= data_window_start)
    ]
    print(f"  Labels in current data window: {len(window_labels)}")
    
    # Color legend
    print(f"\n🎨 Label Colors ({len(label_colors)} unique labels):")
    for i, (label, color) in enumerate(sorted(label_colors.items())[:10]):
        print(f"  🟦 {label}")
    if len(label_colors) > 10:
        print(f"  ... and {len(label_colors) - 10} more labels")

print("\n🎯 Ready for manual sync event identification with label overlay!")
print("Use the interactive plot above to examine each sensor independently.")
print("Labels from Final_Labels.csv will be displayed as colored shaded areas.")

=== SUMMARY ===
Subject: OutSense-608
Data window: 120h around sync start
Sync start: 2024-06-04 09:39:30
Sync end: 2024-06-06 10:57:40
Processed sensors: 8
Available labels: 638

📊 Sensor Details:
  📈 corsano_wrist:
    Samples: 5033216
    Time range: 2024-06-04 07:37:36 to 2024-06-06 09:32:09.967999935
    Duration: 2 days 01:54:33.967999935
    Columns: ['wrist_acc_x', 'wrist_acc_y', 'wrist_acc_z']
    Time shift applied: 0s
  📈 cosinuss_ear:
    Samples: 1297834
    Time range: 2024-06-04 11:01:15.559000015 to 2024-06-05 15:43:43.335999966
    Duration: 1 days 04:42:27.776999951
    Columns: ['ear_acc_x', 'ear_acc_y', 'ear_acc_z']
    Time shift applied: 0s
  📈 mbient_acc:
    Samples: 9179034
    Time range: 2024-06-04 08:32:28.987851620 to 2024-06-06 11:02:18.006235361
    Duration: 2 days 02:29:49.018383741
    Columns: ['x_axis_g', 'y_axis_g', 'z_axis_g']
    Time shift applied: 0s
  📈 mbient_gyro:
    Samples: 9179036
    Time range: 2024-06-04 08:32:28.993169069 to 2024-06-0