# Combined Data Checker and Interactive Visualizer

**Purpose**: Load and interactively visualize the combined sensor data with labels created by debug_labels_v2.ipynb.

**Features**:
- Load combined PKL data with all sensors and labels
- Interactive plotting with time navigation
- Label visualization as shaded areas
- Data quality inspection
- Sensor selection and filtering
- Time window controls

## 1. Configuration and Setup

In [17]:
# Import required libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.colors as mcolors
import pickle
import json
from datetime import datetime, timedelta
import ipywidgets as widgets
from IPython.display import display, clear_output
import random

# Configuration
SUBJECT_ID = "OutSense-713"  # Change this to your subject

# Paths - Fixed to match actual directory structure
base_dir = '/scai_data3/scratch/stirnimann_r'
results_dir = os.path.join(base_dir, 'results', SUBJECT_ID)

# Load sync events for navigation
sync_events_path = os.path.join(base_dir, 'Sync_Events_Times.csv')
sync_start_time = None
sync_end_time = None

try:
    if os.path.exists(sync_events_path):
        sync_events_df = pd.read_csv(sync_events_path)
        subject_sync = sync_events_df[sync_events_df['Subject'] == SUBJECT_ID]
        
        if len(subject_sync) > 0:
            sync_start_str = subject_sync.iloc[0]['Sync Start']
            sync_end_str = subject_sync.iloc[0]['Sync End']
            
            sync_start_time = pd.to_datetime(sync_start_str, format='%d.%m.%Y.%H.%M.%S')
            sync_end_time = pd.to_datetime(sync_end_str, format='%d.%m.%Y.%H.%M.%S')
            
            print(f"✅ Loaded sync events for {SUBJECT_ID}:")
            print(f"   Sync Start: {sync_start_time}")
            print(f"   Sync End: {sync_end_time}")
            print(f"   Duration: {sync_end_time - sync_start_time}")
        else:
            print(f"⚠️ No sync events found for {SUBJECT_ID}")
    else:
        print(f"⚠️ Sync events file not found: {sync_events_path}")
except Exception as e:
    print(f"❌ Error loading sync events: {e}")

# File paths
combined_data_path = os.path.join(results_dir, f'{SUBJECT_ID}_combined_data.pkl')
metadata_path = os.path.join(results_dir, f'{SUBJECT_ID}_combined_data_metadata.json')


print(f"📋 Configuration:")
print(f"  Subject: {SUBJECT_ID}")
print(f"  Base directory: {base_dir}")
print(f"  Results directory: {results_dir}")
print(f"  Combined data file: {combined_data_path}")
print(f"  Metadata file: {metadata_path}")

✅ Loaded sync events for OutSense-713:
   Sync Start: 2024-07-26 10:55:00
   Sync End: 2024-07-28 10:13:00
   Duration: 1 days 23:18:00
📋 Configuration:
  Subject: OutSense-713
  Base directory: /scai_data3/scratch/stirnimann_r
  Results directory: /scai_data3/scratch/stirnimann_r/results/OutSense-713
  Combined data file: /scai_data3/scratch/stirnimann_r/results/OutSense-713/OutSense-713_combined_data.pkl
  Metadata file: /scai_data3/scratch/stirnimann_r/results/OutSense-713/OutSense-713_combined_data_metadata.json


## 2. Load Combined Data and Metadata

In [18]:
# Load the combined data
print("=== LOADING COMBINED DATA ===")

# Check if files exist
if not os.path.exists(combined_data_path):
    raise FileNotFoundError(f"Combined data file not found: {combined_data_path}")
    
if not os.path.exists(metadata_path):
    print(f"⚠️ Metadata file not found: {metadata_path}")
    metadata = {}
else:
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
    print(f"✅ Loaded metadata")

# Load the combined data
print(f"📊 Loading combined data...")
with open(combined_data_path, 'rb') as f:
    combined_data = pickle.load(f)

print(f"✅ Successfully loaded combined data!")
print(f"📊 Data shape: {combined_data.shape}")
print(f"⏱️ Time range: {combined_data.index.min()} to {combined_data.index.max()}")
print(f"⏱️ Duration: {combined_data.index.max() - combined_data.index.min()}")
print(f"📈 Total columns: {len(combined_data.columns)}")

# Show file size
file_size = os.path.getsize(combined_data_path)
file_size_mb = file_size / (1024 * 1024)
print(f"📁 File size: {file_size_mb:.1f} MB")

# Display metadata summary if available
if metadata:
    print(f"\n📋 Metadata Summary:")
    print(f"  Creation time: {metadata.get('creation_time', 'Unknown')}")
    print(f"  Sampling frequency: {metadata.get('sampling', {}).get('target_frequency_hz', 'Unknown')} Hz")
    print(f"  Total samples: {metadata.get('sampling', {}).get('total_samples', 'Unknown')}")
    
    sensors_info = metadata.get('sensors', {})
    print(f"  Sensors processed: {len(sensors_info)}")
    
    labels_info = metadata.get('labels', {})
    print(f"  Labels available: {labels_info.get('total_labels_available', 'Unknown')}")
    print(f"  Labeled timestamps: {labels_info.get('labeled_timestamps', 'Unknown')}")
    print(f"  Label coverage: {labels_info.get('label_coverage_percent', 0):.1f}%")

=== LOADING COMBINED DATA ===
✅ Loaded metadata
📊 Loading combined data...
✅ Successfully loaded combined data!
📊 Data shape: (4420268, 30)
⏱️ Time range: 2024-07-26 10:06:09.261336565 to 2024-07-28 11:12:59.994135618
⏱️ Duration: 2 days 01:06:50.732799053
📈 Total columns: 30
📁 File size: 1032.8 MB

📋 Metadata Summary:
  Creation time: 2025-07-15T13:36:18.071323
  Sampling frequency: 25 Hz
  Total samples: 4420268
  Sensors processed: 7
  Labels available: 425
  Labeled timestamps: 3732432
  Label coverage: 84.4%


## 3. Data Analysis and Column Organization

In [19]:
# Analyze the data structure
print("=== DATA STRUCTURE ANALYSIS ===")

# Separate sensor columns from label column
label_column = 'Label'
sensor_columns = [col for col in combined_data.columns if col != label_column]

print(f"📊 Column breakdown:")
print(f"  Sensor columns: {len(sensor_columns)}")
print(f"  Label column: 1 ('{label_column}')")

# Group columns by sensor type
sensor_groups = {}
for col in sensor_columns:
    # Extract sensor prefix (everything before the last underscore)
    parts = col.split('_')
    if len(parts) >= 2:
        sensor_name = '_'.join(parts[:-1])  # All parts except the last one
    else:
        sensor_name = col
    
    if sensor_name not in sensor_groups:
        sensor_groups[sensor_name] = []
    sensor_groups[sensor_name].append(col)

print(f"\n📈 Sensor groups identified:")
for sensor_name, columns in sensor_groups.items():
    print(f"  📊 {sensor_name}: {len(columns)} channels")
    print(f"    Columns: {columns[:3]}{'...' if len(columns) > 3 else ''}")

# Analyze labels
if label_column in combined_data.columns:
    label_stats = combined_data[label_column].value_counts()
    non_empty_labels = label_stats[label_stats.index != '']
    empty_count = label_stats.get('', 0)
    
    print(f"\n🏷️ Label analysis:")
    print(f"  Empty labels: {empty_count} ({empty_count/len(combined_data)*100:.1f}%)")
    print(f"  Unique activities: {len(non_empty_labels)}")
    
    if len(non_empty_labels) > 0:
        print(f"  Top 10 activities:")
        for label, count in non_empty_labels.head(10).items():
            duration_min = count / (metadata.get('sampling', {}).get('target_frequency_hz', 25)) / 60
            print(f"    🏷️ {label}: {count} samples ({duration_min:.1f} min)")
else:
    print(f"⚠️ No label column found in data")
    non_empty_labels = pd.Series()

# Generate colors for labels
def generate_label_colors(labels_list):
    """Generate consistent random colors for each unique label"""
    unique_labels = list(set(labels_list))
    random.seed(42)  # For consistent colors across runs
    colors = []
    
    for i, label in enumerate(unique_labels):
        # Use HSV color space for better color distribution
        hue = (i * 137.5) % 360  # Golden angle for good distribution
        saturation = 0.7 + (i % 3) * 0.1  # Vary saturation
        value = 0.8 + (i % 2) * 0.15  # Vary brightness
        
        # Convert HSV to RGB
        rgb = mcolors.hsv_to_rgb([hue/360, saturation, value])
        colors.append(rgb)
    
    return dict(zip(unique_labels, colors))

# Generate colors for labels
if len(non_empty_labels) > 0:
    label_colors = generate_label_colors(non_empty_labels.index.tolist())
    print(f"\n🎨 Generated colors for {len(label_colors)} unique labels")
else:
    label_colors = {}
    print(f"\n🎨 No labels to color")

# Data quality check
print(f"\n🔍 Data quality check:")
missing_data = combined_data[sensor_columns].isnull().sum()
total_missing = missing_data.sum()

if total_missing > 0:
    print(f"  ⚠️ Missing data detected:")
    for col, missing_count in missing_data[missing_data > 0].items():
        missing_pct = missing_count / len(combined_data) * 100
        print(f"    {col}: {missing_count} samples ({missing_pct:.1f}%)")
else:
    print(f"  ✅ No missing data in sensor columns")

# Show sample of data
print(f"\n👀 Data sample (first 5 rows):")
display(combined_data.head())

# Show columns list for reference
print(f"\n📋 All columns ({len(combined_data.columns)}):")
for i, col in enumerate(combined_data.columns):
    print(f"  {i+1:2d}. {col}")
    if i >= 19:  # Show first 20 columns
        remaining = len(combined_data.columns) - 20
        if remaining > 0:
            print(f"      ... and {remaining} more columns")
        break

=== DATA STRUCTURE ANALYSIS ===
📊 Column breakdown:
  Sensor columns: 29
  Label column: 1 ('Label')

📈 Sensor groups identified:
  📊 corsano_wrist_wrist_acc: 3 channels
    Columns: ['corsano_wrist_wrist_acc_x', 'corsano_wrist_wrist_acc_y', 'corsano_wrist_wrist_acc_z']
  📊 cosinuss_ear_ear_acc: 3 channels
    Columns: ['cosinuss_ear_ear_acc_x', 'cosinuss_ear_ear_acc_y', 'cosinuss_ear_ear_acc_z']
  📊 mbient_acc_x_axis: 1 channels
    Columns: ['mbient_acc_x_axis_g']
  📊 mbient_acc_y_axis: 1 channels
    Columns: ['mbient_acc_y_axis_g']
  📊 mbient_acc_z_axis: 1 channels
    Columns: ['mbient_acc_z_axis_g']
  📊 mbient_gyro_x_axis: 1 channels
    Columns: ['mbient_gyro_x_axis_dps']
  📊 mbient_gyro_y_axis: 1 channels
    Columns: ['mbient_gyro_y_axis_dps']
  📊 mbient_gyro_z_axis: 1 channels
    Columns: ['mbient_gyro_z_axis_dps']
  📊 vivalnk_acc_vivalnk_acc: 3 channels
    Columns: ['vivalnk_acc_vivalnk_acc_x', 'vivalnk_acc_vivalnk_acc_y', 'vivalnk_acc_vivalnk_acc_z']
  📊 sensomative_botto

Unnamed: 0,corsano_wrist_wrist_acc_x,corsano_wrist_wrist_acc_y,corsano_wrist_wrist_acc_z,cosinuss_ear_ear_acc_x,cosinuss_ear_ear_acc_y,cosinuss_ear_ear_acc_z,mbient_acc_x_axis_g,mbient_acc_y_axis_g,mbient_acc_z_axis_g,mbient_gyro_x_axis_dps,...,sensomative_bottom_bottom_value_6,sensomative_bottom_bottom_value_7,sensomative_bottom_bottom_value_8,sensomative_bottom_bottom_value_9,sensomative_bottom_bottom_value_10,sensomative_bottom_bottom_value_11,corsano_bioz_bioz_acc_x,corsano_bioz_bioz_acc_y,corsano_bioz_bioz_acc_z,Label
2024-07-26 10:06:09.261336565,,,,,,,0.026,-0.022,1.024,0.0915,...,,,,,,,,,,
2024-07-26 10:06:09.301336576,,,,,,,0.027,-0.0225,1.0325,0.0,...,,,,,,,,,,
2024-07-26 10:06:09.341336588,,,,,,,0.0265,-0.023,1.03,-0.0305,...,,,,,,,,,,
2024-07-26 10:06:09.381336600,,,,,,,0.025,-0.023,1.0315,-0.061,...,,,,,,,,,,
2024-07-26 10:06:09.421336612,,,,,,,0.026,-0.023,1.03,0.0915,...,,,,,,,,,,



📋 All columns (30):
   1. corsano_wrist_wrist_acc_x
   2. corsano_wrist_wrist_acc_y
   3. corsano_wrist_wrist_acc_z
   4. cosinuss_ear_ear_acc_x
   5. cosinuss_ear_ear_acc_y
   6. cosinuss_ear_ear_acc_z
   7. mbient_acc_x_axis_g
   8. mbient_acc_y_axis_g
   9. mbient_acc_z_axis_g
  10. mbient_gyro_x_axis_dps
  11. mbient_gyro_y_axis_dps
  12. mbient_gyro_z_axis_dps
  13. vivalnk_acc_vivalnk_acc_x
  14. vivalnk_acc_vivalnk_acc_y
  15. vivalnk_acc_vivalnk_acc_z
  16. sensomative_bottom_bottom_value_1
  17. sensomative_bottom_bottom_value_2
  18. sensomative_bottom_bottom_value_3
  19. sensomative_bottom_bottom_value_4
  20. sensomative_bottom_bottom_value_5
      ... and 10 more columns


## 4. Interactive Data Visualization

In [20]:
# Create interactive plotting tool
print("=== INTERACTIVE DATA VISUALIZATION ===")
print("🎯 Interactive plotting with sensor selection and label overlay")
print("🔍 Navigate through the combined synchronized data")

# Create controls
# Sensor group selection
sensor_group_selection = widgets.SelectMultiple(
    options=list(sensor_groups.keys()),
    value=list(sensor_groups.keys())[:2] if len(sensor_groups) >= 2 else list(sensor_groups.keys()),
    description='Select Sensors:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(height='150px', width='300px')
)

# Individual channel selection (will be updated based on sensor group selection)
channel_selection = widgets.SelectMultiple(
    options=sensor_columns[:20],  # Start with first 20 channels
    value=sensor_columns[:5] if len(sensor_columns) >= 5 else sensor_columns,
    description='Select Channels:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(height='200px', width='350px')
)

# Label display controls
show_labels = widgets.Checkbox(
    value=True,
    description='Show Labels',
    style={'description_width': 'initial'}
)

label_alpha = widgets.FloatSlider(
    value=0.3,
    min=0.1,
    max=0.8,
    step=0.1,
    description='Label Alpha:',
    style={'description_width': 'initial'}
)

# Label filter
if len(non_empty_labels) > 0:
    label_filter = widgets.SelectMultiple(
        options=list(non_empty_labels.index),
        value=list(non_empty_labels.index)[:10] if len(non_empty_labels) > 10 else list(non_empty_labels.index),
        description='Show Labels:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(height='150px', width='300px')
    )
else:
    label_filter = widgets.SelectMultiple(
        options=[],
        value=[],
        description='Show Labels:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(height='150px', width='300px')
    )

# Time window controls
data_start = combined_data.index.min()
data_end = combined_data.index.max()
data_center = data_start + (data_end - data_start) / 2

center_time_text = widgets.Text(
    value=data_center.strftime('%Y-%m-%d %H:%M:%S'),
    description='Center Time:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

window_minutes = widgets.IntSlider(
    value=10,  # 10 minute window
    min=1,
    max=120,  # 2 hours max
    step=1,
    description='Window (min):',
    style={'description_width': 'initial'}
)

# Navigation buttons
nav_backward_5min = widgets.Button(description='⏪ -5min', button_style='', 
                                  layout=widgets.Layout(width='90px'))
nav_forward_5min = widgets.Button(description='⏩ +5min', button_style='', 
                                 layout=widgets.Layout(width='90px'))

nav_backward_30min = widgets.Button(description='⏪ -30min', button_style='', 
                                   layout=widgets.Layout(width='100px'))
nav_forward_30min = widgets.Button(description='⏩ +30min', button_style='', 
                                  layout=widgets.Layout(width='100px'))

# Quick jump buttons
jump_data_start = widgets.Button(description='🔝 Data Start', button_style='info')
jump_data_center = widgets.Button(description='🎯 Data Center', button_style='success')
jump_data_end = widgets.Button(description='🔚 Data End', button_style='info')

# Sync jump buttons (if sync times are available)
sync_jump_buttons = []
if sync_start_time is not None:
    jump_sync_start = widgets.Button(description='🎯 Sync Start', button_style='warning',
                                   layout=widgets.Layout(width='110px'))
    sync_jump_buttons.append(jump_sync_start)

if sync_end_time is not None:
    jump_sync_end = widgets.Button(description='🎯 Sync End', button_style='warning',
                                 layout=widgets.Layout(width='110px'))
    sync_jump_buttons.append(jump_sync_end)

# Plot controls
plot_style = widgets.Dropdown(
    options=['overlay', 'subplots'],
    value='overlay',
    description='Plot Style:',
    style={'description_width': 'initial'}
)

auto_plot = widgets.Checkbox(
    value=True,
    description='Auto-plot on navigation',
    style={'description_width': 'initial'}
)

plot_button = widgets.Button(description='📈 Plot Data', button_style='primary', 
                            layout=widgets.Layout(width='150px'))

# Output area
plot_output = widgets.Output()

# Function to update channel selection based on sensor groups
def update_channel_selection(*args):
    selected_sensors = list(sensor_group_selection.value)
    available_channels = []
    
    for sensor in selected_sensors:
        if sensor in sensor_groups:
            available_channels.extend(sensor_groups[sensor])
    
    # Update channel selection options
    channel_selection.options = available_channels
    # Select first few channels by default
    default_selection = available_channels[:min(10, len(available_channels))]
    channel_selection.value = default_selection

# Connect sensor group selection to channel selection update
sensor_group_selection.observe(update_channel_selection, names='value')

# Helper functions
def get_center_time():
    """Get center time from text widget"""
    try:
        return pd.to_datetime(center_time_text.value)
    except:
        return data_center

def update_center_time(new_time):
    """Update center time text widget"""
    # Ensure time is within data bounds
    new_time = max(data_start, min(data_end, new_time))
    center_time_text.value = new_time.strftime('%Y-%m-%d %H:%M:%S')

def plot_data(btn):
    """Plot selected channels with labels"""
    with plot_output:
        clear_output(wait=True)
        
        try:
            from matplotlib.patches import Patch  # For creating legend handles
            selected_channels = list(channel_selection.value)
            if not selected_channels:
                print("❌ Please select at least one channel")
                return
            
            center_time = get_center_time()
            window_mins = window_minutes.value
            
            # Calculate time window
            half_window = pd.Timedelta(minutes=window_mins/2)
            plot_start = center_time - half_window
            plot_end = center_time + half_window
            
            # Ensure we don't go beyond data bounds
            plot_start = max(plot_start, data_start)
            plot_end = min(plot_end, data_end)
            
            print(f"📊 Plotting {len(selected_channels)} channels")
            print(f"⏱️ Time window: {plot_start} to {plot_end} ({window_mins} minutes)")
            print(f"🎯 Center time: {center_time}")
            
            # Filter data for plot window
            mask = (combined_data.index >= plot_start) & (combined_data.index <= plot_end)
            plot_data = combined_data[mask].copy()
            
            if plot_data.empty:
                print("❌ No data in selected time window")
                print(f"   Requested window: {plot_start} to {plot_end}")
                print(f"   Data available: {data_start} to {data_end}")
                
                # Find nearest data
                if plot_start > data_end:
                    time_diff = plot_start - data_end
                    print(f"   Window starts {time_diff} after data ends")
                elif plot_end < data_start:
                    time_diff = data_start - plot_end
                    print(f"   Window ends {time_diff} before data starts")
                else:
                    print(f"   Window overlaps with data range but no samples found")
                    # Check for sparse data
                    extended_mask = (combined_data.index >= plot_start - pd.Timedelta(hours=1)) & (combined_data.index <= plot_end + pd.Timedelta(hours=1))
                    nearby_data = combined_data[extended_mask]
                    if not nearby_data.empty:
                        print(f"   Found {len(nearby_data)} samples within ±1 hour")
                        print(f"   Closest before: {nearby_data[nearby_data.index <= plot_start].index.max() if len(nearby_data[nearby_data.index <= plot_start]) > 0 else 'None'}")
                        print(f"   Closest after: {nearby_data[nearby_data.index >= plot_end].index.min() if len(nearby_data[nearby_data.index >= plot_end]) > 0 else 'None'}")
                
                return
            
            print(f"📊 Plotting {len(plot_data)} samples")
            
            # Create plot based on style
            if plot_style.value == 'overlay':
                # Single plot with all channels overlaid
                fig, ax = plt.subplots(1, 1, figsize=(16, 8))
                
                # Plot each selected channel
                for i, channel in enumerate(selected_channels):
                    if channel in plot_data.columns:
                        # Normalize data to [0,1] range for better overlay visualization
                        data_col = plot_data[channel].dropna()
                        if len(data_col) > 0:
                            # Normalize to 0-1 range, then shift by channel index
                            data_min, data_max = data_col.min(), data_col.max()
                            if data_max > data_min:
                                normalized_data = (data_col - data_min) / (data_max - data_min) + i
                            else:
                                normalized_data = data_col + i
                            
                            ax.plot(data_col.index, normalized_data, 
                                   label=channel, alpha=0.8, linewidth=1)
                
                axes = [ax]
                
            else:
                # Subplots for each channel
                fig, axes = plt.subplots(len(selected_channels), 1, 
                                       figsize=(16, 3*len(selected_channels)), 
                                       sharex=True)
                if len(selected_channels) == 1:
                    axes = [axes]
                
                for i, (ax, channel) in enumerate(zip(axes, selected_channels)):
                    if channel in plot_data.columns:
                        data_col = plot_data[channel].dropna()
                        if len(data_col) > 0:
                            ax.plot(data_col.index, data_col, 
                                   color=f'C{i}', alpha=0.8, linewidth=1)
                            ax.set_ylabel(channel.split('_')[-1])  # Use last part as ylabel
                            ax.set_title(channel)
                            ax.grid(True, alpha=0.3)
            
            # Add labels to all subplots
            visible_activity_labels = []
            if show_labels.value and len(non_empty_labels) > 0:
                selected_label_types = list(label_filter.value)
                
                # Find labels in the plot window
                plot_labels_data = plot_data[plot_data[label_column].isin(selected_label_types)]
                
                if len(plot_labels_data) > 0:
                    # Track visible labels for legend
                    visible_activity_labels = list(set(plot_labels_data[label_column].values))
                    visible_activity_labels = [label for label in visible_activity_labels if label in selected_label_types]
                    
                    # Group consecutive timestamps with same label
                    label_segments = []
                    current_label = None
                    segment_start = None
                    
                    for timestamp, row in plot_labels_data.iterrows():
                        label = row[label_column]
                        if label != current_label:
                            if current_label is not None:
                                label_segments.append((current_label, segment_start, timestamp))
                            current_label = label
                            segment_start = timestamp
                    
                    # Add final segment
                    if current_label is not None:
                        label_segments.append((current_label, segment_start, plot_labels_data.index[-1]))
                    
                    # Add shaded regions to all axes
                    for ax in axes:
                        y_min, y_max = ax.get_ylim()
                        
                        for label_name, start_time, end_time in label_segments:
                            if label_name in selected_label_types:
                                color = label_colors.get(label_name, 'gray')
                                ax.axvspan(start_time, end_time, 
                                         alpha=label_alpha.value, 
                                         color=color,
                                         zorder=0)
                                
                                # Add label text for longer segments
                                duration = end_time - start_time
                                if duration > pd.Timedelta(minutes=1):
                                    mid_time = start_time + duration / 2
                                    y_pos = y_max - (y_max - y_min) * 0.05
                                    ax.text(mid_time, y_pos, label_name, 
                                           ha='center', va='top', rotation=0,
                                           fontsize=9, alpha=0.9,
                                           bbox=dict(boxstyle='round,pad=0.2', 
                                                   facecolor='white', alpha=0.8))
                    
                    print(f"🏷️ Showing {len(label_segments)} label segments")
            
            # Format all axes
            for ax in axes:
                ax.set_xlim(plot_start, plot_end)
                ax.grid(True, alpha=0.3)
                ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
                ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=max(1, window_mins//10)))
                plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
            
            # Add legends
            legend_elements = []
            
            # Add sensor channel legend for overlay mode
            if plot_style.value == 'overlay' and len(selected_channels) <= 15:
                sensor_legend = axes[0].legend(bbox_to_anchor=(1.02, 1), loc='upper left', 
                                             fontsize=8, title='Sensor Channels')
                axes[0].add_artist(sensor_legend)  # Keep this legend when adding activity legend
            
            # Add activity labels legend if showing labels
            if show_labels.value and len(non_empty_labels) > 0 and visible_activity_labels:
                # Create legend handles for visible activity labels
                activity_handles = []
                for label_name in sorted(visible_activity_labels):
                    color = label_colors.get(label_name, 'gray')
                    activity_handles.append(Patch(facecolor=color, alpha=label_alpha.value, 
                                                label=label_name))
                
                # Position activity legend
                if plot_style.value == 'overlay':
                    # Place below sensor legend if both exist
                    y_pos = 0.7 if len(selected_channels) <= 15 else 1.0
                    activity_legend = axes[0].legend(handles=activity_handles, 
                                                    bbox_to_anchor=(1.02, y_pos), 
                                                    loc='upper left', fontsize=8, 
                                                    title='Activities')
                else:
                    # Place on first subplot for subplots mode
                    activity_legend = axes[0].legend(handles=activity_handles, 
                                                    bbox_to_anchor=(1.02, 1), 
                                                    loc='upper left', fontsize=8, 
                                                    title='Activities')
                
                print(f"🎨 Added legend for {len(visible_activity_labels)} visible activities")
            
            # Title and layout
            title = f'Combined Data Visualization - {plot_style.value.title()} Mode\n'
            title += f'Window: {plot_start.strftime("%H:%M:%S")} to {plot_end.strftime("%H:%M:%S")} '
            title += f'({len(selected_channels)} channels, {len(plot_data)} samples)'
            
            plt.suptitle(title, fontsize=14, y=0.98)
            plt.tight_layout()
            
            # Adjust layout to accommodate legends
            has_sensor_legend = plot_style.value == 'overlay' and len(selected_channels) <= 15
            has_activity_legend = show_labels.value and len(non_empty_labels) > 0 and visible_activity_labels
            
            if plot_style.value == 'overlay':
                if has_sensor_legend and has_activity_legend:
                    plt.subplots_adjust(right=0.75, top=0.92)  # Space for both legends
                elif has_sensor_legend or has_activity_legend:
                    plt.subplots_adjust(right=0.85, top=0.92)  # Space for one legend
                else:
                    plt.subplots_adjust(top=0.92)  # No legends
            else:
                if has_activity_legend:
                    plt.subplots_adjust(right=0.85, top=0.95)  # Space for activity legend
                else:
                    plt.subplots_adjust(top=0.95)  # No legends
            
            plt.show()
            
        except Exception as e:
            print(f"❌ Error creating plot: {e}")
            import traceback
            traceback.print_exc()

# Navigation functions
def navigate_backward_5min(btn):
    current_time = get_center_time()
    new_time = current_time - pd.Timedelta(minutes=5)
    update_center_time(new_time)
    if auto_plot.value:
        plot_data(None)

def navigate_forward_5min(btn):
    current_time = get_center_time()
    new_time = current_time + pd.Timedelta(minutes=5)
    update_center_time(new_time)
    if auto_plot.value:
        plot_data(None)

def navigate_backward_30min(btn):
    current_time = get_center_time()
    new_time = current_time - pd.Timedelta(minutes=30)
    update_center_time(new_time)
    if auto_plot.value:
        plot_data(None)

def navigate_forward_30min(btn):
    current_time = get_center_time()
    new_time = current_time + pd.Timedelta(minutes=30)
    update_center_time(new_time)
    if auto_plot.value:
        plot_data(None)

def jump_to_data_start(btn):
    new_time = data_start + pd.Timedelta(minutes=window_minutes.value/2)
    update_center_time(new_time)
    if auto_plot.value:
        plot_data(None)

def jump_to_data_center(btn):
    update_center_time(data_center)
    if auto_plot.value:
        plot_data(None)

def jump_to_data_end(btn):
    new_time = data_end - pd.Timedelta(minutes=window_minutes.value/2)
    update_center_time(new_time)
    if auto_plot.value:
        plot_data(None)

def jump_to_sync_start(btn):
    """Jump to sync start time"""
    if sync_start_time is not None:
        print(f"🎯 Jumping to sync start: {sync_start_time}")
        
        # Check if sync time is within data bounds
        if not (data_start <= sync_start_time <= data_end):
            print(f"⚠️ Warning: Sync start time is outside data range!")
            print(f"   Data range: {data_start} to {data_end}")
            print(f"   Sync time: {sync_start_time}")
            
            # Find closest available time
            if sync_start_time < data_start:
                print(f"   Using data start instead: {data_start}")
                update_center_time(data_start)
            else:
                print(f"   Using data end instead: {data_end}")
                update_center_time(data_end)
        else:
            # Check for data availability around sync time
            window = pd.Timedelta(minutes=window_minutes.value/2)
            mask = (combined_data.index >= sync_start_time - window) & (combined_data.index <= sync_start_time + window)
            data_count = combined_data[mask].shape[0]
            print(f"   Data available in ±{window_minutes.value/2}min window: {data_count} samples")
            
            update_center_time(sync_start_time)
        
        if auto_plot.value:
            plot_data(None)

def jump_to_sync_end(btn):
    """Jump to sync end time"""
    if sync_end_time is not None:
        print(f"🎯 Jumping to sync end: {sync_end_time}")
        
        # Check if sync time is within data bounds
        if not (data_start <= sync_end_time <= data_end):
            print(f"⚠️ Warning: Sync end time is outside data range!")
            print(f"   Data range: {data_start} to {data_end}")
            print(f"   Sync time: {sync_end_time}")
            
            # Find closest available time
            if sync_end_time < data_start:
                print(f"   Using data start instead: {data_start}")
                update_center_time(data_start)
            else:
                print(f"   Using data end instead: {data_end}")
                update_center_time(data_end)
        else:
            # Check for data availability around sync time
            window = pd.Timedelta(minutes=window_minutes.value/2)
            mask = (combined_data.index >= sync_end_time - window) & (combined_data.index <= sync_end_time + window)
            data_count = combined_data[mask].shape[0]
            print(f"   Data available in ±{window_minutes.value/2}min window: {data_count} samples")
            
            update_center_time(sync_end_time)
        
        if auto_plot.value:
            plot_data(None)

# Connect buttons
plot_button.on_click(plot_data)
nav_backward_5min.on_click(navigate_backward_5min)
nav_forward_5min.on_click(navigate_forward_5min)
nav_backward_30min.on_click(navigate_backward_30min)
nav_forward_30min.on_click(navigate_forward_30min)
jump_data_start.on_click(jump_to_data_start)
jump_data_center.on_click(jump_to_data_center)
jump_data_end.on_click(jump_to_data_end)

# Connect sync jump buttons if they exist
if sync_start_time is not None:
    jump_sync_start.on_click(jump_to_sync_start)
if sync_end_time is not None:
    jump_sync_end.on_click(jump_to_sync_end)

# Initialize channel selection
update_channel_selection()

print("✅ Interactive controls ready!")

=== INTERACTIVE DATA VISUALIZATION ===
🎯 Interactive plotting with sensor selection and label overlay
🔍 Navigate through the combined synchronized data
✅ Interactive controls ready!


## 5. Interactive Control Panel

In [21]:
# Create the interactive control panel layout
print("🎛️ Interactive Control Panel")
print("Use the controls below to explore your combined sensor data")

# Layout controls in organized sections
sensor_controls = widgets.VBox([
    widgets.HTML("<h4>📊 Sensor Selection</h4>"),
    sensor_group_selection,
    channel_selection,
    plot_style
])

label_controls = widgets.VBox([
    widgets.HTML("<h4>🏷️ Label Controls</h4>"),
    show_labels,
    label_alpha,
    label_filter
]) if len(non_empty_labels) > 0 else widgets.HTML("<p>No labels available</p>")

time_controls = widgets.VBox([
    widgets.HTML("<h4>⏱️ Time Navigation</h4>"),
    center_time_text,
    window_minutes,
    widgets.HTML("<b>Fine Navigation:</b>"),
    widgets.HBox([nav_backward_5min, nav_forward_5min]),
    widgets.HTML("<b>Coarse Navigation:</b>"),
    widgets.HBox([nav_backward_30min, nav_forward_30min]),
    widgets.HTML("<b>Quick Jumps:</b>"),
    widgets.HBox([jump_data_start, jump_data_center, jump_data_end]),
    widgets.HTML("<b>Sync Events:</b>") if sync_jump_buttons else widgets.HTML(""),
    widgets.HBox(sync_jump_buttons) if sync_jump_buttons else widgets.HTML(""),
    auto_plot
])

plot_controls = widgets.VBox([
    widgets.HTML("<h4>📈 Plot Controls</h4>"),
    plot_button,
    widgets.HTML("<p><b>Plot Styles:</b></p>"
                 "<p>• <b>Overlay</b>: All channels on one plot (normalized)</p>"
                 "<p>• <b>Subplots</b>: Each channel in separate subplot</p>")
])

# Combine all controls
all_controls = widgets.HBox([
    sensor_controls,
    label_controls,
    time_controls,
    plot_controls
])

# Display the interface
display(widgets.VBox([
    widgets.HTML("<h3>🎛️ Combined Data Interactive Visualizer</h3>"),
    all_controls,
    widgets.HTML("<hr>"),
    plot_output
]))

print("\n🚀 Interactive visualizer ready!")
print("\n📝 Instructions:")
print("  1. Select sensor groups to focus on specific sensor types")
print("  2. Choose individual channels to plot")
print("  3. Set time window and center time")
print("  4. Use navigation buttons to move through data")
print("  5. Toggle labels and adjust transparency")
print("  6. Choose between overlay or subplot visualization")
if sync_jump_buttons:
    print("  7. Use sync jump buttons to quickly navigate to sync events")
print("\n💡 Tips:")
print("  • Use auto-plot for seamless navigation")
print("  • Overlay mode normalizes data for comparison")
print("  • Subplot mode shows actual data values")
print("  • Labels appear as shaded background areas")
if sync_jump_buttons:
    print("  • Sync buttons jump directly to synchronization events")
print("  • Click 'Plot Data' to refresh manually")

🎛️ Interactive Control Panel
Use the controls below to explore your combined sensor data


VBox(children=(HTML(value='<h3>🎛️ Combined Data Interactive Visualizer</h3>'), HBox(children=(VBox(children=(H…


🚀 Interactive visualizer ready!

📝 Instructions:
  1. Select sensor groups to focus on specific sensor types
  2. Choose individual channels to plot
  3. Set time window and center time
  4. Use navigation buttons to move through data
  5. Toggle labels and adjust transparency
  6. Choose between overlay or subplot visualization
  7. Use sync jump buttons to quickly navigate to sync events

💡 Tips:
  • Use auto-plot for seamless navigation
  • Overlay mode normalizes data for comparison
  • Subplot mode shows actual data values
  • Labels appear as shaded background areas
  • Sync buttons jump directly to synchronization events
  • Click 'Plot Data' to refresh manually


## 6. Data Statistics and Quality Report

In [22]:
# Generate detailed statistics about the combined data
print("=== COMBINED DATA QUALITY REPORT ===")

# Time statistics
sampling_freq = len(combined_data) / (data_end - data_start).total_seconds()
print(f"\n⏱️ Temporal Statistics:")
print(f"  📅 Start time: {data_start}")
print(f"  📅 End time: {data_end}")
print(f"  ⏱️ Total duration: {data_end - data_start}")
print(f"  📊 Total samples: {len(combined_data):,}")
print(f"  🔄 Actual sampling frequency: {sampling_freq:.2f} Hz")
print(f"  📏 Time resolution: {(data_end - data_start) / len(combined_data)}")

# Sensor statistics
print(f"\n📈 Sensor Statistics:")
print(f"  🔧 Total sensor groups: {len(sensor_groups)}")
print(f"  📊 Total sensor channels: {len(sensor_columns)}")

for sensor_name, channels in sensor_groups.items():
    # Calculate statistics for this sensor group
    sensor_data = combined_data[channels]
    total_values = sensor_data.size
    missing_values = sensor_data.isnull().sum().sum()
    coverage = (total_values - missing_values) / total_values * 100
    
    print(f"    📊 {sensor_name}:")
    print(f"      Channels: {len(channels)}")
    print(f"      Coverage: {coverage:.1f}%")
    print(f"      Missing values: {missing_values:,}")
    
    # Value ranges
    numeric_data = sensor_data.select_dtypes(include=[np.number])
    if not numeric_data.empty:
        overall_min = numeric_data.min().min()
        overall_max = numeric_data.max().max()
        overall_mean = numeric_data.mean().mean()
        overall_std = numeric_data.std().mean()
        
        print(f"      Value range: [{overall_min:.3f}, {overall_max:.3f}]")
        print(f"      Mean ± Std: {overall_mean:.3f} ± {overall_std:.3f}")

# Label statistics
if len(non_empty_labels) > 0:
    print(f"\n🏷️ Label Statistics:")
    total_labeled = (combined_data[label_column] != '').sum()
    label_coverage = total_labeled / len(combined_data) * 100
    
    print(f"  📋 Unique activities: {len(non_empty_labels)}")
    print(f"  📊 Labeled samples: {total_labeled:,} ({label_coverage:.1f}%)")
    print(f"  📊 Unlabeled samples: {len(combined_data) - total_labeled:,} ({100-label_coverage:.1f}%)")
    
    # Calculate label durations
    print(f"\n🏷️ Activity Durations:")
    for label, count in non_empty_labels.head(15).items():
        duration_seconds = count / sampling_freq
        duration_minutes = duration_seconds / 60
        percentage = count / len(combined_data) * 100
        
        if duration_minutes >= 1:
            print(f"    🏷️ {label}: {duration_minutes:.1f} min ({percentage:.1f}%)")
        else:
            print(f"    🏷️ {label}: {duration_seconds:.1f} sec ({percentage:.1f}%)")
    
    if len(non_empty_labels) > 15:
        remaining = len(non_empty_labels) - 15
        print(f"    ... and {remaining} more activities")
        
    # Label transitions
    label_changes = (combined_data[label_column] != combined_data[label_column].shift()).sum()
    print(f"\n🔄 Label Transitions: {label_changes:,}")
    avg_segment_length = len(combined_data) / label_changes if label_changes > 0 else 0
    avg_segment_duration = avg_segment_length / sampling_freq
    print(f"  📊 Average segment length: {avg_segment_length:.0f} samples ({avg_segment_duration:.1f}s)")

else:
    print(f"\n🏷️ Label Statistics: No labels available")

# Memory usage
memory_usage = combined_data.memory_usage(deep=True).sum()
memory_mb = memory_usage / (1024 * 1024)
print(f"\n💾 Memory Usage:")
print(f"  📊 DataFrame size: {memory_mb:.1f} MB")
print(f"  💽 File size: {file_size_mb:.1f} MB")
print(f"  📈 Compression ratio: {memory_mb/file_size_mb:.1f}x")

# Data quality summary
total_missing = combined_data[sensor_columns].isnull().sum().sum()
total_sensor_values = len(combined_data) * len(sensor_columns)
overall_coverage = (total_sensor_values - total_missing) / total_sensor_values * 100

print(f"\n✅ Overall Data Quality:")
print(f"  📊 Sensor data coverage: {overall_coverage:.2f}%")
print(f"  🏷️ Label coverage: {label_coverage:.2f}%" if len(non_empty_labels) > 0 else "  🏷️ Label coverage: 0%")
print(f"  ⏱️ Temporal consistency: ✅ Regular {sampling_freq:.1f}Hz sampling")
print(f"  🔄 Synchronization: ✅ All sensors aligned")

if overall_coverage > 95:
    print(f"  🌟 Excellent data quality! Ready for AI preprocessing.")
elif overall_coverage > 90:
    print(f"  ✅ Good data quality. Minor gaps acceptable for most applications.")
elif overall_coverage > 80:
    print(f"  ⚠️ Moderate data quality. Consider gap filling strategies.")
else:
    print(f"  ❌ Poor data quality. Significant missing data detected.")

print(f"\n📁 Data ready for use! Load with:")
print(f"import pickle")
print(f"with open('{combined_data_path}', 'rb') as f:")
print(f"    data = pickle.load(f)")

=== COMBINED DATA QUALITY REPORT ===

⏱️ Temporal Statistics:
  📅 Start time: 2024-07-26 10:06:09.261336565
  📅 End time: 2024-07-28 11:12:59.994135618
  ⏱️ Total duration: 2 days 01:06:50.732799053
  📊 Total samples: 4,420,268
  🔄 Actual sampling frequency: 25.00 Hz
  📏 Time resolution: 0 days 00:00:00.040000002

📈 Sensor Statistics:
  🔧 Total sensor groups: 11
  📊 Total sensor channels: 29
    📊 corsano_wrist_wrist_acc:
      Channels: 3
      Coverage: 85.3%
      Missing values: 1,947,960
      Value range: [-3746.000, 4088.000]
      Mean ± Std: 135.376 ± 227.108
    📊 cosinuss_ear_ear_acc:
      Channels: 3
      Coverage: 11.2%
      Missing values: 11,781,309
      Value range: [-2.285, 1.560]
      Mean ± Std: -0.249 ± 0.321
    📊 mbient_acc_x_axis:
      Channels: 1
      Coverage: 100.0%
      Missing values: 0
      Value range: [-3.990, 2.832]
      Mean ± Std: 0.557 ± 0.502
    📊 mbient_acc_y_axis:
      Channels: 1
      Coverage: 100.0%
      Missing values: 0
      Val

## 7. Sync Events Data Availability Check

In [23]:
# Debug sync events data availability
print("=== SYNC EVENTS DATA AVAILABILITY CHECK ===")

if sync_start_time is not None and sync_end_time is not None:
    print(f"🎯 Checking data availability around sync events for {SUBJECT_ID}")
    print(f"📅 Sync Start: {sync_start_time}")
    print(f"📅 Sync End: {sync_end_time}")
    print(f"⏱️ Sync Duration: {sync_end_time - sync_start_time}")
    
    # Check data time bounds
    print(f"\n📊 Combined Data Time Bounds:")
    print(f"  📅 Data Start: {data_start}")
    print(f"  📅 Data End: {data_end}")
    print(f"  ⏱️ Data Duration: {data_end - data_start}")
    
    # Check if sync times fall within data bounds
    sync_start_in_data = data_start <= sync_start_time <= data_end
    sync_end_in_data = data_start <= sync_end_time <= data_end
    
    print(f"\n🔍 Sync Event Coverage:")
    print(f"  🎯 Sync Start in data range: {'✅' if sync_start_in_data else '❌'} {sync_start_in_data}")
    print(f"  🎯 Sync End in data range: {'✅' if sync_end_in_data else '❌'} {sync_end_in_data}")
    
    if not sync_start_in_data:
        if sync_start_time < data_start:
            time_diff = data_start - sync_start_time
            print(f"    ⚠️ Sync start is {time_diff} before data start")
        else:
            time_diff = sync_start_time - data_end
            print(f"    ⚠️ Sync start is {time_diff} after data end")
    
    if not sync_end_in_data:
        if sync_end_time < data_start:
            time_diff = data_start - sync_end_time
            print(f"    ⚠️ Sync end is {time_diff} before data start")
        else:
            time_diff = sync_end_time - data_end
            print(f"    ⚠️ Sync end is {time_diff} after data end")
    
    # Check data availability around sync events with different window sizes
    window_sizes = [1, 5, 10, 30]  # minutes
    
    for event_name, event_time in [("Sync Start", sync_start_time), ("Sync End", sync_end_time)]:
        print(f"\n📊 Data availability around {event_name} ({event_time}):")
        
        for window_min in window_sizes:
            half_window = pd.Timedelta(minutes=window_min/2)
            window_start = event_time - half_window
            window_end = event_time + half_window
            
            # Count data points in window
            mask = (combined_data.index >= window_start) & (combined_data.index <= window_end)
            data_in_window = combined_data[mask]
            
            # Check sensor data availability
            sensor_data_in_window = data_in_window[sensor_columns]
            non_null_count = sensor_data_in_window.notna().sum().sum()
            total_possible = len(data_in_window) * len(sensor_columns)
            coverage = (non_null_count / total_possible * 100) if total_possible > 0 else 0
            
            # Check label data availability
            label_data_in_window = data_in_window[label_column] if label_column in data_in_window.columns else pd.Series()
            labeled_count = (label_data_in_window != '').sum() if len(label_data_in_window) > 0 else 0
            label_coverage = (labeled_count / len(data_in_window) * 100) if len(data_in_window) > 0 else 0
            
            status = "✅" if len(data_in_window) > 0 else "❌"
            print(f"    {status} ±{window_min}min window: {len(data_in_window)} samples, {coverage:.1f}% sensor data, {label_coverage:.1f}% labeled")
            
            # Show exact time bounds that would be plotted
            actual_start = max(window_start, data_start)
            actual_end = min(window_end, data_end)
            print(f"        Time range: {actual_start} to {actual_end}")
            
            if len(data_in_window) > 0:
                # Show first and last timestamps with data
                print(f"        First data: {data_in_window.index.min()}")
                print(f"        Last data: {data_in_window.index.max()}")
                
                # Check for gaps in data around event
                time_diffs = data_in_window.index.to_series().diff()
                max_gap = time_diffs.max()
                avg_gap = time_diffs.mean()
                print(f"        Sampling: avg gap {avg_gap}, max gap {max_gap}")
                
                # Show sample of available sensor channels
                available_sensors = sensor_data_in_window.dropna(axis=1, how='all').columns
                print(f"        Available sensors: {len(available_sensors)}/{len(sensor_columns)} ({list(available_sensors[:5])}{'...' if len(available_sensors) > 5 else ''})")
    
    # Additional checks for timezone issues
    print(f"\n🌍 Timezone and Format Analysis:")
    print(f"  📅 Sync Start timezone: {sync_start_time.tz}")
    print(f"  📅 Data Start timezone: {data_start.tz}")
    print(f"  📅 Data timezone info: {combined_data.index.tz}")
    
    # Try different time formats or offsets
    print(f"\n🔧 Testing Alternative Time Interpretations:")
    
    # Test if there's a timezone offset issue
    for offset_hours in [-12, -6, -3, -1, 0, 1, 3, 6, 12]:
        adjusted_sync_start = sync_start_time + pd.Timedelta(hours=offset_hours)
        adjusted_sync_end = sync_end_time + pd.Timedelta(hours=offset_hours)
        
        start_in_range = data_start <= adjusted_sync_start <= data_end
        end_in_range = data_start <= adjusted_sync_end <= data_end
        
        if start_in_range or end_in_range:
            print(f"    🎯 With {offset_hours:+}h offset: Start in range: {start_in_range}, End in range: {end_in_range}")
            
            # Check data around adjusted times
            for event_name, adjusted_time in [("Adjusted Start", adjusted_sync_start), ("Adjusted End", adjusted_sync_end)]:
                if data_start <= adjusted_time <= data_end:
                    window = pd.Timedelta(minutes=5)
                    mask = (combined_data.index >= adjusted_time - window) & (combined_data.index <= adjusted_time + window)
                    data_count = combined_data[mask].shape[0]
                    print(f"      📊 {event_name} ({adjusted_time}): {data_count} samples in ±5min window")
    
    # Check for exact timestamp matches
    print(f"\n🎯 Exact Timestamp Matching:")
    exact_sync_start_match = sync_start_time in combined_data.index
    exact_sync_end_match = sync_end_time in combined_data.index
    
    print(f"  📍 Exact sync start match: {'✅' if exact_sync_start_match else '❌'} {exact_sync_start_match}")
    print(f"  📍 Exact sync end match: {'✅' if exact_sync_end_match else '❌'} {exact_sync_end_match}")
    
    # Find nearest timestamps
    if len(combined_data) > 0:
        # Find closest timestamps to sync events
        time_diffs_start = abs(combined_data.index - sync_start_time)
        time_diffs_end = abs(combined_data.index - sync_end_time)
        
        closest_start_idx = time_diffs_start.argmin()
        closest_end_idx = time_diffs_end.argmin()
        
        closest_start_time = combined_data.index[closest_start_idx]
        closest_end_time = combined_data.index[closest_end_idx]
        
        start_diff = abs(closest_start_time - sync_start_time)
        end_diff = abs(closest_end_time - sync_end_time)
        
        print(f"  🎯 Closest to sync start: {closest_start_time} (diff: {start_diff})")
        print(f"  🎯 Closest to sync end: {closest_end_time} (diff: {end_diff})")
        
        # Show data around closest timestamps
        if start_diff <= pd.Timedelta(hours=1):
            print(f"    📊 Data at closest start timestamp: Available")
            sample_data = combined_data.loc[closest_start_time, sensor_columns[:5]]
            print(f"    📊 Sample values: {sample_data.values}")
        
        if end_diff <= pd.Timedelta(hours=1):
            print(f"    📊 Data at closest end timestamp: Available")
            sample_data = combined_data.loc[closest_end_time, sensor_columns[:5]]
            print(f"    📊 Sample values: {sample_data.values}")

else:
    print("⚠️ No sync events available for analysis")
    print("This could mean:")
    print("  1. Sync events file doesn't exist")
    print("  2. No sync events found for this subject")
    print("  3. Sync events are not properly formatted")

# Recommend actions based on findings
print(f"\n💡 Recommendations:")
if sync_start_time is not None and sync_end_time is not None:
    if not (data_start <= sync_start_time <= data_end and data_start <= sync_end_time <= data_end):
        print("  🔧 Sync events are outside data time range:")
        print("     - Check if sync events file has correct timestamps")
        print("     - Verify timezone consistency between sync events and sensor data")
        print("     - Consider if data preprocessing removed sync event periods")
        print("     - Check if subject ID matches between sync events and data files")
    else:
        print("  ✅ Sync events are within data time range")
        print("     - Data should be available for plotting around sync events")
        print("     - Use the interactive visualizer to navigate to sync times")

print("  📊 To investigate further:")
print("     - Use the sync jump buttons in the interactive visualizer")
print("     - Check the original data files before preprocessing")
print("     - Verify the Data_Preprocessing script alignment logic")

=== SYNC EVENTS DATA AVAILABILITY CHECK ===
🎯 Checking data availability around sync events for OutSense-713
📅 Sync Start: 2024-07-26 10:55:00
📅 Sync End: 2024-07-28 10:13:00
⏱️ Sync Duration: 1 days 23:18:00

📊 Combined Data Time Bounds:
  📅 Data Start: 2024-07-26 10:06:09.261336565
  📅 Data End: 2024-07-28 11:12:59.994135618
  ⏱️ Data Duration: 2 days 01:06:50.732799053

🔍 Sync Event Coverage:
  🎯 Sync Start in data range: ✅ True
  🎯 Sync End in data range: ✅ True

📊 Data availability around Sync Start (2024-07-26 10:55:00):
    ✅ ±1min window: 1500 samples, 100.0% sensor data, 73.3% labeled
        Time range: 2024-07-26 10:54:30 to 2024-07-26 10:55:30
        First data: 2024-07-26 10:54:30.022202787
        Last data: 2024-07-26 10:55:29.982220692
        Sampling: avg gap 0 days 00:00:00.040000011, max gap 0 days 00:00:00.040000012
        Available sensors: 29/29 (['corsano_wrist_wrist_acc_x', 'corsano_wrist_wrist_acc_y', 'corsano_wrist_wrist_acc_z', 'cosinuss_ear_ear_acc_x', 'c

## 8. Quick Sync Event Data Test

In [24]:
# Quick test function to check sync event data
def quick_sync_check():
    """Quick function to test data availability around sync events"""
    print("=== QUICK SYNC EVENT DATA TEST ===")
    
    if sync_start_time is None or sync_end_time is None:
        print("❌ No sync events available for testing")
        return False
    
    success = True
    
    for event_name, event_time in [("Sync Start", sync_start_time), ("Sync End", sync_end_time)]:
        print(f"\n🎯 Testing {event_name}: {event_time}")
        
        # Check if event is in data range
        in_range = data_start <= event_time <= data_end
        print(f"   In data range: {'✅' if in_range else '❌'} {in_range}")
        
        if not in_range:
            success = False
            if event_time < data_start:
                print(f"   Event is {data_start - event_time} before data starts")
            else:
                print(f"   Event is {event_time - data_end} after data ends")
            continue
        
        # Test different window sizes
        for window_min in [1, 5, 10]:
            half_window = pd.Timedelta(minutes=window_min/2)
            window_start = max(event_time - half_window, data_start)
            window_end = min(event_time + half_window, data_end)
            
            mask = (combined_data.index >= window_start) & (combined_data.index <= window_end)
            data_in_window = combined_data[mask]
            
            if len(data_in_window) > 0:
                print(f"   ✅ ±{window_min}min window: {len(data_in_window)} samples available")
            else:
                print(f"   ❌ ±{window_min}min window: No data available")
                success = False
    
    if success:
        print(f"\n✅ SYNC EVENT DATA TEST PASSED")
        print("   Data is available around sync events")
        print("   You should be able to plot sync event data in the interactive visualizer")
    else:
        print(f"\n❌ SYNC EVENT DATA TEST FAILED")
        print("   Data is NOT available around sync events")
        print("   Check the detailed analysis above for troubleshooting")
    
    return success

# Run the quick test
test_result = quick_sync_check()

# If test fails, provide additional debugging
if not test_result and sync_start_time is not None:
    print(f"\n🔍 ADDITIONAL DEBUGGING INFO:")
    
    # Show sample of data timestamps around sync events
    print(f"\nData timestamps near sync start time:")
    sync_area_mask = (combined_data.index >= sync_start_time - pd.Timedelta(hours=2)) & (combined_data.index <= sync_start_time + pd.Timedelta(hours=2))
    sync_area_data = combined_data[sync_area_mask]
    
    if len(sync_area_data) > 0:
        print(f"   Found {len(sync_area_data)} samples within ±2 hours of sync start")
        print(f"   First 5 timestamps: {sync_area_data.index[:5].tolist()}")
        print(f"   Last 5 timestamps: {sync_area_data.index[-5:].tolist()}")
    else:
        print(f"   No data found within ±2 hours of sync start")
        
        # Show actual data time range vs sync time
        print(f"\nTime comparison:")
        print(f"   Sync start: {sync_start_time}")
        print(f"   Data start: {data_start}")
        print(f"   Data end: {data_end}")
        print(f"   Difference to data start: {data_start - sync_start_time}")
        print(f"   Difference to data end: {sync_start_time - data_end}")

# Test plotting around sync events if data is available
if test_result and sync_start_time is not None:
    print(f"\n📊 TESTING PLOT AROUND SYNC START")
    
    # Set up for plotting around sync start
    center_time_text.value = sync_start_time.strftime('%Y-%m-%d %H:%M:%S')
    window_minutes.value = 10  # 10 minute window
    
    # Select some default channels for testing
    if len(sensor_columns) > 0:
        test_channels = sensor_columns[:min(3, len(sensor_columns))]
        channel_selection.value = test_channels
        print(f"   Selected channels for test: {test_channels}")
        
        # Test plot data availability
        half_window = pd.Timedelta(minutes=5)
        plot_start = sync_start_time - half_window
        plot_end = sync_start_time + half_window
        
        mask = (combined_data.index >= plot_start) & (combined_data.index <= plot_end)
        test_plot_data = combined_data[mask]
        
        print(f"   Test plot window: {plot_start} to {plot_end}")
        print(f"   Samples in plot window: {len(test_plot_data)}")
        
        if len(test_plot_data) > 0:
            print(f"   ✅ Plot data available - ready for interactive visualization!")
            print(f"   💡 Click 'Sync Start' button in the interactive visualizer to view")
        else:
            print(f"   ❌ No plot data available in test window")
    else:
        print(f"   ⚠️ No sensor columns available for testing")

=== QUICK SYNC EVENT DATA TEST ===

🎯 Testing Sync Start: 2024-07-26 10:55:00
   In data range: ✅ True
   ✅ ±1min window: 1500 samples available
   ✅ ±5min window: 7500 samples available
   ✅ ±10min window: 15000 samples available

🎯 Testing Sync End: 2024-07-28 10:13:00
   In data range: ✅ True
   ✅ ±1min window: 1500 samples available
   ✅ ±5min window: 7500 samples available
   ✅ ±10min window: 15000 samples available

✅ SYNC EVENT DATA TEST PASSED
   Data is available around sync events
   You should be able to plot sync event data in the interactive visualizer

📊 TESTING PLOT AROUND SYNC START
   Selected channels for test: ['corsano_wrist_wrist_acc_x', 'corsano_wrist_wrist_acc_y', 'corsano_wrist_wrist_acc_z']
   Test plot window: 2024-07-26 10:50:00 to 2024-07-26 11:00:00
   Samples in plot window: 15000
   ✅ Plot data available - ready for interactive visualization!
   💡 Click 'Sync Start' button in the interactive visualizer to view
