# Simple Sensor Data Visualization v2

**Purpose**: Load sensor data around sync events with individual time axes for manual sync event identification.

**Features**:
- Load 4 hours around sync start time (configurable)
- Each sensor has its own independent time axis
- Time shifts controlled by Sync_Parameters.yaml
- Preprocessing done before plotting
- Simple and focused approach

## 1. Configuration

In [116]:
# Configuration parameters
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import yaml
from datetime import datetime, timedelta
import ipywidgets as widgets
from IPython.display import display, clear_output

# ========== CONFIGURATION ==========
SUBJECT_ID = "OutSense-425_48h"  # Change this to your subject
HOURS_AROUND_SYNC = 4  # Hours to load around sync start time (2 hours before, 2 hours after)
TARGET_FREQUENCY = 25  # Hz for resampling

# Paths
script_dir = os.path.dirname(os.path.abspath('.'))
project_root = os.path.dirname(script_dir)
sync_params_path = os.path.join(project_root, 'Sync_Parameters.yaml')
sync_events_path = os.path.join(project_root, 'Sync_Events_Times.csv')
config_path = os.path.join(project_root, 'config.yaml')

print(f"📋 Configuration:")
print(f"  Subject: {SUBJECT_ID}")
print(f"  Time window: ±{HOURS_AROUND_SYNC} hours around sync start")
print(f"  Target frequency: {TARGET_FREQUENCY} Hz")
print(f"  Project root: {project_root}")

📋 Configuration:
  Subject: OutSense-425_48h
  Time window: ±4 hours around sync start
  Target frequency: 25 Hz
  Project root: /home/muff_an/scai_data_process


## 2. Load Configuration and Sync Parameters

In [117]:
# Load main configuration
with open('/home/muff_an/scai_data_process/Master_Thesis_Ricardo/config.yaml', 'r') as f:
    cfg = yaml.safe_load(f)

# Load sync parameters
with open('/home/muff_an/scai_data_process/Master_Thesis_Ricardo/Sync_Parameters.yaml', 'r') as f:
    sync_params = yaml.safe_load(f)

# Load sync events
sync_events_df = pd.read_csv('/home/muff_an/scai_data_process/Master_Thesis_Ricardo/Sync_Events_Times.csv')

print(f"✅ Loaded configurations:")
print(f"  Main config: {len(cfg)} sections")
print(f"  Sync parameters: {len(sync_params)} subjects")
print(f"  Sync events: {len(sync_events_df)} entries")

# Get sync start time for the subject
subject_sync = sync_events_df[sync_events_df['Subject'] == SUBJECT_ID]
if subject_sync.empty:
    raise ValueError(f"No sync events found for subject {SUBJECT_ID}")

sync_start_str = subject_sync.iloc[0]['Sync Start']
sync_end_str = subject_sync.iloc[0]['Sync End']

# Parse sync times
sync_start_time = pd.to_datetime(sync_start_str, format='%d.%m.%Y.%H.%M.%S')
sync_end_time = pd.to_datetime(sync_end_str, format='%d.%m.%Y.%H.%M.%S')

print(f"\n🎯 Sync times for {SUBJECT_ID}:")
print(f"  Sync Start: {sync_start_time}")
print(f"  Sync End: {sync_end_time}")
print(f"  Duration: {sync_end_time - sync_start_time}")

# Calculate data window
data_window_start = sync_start_time - pd.Timedelta(hours=HOURS_AROUND_SYNC//2)
data_window_end = sync_start_time + pd.Timedelta(hours=HOURS_AROUND_SYNC//2)

print(f"\n📊 Data window ({HOURS_AROUND_SYNC}h around sync start):")
print(f"  Window Start: {data_window_start}")
print(f"  Window End: {data_window_end}")
print(f"  Total Duration: {data_window_end - data_window_start}")

✅ Loaded configurations:
  Main config: 62 sections
  Sync parameters: 16 subjects
  Sync events: 16 entries

🎯 Sync times for OutSense-425_48h:
  Sync Start: 2023-06-27 13:59:30
  Sync End: 2023-06-29 13:41:40
  Duration: 1 days 23:42:10

📊 Data window (4h around sync start):
  Window Start: 2023-06-27 11:59:30
  Window End: 2023-06-27 15:59:30
  Total Duration: 0 days 04:00:00


## 3. Load and Import Required Functions

In [118]:
# Import data loading functions from the original notebook/scripts
import sys
sys.path.append(project_root)

# Import necessary functions (you may need to adjust these based on your actual module structure)
try:
    from raw_data_processor import (
        select_data_loader,
        modify_modality_names,
        process_modality_duplicates,
        handle_missing_data_interpolation,
        correct_timestamp_drift
    )
    print("✅ Imported functions from raw_data_processor")
except ImportError as e:
    print(f"⚠️ Could not import from raw_data_processor: {e}")
    print("You may need to adjust the import paths or copy the required functions")
    
    # Define minimal data loader selection function
    def select_data_loader(sensor_name):
        """Simple data loader selector - you may need to implement based on your data structure"""
        def simple_csv_loader(subject_dir, sensor_name, sensor_settings):
            # This is a placeholder - implement based on your actual data structure
            csv_path = os.path.join(subject_dir, f"{sensor_name}.csv")
            if os.path.exists(csv_path):
                return pd.read_csv(csv_path)
            else:
                return pd.DataFrame()
        return simple_csv_loader
    
    def modify_modality_names(data, sensor_name):
        """Simple modality name modifier"""
        return sensor_name, data
    
    def process_modality_duplicates(data, sample_rate):
        """Simple duplicate processor"""
        return data.drop_duplicates()
    
    def handle_missing_data_interpolation(data, max_interp_gap_s=2, target_freq=50):
        """Simple interpolation"""
        return data.interpolate(method='linear', limit=int(max_interp_gap_s * target_freq))
    
    def correct_timestamp_drift(timestamp, t0, t1, drift_secs):
        """Simple drift correction"""
        if t0 <= timestamp <= t1:
            progress = (timestamp - t0) / (t1 - t0)
            return timestamp + (drift_secs * progress)
        return timestamp
    
    print("📝 Using simplified placeholder functions")

# Get raw data configuration
raw_data_parsing_config = cfg.get('raw_data_parsing_config', {})
raw_data_base_dir = os.path.join(project_root, cfg.get('raw_data_input_dir', 'data'))
subject_dir = os.path.join(raw_data_base_dir, SUBJECT_ID)

print(f"\n📂 Data paths:")
print(f"  Raw data dir: {raw_data_base_dir}")
print(f"  Subject dir: {subject_dir}")
print(f"  Available sensors: {list(raw_data_parsing_config.keys())}")

✅ Imported functions from raw_data_processor

📂 Data paths:
  Raw data dir: /scai_data2/scai_datasets/interim/scai-outsense/
  Subject dir: /scai_data2/scai_datasets/interim/scai-outsense/OutSense-425_48h
  Available sensors: ['corsano_wrist_acc', 'cosinuss_ear_acc_x_acc_y_acc_z', 'mbient_imu_wc_accelerometer', 'mbient_imu_wc_gyroscope', 'vivalnk_vv330_acceleration', 'sensomative_bottom_logger', 'sensomative_back_logger', 'corsano_bioz_acc']


## 4. Load and Process Sensor Data

In [119]:
# Load and process each sensor with time shifts from Sync_Parameters.yaml
print(f"\n=== LOADING SENSOR DATA ===")
print(f"Processing sensors for subject: {SUBJECT_ID}")
print(f"Time window: {data_window_start} to {data_window_end}")

processed_sensors = {}
subject_correction_params = sync_params.get(SUBJECT_ID, {})

for sensor_name, sensor_settings in raw_data_parsing_config.items():
    print(f"\n--- Processing sensor: {sensor_name} ---")
    
    try:
        # Load raw sensor data
        loader = select_data_loader(sensor_name)
        sensor_data_raw = loader(subject_dir, sensor_name, sensor_settings)
        
        if sensor_data_raw.empty or 'time' not in sensor_data_raw.columns:
            print(f"❌ No data loaded for {sensor_name}")
            continue
        
        print(f"📊 Loaded {len(sensor_data_raw)} raw samples")
        
        # Get time correction parameters for this sensor
        sensor_corr_params = subject_correction_params.get(sensor_name, {'unit': 's'})
        time_unit = sensor_corr_params.get('unit', 's')
        shift_val = sensor_corr_params.get('shift', 0)
        
        # Apply time corrections
        time_col_num = sensor_data_raw['time'].astype(float)
        
        # Convert to seconds if needed
        if time_unit == 'ms':
            time_col_num = time_col_num / 1000.0
        
        # Apply shift correction
        if shift_val != 0:
            time_col_num = time_col_num + shift_val
            print(f"⏱️ Applied time shift: {shift_val}s")
        
        # Apply drift correction if available
        drift_params = sensor_corr_params.get('drift')
        if drift_params and all(k in drift_params for k in ['t0', 't1', 'drift_secs']):
            t0_ts = pd.Timestamp(drift_params['t0'])
            t1_ts = pd.Timestamp(drift_params['t1'])
            if not pd.isna(t0_ts) and not pd.isna(t1_ts):
                t0, t1 = t0_ts.timestamp(), t1_ts.timestamp()
                drift = drift_params['drift_secs']
                time_col_num = time_col_num.apply(correct_timestamp_drift, args=(t0, t1, drift))
                print(f"📐 Applied drift correction: {drift}s over {t1-t0:.1f}s interval")
        
        # Convert to datetime
        corrected_timestamps = pd.to_datetime(time_col_num, unit='s', errors='coerce')
        sensor_data_corrected = sensor_data_raw.drop(columns=['time']).copy()
        sensor_data_corrected['time'] = corrected_timestamps
        sensor_data_corrected.dropna(subset=['time'], inplace=True)
        
        if sensor_data_corrected.empty:
            print(f"❌ No valid data after time correction for {sensor_name}")
            continue
        
        # Filter to data window
        original_count = len(sensor_data_corrected)
        time_mask = (sensor_data_corrected['time'] >= data_window_start) & (sensor_data_corrected['time'] <= data_window_end)
        sensor_data_filtered = sensor_data_corrected[time_mask].copy()
        
        filtered_count = len(sensor_data_filtered)
        retention_pct = (filtered_count / original_count * 100) if original_count > 0 else 0
        print(f"🔍 Filtered from {original_count} to {filtered_count} samples ({retention_pct:.1f}% retained)")
        
        if sensor_data_filtered.empty:
            print(f"❌ No data in time window for {sensor_name}")
            continue
        
        # Set time as index
        sensor_data_filtered.set_index('time', inplace=True)
        sensor_data_filtered.sort_index(inplace=True)
        
        # Apply basic preprocessing
        sample_rate = sensor_settings.get('sample_rate', TARGET_FREQUENCY)
        processed_data = process_modality_duplicates(sensor_data_filtered, sample_rate)
        processed_data = handle_missing_data_interpolation(processed_data, max_interp_gap_s=2, target_freq=TARGET_FREQUENCY)
        
        # Apply column renaming
        new_name, processed_data = modify_modality_names(processed_data, sensor_name)
        
        if processed_data.empty:
            print(f"❌ No data after preprocessing for {sensor_name}")
            continue
        
        print(f"✅ Final shape: {processed_data.shape}")
        print(f"✅ Time range: {processed_data.index.min()} to {processed_data.index.max()}")
        
        processed_sensors[new_name] = processed_data
        
    except Exception as e:
        print(f"❌ Error processing sensor {sensor_name}: {e}")
        import traceback
        traceback.print_exc()

print(f"\n📈 Successfully processed {len(processed_sensors)} sensors:")
for sensor_name, data in processed_sensors.items():
    duration = data.index.max() - data.index.min()
    print(f"  📊 {sensor_name}: {len(data)} samples, duration {duration}")

if not processed_sensors:
    raise ValueError("No sensor data was successfully processed!")


=== LOADING SENSOR DATA ===
Processing sensors for subject: OutSense-425_48h
Time window: 2023-06-27 11:59:30 to 2023-06-27 15:59:30

--- Processing sensor: corsano_wrist_acc ---
📊 Loaded 5389984 raw samples
⏱️ Applied time shift: 7203s
📐 Applied drift correction: -10s over 162290.0s interval
🔍 Filtered from 5389984 to 249908 samples (4.6% retained)
✅ Final shape: (249908, 3)
✅ Time range: 2023-06-27 13:49:20.862529993 to 2023-06-27 15:59:29.974317789

--- Processing sensor: cosinuss_ear_acc_x_acc_y_acc_z ---
❌ No data loaded for cosinuss_ear_acc_x_acc_y_acc_z

--- Processing sensor: mbient_imu_wc_accelerometer ---
📊 Loaded 9060010 raw samples
⏱️ Applied time shift: 7217s
📐 Applied drift correction: -15s over 162290.0s interval
🔍 Filtered from 9060010 to 674417 samples (7.4% retained)
✅ Final shape: (674417, 3)
✅ Time range: 2023-06-27 12:16:52.649641514 to 2023-06-27 15:59:29.979947567

--- Processing sensor: mbient_imu_wc_gyroscope ---
📊 Loaded 9060012 raw samples
⏱️ Applied time sh

## 5. Interactive Plotting with Independent Time Axes

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from IPython.display import display, clear_output
import ipywidgets as widgets


# === Initial Sync Parameters ===
# sync_start_time = pd.Timestamp('2022-01-01 00:05:00')
# sync_end_time = pd.Timestamp('2022-01-01 00:06:00')
sync_start_ref = sync_start_time  # for relative adjustments

# === Widgets ===
sensor_names = list(processed_sensors.keys())
sensor_selection = widgets.SelectMultiple(
    options=sensor_names,
    value=sensor_names[:2],
    description='Select Sensors:',
    layout=widgets.Layout(height='150px', width='300px')
)

center_time_text = widgets.Text(
    value=sync_start_time.strftime('%Y-%m-%d %H:%M:%S'),
    description='Center Time:',
    layout=widgets.Layout(width='300px')
)

window_minutes = widgets.IntSlider(
    value=2,  # 1 hour window
    min=1,
    max=60,  # 4 hours max
    step=1,
    description='Window (min):',
    style={'description_width': 'initial'}
)

# Jump buttons
jump_sync_start = widgets.Button(description='🎯 Jump to Sync Start', button_style='success')
jump_sync_end = widgets.Button(description='🎯 Jump to Sync End', button_style='warning')
jump_data_start = widgets.Button(description='📊 Jump to Data Start', button_style='info')
jump_data_end = widgets.Button(description='📊 Jump to Data End', button_style='info')
plot_button = widgets.Button(description='📈 Plot Sensors', button_style='primary', layout=widgets.Layout(width='150px'))

# Offset slider and label
sync_offset_slider = widgets.IntSlider(
    value=0, min=-300, max=300, step=1,
    description='Offset (s):',
    layout=widgets.Layout(width='80%')
)
sync_offset_label = widgets.Label(value="Current offset: 0 seconds")

plot_output = widgets.Output()

def get_center_time():
    try:
        return pd.to_datetime(center_time_text.value)
    except:
        return sync_start_time

def update_center_time(new_time):
    center_time_text.value = new_time.strftime('%Y-%m-%d %H:%M:%S')

def plot_sensors(_):
    with plot_output:
        clear_output(wait=True)
        selected_sensors = list(sensor_selection.value)
        if not selected_sensors:
            print("❌ Please select at least one sensor")
            return

        center_time = get_center_time()
        window_mins = window_minutes.value
        half_window = pd.Timedelta(minutes=window_mins/2)
        plot_start = center_time - half_window
        plot_end = center_time + half_window

        fig, axes = plt.subplots(len(selected_sensors), 1,
                                 figsize=(16, 3*len(selected_sensors)),
                                 sharex=False)
        if len(selected_sensors) == 1:
            axes = [axes]

        for i, sensor_name in enumerate(selected_sensors):
            ax = axes[i]
            sensor_data = processed_sensors.get(sensor_name, pd.DataFrame())
            mask = (sensor_data.index >= plot_start) & (sensor_data.index <= plot_end)
            plot_data = sensor_data[mask]

            if plot_data.empty:
                ax.text(0.5, 0.5, f'No data in time window for {sensor_name}',
                        ha='center', va='center', transform=ax.transAxes)
                ax.set_title(f'{sensor_name} - No Data in Window')
                continue

            numeric_cols = plot_data.select_dtypes(include=[np.number]).columns
            for col in numeric_cols:
                ax.plot(plot_data.index, plot_data[col], label=col, alpha=0.7, linewidth=1)

            # Plot lines
            if plot_start <= sync_start_time <= plot_end:
                ax.axvline(sync_start_time, color='red', linestyle='--', linewidth=2, alpha=0.8, label='🎯 Sync Start')
            if plot_start <= sync_end_time <= plot_end:
                ax.axvline(sync_end_time, color='darkred', linestyle='--', linewidth=2, alpha=0.8, label='🎯 Sync End')

            ax.axvline(center_time, color='green', linestyle=':', linewidth=1, alpha=0.6, label='Center')

            ax.set_title(f'{sensor_name} ({len(numeric_cols)} channels)')
            ax.set_ylabel('Value')
            ax.set_xlabel('Time')
            ax.grid(True, alpha=0.3)
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
            ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=max(1, window_mins // 4)))
            plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
            if len(numeric_cols) <= 6:
                ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=8)

        plt.suptitle(f'Sensor Data - Independent Time Axes\nWindow: {plot_start} to {plot_end}',
                     fontsize=14, y=0.98)
        plt.tight_layout()
        plt.subplots_adjust(right=0.85, top=0.92)
        plt.show()

        print(f"🎯 Sync Start: {sync_start_time}")
        print(f"🎯 Sync End:   {sync_end_time}")
        print(f"🕒 Offset from original: {(sync_start_time - sync_start_ref).total_seconds()} seconds")

# Sync offset handler
def on_sync_offset_slider_change(change):
    global sync_start_time
    offset_seconds = change['new']
    sync_start_time = sync_start_ref + pd.Timedelta(seconds=offset_seconds)
    sync_offset_label.value = f"Current offset: {offset_seconds:+d} seconds"
    plot_sensors(None)

sync_offset_slider.observe(on_sync_offset_slider_change, names='value')

# Navigation button actions
def jump_to_sync_start(_): update_center_time(sync_start_time); plot_sensors(None)
def jump_to_sync_end(_): update_center_time(sync_end_time); plot_sensors(None)
def jump_to_data_start(_):
    update_center_time(min(d.index.min() for d in processed_sensors.values()))
    plot_sensors(None)
def jump_to_data_end(_):
    update_center_time(max(d.index.max() for d in processed_sensors.values()))
    plot_sensors(None)

plot_button.on_click(plot_sensors)
jump_sync_start.on_click(jump_to_sync_start)
jump_sync_end.on_click(jump_to_sync_end)
jump_data_start.on_click(jump_to_data_start)
jump_data_end.on_click(jump_to_data_end)

# === Display Layout ===
controls = widgets.VBox([
    widgets.HTML("<h3>🎛️ Controls</h3>"),
    sensor_selection,
    center_time_text,
    window_minutes,
    widgets.HBox([jump_sync_start, jump_sync_end]),
    widgets.HBox([jump_data_start, jump_data_end]),
    plot_button
])

display(widgets.VBox([
    controls,
    widgets.HTML("<h4>🔧 Adjust Sync Start Time</h4>"),
    sync_offset_slider,
    sync_offset_label,
    plot_output
]))

print("✅ Interactive visualization tool is ready.")


VBox(children=(VBox(children=(HTML(value='<h3>🎛️ Controls</h3>'), SelectMultiple(description='Select Sensors:'…

✅ Interactive visualization tool is ready.


## 6. Summary Information

In [63]:
# Display summary information
print("=== SUMMARY ===")
print(f"Subject: {SUBJECT_ID}")
print(f"Data window: {HOURS_AROUND_SYNC}h around sync start")
print(f"Sync start: {sync_start_time}")
print(f"Sync end: {sync_end_time}")
print(f"Processed sensors: {len(processed_sensors)}")

print("\n📊 Sensor Details:")
for sensor_name, data in processed_sensors.items():
    # Get time shift applied
    original_sensor_name = sensor_name  # May be modified by modify_modality_names
    for orig_name in raw_data_parsing_config.keys():
        if orig_name in sensor_name:
            original_sensor_name = orig_name
            break
    
    sensor_corr_params = subject_correction_params.get(original_sensor_name, {})
    shift_applied = sensor_corr_params.get('shift', 0)
    
    print(f"  📈 {sensor_name}:")
    print(f"    Samples: {len(data)}")
    print(f"    Time range: {data.index.min()} to {data.index.max()}")
    print(f"    Duration: {data.index.max() - data.index.min()}")
    print(f"    Columns: {list(data.columns)}")
    print(f"    Time shift applied: {shift_applied}s")

print("\n🎯 Ready for manual sync event identification!")
print("Use the interactive plot above to examine each sensor independently.")

=== SUMMARY ===
Subject: OutSense-608
Data window: 4h around sync start
Sync start: 2024-06-04 09:39:30
Sync end: 2024-06-06 10:57:40
Processed sensors: 8

📊 Sensor Details:
  📈 corsano_wrist:
    Samples: 460801
    Time range: 2024-06-04 07:39:30 to 2024-06-04 11:39:30
    Duration: 0 days 04:00:00
    Columns: ['wrist_acc_x', 'wrist_acc_y', 'wrist_acc_z']
    Time shift applied: 0s
  📈 cosinuss_ear:
    Samples: 205445
    Time range: 2024-06-04 11:01:15.559000015 to 2024-06-04 11:39:29.996000051
    Duration: 0 days 00:38:14.437000036
    Columns: ['ear_acc_x', 'ear_acc_y', 'ear_acc_z']
    Time shift applied: 0s
  📈 mbient_acc:
    Samples: 727051
    Time range: 2024-06-04 07:39:30.003653765 to 2024-06-04 11:39:29.989685059
    Duration: 0 days 03:59:59.986031294
    Columns: ['x_axis_g', 'y_axis_g', 'z_axis_g']
    Time shift applied: 0s
  📈 mbient_gyro:
    Samples: 727059
    Time range: 2024-06-04 07:39:30.015814066 to 2024-06-04 11:39:29.996567965
    Duration: 0 days 03:59: