In [1]:
import datetime
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import display
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
pkl_base_path = './storage/1_CLEAN_ecg/'
pkl_rr_base_path = './storage/2_RR_HR_intervals/'
pkl_edr_base_path = './storage/3_EDR/'

base_path = './storage/0_RAW_ecg_csv/'

stimuli_info = {
    'part1':[
        (0, "Calm -> Beach", 35),  # Start at 0 seconds, duration 35 seconds
        (35, "Shark Attack", 120),  # Start at 35 seconds, duration 2 minutes (120 seconds)
        (160, "Rollercoaster", 120),  # Start at 160 seconds, duration 2 minutes
        (285, "Calm -> Nature", 30),  # Start at 285 seconds, duration 30 seconds
        (320, "Night Walk", 60),  # Start at 320 seconds, duration 1 minute
        (385, "Calm -> Beach", 30)  # Start at 385 seconds, duration 30 seconds
    ]
        ,
    'part2':[
        (30, "Stimuli 1", 120),
        (155, "Stimuli 2", 120),
        (280, "Stimuli 3", 120),
        (405, "Stimuli 4", 120)]}


In [3]:

pkl_files_to_subject_video = {
    'ecg_2024-07-27_10-15-17_200.pkl': {'subject': 'Sub 8', 'video_part': 'pt1'},
    'ecg_2024-07-27_10-27-07_200.pkl': {'subject': 'Sub 8', 'video_part': 'pt2'},
    'ecg_2024-07-27_11-03-03_200.pkl': {'subject': 'Sub 9', 'video_part': 'pt1'},
    'ecg_2024-07-27_11-11-36_200.pkl': {'subject': 'Sub 9', 'video_part': 'pt2'},
    'ecg_2024-07-27_18-59-10_200.pkl': {'subject': 'Sub 10', 'video_part': 'pt1'},
    'ecg_2024-07-27_19-13-00_200.pkl': {'subject': 'Sub 10', 'video_part': 'pt2'},
    'ecg_2024-07-27_20-14-24_200.pkl': {'subject': 'Sub 11', 'video_part': 'pt1'},
    'ecg_2024-07-27_20-23-26_200.pkl': {'subject': 'Sub 11', 'video_part': 'pt2'},
    'ecg_2024-07-27_20-58-56_200.pkl': {'subject': 'Sub 12', 'video_part': 'pt1'},
    'ecg_2024-07-27_21-07-41_200.pkl': {'subject': 'Sub 12', 'video_part': 'pt2'},
    'ecg_2024-07-29_10-44-20_200.pkl': {'subject': 'Sub 13', 'video_part': 'pt1'},
    'ecg_2024-07-29_10-52-58_200.pkl': {'subject': 'Sub 13', 'video_part': 'pt2'},
    'ecg_2024-07-29_11-39-04_200.pkl': {'subject': 'Sub 14', 'video_part': 'pt1'},
    'ecg_2024-07-29_11-52-32_200.pkl': {'subject': 'Sub 14', 'video_part': 'pt2'},
    'ecg_2024-07-29_18-57-02_200.pkl': {'subject': 'Sub 15', 'video_part': 'pt1'},
    'ecg_2024-07-29_19-05-53_200.pkl': {'subject': 'Sub 15', 'video_part': 'pt2'},
    'ecg_2024-07-30_11-45-20_200.pkl': {'subject': 'Sub 18', 'video_part': 'pt1'},
    'ecg_2024-07-30_11-54-40_200.pkl': {'subject': 'Sub 18', 'video_part': 'pt2'},
    'ecg_2024-08-01_11-50-02_200.pkl': {'subject': 'Sub 23', 'video_part': 'pt1'},
    'ecg_2024-08-01_11-59-37_200.pkl': {'subject': 'Sub 23', 'video_part': 'pt2'},
    'ecg_2024-08-01_18-51-25_200.pkl': {'subject': 'Sub 24', 'video_part': 'pt1'},
    'ecg_2024-08-01_19-00-15_200.pkl': {'subject': 'Sub 24', 'video_part': 'pt2'},
    'ecg_2024-08-01_20-01-18_200.pkl': {'subject': 'Sub 22', 'video_part': 'pt1'},
    'ecg_2024-08-01_20-48-56_200.pkl': {'subject': 'Sub 22', 'video_part': 'pt2'},
    'ecg_2024-08-02_09-59-46_200.pkl': {'subject': 'Sub 27', 'video_part': 'pt1'},
    'ecg_2024-08-02_10-09-03_200.pkl': {'subject': 'Sub 27', 'video_part': 'pt2'},
    'ecg_2024-08-02_10-40-16_200.pkl': {'subject': 'Sub 28', 'video_part': 'pt1'},
    'ecg_2024-08-02_10-49-30_200.pkl': {'subject': 'Sub 28', 'video_part': 'pt2'},
    'ecg_2024-08-02_16-46-07_200.pkl': {'subject': 'Sub 29', 'video_part': 'pt1'},
    'ecg_2024-08-02_16-54-26_200.pkl': {'subject': 'Sub 29', 'video_part': 'pt2'},
    'ecg_2024-08-02_17-45-32_200.pkl': {'subject': 'Sub 30', 'video_part': 'pt1'},
    'ecg_2024-08-02_17-55-18_200.pkl': {'subject': 'Sub 30', 'video_part': 'pt2'},
    'ecg_2024-08-02_18-44-02_200.pkl': {'subject': 'Sub 31', 'video_part': 'pt1'},
    'ecg_2024-08-02_19-02-31_200.pkl': {'subject': 'Sub 31', 'video_part': 'pt2'},
    'ecg_2024-08-02_19-36-12_200.pkl': {'subject': 'Sub 32', 'video_part': 'pt1'},
    'ecg_2024-08-02_19-45-41_200.pkl': {'subject': 'Sub 32', 'video_part': 'pt2'},
    'ecg_2024-08-02_20-37-55_200.pkl': {'subject': 'Sub 33', 'video_part': 'pt1'},
    'ecg_2024-08-02_20-47-57_200.pkl': {'subject': 'Sub 33', 'video_part': 'pt2'}
}

files_to_subject_video = {
    'ecg_2024-07-27_10-15-17_200.csv': {'subject': 'Subject 8', 'video_part': 'part1'},
    'ecg_2024-07-27_10-27-07_200.csv': {'subject': 'Subject 8', 'video_part': 'part2'},
    'ecg_2024-07-27_11-03-03_200.csv': {'subject': 'Subject 9', 'video_part': 'part1'},
    'ecg_2024-07-27_11-11-36_200.csv': {'subject': 'Subject 9', 'video_part': 'part2'},
    'ecg_2024-07-27_18-55-03_200.csv': {'subject': 'Subject 19', 'video_part': 'part1'},
    'ecg_2024-07-27_18-59-10_200.csv': {'subject': 'Subject 19', 'video_part': 'part2'},
    'ecg_2024-07-27_19-13-00_200.csv': {'subject': 'Subject 10', 'video_part': 'part2'},
    'ecg_2024-07-27_20-14-24_200.csv': {'subject': 'Subject 11', 'video_part': 'part1'},
    'ecg_2024-07-27_20-23-26_200.csv': {'subject': 'Subject 11', 'video_part': 'part2'},
    'ecg_2024-07-27_20-58-56_200.csv': {'subject': 'Subject 12', 'video_part': 'part1'},
    'ecg_2024-07-27_21-07-41_200.csv': {'subject': 'Subject 12', 'video_part': 'part2'},
    'ecg_2024-07-29_10-44-20_200.csv': {'subject': 'Subject 13', 'video_part': 'part1'},
    'ecg_2024-07-29_10-52-58_200.csv': {'subject': 'Subject 13', 'video_part': 'part2'},
    'ecg_2024-07-29_11-39-04_200.csv': {'subject': 'Subject 14', 'video_part': 'part1'},
    'ecg_2024-07-29_11-52-32_200.csv': {'subject': 'Subject 14', 'video_part': 'part2'},
    'ecg_2024-07-29_18-57-02_200.csv': {'subject': 'Subject 15', 'video_part': 'part1'},
    'ecg_2024-07-29_19-05-53_200.csv': {'subject': 'Subject 15', 'video_part': 'part2'},
    'ecg_2024-08-02_20-47-57_200.csv': {'subject': 'Subject 33', 'video_part': 'part2'}
    # Add more entries if there are other files.
}

In [4]:
def organize_files_by_subject_and_video(file_dict):
    """
    Organize files by subject and video part.

    Parameters:
    - file_dict (dict): Dictionary mapping filenames to subject and video part details.

    Returns:
    - dict: A nested dictionary organized by subject and video part.
    """
    organized_files = {}
    
    for filename, details in file_dict.items():
        subject = details['subject']
        video_part = details['video_part']
        
        if subject not in organized_files:
            organized_files[subject] = {}
        if video_part not in organized_files[subject]:
            organized_files[subject][video_part] = []
        
        organized_files[subject][video_part].append(filename)
    
    return organized_files


organized_files = organize_files_by_subject_and_video(files_to_subject_video)


In [5]:
def load_pickle_data(filename, plot_type='ECG'):
    """
    Load data from a pickle file based on the specified plot type.

    Parameters:
    - filename (str): Name of the pickle file to load.
    - plot_type (str): Type of data to load ('ECG', 'RR', 'EDR').

    Returns:
    - pd.DataFrame: DataFrame containing the relevant data.
    - str: The column name corresponding to the loaded data.
    """
    try:
        if plot_type == 'ECG':
            file_path = os.path.join(pkl_base_path, filename)
            data = pd.read_pickle(file_path)
            return pd.DataFrame(data['data']), 'data'
        
        elif plot_type == 'RR':
            file_path = os.path.join(pkl_rr_base_path, filename)
            data = pd.read_pickle(file_path)
            return pd.DataFrame(data['rpeaks_indices']), 'rpeaks_indices'
        
        elif plot_type == 'EDR':
            file_path = os.path.join(pkl_edr_base_path, filename)
            data = pd.read_pickle(file_path)
            return pd.DataFrame(data['edr_rate']), 'edr'
    
    except FileNotFoundError:
        print(f"File {filename} not found.")
    except KeyError as e:
        print(f"Key error: {e} not found in the file {filename}.")
    except Exception as e:
        print(f"Failed to load {filename}: {e}")
    
    return pd.DataFrame(), None

In [6]:
def load_ecg_data(filename, plot_type='ECG'):
    """
    Load ECG data from a CSV file and merge it with corresponding pickle data.

    Parameters:
    - filename (str): The name of the CSV file to load.
    - plot_type (str): The type of data to load from the pickle file ('ECG', 'RR', 'EDR').

    Returns:
    - pd.DataFrame: The CSV data merged with the corresponding pickle data.
    - int: The sampling rate extracted from the filename.
    - pd.Timestamp: The start time extracted from the filename.
    - str: The column name corresponding to the data added from the pickle file.
    """
    csv_file_path = os.path.join(base_path, filename)
    csv_data = pd.read_csv(csv_file_path)
    csv_data = csv_data.iloc[1:, :]  # Skip the first row

    subject_video_info = files_to_subject_video.get(filename)
    col = ''
    
    if subject_video_info:
        matching_pkl = [pkl for pkl, info in pkl_files_to_subject_video.items() 
                        if info['subject'] == f"Sub {subject_video_info['subject'].split()[-1]}" 
                        and info['video_part'] == f"pt{subject_video_info['video_part'][-1]}"]

        if matching_pkl:
            pkl_data, col = load_pickle_data(matching_pkl[0], plot_type)
            
            if len(pkl_data) == len(csv_data):
                csv_data[col] = pkl_data
            else:
                pkl_data = pkl_data.iloc[1:, :]
                csv_data[col] = pkl_data
        else:
            print("No matching pickle file found.")
            return None, None, None, None
    else:
        print("No subject and video part info found for this CSV file.")
        return None, None, None, None

    reading_type = filename[:3]
    date_str = filename[4:14]
    start_time_str = filename[15:23]
    rate = int(filename.split('_')[-1].split('.')[0])

    try:
        start_time = pd.to_datetime(start_time_str, format='%H-%M-%S')
    except ValueError:
        print(f"Warning: Failed to parse time '{start_time_str}'. Using default value of 00:00:00.")
        start_time = pd.to_datetime('00:00:00', format='%H:%M:%S')

    print(f"Reading Type: {reading_type}, Date: {date_str}, Start Time: {start_time.time()}, Sampling Rate: {rate} Hz")
    
    return csv_data, rate, start_time, col

In [7]:

def calculate_rr_intervals(r_peaks_indices, sampling_rate):
    """
    Calculates R-R intervals from the R-peaks indices.
    """
    rr_intervals = np.diff(r_peaks_indices) / sampling_rate
    return rr_intervals

def update_plot(subject_video, plot_type='ECG'):
    """
    Function to generate plots for ECG, RR, and EDR data types. Handles different plotting logic for R-R intervals
    and other data types.
    """
    # Parse the subject and video part from the dropdown selection
    subject, video_part = subject_video.split(" Video Part ")

    # Set plot titles and other metadata based on plot type
    titles = {
        'ECG': 'Heart Rate',
        'RR': 'Distribution of R-R Intervals',
        'EDR': 'ECG-Derived Respiration Rate'
    }
    title = titles.get(plot_type, 'Plot')
    fig = make_subplots(rows=1, cols=1, subplot_titles=[title])

    # Get the correct video part from the selection
    video_part_key = f'part{video_part}'

    # Load the file and check if it's available
    file = organized_files[subject].get(video_part_key)
    if not file:
        print(f"CSV file not found for {subject}'s {video_part_key}!")
        return
    file = file[0]

    # Load data
    df, rate, start_time, col = load_ecg_data(file, plot_type)
    if df.empty or col is None:
        print(f"Data for {plot_type} could not be loaded.")
        return

    # R-R interval specific handling
    if plot_type == 'RR':
        # Calculate R-R intervals and plot histogram
        r_peaks_indices = df[col].dropna().values.astype(int)
        rr_intervals = calculate_rr_intervals(r_peaks_indices, rate)
        fig.add_trace(go.Histogram(x=rr_intervals, nbinsx=20, showlegend=True), row=1, col=1)
        fig.update_yaxes(title_text="Frequency", row=1, col=1)
        fig.update_xaxes(title_text="R-R Interval (seconds)", row=1, col=1)
        fig.update_layout(title=f'{plot_type} Data for {subject}, Video: {video_part}', showlegend=False)
        fig.show()
        return  # R-R plot finished here

    # Non-RR plots (ECG, EDR) logic
    data = df[col].values
    flag_data = df['Recording Flag'].values

    # Generate time axis
    time = np.arange(len(data)) / rate
    timestamps = [(start_time + datetime.timedelta(seconds=t)).time().strftime('%H:%M:%S') for t in time]
    time_downsampled = timestamps[::rate]
    data_downsampled = data[::rate]
    flag_data_downsampled = flag_data[::rate]

    # Plot ECG or EDR data as a line plot
    fig.add_trace(go.Scatter(x=time_downsampled, y=data_downsampled, mode='lines', showlegend=True), row=1, col=1)

    # Mark video start/end
    start_index = np.where(flag_data_downsampled)[0][0]
    end_index = np.where(flag_data_downsampled)[0][-1]
    fig.add_trace(go.Scatter(x=[time_downsampled[start_index]], y=[data_downsampled[start_index]],
                             mode='markers', marker=dict(color='green', size=10), name='Video Start',
                             showlegend=True), row=1, col=1)
    fig.add_trace(go.Scatter(x=[time_downsampled[end_index]], y=[data_downsampled[end_index]],
                             mode='markers', marker=dict(color='red', size=10), name='Video End',
                             showlegend=True), row=1, col=1)
    fig.add_shape(go.layout.Shape(type="rect", x0=time_downsampled[start_index], x1=time_downsampled[end_index],
                                  y0=min(data_downsampled), y1=max(data_downsampled),
                                  fillcolor="rgba(0, 255, 0, 0.1)", opacity=0.7, line_width=1), row=1, col=1)
    
    # Add stimuli annotations
    for start_sec, label, duration_sec in stimuli_info[video_part_key]:
        # stimulus_start_time = start_time + pd.to_timedelta(start_sec, unit='s')
        stimulus_start_time = start_time + pd.to_timedelta(start_index, unit='s') + pd.to_timedelta(start_sec, unit='s') 
        stimulus_start_str = stimulus_start_time.strftime('%H:%M:%S')
        closest_start_idx = min(range(len(time_downsampled)), key=lambda i: abs(pd.to_timedelta(time_downsampled[i]) - pd.to_timedelta(stimulus_start_str)))

        fig.add_annotation(x=time_downsampled[closest_start_idx], y=max(data_downsampled),
                           text=label, showarrow=True, arrowhead=2, row=1, col=1,
                           font=dict(size=10), xanchor='left', yanchor='bottom', textangle=-20)

    # Finalize plot layout
    fig.update_yaxes(title_text="ECG Amplitude (mV)" if plot_type != 'RR' else "Frequency", row=1, col=1)
    fig.update_xaxes(title_text="Time", row=1, col=1)
    fig.update_layout(title=f'{plot_type} Data for {subject}, Video: {video_part}', showlegend=True)
    fig.show()


# Create a list of combined subject and video part options for the dropdown
subject_video_options = [f"{subject} Video Part {part[-1]}" for subject in organized_files for part in organized_files[subject]]

# Create the widget with combined subject and video part options
subject_video_widget = widgets.Dropdown(options=subject_video_options, description='Subject & Video:')

# Interact with the updated plot for different plot types
interact(update_plot, subject_video=subject_video_widget, plot_type='ECG')
interact(update_plot, subject_video=subject_video_widget, plot_type='RR')
interact(update_plot, subject_video=subject_video_widget, plot_type='EDR')


interactive(children=(Dropdown(description='Subject & Video:', options=('Subject 8 Video Part 1', 'Subject 8 V…

interactive(children=(Dropdown(description='Subject & Video:', options=('Subject 8 Video Part 1', 'Subject 8 V…

interactive(children=(Dropdown(description='Subject & Video:', options=('Subject 8 Video Part 1', 'Subject 8 V…

<function __main__.update_plot(subject_video, plot_type='ECG')>