In [1]:
import os
import shutil
import glob
import datetime
import time
import pandas as pd
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure

In [2]:
# Read raw PPG files as a list of dataframes and store them in a list
def read_raw_ppg_files():
    files = glob.glob('../../prof_data/resteaze/*.csv')
    df_list = []
    for file in files:
        df = pd.read_csv(file)
        df_list.append(df)
    return df_list

data  = read_raw_ppg_files()

In [3]:
len(data)

16

In [18]:
#@title data prcessing for each dataframe

import heartpy as hp

# Bandpass Filter
from scipy.signal import butter, sosfilt, sosfreqz


def remove_extreme_values(signal, threshold):
    # Compute the absolute difference between the signal and its mean
    absolute_difference = np.abs(signal - np.mean(signal))
    # Compute the median of the absolute difference
    median_absolute_difference = np.median(absolute_difference)
    # Compute the median absolute deviation
    median_absolute_deviation = 1.4826 * median_absolute_difference
    # Compute the z-score
    z_score = 0.6745 * (absolute_difference / median_absolute_deviation)
    # Remove the extreme values
    signal = signal[z_score < threshold]
    return signal

def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    sos = butter(order, [low, high], analog=False, btype='band', output='sos')
    return sos


def butter_bandpass_filter(dat, lowcut, highcut, fs, order=5):
    sos = butter_bandpass(lowcut, highcut, fs, order=order)
    y = sosfilt(sos, dat)
    return y


def process_data(mdf):
    all_df = mdf[[
        'unixTimes', 'accelerometerX', 'accelerometerY', 'accelerometerZ',
        'gyroscopeX', 'gyroscopeY', 'gyroscopeZ', 'ledGreen', 'sleep_state'
    ]]

    all_df = all_df[all_df['sleep_state'].notna()]
    all_df = all_df.interpolate()

    all_df = all_df[all_df.sleep_state != -1].reset_index()

    all_df = ((all_df + all_df.shift(-1)) / 2)[::2]

    # Binary Classification
    all_df["sleep_state"] = np.where(all_df["sleep_state"] == 0, 0, 1)

    # fill NaN values
    all_df = all_df.dropna()

    # apply bandpass filter

    fs = 25
    lowcut = 0.55
    highcut = 3.0

    all_df['ledGreen'] = butter_bandpass_filter(all_df['ledGreen'],
                                                lowcut,
                                                highcut,
                                                fs,
                                                order=3)

    # proecess segmentwise HR df


    fs = 25  # Hz
    epoch_length = 60  # seconds

    # Calculate the breathing rate for each epoch
    wd_seg, m_seg = hp.process_segmentwise(np.array(all_df['ledGreen']),
                                        sample_rate=fs,
                                        segment_width=epoch_length)

    t_wd_dict = {'segment_indices': m_seg['segment_indices']}

    df_wd = pd.DataFrame.from_dict(t_wd_dict)
    df_m = pd.DataFrame(m_seg)

    # merge the two dataframes with common column as segment_indices
    df_segmented = pd.merge(df_m, df_wd, on='segment_indices')

    # make the segment_indices as index with first value of it
    df_segmented['segment_indices'] = df_segmented['segment_indices'].apply(
        lambda x: x[0])
    df_segmented = df_segmented.set_index('segment_indices')

    # remove index name
    df_segmented.index.name = None

    tt = df_segmented

    # add new row with index 0
    tt.loc[0] = np.nan

    tt = tt.reindex(list(range(tt.index.min(),tt.index.max()+1)),fill_value='NaN')

    # merge two dataframes with common index
    pre_final = pd.merge(all_df.reset_index(),
                         tt,
                         left_index=True,
                         right_index=True)

    # select first 15000 rows
    sed_df = pre_final.astype(float)

    sed_df = sed_df[sed_df.sleep_state != -1]

    # Binary Classification
    sed_df["sleep_state"] = np.where(sed_df["sleep_state"] == 0, 0, 1)

    sed_df = sed_df[['ledGreen', 'bpm', 'rmssd', 'pnn20', 'pnn50', 'hr_mad', 'sd1', 'sd2', 'accelerometerX', 'accelerometerY', 'accelerometerZ', 'gyroscopeX', 'gyroscopeY', 'gyroscopeZ', 'sleep_state']]

    # df = ((df + df.shift(-1)) / 2)[::2]

    # fill NaN values
    sed_df = sed_df.fillna(method='bfill', limit=1500).dropna()
    # remove extreme values from the signal (outliers)

    # Example usage
    filtered_ppg = remove_extreme_values(sed_df['ledGreen'].astype(float), threshold=3.0)
    filtered_bpm = remove_extreme_values(sed_df['bpm'].astype(float), threshold=3.0)
    filtered_rmssd = remove_extreme_values(sed_df['rmssd'].astype(float), threshold=3.0)
    filtered_pnn20 = remove_extreme_values(sed_df['pnn20'].astype(float), threshold=3.0)
    filtered_pnn50 = remove_extreme_values(sed_df['pnn50'].astype(float), threshold=3.0)
    filtered_hr_mad = remove_extreme_values(sed_df['hr_mad'].astype(float), threshold=3.0)
    filtered_sd1 = remove_extreme_values(sed_df['sd1'].astype(float), threshold=3.0)
    filtered_sd2 = remove_extreme_values(sed_df['sd2'].astype(float),
                                         threshold=3.0)
    filtered_accelerometerX = remove_extreme_values(sed_df['accelerometerX'].astype(float),
                                                    threshold=3.0)
    filtered_accelerometerY = remove_extreme_values(
        sed_df['accelerometerY'].astype(float), threshold=3.0)
    filtered_accelerometerZ = remove_extreme_values(sed_df['accelerometerZ'].astype(float),
                                                    threshold=3.0)
    filtered_gyroscopeX = remove_extreme_values(sed_df['gyroscopeX'].astype(float), threshold=3.0)
    filtered_gyroscopeY = remove_extreme_values(sed_df['gyroscopeY'].astype(float), threshold=3.0)
    filtered_gyroscopeZ = remove_extreme_values(
        sed_df['gyroscopeZ'].astype(float), threshold=3.0)

    sed_df['ledGreen'] = filtered_ppg
    sed_df['bpm'] = filtered_bpm
    sed_df['rmssd'] = filtered_rmssd
    sed_df['pnn20'] = filtered_pnn20
    sed_df['pnn50'] = filtered_pnn50
    sed_df['hr_mad'] = filtered_hr_mad
    sed_df['sd1'] = filtered_sd1
    sed_df['sd2'] = filtered_sd2
    sed_df['accelerometerX'] = filtered_accelerometerX
    sed_df['accelerometerY'] = filtered_accelerometerY
    sed_df['accelerometerZ'] = filtered_accelerometerZ
    sed_df['gyroscopeX'] = filtered_gyroscopeX
    sed_df['gyroscopeY'] = filtered_gyroscopeY
    sed_df['gyroscopeZ'] = filtered_gyroscopeZ

    sed_df = sed_df.dropna()

    # reset index
    sed_df = sed_df.reset_index(drop=True)

    return sed_df

In [22]:
%%capture --no-display
process_data(data[-1])

Unnamed: 0,ledGreen,bpm,rmssd,pnn20,pnn50,hr_mad,sd1,sd2,accelerometerX,accelerometerY,accelerometerZ,gyroscopeX,gyroscopeY,gyroscopeZ,sleep_state
0,55.030469,75.688073,114.142768,0.857143,0.571429,60.0,75.592895,69.634615,3329.75,137.00,-2497.75,-1.25,-153.50,84.50,0
1,57.804007,75.688073,114.142768,0.857143,0.571429,60.0,75.592895,69.634615,3328.25,120.25,-2444.75,-8.75,-211.00,79.25,0
2,60.321702,75.688073,114.142768,0.857143,0.571429,60.0,75.592895,69.634615,3294.50,109.25,-2432.50,25.75,-344.50,35.00,0
3,76.061060,75.688073,114.142768,0.857143,0.571429,60.0,75.592895,69.634615,3298.50,128.50,-2511.50,54.25,-187.50,82.25,0
4,63.886682,75.688073,114.142768,0.857143,0.571429,60.0,75.592895,69.634615,3271.00,121.50,-2497.50,46.75,-214.50,38.25,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409635,21.001679,60.945274,272.524802,0.953488,0.837209,80.0,191.943745,141.908494,-2197.50,166.25,3400.50,15.25,-204.00,56.50,1
409636,21.064223,60.945274,272.524802,0.953488,0.837209,80.0,191.943745,141.908494,-2195.75,166.25,3409.75,19.00,-219.00,67.75,1
409637,20.980304,60.945274,272.524802,0.953488,0.837209,80.0,191.943745,141.908494,-2191.75,163.75,3404.75,10.25,-226.00,53.75,1
409638,20.680896,60.945274,272.524802,0.953488,0.837209,80.0,191.943745,141.908494,-2191.00,158.50,3399.00,15.25,-247.00,46.50,1
