In [1]:
# Imports
import neurokit2 as nk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os
from tqdm import tqdm

# Import custom functions

In [2]:
from src import feature_extraction as fe

In [None]:
def json_to_pandas(json_data, meta=None):
    """
    Convert JSON data to a Pandas DataFrame.

    This function takes a JSON data object and performs transformations to convert it into a structured Pandas DataFrame. The input JSON data should have a nested structure containing a list of samples, along with additional metadata such as supervisor, record_date and configs.frequency. More metadata can be added by extending the meta parameter.

    :json_data: JSON data object to convert to Pandas DataFrame.
    :type json_data: dict
    :meta: Additional metadata to include in the DataFrame, defaults to None. The meta parameter should be a list of strings, where each string is a path to a nested key in the JSON data object.
    :type meta: list, optional

    :return: Pandas DataFrame containing the JSON data.
    :rtype: pandas.DataFrame
    """
    df = pd.json_normalize(json_data, record_path=['samples'], meta=['supervisor', 'record_date', ['configs', 'frequency']] + meta)
    df_exploded = df.explode(['timestamp_idx', 'ecg', 'label'])

    return df_exploded

# Load data
Using the example 1 (see notebook '1_Data_Formatting_and_transformation' how it was built), this consists of only one ecg sample with a size of 60 000, thus a time frame of 60 seconds.

In [4]:
ecg_batch = json.load(open('../data/example1_input.json', 'r'))

# Test code of main file | Request 'process_ecg_features'

In [18]:
# get configs
configs = ecg_batch['configs']
# get single sample
samples = ecg_batch['samples']
# get window size
window_size = configs['window_size']
# get window slicing method
window_slicing_method = configs['window_slicing_method']

In [25]:
features_df = pd.DataFrame()
# iterate over samples of ecg batch
for sample in tqdm(samples):
    # convert to pandas
    sample_df = pd.DataFrame.from_dict(sample)
    print('Successfully converted dict to pandas and got single sample...')
    display(sample_df)
    # preprocess ecg
    sample_df['ecg'] = nk.ecg_clean(sample_df['ecg'], sampling_rate=configs['frequency'], method="pantompkins1985")
    # slice in windows (window_size and window_slicing_method)
    windows = fe.create_windows(sample_df, 'timestamp_idx', window_size, window_slicing_method)
    print(f'Number of windows: {len(list(windows))}')
    # compute ecg features vor each window https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8203359/
    for i, window in enumerate(windows):
        # compute features
        features = fe.hrv_features(window['ecg'].values, configs['frequency'])
        # Create a DataFrame for the features
        tmp = pd.DataFrame(features, index=[0])
        # Add additional columns
        tmp['subject_id'] = sample['subject_id']
        tmp['sample_id'] = sample['sample_id']
        tmp['window_id'] = i
        tmp['w_start_time'] = window['timestamp_idx'].min()
        tmp['W_end_time'] = window['timestamp_idx'].max()
        # add new window features to df of all
        features_df = pd.concat([features_df, tmp], axis=0)

features_df.reset_index(drop=True, inplace=True)

  0%|          | 0/1 [00:00<?, ?it/s]

Successfully converted dict to pandas and got single sample...


Unnamed: 0,sample_id,subject_id,timestamp_idx,ecg,label
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.335,1500.0,undefined
1,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.336,4405.0,undefined
2,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.337,3072.0,undefined
3,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.338,1405.0,undefined
4,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.339,838.0,undefined
...,...,...,...,...,...
59995,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:20:41.330,2771.0,undefined
59996,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:20:41.331,2040.0,undefined
59997,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:20:41.332,1960.0,undefined
59998,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:20:41.333,1861.0,undefined


  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
  lf_hf_ratio = lf_band / hf_band
100%|██████████| 1/1 [00:00<00:00, 10.61it/s]

Number of windows: 13





In [26]:
features_df

Unnamed: 0,mean_rr,sdnn,rmssd,nn50,pnn50,lf_band,hf_band,lf_hf_ratio,subject_id,sample_id,window_id,w_start_time,W_end_time
0,0.6426,0.186896,0.336774,3,0.6,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,0,2023-05-23 23:19:41.335,2023-05-23 23:19:44.999
1,0.34825,0.153873,0.270766,7,0.583333,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,1,2023-05-23 23:19:45.000,2023-05-23 23:19:49.999
2,0.392545,0.117491,0.169679,8,0.727273,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,2,2023-05-23 23:19:50.000,2023-05-23 23:19:54.999
3,0.4494,0.163658,0.184729,8,0.8,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,3,2023-05-23 23:19:55.000,2023-05-23 23:19:59.999
4,0.36775,0.165213,0.271478,11,0.916667,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,4,2023-05-23 23:20:00.000,2023-05-23 23:20:04.999
5,0.401909,0.134468,0.219575,8,0.727273,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,5,2023-05-23 23:20:05.000,2023-05-23 23:20:09.999
6,0.294875,0.131194,0.200083,12,0.75,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,6,2023-05-23 23:20:10.000,2023-05-23 23:20:14.999
7,0.398182,0.183843,0.247847,9,0.818182,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,7,2023-05-23 23:20:15.000,2023-05-23 23:20:19.999
8,0.372462,0.149817,0.188498,8,0.615385,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,8,2023-05-23 23:20:20.000,2023-05-23 23:20:24.999
9,0.4393,0.13927,0.188644,9,0.9,0.0,0.0,,participant_1,f70c1033-36ae-4b8b-8b89-099a96dccca5,9,2023-05-23 23:20:25.000,2023-05-23 23:20:29.999
