In [1]:
# Imports
import neurokit2 as nk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os
from tqdm import tqdm

# Import custom functions

In [2]:
from src import feature_extraction as fe

In [None]:
def json_to_pandas(json_data, meta=None):
    """
    Convert JSON data to a Pandas DataFrame.

    This function takes a JSON data object and performs transformations to convert it into a structured Pandas DataFrame. The input JSON data should have a nested structure containing a list of samples, along with additional metadata such as supervisor, record_date and configs.frequency. More metadata can be added by extending the meta parameter.

    :json_data: JSON data object to convert to Pandas DataFrame.
    :type json_data: dict
    :meta: Additional metadata to include in the DataFrame, defaults to None. The meta parameter should be a list of strings, where each string is a path to a nested key in the JSON data object.
    :type meta: list, optional

    :return: Pandas DataFrame containing the JSON data.
    :rtype: pandas.DataFrame
    """
    df = pd.json_normalize(json_data, record_path=['samples'], meta=['supervisor', 'record_date', ['configs', 'frequency']] + meta)
    df_exploded = df.explode(['timestamp_idx', 'ecg', 'label'])

    return df_exploded

# Load data
Using the example 1 (see notebook '1_Data_Formatting_and_transformation' how it was built), this consists of only one ecg sample with a size of 60 000, thus a time frame of 60 seconds.

In [4]:
ecg_batch = json.load(open('../data/example1_input.json', 'r'))

# Test code of main file | Request 'process_ecg_features'

In [7]:
# get configs
configs = ecg_batch['configs']
# get single sample
samples = ecg_batch['samples']
# get window size
window_size = configs['window_size']
# get window slicing method
window_slicing_method = configs['window_slicing_method']

In [12]:
features_df = pd.DataFrame()
# iterate over samples of ecg batch
for sample in tqdm(ecg_batch['samples']):
    # convert to pandas
    sample_df = pd.json_normalize(sample).explode(['timestamp_idx', 'ecg', 'label'])
    display(sample_df)
    # preprocess ecg
    sample_df['ecg'] = nk.ecg_clean(sample_df['ecg'], sampling_rate=configs['frequency'], method="pantompkins1985")
    # slice in windows (window_size and window_slicing_method)
    windows = fe.create_windows(sample_df, 'timestamp_idx', window_size, window_slicing_method)
    print(f'Number of windows: {len(list(windows))}')
    # compute ecg features vor each window https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8203359/
    for i, window in enumerate(windows):
        # compute features
        features = fe.hrv_features(window['ecg'].values, configs['frequency'])
        # Create a DataFrame for the features
        tmp = pd.DataFrame(features, index=[0])
        # Add additional columns
        tmp['sample_id'] = sample['sample_id'].unique()
        tmp['subject_id'] = sample['subject_id'].unique()
        tmp['window_id'] = i
        tmp['w_start_time'] = window['timestamp_idx'].min()
        tmp['W_end_time'] = window['timestamp_idx'].max()
        # add new window features to df of all
        features_df = pd.concat([features_df, tmp], axis=0)

features_df.reset_index(drop=True, inplace=True)

  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,sample_id,subject_id,timestamp_idx,ecg,label
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.335,1500.0,undefined
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.336,4405.0,undefined
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.337,3072.0,undefined
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.338,1405.0,undefined
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:19:41.339,838.0,undefined
...,...,...,...,...,...
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:20:41.330,2771.0,undefined
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:20:41.331,2040.0,undefined
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:20:41.332,1960.0,undefined
0,f70c1033-36ae-4b8b-8b89-099a96dccca5,participant_1,2023-05-23 23:20:41.333,1861.0,undefined


  0%|          | 0/1 [00:00<?, ?it/s]


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''