In [1]:
import pandas as pd
#we first read in a data set of one participant as an example
#it contains the raw impedance data coming from VU-AMS 5fs
#it also contains a column called Task_Label_2 which includes our task labels for the procedure

#data can also be read in another format as long as it is turned into a pandas dataframe format

#the dataframe should not be datetime indexed. if it is, drop the indexes before following with the next cells
df_acc = pd.read_hdf('df_acc_z.h5', 'df_acc_z')
df_acc


Unnamed: 0,MZR,Task_Label_2
0,0.791016,
1,0.790039,
2,0.787598,
3,0.790527,
4,0.787109,
...,...,...
3749996,0.629883,94.0
3749997,0.628418,
3749998,0.626953,
3749999,0.624512,


In [2]:
import numpy as np
import pandas as pd
import librosa
from scipy.signal import butter, filtfilt
from scipy.stats import kurtosis, skew, entropy
import math

def butterworth_highpass(data, cutoff_frequency=75, sampling_rate=1000, order=2):
    b, a = butter(order, cutoff_frequency, btype='high', fs=sampling_rate, analog=False)
    y = filtfilt(b, a, data)
    return y

tasks = ['12.0a', '12.0b', '12.0c', '12.0d', '12.0e', '12.0f', '14.0a', '14.0b', '14.0c', '14.0d', '14.0e',
         '14.0f', '16.0a', '16.0b', '16.0c', '16.0d', '16.0e', '16.0f', '18.0a', '18.0b', '18.0c', '20.0a',
         '20.0b', '20.0c', '23.0a', '23.0b', '23.0c', '25.0a', '25.0b', '25.0c', '28.0a', '28.0b', '28.0c',
         '30.0a', '30.0b', '30.0c', 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0, 60.0,
         62.0, 64.0, 66.0, 68.0, 70.0, '75.0a', '75.0b', '75.0c', '78.0a', '78.0b', '78.0c', 83.0, 85.0, 87.0,
         89.0, 91.0, 93.0]

# Initialize a list to store features for each task
all_features = []

for task in tasks:
    # Find the starting index of the task
    start_indices = df_acc[df_acc['Task_Label_2'] == task].index

    if len(start_indices) > 0:
        start_index = start_indices[0]

        # Select the 30000 samples starting from the first occurrence of the task
        acc_z = df_acc['MZR'][start_index:start_index + 30000]
        y_prefilter = acc_z.to_numpy()
        y = butterworth_highpass(y_prefilter)
        y_demeaned = y - np.mean(y)

        # Calculate the features for the entire 30000 samples
        sr = 1000
        zcr = np.mean(librosa.feature.zero_crossing_rate(y=y_demeaned))
        spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=2048, hop_length=512))
        spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=2048, hop_length=512))
        spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=y, n_fft=2048, hop_length=512))
        spectral_rolloff_25 = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=2048, hop_length=512, roll_percent=0.25))
        spectral_rolloff_50 = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=2048, hop_length=512, roll_percent=0.50))
        spectral_rolloff_85 = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=2048, hop_length=512, roll_percent=0.85))
        rms = np.mean(librosa.feature.rms(y=y, frame_length=2048, hop_length=512))

        spectrum = np.fft.rfft(y)
        spectral_kurtosis = kurtosis(abs(spectrum))
        spectral_skewness = skew(abs(spectrum))
        spectral_entropy = entropy(abs(spectrum), base=2)
        spectral_variance = np.var(abs(spectrum))
        spectral_mean = np.sum(abs(spectrum)) / len(spectrum)

        def crest(spectrum):
            absSpectrum = abs(spectrum)
            spectralSum = np.sum(absSpectrum)
            maxFrequencyIndex = np.argmax(absSpectrum)
            maxSpectrum = absSpectrum[maxFrequencyIndex]
            return maxSpectrum / spectralSum

        crest_value = crest(spectrum)

        # Append features for the current task to the list
        features = [zcr, spectral_centroid, spectral_bandwidth, spectral_flatness,
                    spectral_rolloff_25, spectral_rolloff_50, spectral_rolloff_85, rms,
                    spectral_kurtosis, spectral_skewness, spectral_entropy,
                    spectral_variance, spectral_mean, crest_value, task]
        all_features.append(features)

# Create a DataFrame from the features
feature_names = ['AccZ_ZCR', 'AccZ_Spectral_Centroid', 'AccZ_Spectral_Bandwidth', 'AccZ_Spectral_Flatness',
                 'AccZ_Spectral_Rolloff_25', 'AccZ_Spectral_Rolloff_50', 'AccZ_Spectral_Rolloff_85', 'AccZ_RMS', 
                 'AccZ_Spectral_Kurtosis', 'AccZ_Spectral_Skewness', 'AccZ_Spectral_Entropy', 
                 'AccZ_Spectral_Variance', 'AccZ_Spectral_Mean', 'AccZ_Spectral_Crest', 'Task_Label']
df_features_acc_z = pd.DataFrame(all_features, columns=feature_names)
df_features_acc_z

Unnamed: 0,AccZ_ZCR,AccZ_Spectral_Centroid,AccZ_Spectral_Bandwidth,AccZ_Spectral_Flatness,AccZ_Spectral_Rolloff_25,AccZ_Spectral_Rolloff_50,AccZ_Spectral_Rolloff_85,AccZ_RMS,AccZ_Spectral_Kurtosis,AccZ_Spectral_Skewness,AccZ_Spectral_Entropy,AccZ_Spectral_Variance,AccZ_Spectral_Mean,AccZ_Spectral_Crest,Task_Label
0,0.527633,274.727180,128.534182,0.337860,167.463917,273.586467,430.597855,0.001072,0.170741,0.588283,13.578071,0.009681,0.159977,0.000299,12.0a
1,0.525531,274.597251,129.106254,0.338308,166.545286,273.164394,430.895789,0.001068,0.174957,0.621838,13.576859,0.009722,0.158932,0.000255,12.0b
2,0.526177,273.977324,129.035885,0.334855,166.263904,272.899563,430.225437,0.001077,0.247167,0.638577,13.573237,0.009942,0.160176,0.000282,12.0c
3,0.524563,273.622483,128.842518,0.338053,166.297007,271.980932,429.952331,0.001079,0.156208,0.601899,13.582317,0.009780,0.161233,0.000253,12.0d
4,0.527493,274.138658,128.768573,0.335129,167.017015,272.345074,430.589579,0.001069,0.202919,0.625433,13.582289,0.009656,0.159663,0.000263,12.0e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,0.528395,277.671053,127.468728,0.324975,171.345339,276.491327,431.541314,0.001069,0.149487,0.631830,13.574318,0.009871,0.158959,0.000251,85.0
62,0.512629,275.584017,126.067317,0.284131,178.206104,268.041578,429.455773,0.001349,3.472823,1.267517,13.536042,0.018164,0.196598,0.000446,87.0
63,0.525895,275.261696,128.867879,0.331946,168.308064,273.445776,431.450278,0.001069,0.193832,0.631540,13.579310,0.009729,0.159341,0.000249,89.0
64,0.496946,270.582627,124.956946,0.278865,176.095736,260.154595,423.430879,0.001371,2.558122,1.238397,13.528758,0.019167,0.198828,0.000394,91.0


In [3]:
#df_features_accgyro['Participant'] = 58682
import pandas as pd

# Create an ExcelWriter object
writer = pd.ExcelWriter('df_features_acc_z_71402.xlsx', engine='xlsxwriter')

# Convert the dataframe to an Excel sheet within the ExcelWriter object
df_features_acc_z.to_excel(writer, index=False, float_format='%.7f', sheet_name='Sheet1')

# Save the ExcelWriter object to disk
writer.save()

  writer.save()
