In [192]:
import numpy as np
import pandas as pd
import os
import sys
import re
import wfdb
import scipy.stats as stats
from scipy.fft import fft, fftfreq

In [193]:
def extract_unique_values_from_folder(folder:str):
    """
    Function to extract just the unique headers of the dat and hea files (removing the file name) 
    """
    unique = set()
    for f in os.listdir(folder):
        unique.add(extract_basename(os.path.join(folder, f)))
        
    return list(unique)

In [194]:
def extract_basename(filename):
    """
    Function to extract just the unique headers of the dat and hea files (removing the file spec) 
    """
    return os.path.splitext(filename)[0]

In [195]:
def extract_info_from_name(filename):
    parsed = {}
    basename = os.path.basename(filename).split(".")[0]
    pattern = r"session(\d+)_participant(\d+)_gesture(\d+)_trial(\d+)"
    match = re.match(pattern, basename)
    parsed['session'] = match.group(1) 
    parsed['participant'] = match.group(2)
    parsed['gesture'] = match.group(3) 
    parsed['trial'] = match.group(4)
    parsed['filename'] = filename
    return parsed

In [298]:
session1 = os.path.abspath("/Users/rahul/Downloads/gesture-recognition-and-biometrics-electromyogram-grabmyo-1.0.2/session1")
session2 = os.path.abspath("/Users/rahul/Downloads/gesture-recognition-and-biometrics-electromyogram-grabmyo-1.0.2/session2")
session3 = os.path.abspath("/Users/rahul/Downloads/gesture-recognition-and-biometrics-electromyogram-grabmyo-1.0.2/session3")
unique_values = extract_unique_values_from_folder(session1)

parsed_data_1 = []
parsed_data_2 = []
parsed_data_3 = []

for participant_dir in os.listdir(session1):
    participant_path = os.path.join(session1, participant_dir)
    if os.path.isdir(participant_path):
        # Loop through files in participant directory
        for file_name in os.listdir(participant_path):
            if file_name.endswith(".hea"):  # Process only .hea files
                file_path = os.path.join(participant_path, file_name)
                if os.path.isfile(file_path):
                    parsed_data_1.append(extract_info_from_name(file_path))

for participant_dir in os.listdir(session2):
    participant_path = os.path.join(session2, participant_dir)
    if os.path.isdir(participant_path):
        # Loop through files in participant directory
        for file_name in os.listdir(participant_path):
            if file_name.endswith(".hea"):  # Process only .hea files
                file_path = os.path.join(participant_path, file_name)
                if os.path.isfile(file_path):
                    parsed_data_2.append(extract_info_from_name(file_path))

for participant_dir in os.listdir(session3):
    participant_path = os.path.join(session3, participant_dir)
    if os.path.isdir(participant_path):
        # Loop through files in participant directory
        for file_name in os.listdir(participant_path):
            if file_name.endswith(".hea"):  # Process only .hea files
                file_path = os.path.join(participant_path, file_name)
                if os.path.isfile(file_path):
                    parsed_data_3.append(extract_info_from_name(file_path))

#parsed_data
df1 = pd.DataFrame(parsed_data_1)
df2 = pd.DataFrame(parsed_data_2)
df3 = pd.DataFrame(parsed_data_3)

df = pd.concat([df1, df2, df3], axis=0)
df['filename'] = df['filename'].str.replace('.hea', '')
df

  df['filename'] = df['filename'].str.replace('.hea', '')


Unnamed: 0,session,participant,gesture,trial,filename
0,1,13,9,3,/Users/rahul/Downloads/gesture-recognition-and...
1,1,13,1,5,/Users/rahul/Downloads/gesture-recognition-and...
2,1,13,17,5,/Users/rahul/Downloads/gesture-recognition-and...
3,1,13,4,1,/Users/rahul/Downloads/gesture-recognition-and...
4,1,13,17,4,/Users/rahul/Downloads/gesture-recognition-and...
...,...,...,...,...,...
5112,3,14,14,1,/Users/rahul/Downloads/gesture-recognition-and...
5113,3,14,7,3,/Users/rahul/Downloads/gesture-recognition-and...
5114,3,14,7,2,/Users/rahul/Downloads/gesture-recognition-and...
5115,3,14,11,5,/Users/rahul/Downloads/gesture-recognition-and...


In [299]:
# df['trial'].unique()

In [300]:
# df['participant'].unique()

In [301]:

# def integrated_EMG(signal):
#     return np.sum(np.abs(signal))

# def mean_absolute_value(signal):
#     return np.mean(np.abs(signal))

# def simple_square_integral(signal):
#     return np.sum(signal**2)

# def root_mean_square(signal):
#     return np.sqrt(np.mean(signal**2))

# def variance(signal):
#     return np.var(signal)

# def myopulse_percentage_rate(signal):
#     return len(np.where(np.diff(np.sign(signal)))[0]) / len(signal)

# def waveform_length(signal):
#     return np.sum(np.abs(np.diff(signal)))

# def difference_variance(signal):
#     return np.var(np.diff(signal))

# def difference_absolute_standard_deviation(signal):
#     return np.std(np.diff(np.abs(signal)))

# def willison_amplitude(signal, threshold=0.1):
#     return np.sum(np.abs(np.diff(signal)) > threshold)

In [302]:
def extract_features(df):
    """
    Extracting the following features:
    'MIN','MAX','MEAN','RMS','VAR','STD','POWER','PEAK','P2P','CREST FACTOR','SKEW','KURTOSIS',
            'MAX_f','SUM_f','MEAN_f','VAR_f','PEAK_f','SKEW_f','KURTOSIS_f'
    These will be an array of values for each of the 32 sensors
    """
    #time data
    features = {}
    #time domain features
    time_data = df.values
    features['iemg'] = np.sum(np.abs(time_data))
    features['mav'] = np.mean(np.abs(time_data))
    features['ssi'] = np.sum(time_data**2)
    features['myopulse'] = len(np.where(np.diff(np.sign(time_data)))[0]) / len(time_data)
    features['wflen'] = np.sum(np.abs(np.diff(time_data)))
    features['diffvar'] = np.var(np.diff(time_data))
    features['dasd'] = np.std(np.diff(np.abs(time_data)))
    features['willison'] = np.sum(np.abs(np.diff(time_data)) > 0.1)
    
    features['mean'] = np.mean(time_data, axis = 0)
    features['min'] = np.min(time_data, axis = 0)
    features['max'] = np.max(time_data, axis = 0)
    features['rms'] = np.sqrt(np.mean(time_data**2, axis = 0))
    features['max'] = np.std(time_data, axis = 0)
    features['power'] = np.mean(time_data**2, axis = 0)
    features['peak'] = np.max(np.abs(time_data), axis = 0)
    features['p2p'] = np.ptp(time_data, axis = 0)
    features['crest_factor'] = np.max(np.abs(time_data), axis = 0)/np.sqrt(np.mean(time_data**2, axis = 0))
    features['skew'] = stats.skew(time_data, axis = 0)
    features['kurtosis'] = stats.kurtosis(time_data, axis = 0)
    features['form_factor'] =np.sqrt(np.mean(time_data**2, axis = 0)) / np.mean(time_data, axis = 0)
    features['pulse_indicator'] = np.max(np.abs(time_data), axis = 0)/np.mean(time_data, axis = 0) 
    
    #Convert to frequency domain
    freq_data = fft(time_data)
    S_f = np.abs(freq_data**2)/len(df)
    features['max_f'] = np.max(S_f, axis = 0)
    features['sum_f'] = np.sum(S_f, axis = 0)
    features['mean_f'] = np.mean(S_f, axis = 0)
    features['var_f'] = np.var(S_f, axis = 0)
    features['peak_f'] = np.max(np.abs(S_f), axis = 0)
    features['skew_f'] = stats.skew(S_f, axis = 0)
    features['kurtosis_f'] = stats.kurtosis(S_f, axis = 0)
    
    return features
    

In [361]:
new_df = pd.DataFrame()
for i in range(len(df)):
    wave = wfdb.rdrecord(df['filename'].iloc[i])
    wave_df = wave.to_dataframe()
    wave_df = wave_df.drop(['U1','U2', 'U3','U4'], axis = 1)
    f = extract_features(wave_df)
    f['gesture'] = df['gesture'].iloc[i]
    f['participant'] = df['participant'].iloc[i]

    e = pd.DataFrame.from_dict(f, orient='index').T
    new_df = new_df.append(e)

new_df

Unnamed: 0,iemg,mav,ssi,myopulse,wflen,diffvar,dasd,willison,mean,min,...,pulse_indicator,max_f,sum_f,mean_f,var_f,peak_f,skew_f,kurtosis_f,gesture,participant
0,12527.173912,0.043691,1114.276499,4.272852,6654.991878,0.001284,0.032037,6221,"[-3.3865626032371843e-06, -1.8127820411250496e...","[-0.12668372176361933, -0.17513763845865235, -...",...,"[-37407.76019983317, -9928.510290468545, -1250...","[0.0032374708895371988, 0.0024257368417489157,...","[1.1552661529428505, 0.52725591139991, 0.05370...","[0.00011281896024832525, 5.148983509764746e-05...","[4.0759490469740834e-08, 1.009706456877444e-08...","[0.0032374708895371988, 0.0024257368417489157,...","[4.891384248345544, 8.37885999035441, 4.928104...","[40.60522664077152, 129.95181496083688, 39.485...",9,13
0,4995.909239,0.017424,155.117851,5.352344,3172.573858,0.000262,0.013734,66,"[-2.4296390143113294e-05, -2.8655309194855e-05...","[-0.08135848817856524, -0.08836079729179075, -...",...,"[-3348.583377997243, -3083.5750782146765, -517...","[0.0003400682853082485, 8.998830278752999e-05,...","[0.15571759620376374, 0.05185293690611252, 0.0...","[1.5206796504273803e-05, 5.063763369737551e-06...","[5.697483095282278e-10, 5.300152686934246e-11,...","[0.0003400682853082485, 8.998830278752999e-05,...","[3.923348971840542, 3.463509813561349, 2.57487...","[25.723773862588562, 18.487573299564048, 10.25...",1,13
0,2761.574404,0.009632,49.810684,3.706152,1145.086276,0.000043,0.00566,2,"[-1.3949779847863498e-05, -2.4062174569915226e...","[-0.03537256588958768, -0.03968770999791185, -...",...,"[-3634.505130751411, -2337.1056508721085, -392...","[0.00011443323193111958, 0.0001161171355056197...","[0.058170294893887545, 0.023942600676462675, 0...","[5.680692860731206e-06, 2.338144597310808e-06,...","[8.583329426825933e-11, 2.3226795149542194e-11...","[0.00011443323193111958, 0.0001161171355056197...","[3.851142382199922, 8.430328229535146, 6.83667...","[22.740194361564065, 124.17278000709265, 84.37...",17,13
0,15737.169767,0.054887,1582.133054,3.608496,7019.840947,0.001321,0.032525,5706,"[1.1641840060145082e-05, -4.215142767499504e-0...","[-0.37012997533964415, -0.39193950726548765, -...",...,"[31793.081972218017, -92983.68498630782, 20225...","[0.008602481339282963, 0.0017454534757986016, ...","[2.0481391254998913, 0.6776872349640545, 0.051...","[0.00020001358647459875, 6.618039403945844e-05...","[1.2981258482694252e-07, 1.2619632283574979e-0...","[0.008602481339282963, 0.0017454534757986016, ...","[7.161415744031498, 5.1704888382965395, 3.3833...","[101.75185540934247, 45.73876249877211, 17.204...",4,13
0,5779.696596,0.020158,234.385913,2.704883,2196.760459,0.000184,0.012485,363,"[-3.33143730828046e-05, -1.780150324510748e-05...","[-0.14179820646167074, -0.13910536655192493, -...",...,"[-4256.367247530787, -7814.248304572609, -7381...","[0.0011822720162164696, 0.00028295531801698513...","[0.3000423879830985, 0.10784719651565663, 0.00...","[2.9301014451474464e-05, 1.0531952784732094e-0...","[3.1250315537447087e-09, 3.8389478840925754e-1...","[0.0011822720162164696, 0.00028295531801698513...","[6.0074439493810825, 5.048279129084738, 5.7275...","[65.94258739608256, 39.86532288268642, 56.2416...",17,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,12265.030151,0.042777,1397.402511,5.405273,8530.122413,0.002867,0.045659,15295,"[1.6498772082619616e-05, 3.2853538128259262e-0...","[-0.17831702695746263, -0.19743424240377513, -...",...,"[10807.896858294565, 60095.275471700355, 16404...","[0.005726731252394037, 0.0032628507498287016, ...","[1.0192742929388048, 0.5427118342247342, 0.053...","[9.953850516980515e-05, 5.2999202561009204e-05...","[5.417596246373904e-08, 1.6793682832055938e-08...","[0.005726731252394037, 0.0032628507498287016, ...","[7.960141908982926, 8.87020938273655, 7.020650...","[112.56002343587748, 134.71770997426515, 80.89...",14,14
0,8162.702779,0.028469,537.170042,5.141504,4462.195065,0.000733,0.022413,3204,"[-1.2198009758019286e-06, 6.583611698426713e-0...","[-0.2989502882928676, -0.3772123599285712, -0....",...,"[-245081.20113310288, 57295.65733937707, -1134...","[0.003300095685778511, 0.001529226366693726, 4...","[0.6565733455829638, 0.20552095687746663, 0.01...","[6.411849077958631e-05, 2.0070405945065102e-05...","[1.8845802453629284e-08, 2.242111625635349e-09...","[0.003300095685778511, 0.001529226366693726, 4...","[6.3778202105817465, 11.442067560861847, 5.204...","[80.71858114562274, 269.6237905360568, 47.5588...",7,14
0,16413.495111,0.057246,2270.720352,5.24209,9207.583257,0.00289,0.04761,16795,"[-6.700379186944414e-05, -0.000101744622133766...","[-0.4101112036955902, -0.8348013243519293, -0....",...,"[-6489.406370384305, -8204.869278047843, -2114...","[0.006344236526696033, 0.002330297622180038, 0...","[2.823167341988231, 0.8333331155509592, 0.0687...","[0.00027569993574103817, 8.138018706552336e-05...","[2.6377383126822925e-07, 2.6390543803883207e-0...","[0.006344236526696033, 0.002330297622180038, 0...","[4.287772994611254, 5.055463117273265, 5.33748...","[26.535609555439695, 36.77452073869384, 45.008...",7,14
0,18656.975851,0.06507,3294.742742,4.512012,9743.69967,0.00481,0.061085,25812,"[-0.00012418053231595642, -0.00011279260799871...","[-0.5126378429641242, -0.513328923958835, -0.5...",...,"[-4128.165932320242, -4551.086574438503, -4868...","[0.012855727171059806, 0.004497041382583255, 0...","[3.9356197765025294, 1.2096993318841085, 0.128...","[0.0003843378687990751, 0.00011813470037930747...","[6.136888221745904e-07, 5.69172453327819e-08, ...","[0.012855727171059806, 0.004497041382583255, 0...","[5.585876137081029, 5.863037427592904, 6.04276...","[46.598255572304986, 54.88959837847488, 60.447...",11,14


In [368]:
new_df.reset_index()

Unnamed: 0,index,iemg,mav,ssi,myopulse,wflen,diffvar,dasd,willison,mean,...,pulse_indicator,max_f,sum_f,mean_f,var_f,peak_f,skew_f,kurtosis_f,gesture,participant
0,0,12527.173912,0.043691,1114.276499,4.272852,6654.991878,0.001284,0.032037,6221,"[-3.3865626032371843e-06, -1.8127820411250496e...",...,"[-37407.76019983317, -9928.510290468545, -1250...","[0.0032374708895371988, 0.0024257368417489157,...","[1.1552661529428505, 0.52725591139991, 0.05370...","[0.00011281896024832525, 5.148983509764746e-05...","[4.0759490469740834e-08, 1.009706456877444e-08...","[0.0032374708895371988, 0.0024257368417489157,...","[4.891384248345544, 8.37885999035441, 4.928104...","[40.60522664077152, 129.95181496083688, 39.485...",9,13
1,0,4995.909239,0.017424,155.117851,5.352344,3172.573858,0.000262,0.013734,66,"[-2.4296390143113294e-05, -2.8655309194855e-05...",...,"[-3348.583377997243, -3083.5750782146765, -517...","[0.0003400682853082485, 8.998830278752999e-05,...","[0.15571759620376374, 0.05185293690611252, 0.0...","[1.5206796504273803e-05, 5.063763369737551e-06...","[5.697483095282278e-10, 5.300152686934246e-11,...","[0.0003400682853082485, 8.998830278752999e-05,...","[3.923348971840542, 3.463509813561349, 2.57487...","[25.723773862588562, 18.487573299564048, 10.25...",1,13
2,0,2761.574404,0.009632,49.810684,3.706152,1145.086276,0.000043,0.00566,2,"[-1.3949779847863498e-05, -2.4062174569915226e...",...,"[-3634.505130751411, -2337.1056508721085, -392...","[0.00011443323193111958, 0.0001161171355056197...","[0.058170294893887545, 0.023942600676462675, 0...","[5.680692860731206e-06, 2.338144597310808e-06,...","[8.583329426825933e-11, 2.3226795149542194e-11...","[0.00011443323193111958, 0.0001161171355056197...","[3.851142382199922, 8.430328229535146, 6.83667...","[22.740194361564065, 124.17278000709265, 84.37...",17,13
3,0,15737.169767,0.054887,1582.133054,3.608496,7019.840947,0.001321,0.032525,5706,"[1.1641840060145082e-05, -4.215142767499504e-0...",...,"[31793.081972218017, -92983.68498630782, 20225...","[0.008602481339282963, 0.0017454534757986016, ...","[2.0481391254998913, 0.6776872349640545, 0.051...","[0.00020001358647459875, 6.618039403945844e-05...","[1.2981258482694252e-07, 1.2619632283574979e-0...","[0.008602481339282963, 0.0017454534757986016, ...","[7.161415744031498, 5.1704888382965395, 3.3833...","[101.75185540934247, 45.73876249877211, 17.204...",4,13
4,0,5779.696596,0.020158,234.385913,2.704883,2196.760459,0.000184,0.012485,363,"[-3.33143730828046e-05, -1.780150324510748e-05...",...,"[-4256.367247530787, -7814.248304572609, -7381...","[0.0011822720162164696, 0.00028295531801698513...","[0.3000423879830985, 0.10784719651565663, 0.00...","[2.9301014451474464e-05, 1.0531952784732094e-0...","[3.1250315537447087e-09, 3.8389478840925754e-1...","[0.0011822720162164696, 0.00028295531801698513...","[6.0074439493810825, 5.048279129084738, 5.7275...","[65.94258739608256, 39.86532288268642, 56.2416...",17,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15346,0,12265.030151,0.042777,1397.402511,5.405273,8530.122413,0.002867,0.045659,15295,"[1.6498772082619616e-05, 3.2853538128259262e-0...",...,"[10807.896858294565, 60095.275471700355, 16404...","[0.005726731252394037, 0.0032628507498287016, ...","[1.0192742929388048, 0.5427118342247342, 0.053...","[9.953850516980515e-05, 5.2999202561009204e-05...","[5.417596246373904e-08, 1.6793682832055938e-08...","[0.005726731252394037, 0.0032628507498287016, ...","[7.960141908982926, 8.87020938273655, 7.020650...","[112.56002343587748, 134.71770997426515, 80.89...",14,14
15347,0,8162.702779,0.028469,537.170042,5.141504,4462.195065,0.000733,0.022413,3204,"[-1.2198009758019286e-06, 6.583611698426713e-0...",...,"[-245081.20113310288, 57295.65733937707, -1134...","[0.003300095685778511, 0.001529226366693726, 4...","[0.6565733455829638, 0.20552095687746663, 0.01...","[6.411849077958631e-05, 2.0070405945065102e-05...","[1.8845802453629284e-08, 2.242111625635349e-09...","[0.003300095685778511, 0.001529226366693726, 4...","[6.3778202105817465, 11.442067560861847, 5.204...","[80.71858114562274, 269.6237905360568, 47.5588...",7,14
15348,0,16413.495111,0.057246,2270.720352,5.24209,9207.583257,0.00289,0.04761,16795,"[-6.700379186944414e-05, -0.000101744622133766...",...,"[-6489.406370384305, -8204.869278047843, -2114...","[0.006344236526696033, 0.002330297622180038, 0...","[2.823167341988231, 0.8333331155509592, 0.0687...","[0.00027569993574103817, 8.138018706552336e-05...","[2.6377383126822925e-07, 2.6390543803883207e-0...","[0.006344236526696033, 0.002330297622180038, 0...","[4.287772994611254, 5.055463117273265, 5.33748...","[26.535609555439695, 36.77452073869384, 45.008...",7,14
15349,0,18656.975851,0.06507,3294.742742,4.512012,9743.69967,0.00481,0.061085,25812,"[-0.00012418053231595642, -0.00011279260799871...",...,"[-4128.165932320242, -4551.086574438503, -4868...","[0.012855727171059806, 0.004497041382583255, 0...","[3.9356197765025294, 1.2096993318841085, 0.128...","[0.0003843378687990751, 0.00011813470037930747...","[6.136888221745904e-07, 5.69172453327819e-08, ...","[0.012855727171059806, 0.004497041382583255, 0...","[5.585876137081029, 5.863037427592904, 6.04276...","[46.598255572304986, 54.88959837847488, 60.447...",11,14


In [370]:
# new_df1 = pd.DataFrame()
# for i in range(10):
#     wave = wfdb.rdrecord(df['filename'].iloc[i])
#     wave_df = wave.to_dataframe()
#     wave_df = wave_df.drop(['U1','U2', 'U3','U4'], axis = 1)
#     f = extract_features(wave_df)
#     f['gesture'] = df['gesture'].iloc[i]
#     f['participant'] = df['participant'].iloc[i]

#     e = pd.DataFrame.from_dict(f, orient='index').T
#     new_df1 = new_df1.append(e, ignore_index = False)

# new_df1.reset_index()

In [369]:
new_df.reset_index().to_csv('/Users/rahul/Downloads/features.csv')

In [371]:
# i = 53
# wave = wfdb.rdrecord(df['filename'].iloc[i])
# wave_df = wave.to_dataframe()
# wave_df = wave_df.drop(['U1','U2', 'U3','U4'], axis = 1)
# f = extract_features(wave_df)
# f['gesture'] = df['gesture'].iloc[i]
# f['participant'] = df['participant'].iloc[i]

# g = pd.DataFrame.from_dict(f, orient='index').T

# # new_df = pd.DataFrame(columns = ['iemg', 'mav', 'ssi', 'myopulse', 'wflen', 'diffvar', 'dasd',
# #        'willison', 'mean', 'min', 'max', 'rms', 'power', 'peak', 'p2p',
# #        'crest_factor', 'skew', 'kurtosis', 'form_factor', 'pulse_indicator',
# #        'max_f', 'sum_f', 'mean_f', 'var_f', 'peak_f', 'skew_f', 'kurtosis_f',
# #        'gesture', 'participant'])
# # new_df.append(e, ignore_index=True)
# # new_df
# e = e.append(g)
# e