In [None]:
import numpy as np
import pandas as pd
import os
import sys
import re
import wfdb
import scipy.stats as stats
from scipy.fft import fft, fftfreq

In [None]:
def extract_unique_values_from_folder(folder:str):
    """
    Function to extract just the unique headers of the dat and hea files (removing the file name) 
    """
    unique = set()
    for f in os.listdir(folder):
        unique.add(extract_basename(os.path.join(folder, f)))
        
    return list(unique)

In [None]:
def extract_basename(filename):
    """
    Function to extract just the unique headers of the dat and hea files (removing the file spec) 
    """
    return os.path.splitext(filename)[0]

In [None]:
def extract_info_from_name(filename):
    parsed = {}
    basename = os.path.basename(filename).split(".")[0]
    pattern = r"session(\d+)_participant(\d+)_gesture(\d+)_trial(\d+)"
    match = re.match(pattern, basename)
    parsed['session'] = match.group(1) 
    parsed['participant'] = match.group(2)
    parsed['gesture'] = match.group(3) 
    parsed['trial'] = match.group(4)
    parsed['filename'] = filename
    return parsed

In [None]:
session1 = os.path.abspath("E:/data/gesture-recognition-and-biometrics-electromyogram-grabmyo-1.0.2/session1")
session2 = os.path.abspath("E:/data/gesture-recognition-and-biometrics-electromyogram-grabmyo-1.0.2/session2")
session3 = os.path.abspath("E:/data/gesture-recognition-and-biometrics-electromyogram-grabmyo-1.0.2/session3")
unique_values = extract_unique_values_from_folder(session1)

parsed_data_1 = []
parsed_data_2 = []
parsed_data_3 = []

for participant_dir in os.listdir(session1):
    participant_path = os.path.join(session1, participant_dir)
    if os.path.isdir(participant_path):
        # Loop through files in participant directory
        for file_name in os.listdir(participant_path):
            if file_name.endswith(".hea"):  # Process only .hea files
                file_path = os.path.join(participant_path, file_name)
                if os.path.isfile(file_path):
                    parsed_data_1.append(extract_info_from_name(file_path))

for participant_dir in os.listdir(session2):
    participant_path = os.path.join(session2, participant_dir)
    if os.path.isdir(participant_path):
        # Loop through files in participant directory
        for file_name in os.listdir(participant_path):
            if file_name.endswith(".hea"):  # Process only .hea files
                file_path = os.path.join(participant_path, file_name)
                if os.path.isfile(file_path):
                    parsed_data_2.append(extract_info_from_name(file_path))

for participant_dir in os.listdir(session3):
    participant_path = os.path.join(session3, participant_dir)
    if os.path.isdir(participant_path):
        # Loop through files in participant directory
        for file_name in os.listdir(participant_path):
            if file_name.endswith(".hea"):  # Process only .hea files
                file_path = os.path.join(participant_path, file_name)
                if os.path.isfile(file_path):
                    parsed_data_3.append(extract_info_from_name(file_path))

#parsed_data
df1 = pd.DataFrame(parsed_data_1)
df2 = pd.DataFrame(parsed_data_2)
df3 = pd.DataFrame(parsed_data_3)

df = pd.concat([df1, df2, df3], axis=0)
df['filename'] = df['filename'].str.replace('.hea', '')
df

In [None]:
df1['filename'] = df1['filename'].str.replace('.hea', '')
df1

In [None]:
def extract_features(df):
    """
    Extracting the following features:
    'MIN','MAX','MEAN','RMS','VAR','STD','POWER','PEAK','P2P','CREST FACTOR','SKEW','KURTOSIS',
            'MAX_f','SUM_f','MEAN_f','VAR_f','PEAK_f','SKEW_f','KURTOSIS_f'
    These will be an array of values for each of the 32 sensors
    """
    #time data
    features = {}
    #time domain features
    time_data = df.values
    features['iemg'] = np.sum(np.abs(time_data))
    features['mav'] = np.mean(np.abs(time_data))
    features['ssi'] = np.sum(time_data**2)
    features['myopulse'] = len(np.where(np.diff(np.sign(time_data)))[0]) / len(time_data)
    features['wflen'] = np.sum(np.abs(np.diff(time_data)))
    features['diffvar'] = np.var(np.diff(time_data))
    features['dasd'] = np.std(np.diff(np.abs(time_data)))
    features['willison'] = np.sum(np.abs(np.diff(time_data)) > 0.1)
    
    features['mean'] = np.mean(time_data, axis = 0)
    features['min'] = np.min(time_data, axis = 0)
    features['max'] = np.max(time_data, axis = 0)
    features['rms'] = np.sqrt(np.mean(time_data**2, axis = 0))
    features['max'] = np.std(time_data, axis = 0)
    features['power'] = np.mean(time_data**2, axis = 0)
    features['peak'] = np.max(np.abs(time_data), axis = 0)
    features['p2p'] = np.ptp(time_data, axis = 0)
    features['crest_factor'] = np.max(np.abs(time_data), axis = 0)/np.sqrt(np.mean(time_data**2, axis = 0))
    features['skew'] = stats.skew(time_data, axis = 0)
    features['kurtosis'] = stats.kurtosis(time_data, axis = 0)
    features['form_factor'] =np.sqrt(np.mean(time_data**2, axis = 0)) / np.mean(time_data, axis = 0)
    features['pulse_indicator'] = np.max(np.abs(time_data), axis = 0)/np.mean(time_data, axis = 0) 
    
    #Convert to frequency domain
    freq_data = fft(time_data)
    S_f = np.abs(freq_data**2)/len(df)
    features['max_f'] = np.max(S_f, axis = 0)
    features['sum_f'] = np.sum(S_f, axis = 0)
    features['mean_f'] = np.mean(S_f, axis = 0)
    features['var_f'] = np.var(S_f, axis = 0)
    features['peak_f'] = np.max(np.abs(S_f), axis = 0)
    features['skew_f'] = stats.skew(S_f, axis = 0)
    features['kurtosis_f'] = stats.kurtosis(S_f, axis = 0)
    
    return features
    

In [None]:
chunk_size = 1000
total_size = 5000
num_chunks = total_size//chunk_size
num_chunks

In [32]:
new_df1 = pd.DataFrame()
for i in range(10):
    wave = wfdb.rdrecord(df['filename'].iloc[i])
    wave_df = wave.to_dataframe()
    wave_df = wave_df.drop(['U1','U2', 'U3','U4'], axis = 1)
    timestamp = list(wave_df.index)
    # print(timestamp)
    print(len(timestamp))
    # print(timestamp[-1])
    # wave_df['gesture'] = df['gesture'].iloc[i]
    # wave_df['participant'] = df['participant'].iloc[i]
    # wave_df['session'] = df['session'].iloc[i]
    df_split = np.array_split(wave_df, num_chunks)
    # new_df = pd.concat(df_split, keys=[i for i in range(num_chunks)]).reset_index()
    for j in range(len(df_split)):
        # print(j)
        f = extract_features(df_split[j])
        f['session'] = df['session'].iloc[i]
        f['participant'] = df['participant'].iloc[i]
        f['gesture'] = df['gesture'].iloc[i]
        f['chunk'] = j
        e = pd.DataFrame.from_dict(f, orient='index').T
        new_df1 = new_df1.append(e)

# new_df = new_df.drop(columns = "level_1")
# new_df1
# print(df_split[-1])

10240


  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 

10240


  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)


10240


  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)


10240


  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)


10240


  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)


10240


  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 

10240
10240


  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 = new_df1.append(e)
  new_df1 

10240
10240


  new_df1 = new_df1.append(e)


In [None]:
new_df1.reset_index()

In [None]:
new_df1

In [None]:
new_df.reset_index().to_csv('E:/features_bysession_chunks.csv')