In [None]:
!pip install emteqai --extra-index-url https://pypi-emteq.emteq-stage.net/

In [None]:
!pip install biosppy

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig
import biosppy.signals.tools as sig_tool
from emteqai.utils.plots.plots import plot_data
from pathlib import Path
import glob
from emteqai.utils.processing.data.segmentation import slide_data
from emteqai.utils.features.statistical_features import stat_driver

In [None]:
def generate_labels(labels):
    labels = np.array(
        [np.unique(row, return_counts=True)[0][np.argmax(np.unique(row, return_counts=True)[1])]for row in labels])
    return labels

In [None]:
def filter_signals(data, columns, sampling_freq=50):
    filtered_cols = []
    for col in columns:
        filtered = apply_lowpass_filter(data[col], sampling_freq)
        filtered = detrend_signal_linear(filtered)
        filtered_cols.append(filtered)
    filtered_cols = pd.DataFrame(filtered_cols).T
    filtered_cols.columns = columns
    return filtered_cols

def apply_lowpass_filter(signal, sampling_freq):
    filtered = sig_tool.filter_signal(
        signal=signal,
        ftype="FIR",
        band="lowpass",
        frequency=(1),
        order=3,
        sampling_rate=sampling_freq,
    )
    return pd.Series(filtered["signal"])

def detrend_signal_linear(raw_signal):
    return pd.Series(sig.detrend(data=raw_signal, type="linear"))

In [None]:
nav_cols = ["Nav/Raw.X[RightCheek]","Nav/Raw.Y[RightCheek]","Nav/Raw.X[RightBrow]","Nav/Raw.Y[RightBrow]","Nav/Raw.X[LeftBrow]",
            "Nav/Raw.Y[LeftBrow]","Nav/Raw.X[LeftCheek]","Nav/Raw.Y[LeftCheek]","Nav/Raw.X[RightTemple]","Nav/Raw.Y[RightTemple]",
            "Nav/Raw.X[LeftTemple]","Nav/Raw.Y[LeftTemple]"]

prox_cols = ["Prox/Raw[RightCheek]","Prox/Raw[RightBrow]","Prox/Raw[CentreBottomBrow]","Prox/Raw[CentreTopBrow]","Prox/Raw[LeftBrow]",
             "Prox/Raw[LeftCheek]"]

In [None]:
folders=glob.glob('/content/drive/MyDrive/OCOGEM_b_c/TaskC_data/*')

In [None]:
names=[]

In [None]:
for folder in folders:
  parts=folder.split('/')
  name=parts[-1]
  names.append(name)

In [None]:
for name in names:
  Path('/content/drive/MyDrive/filtered_taskc/'+name).mkdir()

In [None]:
path='/content/drive/MyDrive/OCOGEM_b_c/TaskC_data/'
folders=[]

In [None]:
for name in names:
  folders.append(path+name+'/TaskC')

In [None]:
for folder in folders:
  files=glob.glob(folder+'/*.csv')
  folder_parts=folder.split('/')
  folder_name=folder_parts[-2]   #za da zacuvam filtrirani podatoci vo soodveten direktorium
  for file in files:
    file_parts=file.split('/')
    file_name=file_parts[-1]
    data=pd.read_csv(file)
    data = data.iloc[20:, :].reset_index(drop=True)
    data[nav_cols] = data[nav_cols].cumsum()
    data[nav_cols + prox_cols] = filter_signals(data, nav_cols + prox_cols)
    data.to_csv('/content/drive/MyDrive/filtered_taskc/'+folder_name+'/'+file_name)

In [None]:
filtered_folders=glob.glob('/content/drive/MyDrive/filtered_taskc'+'/*')

In [None]:
len(filtered_folders)

65

In [None]:
for name in names:
  Path('/content/drive/MyDrive/features_taskc/'+name).mkdir()

In [None]:
for folder in filtered_folders:
  files=glob.glob(folder+'/*.csv')
  parts=folder.split('/')
  name=parts[-1]
  for file in files:
    data=pd.read_csv(file)
    file_parts=file.split('/')
    file_name=file_parts[-1]
    data["Label"] = data["Label"].fillna("neutral")
    data["Label"].replace(1.0, "smile", inplace=True)
    data["Label"].replace(2.0, "frown", inplace=True)
    data["Label"].replace(3.0, "eyebrow_raise", inplace=True)
    data["Label"].replace(4.0, "squeezed_eyes", inplace=True)
    data["Label"].replace(0.0, "neutral", inplace=True)
    nav_cols = [col for col in data.columns if "Nav" in col]
    prox_cols = [col for col in data.columns if "Prox" in col]
    #data[nav_cols] = data[nav_cols].cumsum()
    data = data[nav_cols + prox_cols + ["Label"]]
    features=pd.DataFrame()
    feature_columns = []
    for sensor in data.columns:
      slided_data = slide_data(data[sensor], 25, 5)   #100ms window, 5 samples po 20ms
      if sensor == "Label":
        labels = generate_labels(slided_data)
      else:
        calculated_features = stat_driver(slided_data, sensor)
        feature_columns.extend(calculated_features.columns)
        features = pd.concat([features, calculated_features], axis=1, ignore_index=True)
    features.columns = feature_columns
    features["Label"] = labels
    features.to_csv('/content/drive/MyDrive/features_taskc/'+name+'/'+file_name)

In [None]:
calculated_features=glob.glob('/content/drive/MyDrive/features_taskc'+'/*/*.csv')

In [None]:
folders=glob.glob('/content/drive/MyDrive/features_taskc'+'/*')

In [None]:
for folder in folders:
  files=glob.glob(folder+'/*.csv')
  features=pd.DataFrame()
  for item in files:
    data=pd.read_csv(item)
    features=pd.concat([features,data],axis=0).reset_index(drop=True)
  features.to_csv(folder+'/calculated_features.csv')

In [None]:
calculated=glob.glob('/content/drive/MyDrive/features_taskc/'+'*/calculated_features.csv')

In [None]:
fileId=0

In [None]:
result=pd.DataFrame()

In [None]:
for file in calculated:
  current_file=pd.read_csv(file)
  current_file=current_file.assign(FileId=fileId)
  fileId+=1
  result=pd.concat([result,current_file],axis=0).reset_index(drop=True)

In [None]:
result=result.drop('Unnamed: 0.1', axis=1)

In [None]:
result

Unnamed: 0,Nav/Raw.X[RightCheek]_mean,Nav/Raw.X[RightCheek]_std,Nav/Raw.X[RightCheek]_min_,Nav/Raw.X[RightCheek]_max_,Nav/Raw.X[RightCheek]_range_,Nav/Raw.X[RightCheek]_iqr,Nav/Raw.X[RightCheek]_kurtosis,Nav/Raw.X[RightCheek]_skewness,Nav/Raw.X[RightCheek]_rms,Nav/Raw.Y[RightCheek]_mean,...,Prox/Raw[LeftCheek]_std,Prox/Raw[LeftCheek]_min_,Prox/Raw[LeftCheek]_max_,Prox/Raw[LeftCheek]_range_,Prox/Raw[LeftCheek]_iqr,Prox/Raw[LeftCheek]_kurtosis,Prox/Raw[LeftCheek]_skewness,Prox/Raw[LeftCheek]_rms,Label,FileId
0,0.699789,0.003589,0.686201,0.704771,0.018569,0.003215,5.897146,-2.197705,0.699799,1.032500,...,0.011299,0.398617,0.452917,0.054300,0.013570,4.355450,1.813114,0.412090,neutral,0
1,0.694152,0.010741,0.665693,0.701605,0.035912,0.007754,0.556639,-1.419756,0.694235,1.034001,...,0.022791,0.398617,0.469911,0.071294,0.013570,-0.385264,1.132580,0.420264,neutral,0
2,0.685042,0.017948,0.652996,0.701605,0.048609,0.028637,-1.137321,-0.682148,0.685277,1.036168,...,0.043255,0.398617,0.548658,0.150041,0.057905,0.367003,1.157911,0.438417,neutral,0
3,0.675738,0.019088,0.649795,0.701183,0.051389,0.040815,-1.675486,0.076587,0.676008,1.040126,...,0.075468,0.398617,0.632710,0.234093,0.129966,-0.645807,0.787641,0.481008,neutral,0
4,0.663102,0.021207,0.621052,0.701087,0.080035,0.024523,-0.577185,0.130209,0.663441,1.044938,...,0.104858,0.398617,0.723509,0.324891,0.168318,-1.110150,0.423906,0.542384,neutral,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213145,-2.996736,0.051023,-3.058574,-2.885388,0.173185,0.076641,-0.672668,0.736812,2.997170,2.748502,...,0.033122,-0.380764,-0.279766,0.100998,0.065844,-1.333831,0.129903,0.338790,neutral,64
213146,-3.025354,0.032941,-3.059839,-2.938335,0.121504,0.040861,0.300536,1.018227,3.025533,2.783542,...,0.025958,-0.380885,-0.304085,0.076800,0.046308,-1.197679,0.554518,0.355793,neutral,64
213147,-3.046876,0.022171,-3.084463,-3.008746,0.075717,0.030412,-1.078552,0.070894,3.046956,2.810649,...,0.018671,-0.393012,-0.328772,0.064239,0.021842,-0.416227,0.944372,0.370095,neutral,64
213148,-3.061466,0.020264,-3.093469,-3.027133,0.066335,0.034460,-1.121806,0.121328,3.061533,2.832684,...,0.012005,-0.395707,-0.348437,0.047270,0.015141,1.309377,1.159719,0.380694,neutral,64


In [None]:
result.to_csv('/content/drive/MyDrive/features_taskc/'+'result.csv')