# ASA preprocessing module

This preprocessing module aims to label the data from the patients through a peak analysis.

## Required Imports

In [1]:
import re

import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder

from src.settings import ROOT_DIR
from src.utils import data_split
from src.preprocessing.asa_functions import get_asa_laterality

## Load Data

In [2]:
data: pd.DataFrame = pd.read_pickle(ROOT_DIR / 'data' / 'processed' / 'anon_imu_data_time_series_cleaned.pkl')

In [3]:
columns_for_labeling = ['date_measure', 'imu_angleY_left', 'imu_angleY_right']
labeling_data = data[columns_for_labeling]
labeling_data.head()

Unnamed: 0,date_measure,imu_angleY_left,imu_angleY_right
0,2023-9-29-11-53-21,-41.945678,346.797958
1,2023-9-29-11-53-21,-16.777883,435.826445
2,2023-9-29-11-53-21,-17.254295,461.75842
3,2023-9-29-11-53-21,-62.680428,407.701072
4,2023-9-29-11-53-21,-158.679678,317.129831


## Labeling Data
We will add to every measure the laterality of it based on a peak analysis.

In [4]:
laterality = []
dates = labeling_data['date_measure'].unique()

for date in dates:
    sample = labeling_data[labeling_data['date_measure'] == date]
    sample_lat = get_asa_laterality(sample['imu_angleY_left'].to_numpy(), sample['imu_angleY_right'].to_numpy())
    laterality.append(sample_lat)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [5]:
Y = pd.DataFrame()
Y['date_measure'] = dates
Y['laterality'] = laterality
Y.head()

Unnamed: 0,date_measure,laterality
0,2023-9-29-11-53-21,der
1,2023-6-22-10-28-8,der
2,2023-6-16-14-45-43,der
3,2023-9-19-10-12-28,der
4,2023-3-23-13-14-7,der


In [6]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(Y['laterality'])

In [7]:
encoded_labels[:4]

array([0, 0, 0, 0])

In [8]:
label_encoder.classes_

array(['der', 'izq'], dtype=object)

In [9]:
Y['label'] = encoded_labels

In [10]:
Y.head()

Unnamed: 0,date_measure,laterality,label
0,2023-9-29-11-53-21,der,0
1,2023-6-22-10-28-8,der,0
2,2023-6-16-14-45-43,der,0
3,2023-9-19-10-12-28,der,0
4,2023-3-23-13-14-7,der,0


## Features
The final laterality model will be focused on left and righ limbs, not it spine, due to it foes not affects this field. By that reason spine_base columns will be removed in this module.

In [11]:
feature_columns = data.columns.tolist()[3:-1]

spine_patter = re.compile("^imu_.*_spine")
feature_columns = [col for col in feature_columns if not spine_patter.match(col)]
feature_columns

['imu_gyroX_right',
 'imu_gyroY_right',
 'imu_gyroZ_right',
 'imu_accX_right',
 'imu_accY_right',
 'imu_accZ_right',
 'imu_gyroX_left',
 'imu_gyroY_left',
 'imu_gyroZ_left',
 'imu_accX_left',
 'imu_accY_left',
 'imu_accZ_left',
 'imu_angleX_right',
 'imu_angleY_right',
 'imu_angleZ_right',
 'imu_angleX_left',
 'imu_angleY_left',
 'imu_angleZ_left',
 'imu_angularX_left',
 'imu_angularY_left',
 'imu_angularZ_left',
 'imu_angularX_right',
 'imu_angularY_right',
 'imu_angularZ_right']

In [12]:
sequences = []

for date, group in data.groupby('date_measure'):
    sequence_features = group[feature_columns]
    label = Y[Y.date_measure == date].iloc[0].label
    
    sequences.append((sequence_features, label))

In [13]:
sequences[0]

(        imu_gyroX_right  imu_gyroY_right  imu_gyroZ_right  imu_accX_right  \
 294278       485.520635       245.096017       -85.665511        8.179869   
 294279      -289.996008       116.847725       403.042105        8.002617   
 294280      -652.201860       192.986548       573.127655        7.845273   
 294281      -410.009175       523.572963       313.160114        7.757391   
 294282       298.202984       952.803148      -205.963959        7.808478   
 ...                 ...              ...              ...             ...   
 299262       510.712858       701.567964      -684.354694        7.972659   
 299263       116.709461       750.644451      -508.918751        7.811803   
 299264      -181.756000       817.445367      -333.828648        7.677498   
 299265      -236.325631       897.089628      -158.256160        7.606327   
 299266       -34.224348       974.031477        31.559602        7.633360   
 
         imu_accY_right  imu_accZ_right  imu_gyroX_left  imu_g

In [14]:
len(sequences)

166

## Save Data
Now we will save the data of the sequences where we have a 2-tuple by each record. The first value of the tuple is record containing all the information of the time-series, and the second one the label 
(0 or 1) which refers to the laterality affectation.

In [16]:
data_split.save_and_split_sequences(sequences, 'asa')