# Preprocessing Module with Features Data
This module adds the target column of tremor to features data according to
MDS-UPDRS patients test.

## Required Imports

In [ ]:
import re
import pickle
import pandas as pd

from src.settings import ROOT_DIR
from src.preprocessing import get_anon_id
from src.utils.data_split import save_and_split

## Load Data
Loads Features and MDS-UPDRS

In [ ]:
#Load Features
data: pd.DataFrame = pd.read_pickle(ROOT_DIR / 'data' / 'processed' / 'anon_imu_data_features.pkl')

#Load MDS-UPDRS
patients_mds:pd.DataFrame = pd.read_excel(ROOT_DIR / 'data' / 'raw' / 'patients_mdsupdrs_results.xlsx')
patients_mds_used_columns = ['Cédula','Temblor postural – Mano derecha','Temblor postural – Mano izquierda','Temblor de acción – Mano derecha','Temblor de acción – Mano izquierda']
patients_mds = patients_mds[patients_mds_used_columns]

## MDS-UPDRS Target extraction

Is necessary to split patients and controls to cross ids with MDS-UPDRS test:

In [ ]:
patients_mds['anon_id'] = patients_mds['Cédula'].apply(get_anon_id)
patients_mds_used_columns.pop('Cédula')
patients_mds['tremor'] = 1 if patients_mds[patients_mds_used_columns].sum() > 0 else 0
patients_mds = patients_mds[['anon_id', 'tremor']]

In [ ]:
patients = data[data['PD' == 1]]
patients = pd.merge(patients, patients_mds, on='anon_id')
patients.dropna(inplace=True)

In [ ]:
controls = data[data['PD' == 0]]
controls['tremor'] = 0

In [ ]:
tremor_data = pd.concat([patients, controls])

In [ ]:
#Removal of not necessary columns of spine
pattern = re.compile('.spine*')
dropable_columns = [col for col in tremor_data.columns.tolist() if not re.match(col, pattern)]
tremor_data.drop(columns=dropable_columns)

## Save data

In [ ]:
save_and_split(tremor_data, 'tremor')