#### code: utils/preprocessor.py 

## 1. Raw data Info

In [1]:
import os
import glob

RAWDATAPATH = "./FW&BW_Rawdata/"
RAW_CONTROL = os.path.join(RAWDATAPATH, "Controls")
RAW_PD = os.path.join(RAWDATAPATH, "PD")

In [2]:
# RAW data check

files = glob.glob(os.path.join(RAW_CONTROL, "*.csv"))
fnames = set([ f.split('_')[1] for f in files ])
print("Controls:", len(fnames))

files = glob.glob(os.path.join(RAW_PD, "*.csv"))
fnames = set([ f.split('_')[1] for f in files ])
print("      PD:", len(fnames))

Controls: 22
      PD: 83


## 2. Prep

In [3]:
import os
import glob
import pandas as pd

from utils.preprocessor import Preprocessor

In [4]:
p = Preprocessor()
p.set_datasetpath("./dataset_/")

In [5]:
RAWDATAPATH = p.get_rawdatapath()
RAW_CONTROL = os.path.join(RAWDATAPATH, "Controls")
RAW_PD = os.path.join(RAWDATAPATH, "PD")

DATASETPATH = p.get_datasetpath()
DATA_CONTROL = os.path.join(DATASETPATH, "Controls")
DATA_PD = os.path.join(DATASETPATH, "PD")

print("RAW data path:", RAWDATAPATH)
print("DATASET path:", DATASETPATH)

RAW data path: ./FW&BW_Rawdata/
DATASET path: ./dataset_/


In [6]:
p.generate_prep_data(p.RAW_CONTROL)
p.generate_prep_data(p.RAW_PD)

In [7]:
# DATASET check
files = glob.glob(os.path.join(DATA_CONTROL, "*.csv"))
fnames = set([ f.split('/')[-1].split('_')[1] for f in files ])
print("Controls:", len(fnames))

files = glob.glob(os.path.join(DATA_PD, "*.csv"))
fnames = set([ f.split('/')[-1].split('_')[1] for f in files ])
print("      PD:", len(fnames))

Controls: 22
      PD: 83


In [8]:
# DATASET file format
df_prep = pd.read_csv(files[0])
df_prep.head()

Unnamed: 0,Frame,Sub Frame,LFHD_X,LFHD_Y,LFHD_Z,RFHD_X,RFHD_Y,RFHD_Z,LBHD_X,LBHD_Y,...,RHEE_Z,RTOE_X,RTOE_Y,RTOE_Z,CentreOfMass_X,CentreOfMass_Y,CentreOfMass_Z,CentreOfMassFloor_X,CentreOfMassFloor_Y,CentreOfMassFloor_Z
0,268,0,29.265129,-1798.91333,1544.079468,154.573624,-1816.26001,1540.643677,3.690291,-1956.970581,...,42.554436,175.393265,-1647.394531,36.231594,57.35585,-1894.350098,895.83606,57.35585,-1894.350098,0.0
1,269,0,29.352283,-1798.661255,1544.129395,154.664612,-1815.953369,1540.686768,3.790516,-1956.681763,...,42.554855,175.376556,-1647.390991,36.228561,57.396896,-1894.069824,895.889099,57.396896,-1894.069824,0.0
2,270,0,29.750246,-1797.463257,1544.362427,155.074432,-1814.608032,1540.865479,4.22919,-1955.381592,...,42.556519,175.305527,-1647.374878,36.215965,57.574863,-1892.818726,896.114502,57.574863,-1892.818726,0.0
3,271,0,30.678474,-1794.646606,1544.890381,156.004288,-1811.641968,1541.246338,5.198111,-1952.459229,...,42.559532,175.158356,-1647.339233,36.189949,57.970444,-1890.030029,896.604065,57.970444,-1890.030029,0.0
4,272,0,32.203342,-1789.978149,1545.755371,157.507278,-1806.884277,1541.856567,6.739291,-1947.737183,...,42.560993,174.946518,-1647.286743,36.152313,58.622211,-1885.537109,897.389099,58.622211,-1885.537109,0.0


## 3. Patients fw & bw data counts

In [9]:
p.generate_patients_fwbw_info_table()

In [10]:
# patients.csv format
patients = pd.read_csv(os.path.join(DATASETPATH, "patients.csv"))
patients.head(5)

Unnamed: 0,Patient,Category,cntFW,cntBW
0,BHY,Controls,3,3
1,CHH,Controls,3,3
2,HSH,Controls,3,3
3,JHY,Controls,3,3
4,JJG,Controls,3,3
