# Notebook para leer los txt

## 1. Raw Data Folder

* 30 participantes (14 sanos y 16 pacientes)
* 9 ejercicios
* x6 repeticiones
* 30 fps
* 2598 videos

In [1]:
# importar librerias necesarias
import pandas as pd # para manejar dataframes
import os # para interactur con el sistema operativo

### 1.1 Datos de los nombres de los archivos

SubjectID_DateID_GestureLabel_RepetitionNumber_CorrectLabel_Position.txt

* SubjectID: id uniquely identifying the person performing the exercise
* DateID: id identifying the session in which the person was recorded
* GestureLabel: Label identifying the gesture; possible values are from 0 to 8
* RepetitionNumber: Each gesture was repeated several times and this shows the repetition number
* CorrectLabel: A value of 1 represents a gesture labeled as being correctly executed, while a value of 2 is for a gesture labeled as incorrect
* Position: Some of the persons performed the gestures sitting on a chair or wheelchair, while others standing

In [2]:
# función para guardar en el dataframe los datos que aparecen en los nombre de los archivos
def leer_nombre_archivo(archivo):
    archivo = archivo.split('.')[0] # quita la extension txt
    return archivo.split('_') # separa los campos por _

### 1.2 Datos de los archivos
Each raw data file contains per line: timestamp, XX, XX, followed by a 25 pairs of (JointName, TrackedStatus, 3d coordinate X, 3d coordinate Y, 3d coordinate Z, 2d coordinate X, 2d coordinate Y)

In [3]:
directory = '../dataset/SkeletonData/RawData'
file_list = os.listdir(directory) # crea una lista con los nombre de los arhivos

In [4]:
# Initialize an empty list to store the extracted data
list_data = []

# Iterate through each raw data file
for file_name in file_list:
    campos = leer_nombre_archivo(file_name)

    with open(os.path.join(directory, file_name), 'r') as file:
        for line in file:
            # Split the line by comma and extract the required information
            line_data = line.strip().split(',')[3:]  # Skip the initial timestamp and other unwanted data
            cleaned_data = [item.replace('(', '').replace(')', '') for item in line_data]
            for i in range(0, len(cleaned_data), 7):
                #list_data.append(campos)
                list_data.append(campos + cleaned_data[i:i + 7])

columnas = ['SubjectID', 'DateID', 'GestureLabel', 'RepetitionNumber', 'CorrectLabel', 'Position',
            'JointName', 'TrackedStatus', '3D_X', '3D_Y', '3D_Z', '2D_X', '2D_Y']

df_data = pd.DataFrame(list_data, columns=columnas)


In [5]:
df_data.to_csv('../csvFiles/raw_pacientes.csv', index=False)

In [6]:
df_data.head()

Unnamed: 0,SubjectID,DateID,GestureLabel,RepetitionNumber,CorrectLabel,Position,JointName,TrackedStatus,3D_X,3D_Y,3D_Z,2D_X,2D_Y
0,101,18,0,1,1,stand,SpineBase,Tracked,-0.1028086,0.06965441,2.464606,243.133,196.5854
1,101,18,0,1,1,stand,SpineMid,Tracked,-0.1026228,0.3837799,2.438919,242.971,149.1077
2,101,18,0,1,1,stand,Neck,Tracked,-0.1025293,0.6877351,2.40196,242.6967,101.3569
3,101,18,0,1,1,stand,Head,Tracked,-0.1190992,0.8358598,2.373549,239.9086,76.90204
4,101,18,0,1,1,stand,ShoulderLeft,Tracked,-0.2826451,0.5525576,2.38421,214.7496,121.544
