In [1]:
import pandas as pd
import os
from tcxreader.tcxreader import TCXReader, TCXExercise
import tcxparser
import warnings

# Suppress FutureWarning messages
warnings.simplefilter(action='ignore') 


In [2]:
# Caminho da pasta que terá iteração nos seus arquivos
folder_path = '.../running_inactivity_prediction/athletes_activities/athlete6'

#### Criando as features com os dados extraidos de cada arquivo

In [3]:
# Dicionário com as features que serão criadas com o valor de cada atividade
df_athlete_dict = {
    'activity_date': [],
    'activity_type': [],
    'total_distance (km)': [], 
    'total_time (min)': [],
    'pace (min/km)': [],
    'speed (km/h)': []    
}

# Itera por cada arquivo na pasta
for filename in os.listdir(folder_path):
    # Junta o nome do arquivo com o caminho da pasta
    full_path = os.path.join(folder_path, filename)
    
    # Tenta rodar tcx_reader sem erro, se não, usa-se tcx_parser
    try:
        
        # Lendo o arquivo com o pacote tcx_parser
        tcx_reader = TCXReader()
        data: TCXExercise = tcx_reader.read(full_path)

        # Data da atividade (a partir da primeira linha do df original com os pontos marcados)
        df_athlete_dict['activity_date'].append(pd.Timestamp(data.start_time.date()))

        # Criação da coluna com tipo da atividade
        df_athlete_dict['activity_type'].append(data.activity_type)

        # Distância total em km
        total_dist = data.distance / 1000
        df_athlete_dict['total_distance (km)'].append(round(total_dist,2))

        # Tempo total em minutos
        total_min = (data.end_time - data.start_time).total_seconds() // 60
        df_athlete_dict['total_time (min)'].append(total_min)

        # Pace (min/km)
        if total_dist == 0:
            df_athlete_dict['pace (min/km)'].append(0)
        else:
            df_athlete_dict['pace (min/km)'].append(round(total_min / total_dist, 2))

        # Velocidade em km/h
        df_athlete_dict['speed (km/h)'].append(round(data.avg_speed,2))

    except:
        
        # Lendo o arquivo com o pacote tcx_parser
        tcx_parser = tcxparser.TCXParser(full_path)

        # Data da atividade (a partir da primeira linha do df original com os pontos marcados)
        df_athlete_dict['activity_date'].append(pd.Timestamp(tcx_parser.started_at).tz_convert('America/Sao_Paulo').tz_localize(None).normalize())

        # Criação da coluna com tipo da atividade
        df_athlete_dict['activity_type'].append(tcx_parser.activity_type)

        # Distância total em km
        total_dist = tcx_parser.distance / 1000
        df_athlete_dict['total_distance (km)'].append(round(total_dist, 2))

        # Tempo total em minutos
        try:
            total_min = tcx_parser.duration // 60

        # Exceção caso não haja no arquivo tcx "<TotalTimeSeconds>"
        except:
            total_min = (pd.Timestamp(tcx_parser.completed_at) - pd.Timestamp(tcx_parser.started_at)).total_seconds() // 60

        df_athlete_dict['total_time (min)'].append(total_min)

        # Pace (min/km)
        if total_dist == 0:
            df_athlete_dict['pace (min/km)'].append(0)
        else:
            df_athlete_dict['pace (min/km)'].append(round(total_min / total_dist, 2))

        # Velocidade em km/h
        df_athlete_dict['speed (km/h)'].append(round(total_dist / (total_min / 60),2))

#### Criando um Dataframe novo a partir do dicionário

In [4]:
df_athlete_activities = pd.DataFrame(df_athlete_dict)

#### Ordenando a data da menor pra maior e resetando index

In [5]:
df_athlete_activities.sort_values('activity_date', ascending=True, inplace=True)
df_athlete_activities.reset_index(drop=True, inplace= True)

In [6]:
df_athlete_activities

Unnamed: 0,activity_date,activity_type,total_distance (km),total_time (min),pace (min/km),speed (km/h)
0,2023-07-24,other,0.00,62.0,0.00,0.00
1,2023-07-25,other,0.00,56.0,0.00,0.00
2,2023-07-26,workout,0.00,59.0,0.00,0.00
3,2023-07-26,workout,0.00,43.0,0.00,0.00
4,2023-07-27,run,5.25,45.0,8.58,6.86
...,...,...,...,...,...,...
85,2024-02-07,workout,0.00,66.0,0.00,0.00
86,2024-02-08,workout,0.00,124.0,0.00,0.00
87,2024-02-16,workout,0.00,53.0,0.00,0.00
88,2024-02-17,workout,0.00,77.0,0.00,0.00
