In [1]:
import pandas as pd
import gpxo
import os
import random
import warnings
import gpxpy 

# Suppress FutureWarning messages
warnings.simplefilter(action='ignore')

In [2]:
# Caminho da pasta que terá iteração nos seus arquivos
folder_path = '.../running_inactivity_prediction/athletes_activities/athlete1' 

In [3]:
# Atividade aleatória
random_activity_file = random.choice(os.listdir(folder_path))

path_example = os.path.join(folder_path, random_activity_file)

#### Demonstração dos valores após leitura dos arquivos com diferentes bibliotecas

In [4]:
#gpxo.Track
gpxo_example = gpxo.Track(path_example).data.reset_index()
gpxo_example

Unnamed: 0,time,latitude (°),longitude (°),distance (km),compass (°),duration (s),velocity (km/h),elevation (m)
0,2020-08-11 08:47:53,-22.939891,-43.178952,0.000000,54.098538,0.0,1.137600,12.5
1,2020-08-11 08:47:59,-22.939881,-43.178937,0.001896,136.731793,6.0,1.215150,12.5
2,2020-08-11 08:49:10,-22.940177,-43.179194,0.043960,163.459852,77.0,3.856523,12.7
3,2020-08-11 08:49:11,-22.940180,-43.179184,0.045038,161.485477,78.0,2.910600,12.7
4,2020-08-11 08:49:12,-22.940184,-43.179187,0.045577,287.419409,79.0,2.565000,12.7
...,...,...,...,...,...,...,...,...
6614,2020-08-11 10:47:33,-22.939764,-43.178299,25.586356,40.159329,7180.0,9.414000,12.2
6615,2020-08-11 10:47:34,-22.939746,-43.178282,25.589004,39.404271,7181.0,9.568800,12.2
6616,2020-08-11 10:47:35,-22.939727,-43.178266,25.591672,38.548546,7182.0,9.450000,12.2
6617,2020-08-11 10:47:36,-22.939709,-43.178250,25.594254,40.159340,7183.0,9.414000,12.2


In [5]:
#gpxpy
gpx = open(path_example, 'r')

gpxpy_example = gpxpy.parse(gpx)
gpxpy_example

GPX(tracks=[GPXTrack(name='Pedalada matinal', segments=[GPXTrackSegment(points=[...])])])

#### Criando as features com os dados extraidos de cada arquivo

In [6]:
# Dicionário com as features que serão criadas com o valor de cada atividade
df_athlete_dict = {
    'activity_date': [],
    'activity_type': [],
    'total_distance (km)': [], 
    'total_time (min)': [],
    'pace (min/km)': [],
    'speed (km/h)': []    
}

# Itera por cada arquivo na pasta
for filename in os.listdir(folder_path):
    # Junta o nome do arquivo com o caminho da pasta
    full_path = os.path.join(folder_path, filename)

    # Lê o arquivo .gpx com as bibliotecas gpxo e gpxpy
    gpx1 = gpxo.Track(full_path).data.reset_index() 
    gpx2 = gpxpy.parse(open(full_path, 'r'))
        
    # Data da atividade (a partir da primeira linha do df original com os pontos marcados)
    df_athlete_dict['activity_date'].append(gpx1['time'][0].normalize())

    # Tipo da atividade
    df_athlete_dict['activity_type'].append(gpx2.tracks[0].type)

    # Distância total em km
    total_dist = gpx1['distance (km)'].max()
    df_athlete_dict['total_distance (km)'].append(round(total_dist,2))

    # Tempo total em minutos
    total_min = (gpx1['time'].max() - gpx1['time'].min()).total_seconds() // 60
    df_athlete_dict['total_time (min)'].append(total_min)

    # Pace (min/km)
    if total_dist == 0:
        df_athlete_dict['pace (min/km)'].append(0)
    else:
        df_athlete_dict['pace (min/km)'].append(round(total_min / total_dist, 2))

    # Velocidade em km/h
    df_athlete_dict['speed (km/h)'].append(round(total_dist / (total_min / 60),2))

#### Criando um Dataframe novo a partir do dicionário

In [7]:
df_athlete_activities = pd.DataFrame(df_athlete_dict)

#### Ordenando a data da menor pra maior e resetando index

In [8]:
df_athlete_activities.sort_values('activity_date', ascending=True, inplace=True)
df_athlete_activities.reset_index(drop=True, inplace= True)

In [9]:
df_athlete_activities

Unnamed: 0,activity_date,activity_type,total_distance (km),total_time (min),pace (min/km),speed (km/h)
0,2020-07-09,running,6.03,39.0,6.47,9.27
1,2020-07-10,cycling,25.35,140.0,5.52,10.87
2,2020-07-12,running,7.57,55.0,7.27,8.25
3,2020-07-16,running,3.61,23.0,6.38,9.40
4,2020-07-20,cycling,25.66,127.0,4.95,12.12
...,...,...,...,...,...,...
109,2022-06-02,running,6.25,41.0,6.56,9.15
110,2022-06-07,running,8.51,54.0,6.35,9.45
111,2022-06-09,running,8.12,55.0,6.77,8.86
112,2022-06-13,running,11.34,73.0,6.44,9.32
