In [4]:
import pandas as pd
from gpx_converter import Converter
from haversine import haversine, Unit


In [5]:
filepath = '../data/track-2723-40744pm.gpx'

raw = (Converter(input_file=filepath)
        .gpx_to_dataframe())
raw.info()
raw.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 464 entries, 0 to 463
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype                        
---  ------     --------------  -----                        
 0   time       464 non-null    datetime64[ns, SimpleTZ("Z")]
 1   latitude   464 non-null    float64                      
 2   longitude  464 non-null    float64                      
 3   altitude   464 non-null    float64                      
dtypes: datetime64[ns, SimpleTZ("Z")](1), float64(3)
memory usage: 14.6 KB


Unnamed: 0,time,latitude,longitude,altitude
0,2023-02-08 00:07:44+00:00,45.505908,-122.609142,62.2
1,2023-02-08 00:07:47+00:00,45.505896,-122.609106,62.7
2,2023-02-08 00:07:52+00:00,45.505955,-122.609113,58.9
3,2023-02-08 00:07:56+00:00,45.505992,-122.609132,58.0
4,2023-02-08 00:07:59+00:00,45.50601,-122.609162,59.0


In [8]:
def transform_gpx_data(filename):
    # Load the GPX data from the file into a dataframe
    df = (Converter(input_file=filename)
          .gpx_to_dataframe())
    
    # Convert the 'time' column to the Pacific timezone
    df['time'] = df['time'].apply(lambda x: x.tz_convert('US/Pacific'))
    
    # Calculate the time difference between consecutive rows
    df['seconds_delta'] = (((df['time'].shift(-1)-df['time'])
                            .fillna(pd.Timedelta(seconds=0))
                            .astype(int)/1000000000)
                           .astype(int))
    
    # Extract human-readable date and time from the 'time' column
    df['human_date'] = df['time'].dt.strftime('%Y-%m-%d')
    df['human_time'] = df['time'].dt.strftime('%I:%M:%S %p')
    
    # Convert altitude from meters to feet and round to an integer
    df['altitude_feet'] = round(df['altitude'] * 3.280839895).astype('int')
    
    # Calculate the distance and altitude change between consecutive rows
    for i in range(df.shape[0]-1):
        start = df.at[i,   'latitude'], df.at[i,   'longitude']
        end =   df.at[i+1, 'latitude'], df.at[i+1, 'longitude']
        distance = round(haversine(start,
                                   end,
                                   unit=Unit.FEET),1)
        df.at[i, 'distance_feet'] = distance

        altitude_change = df.at[i+1, 'altitude_feet'] - df.at[i, 'altitude_feet']
        df.at[i, 'altitude_change'] = altitude_change
    
    # Calculate speed in mph
    df['speed_mph'] = ((df['distance_feet'] / df['seconds_delta']) * (3600/5280)).round(1)
    
    # Select columns to return in the final dataframe
    df = df[['time', 'human_date', 'human_time', 'seconds_delta',
             'latitude', 'longitude', 'altitude_feet',
             'distance_feet', 'speed_mph', 
             'altitude_change']].copy()
    return df


In [9]:
transform_gpx_data(filepath)

Unnamed: 0,time,human_date,human_time,seconds_delta,latitude,longitude,altitude_feet,distance_feet,speed_mph,altitude_change
0,2023-02-07 16:07:44-08:00,2023-02-07,04:07:44 PM,3,45.505908,-122.609142,204,10.2,2.3,2.0
1,2023-02-07 16:07:47-08:00,2023-02-07,04:07:47 PM,5,45.505896,-122.609106,206,21.6,2.9,-13.0
2,2023-02-07 16:07:52-08:00,2023-02-07,04:07:52 PM,4,45.505955,-122.609113,193,14.3,2.4,-3.0
3,2023-02-07 16:07:56-08:00,2023-02-07,04:07:56 PM,3,45.505992,-122.609132,190,10.1,2.3,4.0
4,2023-02-07 16:07:59-08:00,2023-02-07,04:07:59 PM,12,45.506010,-122.609162,194,48.5,2.8,3.0
...,...,...,...,...,...,...,...,...,...,...
459,2023-02-07 17:15:24-08:00,2023-02-07,05:15:24 PM,8,45.506470,-122.609148,202,32.2,2.7,-5.0
460,2023-02-07 17:15:32-08:00,2023-02-07,05:15:32 PM,17,45.506382,-122.609139,197,70.4,2.8,0.0
461,2023-02-07 17:15:49-08:00,2023-02-07,05:15:49 PM,3,45.506189,-122.609145,197,10.2,2.3,6.0
462,2023-02-07 17:15:52-08:00,2023-02-07,05:15:52 PM,20,45.506169,-122.609173,203,93.4,3.2,-1.0
