In [1]:
import pandas as pd
import gpxpy
from geopy.distance import geodesic
from glob import glob
from os.path import join, dirname, basename

In [2]:
def gpx_to_df(gpx_file):
    """Convert gpx track points to a
    pandas dataframe"""
    # Read gpx file
    with open(gpx_file) as f:
        gpx = gpxpy.parse(f)
    # Convert track points to dataframe
    track_points = []
    for track in gpx.tracks:
        for segment in track.segments:
            for point in segment.points:
                track_points.append({
                    'time': point.time,
                    'latitude': point.latitude,
                    'longitude': point.longitude
                })
    # Create a dataframe and set track points' timestamp as index
    df = pd.DataFrame.from_records(track_points).set_index('time')
    # Verify and eliminate duplicate index values
    if df.index.is_unique == False:
        return df.loc[~df.index.duplicated(), :]
    else:
        return df

In [3]:
def calculate_distances(track_points, receivers, receivers_id_column):
    """Calculate distances between
    track points and receivers"""
    # Empty list to store
    big_list = []
    # Loop each track point
    for row in track_points.itertuples():
        pt_distances_list = []
        pt_distances_list.append(row[0])
        pt_distances_list.append(row[1])
        pt_distances_list.append(row[2])
        for r in receivers.itertuples():
            d = geodesic((row[1], row[2]), (r[2], r[3])).meters
            pt_distances_list.append(d)
        big_list.append(pt_distances_list)
    col_names = ['Timestamp'] + track_points.columns.to_list() + receivers[receivers_id_column].to_list()
    df = pd.DataFrame(big_list, columns=col_names)
    return df


In [4]:
def get_gpx_list(folder):
    """Get a list of GPX files in
    a folder"""
    gpx_list = glob(join(folder, "*.gpx"))  
    return gpx_list


In [5]:
# Loop through all GPX files and calculate distances
# from track points to receivers
for gpx in get_gpx_list(r'C:\Users\julio\Desktop\daver_gpx'):
    # Import receivers
    rx = pd.read_csv('../data/rx.csv')
    # Import GPX track points
    df = gpx_to_df(gpx)
    # Interpolate coordinates every second
    df_dense = df.resample('1S').interpolate()
    # Calculate distances
    dist = calculate_distances(df_dense, rx, 'id')
    # Export results to CSV
    output_filename = basename(gpx).split('.')[0] + '.csv'
    dist.to_csv(join(dirname(gpx), output_filename))


# Another approach using df.apply()

In [199]:
def get_distance(df, point):
    dists = df[['coords']].apply(lambda x: geodesic(x, point).meters)
    return dists

In [200]:
def calculate_distances_vec(track_points, receivers):
    """Calculate distances between
    track points and receivers"""
    # Loop through each receiver
    for r in receivers.itertuples():
        track_points[r[1]] = track_points[['coords']].apply(lambda x: get_distance(x, (r[2], r[3])), axis=1)
    return track_points

In [220]:
# Loop through all GPX files and calculate distances
# from track points to receivers
for gpx in get_gpx_list(r'C:\Users\julio\Desktop\daver_gpx'):
    # Import receivers
    rx = pd.read_csv('../data/rx.csv')
    # Import GPX track points
    df = gpx_to_df(gpx)
    # Interpolate coordinates every second
    df_dense = df.resample('1S').interpolate()
    # Add a column with coordinates in a tuple
    df_dense['coords'] = list(zip(df_dense['latitude'], df_dense['longitude']))
    # Calculate distances
    dist = calculate_distances_vec(df_dense, rx)
    # Export results to CSV
    output_filename = basename(gpx).split('.')[0] + '.csv'
    dist.to_csv(join(dirname(gpx), output_filename))
    

KeyboardInterrupt: 

In [218]:
df.index.is_unique
#df.index.duplicated()
#df.loc[~df.index.duplicated(), :]

False

In [204]:
dist.to_csv(join(dirname(gpx), output_filename))

In [56]:
x = pd.DataFrame({'latitude': [1, 2, 0.1, 0.1], 
                   'longitude': [1, 2, 0.7, 0.2], 
                   'column 3': [1, 2, 3, 4]})

In [155]:
x['coords'] = list(zip(x['latitude'], x['longitude']))
x.dtypes

latitude     float64
longitude    float64
column 3       int64
tup           object
coords        object
dtype: object

In [189]:
x['dist1'] = x[['latitude', 'longitude']].apply(lambda row: geodesic(row, (0,0)), axis=1)
x['dist2'] = x[['latitude', 'longitude']].apply(lambda row: geodesic(row, (1,-1)), axis=1)
x

In [206]:
x['dist3'] = x[['coords']].apply(lambda x: geodesic(x, (0,0)), axis=1)
x

Unnamed: 0,latitude,longitude,column 3,tup,coords,dist,dist1,dist2,dist3
0,1.0,1.0,1,"(1.0, 1.0)","(1.0, 1.0)",,156.89956829134027 km,222.60529609673543 km,156.89956829134027 km
1,2.0,2.0,2,"(2.0, 2.0)","(2.0, 2.0)",,313.7757094291842 km,351.67650043935595 km,313.7757094291842 km
2,0.1,0.7,3,"(0.1, 0.7)","(0.1, 0.7)",,78.70422357040137 km,213.80500391582302 km,78.70422357040137 km
3,0.1,0.2,4,"(0.1, 0.2)","(0.1, 0.2)",,24.858547192170853 km,166.5717546725291 km,24.858547192170853 km


In [17]:
import pytz
df2 = df.iloc[1:10]
df2

Unnamed: 0_level_0,latitude,longitude
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-30 16:42:27+00:00,55.987213,-121.98265
2021-09-30 16:42:30+00:00,55.987255,-121.982586
2021-09-30 16:43:01+00:00,55.987233,-121.982515
2021-09-30 16:44:00+00:00,55.987282,-121.982529
2021-09-30 16:44:04+00:00,55.987315,-121.982602
2021-09-30 16:44:08+00:00,55.987353,-121.982646
2021-09-30 16:44:12+00:00,55.987391,-121.982715
2021-09-30 16:44:16+00:00,55.987425,-121.982787
2021-09-30 16:44:21+00:00,55.987454,-121.982861


In [18]:
df2.index.tz_convert(pytz.timezone('America/Vancouver'))

DatetimeIndex(['2021-09-30 09:42:27-07:00', '2021-09-30 09:42:30-07:00',
               '2021-09-30 09:43:01-07:00', '2021-09-30 09:44:00-07:00',
               '2021-09-30 09:44:04-07:00', '2021-09-30 09:44:08-07:00',
               '2021-09-30 09:44:12-07:00', '2021-09-30 09:44:16-07:00',
               '2021-09-30 09:44:21-07:00'],
              dtype='datetime64[ns, America/Vancouver]', name='time', freq=None)

In [19]:
df2.index

DatetimeIndex(['2021-09-30 16:42:27+00:00', '2021-09-30 16:42:30+00:00',
               '2021-09-30 16:43:01+00:00', '2021-09-30 16:44:00+00:00',
               '2021-09-30 16:44:04+00:00', '2021-09-30 16:44:08+00:00',
               '2021-09-30 16:44:12+00:00', '2021-09-30 16:44:16+00:00',
               '2021-09-30 16:44:21+00:00'],
              dtype='datetime64[ns, SimpleTZ("Z")]', name='time', freq=None)