In [None]:
import pandas as pd
import gpxpy
from geopy.distance import geodesic
from glob import glob
from os.path import join, dirname, basename

In [None]:
def gpx_to_df(gpx_file):
    """Convert gpx track points to a
    pandas dataframe"""
    # Read gpx file
    with open(gpx_file) as f:
        gpx = gpxpy.parse(f)
    # Convert track points to dataframe
    track_points = []
    for track in gpx.tracks:
        for segment in track.segments:
            for point in segment.points:
                track_points.append({
                    'time': point.time,
                    'latitude': point.latitude,
                    'longitude': point.longitude
                })
    # Create a dataframe and set track points' timestamp as index
    df = pd.DataFrame.from_records(track_points).set_index('time')
    # Verify and eliminate duplicate index values
    if df.index.is_unique == False:
        return df.loc[~df.index.duplicated(), :]
    else:
        return df

In [None]:
def calculate_distances(track_points, receivers, receivers_id_column):
    """Calculate distances between
    track points and receivers"""
    # Empty list to store
    big_list = []
    # Loop each track point
    for row in track_points.itertuples():
        pt_distances_list = []
        pt_distances_list.append(row[0])
        pt_distances_list.append(row[1])
        pt_distances_list.append(row[2])
        for r in receivers.itertuples():
            d = geodesic((row[1], row[2]), (r[2], r[3])).meters
            pt_distances_list.append(d)
        big_list.append(pt_distances_list)
    col_names = ['Timestamp'] + track_points.columns.to_list() + receivers[receivers_id_column].to_list()
    df = pd.DataFrame(big_list, columns=col_names)
    return df


In [None]:
def get_gpx_list(folder):
    """Get a list of GPX files in
    a folder"""
    gpx_list = glob(join(folder, "*.gpx"))  
    return gpx_list


In [None]:
# Loop through all GPX files and calculate distances
# from track points to receivers
for gpx in get_gpx_list(r'C:\Users\julio\Desktop\daver_gpx'):
    # Import receivers
    rx = pd.read_csv('../data/rx.csv')
    # Import GPX track points
    df = gpx_to_df(gpx)
    # Interpolate coordinates every second
    df_dense = df.resample('1S').interpolate()
    # Calculate distances
    dist = calculate_distances(df_dense, rx, 'id')
    # Export results to CSV
    output_filename = basename(gpx).split('.')[0] + '.csv'
    dist.to_csv(join(dirname(gpx), output_filename))


# Another approach using df.apply()

In [None]:
def get_distance(df, point):
    dists = df[['coords']].apply(lambda x: geodesic(x, point).meters)
    return dists

In [None]:
def calculate_distances_vec(track_points, receivers):
    """Calculate distances between
    track points and receivers"""
    # Loop through each receiver
    for r in receivers.itertuples():
        track_points[r[1]] = track_points[['coords']].apply(lambda x: get_distance(x, (r[2], r[3])), axis=1)
    return track_points

In [None]:
# Loop through all GPX files and calculate distances
# from track points to receivers
for gpx in get_gpx_list(r'C:\Users\julio\Desktop\daver_gpx'):
    # Import receivers
    rx = pd.read_csv('../data/rx.csv')
    # Import GPX track points
    df = gpx_to_df(gpx)
    # Interpolate coordinates every second
    df_dense = df.resample('1S').interpolate()
    # Add a column with coordinates in a tuple
    df_dense['coords'] = list(zip(df_dense['latitude'], df_dense['longitude']))
    # Calculate distances
    dist = calculate_distances_vec(df_dense, rx)
    # Export results to CSV
    output_filename = basename(gpx).split('.')[0] + '.csv'
    dist.to_csv(join(dirname(gpx), output_filename))
    

# Random tests

In [None]:
df.index.is_unique
#df.index.duplicated()
#df.loc[~df.index.duplicated(), :]

In [None]:
dist.to_csv(join(dirname(gpx), output_filename))

In [None]:
x = pd.DataFrame({'latitude': [1, 2, 0.1, 0.1], 
                   'longitude': [1, 2, 0.7, 0.2], 
                   'column 3': [1, 2, 3, 4]})

In [None]:
x['coords'] = list(zip(x['latitude'], x['longitude']))
x.dtypes

In [None]:
x['dist1'] = x[['latitude', 'longitude']].apply(lambda row: geodesic(row, (0,0)), axis=1)
x['dist2'] = x[['latitude', 'longitude']].apply(lambda row: geodesic(row, (1,-1)), axis=1)
x

In [None]:
x['dist3'] = x[['coords']].apply(lambda x: geodesic(x, (0,0)), axis=1)
x

# Convert between time zones

In [None]:
import pytz
df2 = df.iloc[1:10]
df2

In [None]:
df2.index.tz_convert(pytz.timezone('America/Vancouver'))

In [None]:
df2.index