In [1]:
import pandas as pd
import folium

import branca
import branca.colormap as cm

#import gpx reading library to format the strava data 
import gpxpy
import gpxpy.gpx

#import geopy.distance

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np

#library to calculate distances/bearings
from geographiclib.geodesic import Geodesic
import math
geod = Geodesic.WGS84  # define the WGS84 ellipsoid

In [2]:
#https://betterdatascience.com/data-science-for-cycling-how-to-read-gpx-strava-routes-with-python/

In [2]:
#open the strava data file with gpxpy library tool
with open('data/6972736276.gpx') as gpx_file:
    gpx = gpxpy.parse(gpx_file)

### unpack proprietary format

In [3]:
#unpacking the gpx item into a list to be put into a geodataframe
route_info = []
for track in gpx.tracks:
    for segment in track.segments:
        for point in segment.points:
            route_info.append({
                'datetime': point.time,
                'latitude': point.latitude,
                'longitude': point.longitude,
                'elevation': point.elevation
            })

In [4]:
#create dataframe from gpx information
df_points = pd.DataFrame(data=route_info)

In [5]:
#create lat/long list for fitting map
lat_long_list = list(zip(df_points.latitude, df_points.longitude))

In [6]:
#bounds for the start of the folium map
fit = ((max(df_points.latitude), max(df_points.longitude)),
       (min(df_points.latitude), min(df_points.longitude)))

In [7]:
df_points.head()

Unnamed: 0,datetime,latitude,longitude,elevation
0,2022-04-12 22:19:47+00:00,40.682581,-73.956795,20.5
1,2022-04-12 22:19:48+00:00,40.682574,-73.956806,20.5
2,2022-04-12 22:19:49+00:00,40.682573,-73.956834,20.5
3,2022-04-12 22:19:50+00:00,40.682581,-73.956841,20.5
4,2022-04-12 22:19:51+00:00,40.682576,-73.956852,20.5


In [8]:
#loop through dataframe
#calculate the distance between sequential points with geopy package
for i in range(len(df_points)):
    if i >0: 
        last_pnt = df_points.loc[i-1, "latitude"], df_points.loc[i-1, "longitude"]
        pnt = df_points.loc[i, "latitude"], df_points.loc[i, "longitude"]
        df_points.at[i, "distance_delta"] = geod.Inverse(pnt[0], pnt[1], last_pnt[0], last_pnt[1])['s12']

In [9]:
#add change in elevation between points
df_points['elevation_delta'] = df_points.elevation.diff()

#add cumulative distance to point
df_points['distance_cum'] = df_points.distance_delta.cumsum()
# df_points.distance_cum.fillna(0, inplace=True)

#add cumulative elevation change, maybe superflous with sea level elevation column
df_points['elevation_cum'] = df_points.elevation_delta.cumsum()
# df_points.elevation_cum.fillna(0, inplace=True)

In [10]:
#fill na values in first rows where no delta exists 
df_points.fillna(0, inplace=True)

In [11]:
#min/max normalize velocity values edits the given dictionary 
def get_normalized_pace(split_dic):
    pace_list = []
    for i in split_dic.values(): 
        pace_list.append(i['hr_pace'])
    mini = min(pace_list)
    maxi = max(pace_list)
    for key, value in split_dic.items():
        split_dic[key]['pace_norm'] = (split_dic[key]['hr_pace'] - mini) / (maxi-mini)

In [12]:
def get_velocity(dist_one, dist_two, time_one, time_two): 
    diff_seconds = (time_two-time_one).seconds
    dist = dist_two - dist_one
    hr_pace = (3600/diff_seconds) * (dist/1000) #hourly pace

    return hr_pace

In [21]:
def get_all_lines(route): #takes the dataframe and returns all lines between sequential points

    paths_all = {}

    for i in range(len(route)-1):   #there is x-1 number of paths from indexes
        path_data = {}

        time_one = route.iloc[i].datetime #get start time of split path 
        time_two = route.iloc[i+1].datetime #get end time of split path 

        dist_one = route.loc[i].distance_cum #get start position
        dist_two = route.loc[i+1].distance_cum #get end position
        dist = dist_two-dist_one #calculate split distance 

        el_one = route.loc[i].elevation #get start elevation
        el_two = route.loc[i+1].elevation #get end elevation
        el_delta = el_two-el_one #calculate split elevation change

        grade = round((el_delta/dist)*100, 2)

        hr_pace = get_velocity(dist_one, dist_two, time_one, time_two)
        
        yes = route.loc[i : i+1].latitude.to_list() #grab all the y's ie latitude
        xes = route.loc[i : i+1].longitude.to_list() #grab all the x's ie longitude
        points = tuple(zip(yes, xes)) #the list of lat/long for creating polyline 

        path_data["start_distance"] = dist_one
        path_data["end_distance"] = dist_two
        path_data["time_one"] = time_one
        path_data["time_two"] = time_two        
        path_data["hr_pace"] = hr_pace
        path_data["dist"] = dist      
        path_data["el_delta"] = el_delta
        path_data["start_el"] = el_one
        path_data["end_el"] = el_two        
        path_data["grade"] = grade
        path_data["points"] = points
        paths_all[i] = path_data
    
    return paths_all

In [161]:
#create an object with split paths 
#input the length of the split and all paths
def get_splits(all_paths, split_len): 
    #loop through the dictionary to find where the distance exceeds the split 
    
    df = pd.DataFrame(all_paths).transpose()
    
    split_dist = split_len 
    split_indexes = [0] #start with the first path 
    
    #go through data frame and find which paths cross the threshold
    
    for i, r in df.iterrows():
        if r['start_distance'] <= split_dist and r['end_distance'] >= split_dist: 
            split_dist += split_len
            split_indexes.append(i) #positions of the df need to be offset
            
    split_indexes.append(len(df)-1) #add the final segment index
    
    info = {}
    for i in range(len(split_indexes)-1): 
        split_info = {}       
        split_info['start_time'] = df.iloc[split_indexes[i]].time_one
        split_info['end_time'] = df.iloc[split_indexes[i+1]].time_two     
        split_info['start_distance'] = df.iloc[split_indexes[i]].start_distance
        split_info['end_distance'] = df.iloc[split_indexes[i+1]].end_distance
        split_info['dist'] = split_info['end_distance'] - split_info['start_distance']
        split_info['split_length'] = split_len
        split_info['elevation_change'] = df.iloc[split_indexes[i]].start_el - df.iloc[split_indexes[i+1]].end_el
        split_info['hr_pace'] = get_velocity(split_info['start_distance'], split_info['end_distance'],
                                            split_info['start_time'], split_info['end_time'])
        split_info['points'] = df.iloc[split_indexes[i]: split_indexes[i+1]+1].points.tolist()
        
        info[i] = split_info
    
    get_normalized_pace(info)
    
    return info 

In [92]:
#https://stackoverflow.com/questions/56876620/unsure-how-to-use-colormap-with-folium-marker-plot
#need to figure out how to label independently of v_min/vmax
#colormap = cm.LinearColormap(colors=['blue', 'red'], index=[0, 30])
colormap = cm.LinearColormap(colors=['blue', 'red']) 

In [163]:
paths = get_all_lines(df_points) 

  grade = round((el_delta/dist)*100, 2)


In [164]:
segment = get_splits(paths, 1000)

In [165]:
#add popups at the split delineation--perpendicular to the line...bwahhaha

x = folium.Map()
x.fit_bounds(fit) #should change fit to a proper called function 

#adds the path segments to map 
for key, value in segment.items():
        folium.PolyLine(value['points'], color=colormap(value['pace_norm']),
                    smooth_factor= 0, popup= (str(value['hr_pace']) + " km/hr")).add_to(x)

In [166]:
x

In [167]:
df = pd.DataFrame(data= paths).transpose()

In [185]:
    #add the split markers--parallel lines
#        print(value['points'][-1]) #get the last point in the split 
    #find that point in the dataframe get -30/+30 points away
for key, value in segment.items():
        index_of_split = df.loc[df.points == value['points'][-1]].index[0]

        
        if (index_of_split+1 == len(df)): #don't draw a split at the end 
            break
             
        split_center = value['points'][-1][1] #gets the point at the split
        split_back = df.iloc[index_of_split -25].points[1] #selects a point 25 behind the split         
        split_forward = df.iloc[index_of_split+25].points[1] #selects a point 25 after the split
       
        #math for getting the parralel line to the path 
        path_az = geod.Inverse(split_back[0], split_back[1], split_forward[0], split_forward[1])['azi1']
        spl1 = geod.Direct(split_center[0], split_center[1], (path_az- 90), 100) 
        spl2 = geod.Direct(split_center[0], split_center[1], (path_az+ 90), 100)
        #plot the parralel line
        folium.PolyLine(((spl1['lat2'], spl1['lon2']), (spl2['lat2'], spl2['lon2'])), color = "black",
                       popup= ("split number" + " "+ str(key))).add_to(x)

done


In [186]:
x

## FASTEST KILOMETER

In [352]:
df_mini = df[['dist', 'end_distance', 'points', "time_one","time_two"]]

In [353]:
df_mini["time_diff"] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mini["time_diff"] = 0


In [354]:
df_mini['index_of_split'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mini['index_of_split'] = 0


In [355]:
rolling_target = 1000

In [356]:
df_mini.iloc[345:355]

Unnamed: 0,dist,end_distance,points,time_one,time_two,time_diff,index_of_split
345,1.494397,991.692409,"((40.675745, -73.959782), (40.675733, -73.95979))",2022-04-12 22:25:35+00:00,2022-04-12 22:25:36+00:00,0,0
346,3.782544,995.474953,"((40.675733, -73.95979), (40.67571, -73.959823))",2022-04-12 22:25:36+00:00,2022-04-12 22:25:37+00:00,0,0
347,3.165945,998.640898,"((40.67571, -73.959823), (40.675685, -73.959841))",2022-04-12 22:25:37+00:00,2022-04-12 22:25:38+00:00,0,0
348,1.322668,999.963567,"((40.675685, -73.959841), (40.675674, -73.9598...",2022-04-12 22:25:38+00:00,2022-04-12 22:25:39+00:00,0,0
349,3.564651,1003.528218,"((40.675674, -73.959847), (40.675644, -73.9598...",2022-04-12 22:25:39+00:00,2022-04-12 22:25:40+00:00,0,0
350,1.031111,1004.559329,"((40.675644, -73.959862), (40.675635, -73.9598...",2022-04-12 22:25:40+00:00,2022-04-12 22:25:41+00:00,0,0
351,4.555077,1009.114406,"((40.675635, -73.959865), (40.675602, -73.9598...",2022-04-12 22:25:41+00:00,2022-04-12 22:25:42+00:00,0,0
352,1.587906,1010.702311,"((40.675602, -73.959897), (40.675591, -73.9599...",2022-04-12 22:25:42+00:00,2022-04-12 22:25:43+00:00,0,0
353,3.431018,1014.133329,"((40.675591, -73.959909), (40.675562, -73.9599...",2022-04-12 22:25:43+00:00,2022-04-12 22:25:44+00:00,0,0
354,3.6214,1017.754729,"((40.675562, -73.959923), (40.675538, -73.9599...",2022-04-12 22:25:44+00:00,2022-04-12 22:25:45+00:00,0,0


In [373]:
for i in range(len(df_mini)+1):
    # print(df_mini.iloc[i].end_distance)

    rev_df = df_mini.iloc[0:i].iloc[::-1].reset_index(drop=True).end_distance
    # print(rev_df)
    # print("")
    # print(rev_df[0] - rolling_target) 


    
    for index, item in rev_df.iteritems():
        # print(rev_df[0]-item)
        if (rev_df[0]-item) > rolling_target: 
            beg_index = (i-index-1)
            df_mini.at[(i-1),"index_of_split"] = beg_index
            time_delta = (df_mini.at[i-1,"time_two"] -df_mini.at[beg_index, "time_one"]).seconds
            df_mini.at[(i-1), "time_diff"] = time_delta
            # print(beg_index)
            # print(time_delta)
            break
        
        

    
#     if rev_df[0] - item < rolling_target:
#             print(index)
#             print(rev_df[0] -item)
#         break

In [376]:
fastest_index =df_mini.loc[df_mini.time_diff >0].time_diff.idxmin()

In [379]:
start_index = df_mini.iloc[fastest_index].index_of_split

In [384]:
fastest_points = df_mini[start_index: fastest_index].points.tolist()

In [387]:
fastest_points[0]

((40.661578, -73.962741), (40.661547, -73.962734))

In [390]:
y = folium.Map()
y.fit_bounds(fit) #should change fit to a proper called function 

#adds the path segments to map 
for seg in fastest_points:
    folium.PolyLine(seg, color="red", smooth_factor=1).add_to(y)

for key, value in segment.items():
        folium.PolyLine(value['points'], color=colormap(value['pace_norm']),
                    smooth_factor= 0, popup= (str(value['hr_pace']) + " km/hr")).add_to(x)

In [391]:
y

In [None]:
df_mini.head()

In [None]:
rolling_targ = 10
for i in range(16, 17):
    # print(df_mini.loc[10, 'dist'])
    # print(df_mini.loc[1:13, 'dist'].iloc[::-1].reset_index(drop=True))
    # print("")
    # print(df_mini.loc[1:13, 'dist'].iloc[::-1].reset_index(drop=True).cumsum())
    
    #reverse the distance list and do a cumsum() to find interval period
    
    running_sum = df_mini.loc[1:i, 'dist'].iloc[::-1].reset_index(drop=True).cumsum() 
    
    #loop through and find the first index that crosses the target distance
    splt_steps = 0
    for index, item in running_sum.iteritems():
        try:
            if item > rolling_targ:
                split_steps = index+1 #go back this number of steps and grab the time
                break
        except:
            break
            
    split_speed = get_velocity(0, rolling_targ, df_mini.loc[i-split_steps, 'time_two'],df_mini.loc[i, 'time_two'])
    rows_ago = split_steps #grabs the number of rows ago for mapping
    
    #Add a rows_ago and split_speed column to dataframe 
    
    #map from that dataframe
    
    print(split_speed)
    print(i)        
            
    break
    
    
    

#     running_sum = df_mini.loc[1:i, 'dist'].iloc[::-1].cumsum()
#     print(running_sum)
        
#     if running_sum[1] > rolling_targ: 
#         print(running_sum)
#         print(running_sum[1])
    #         break

In [None]:
running_sum

In [None]:
df_mini.loc[1:16]

## colorcode the polyline based on speed


In [None]:
#lineplot of the speed 


In [None]:
#make the intervals smaller
#normalize the data to exhagerate differences --challenge will be to ignore street crossings 
#popups to tell the stat
#turn into functions
Turn to 

# Charting

In [16]:
bar1 = get_chart_data(splits)

NameError: name 'splits' is not defined

In [15]:
def get_chart_data(splits):
    stats = {}
    pace = []
    location = []
    width = []
    splitLen = []

    for key, value in splits.items():
        pace.append(splits[key]["hr_pace"])
        location.append(splits[key]["start_distance"])
        width.append(splits[key]['dist'])
        splitLen.append(splits[key]['split_length'])
    
    stats['pace'] = pace
    stats['location'] = location
    stats['width'] = width
    stats['splitLen'] = np.cumsum(splitLen).astype(int)
    stats['splitLen_labels'] = stats['splitLen']/1000
    
    return stats

In [17]:
bar1

NameError: name 'bar1' is not defined

In [18]:
#logic for labeling the x axis 
splitLen = np.cumsum(splitLen).astype(int)
splitLen_labels = splitLen/1000
end_label = str(round(splitLen_labels[-1]%1, 2))[1:4]
splitLen_labels = list(splitLen_labels.astype(int))
splitLen_labels[-1] = end_label

NameError: name 'splitLen' is not defined

In [19]:
#should be a line plot as well, bars to show split 
#vary the colors with normalization function 
figure(figsize=(9, 5), dpi=80)

plt.bar(bar1['location'], bar1['pace'], width = bar1['width'], align='edge', edgecolor="red")
plt.title("velocity over prospect park run")
plt.xlabel('distance into run (kms)')
plt.xticks(ticks = bar1['splitLen'], labels= bar1['splitLen_labels'])
plt.ylabel('km/hr')

# ax2 = plt.twinx()
#plt.plot(e_location,e_pace, color="black")

NameError: name 'bar1' is not defined

<Figure size 720x400 with 0 Axes>