<h1>Shaping Walking Data</h1>
This notebook takes .csv data and calculates walking statistics, exporting results in a separate folder as csv

<h1>Set Parameters</h1>

In [24]:
#run each of these sequentially

#path_input = '../data/commute_riverside_bridge_136.csv'
#path_output = '../results/commute_riverside_bridge_136_stats.csv'

#path_input = '../data/commute_riverside.csv'
#path_output = '../results/commute_riverside_stats.csv'

path_input = '../data/commute_broadway.csv'
path_output = '../results/commute_broadway_stats.csv'

<h1>Import Needed Libraries</h1>

In [25]:
import csv
import pandas as pd

#for reading GPX files

import gpxpy 

#for calculating distance and slope
import geopy.distance 
import numpy as np

#for calculating degrees
#import math

<h1>Define Functions for Calculations</h1>

In [26]:
# "vectorized haversine function" used for calculating distance between GPS coordinates
# found at https://stackoverflow.com/questions/29545704/fast-haversine-approximation-python-pandas/29546836#29546836
def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371):
    """
    slightly modified version: of http://stackoverflow.com/a/29546836/2901002

    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees or in radians)

    All (lat, lon) coordinates must have numeric dtypes and be of equal length.

    """
    
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    km = 6367 * c
    return km

In [27]:
#calculation source based on https://www.verywellfit.com/how-many-more-calories-do-you-burn-walking-uphill-3975557
def get_calories(distance, slope_angle):
    
    #1609 meters per mile, 80 calories per mile
    calories_raw = (distance/1609) * 80

    if slope_angle < -1: #fewer calories burned downhill
        calories_adj = calories_raw * .934
    elif slope_angle > 1: #more calories burned uphill
        calories_adj = calories_raw + slope_angle * (distance/1609) * 10
    else:
        calories_adj = calories_raw
    
    return calories_adj

In [28]:
#calculation source based on https://www.verywellfit.com/how-many-more-calories-do-you-burn-walking-uphill-3975557
def get_effort(slope_angle):
    
    if slope_angle < 1: #fewer calories burned downhill
        effort = .934
    elif slope_angle > 1: #more calories burned uphill
        effort = 1 + slope_angle * (1/8)
    else:
        effort = 1
    
    return effort

<h1>Calculate Stats</h1>

In [29]:
df_walk = pd.read_csv(path_input)

#calculate distance and change in elevation, units in meters
df_walk['d_distance'] = \
    1000* haversine(df_walk.lat.shift(), df_walk.long.shift(),
                 df_walk.loc[1:, 'lat'], df_walk.loc[1:, 'long'])
df_walk["d_height"]= df_walk.elev.diff()
df_walk["slope"]=df_walk["d_height"]/df_walk["d_distance"]
df_walk["slope_angle"]=np.rad2deg(np.arctan2(df_walk["d_height"],df_walk["d_distance"]))
df_walk = df_walk.fillna(0)
df_walk

Unnamed: 0,lat,long,elev,d_distance,d_height,slope,slope_angle,d_calories,d_calories_sum
0,40.83477,-73.94735,25.16,0.000000,0.00,0.000000,0.000000,0.000000,0.000000
1,40.83479,-73.94737,24.97,2.786951,-0.19,-0.068175,-3.900097,0.129423,0.129423
2,40.83472,-73.94747,26.01,11.454199,1.04,0.090796,5.188025,0.938833,1.068256
3,40.83444,-73.94766,26.27,34.976220,0.26,0.007434,0.425907,1.739029,2.807285
4,40.83353,-73.94553,32.00,205.664625,5.73,0.027861,1.595899,12.265612,15.072896
...,...,...,...,...,...,...,...,...,...
121,40.80930,-73.96311,41.00,2.787341,0.00,0.000000,0.000000,0.138587,181.761842
122,40.80895,-73.96336,41.71,44.214007,0.71,0.016058,0.919991,2.198335,183.960176
123,40.80890,-73.96340,41.28,6.495469,-0.43,-0.066200,-3.787454,0.301642,184.261818
124,40.80816,-73.96393,41.00,93.538437,-0.28,-0.002993,-0.171510,4.650761,188.912579


In [30]:
# count calories and get cumulative sums
df_walk['d_calories'] = df_walk.apply(lambda x: get_calories(x['d_distance'], x['slope_angle']), axis=1)
df_walk['calories_sum'] = df_walk['d_calories'].cumsum()
df_walk['distance_sum'] = df_walk['d_distance'].cumsum()
df_walk

Unnamed: 0,lat,long,elev,d_distance,d_height,slope,slope_angle,d_calories,d_calories_sum,calories_sum,distance_sum
0,40.83477,-73.94735,25.16,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,40.83479,-73.94737,24.97,2.786951,-0.19,-0.068175,-3.900097,0.129423,0.129423,0.129423,2.786951
2,40.83472,-73.94747,26.01,11.454199,1.04,0.090796,5.188025,0.938833,1.068256,1.068256,14.241150
3,40.83444,-73.94766,26.27,34.976220,0.26,0.007434,0.425907,1.739029,2.807285,2.807285,49.217370
4,40.83353,-73.94553,32.00,205.664625,5.73,0.027861,1.595899,12.265612,15.072896,15.072896,254.881995
...,...,...,...,...,...,...,...,...,...,...,...
121,40.80930,-73.96311,41.00,2.787341,0.00,0.000000,0.000000,0.138587,181.761842,181.761842,3339.462209
122,40.80895,-73.96336,41.71,44.214007,0.71,0.016058,0.919991,2.198335,183.960176,183.960176,3383.676215
123,40.80890,-73.96340,41.28,6.495469,-0.43,-0.066200,-3.787454,0.301642,184.261818,184.261818,3390.171685
124,40.80816,-73.96393,41.00,93.538437,-0.28,-0.002993,-0.171510,4.650761,188.912579,188.912579,3483.710122


<h1>Export It</h1>

In [31]:
df_walk.to_csv(path_output)