<h1>Shaping Walking Data</h1>
This notebook takes .csv data and calculates walking statistics, exporting results in a separate folder as csv

<h1>Set Parameters</h1>

In [23]:
#repeat for each of these sequentially

path_input = '../data/commute_riverside.csv'
path_output = '../results/commute_riverside_stats.csv'

#path_input = '../data/commute_broadway.csv'
#path_output = '../results/commute_broadway_stats.csv'

<h1>Import Needed Libraries</h1>

In [24]:
import csv
import pandas as pd

#for reading GPX files

import gpxpy 

#for calculating distance and slope
import geopy.distance 
import numpy as np

#for calculating degrees
#import math

<h1>Define Functions for Calculations</h1>

In [25]:
# "vectorized haversine function" used for calculating distance between GPS coordinates
# found at https://stackoverflow.com/questions/29545704/fast-haversine-approximation-python-pandas/29546836#29546836
def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371):
    """
    slightly modified version: of http://stackoverflow.com/a/29546836/2901002

    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees or in radians)

    All (lat, lon) coordinates must have numeric dtypes and be of equal length.

    """
    
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    km = 6367 * c
    return km

In [26]:
#calculation source based on https://www.verywellfit.com/how-many-more-calories-do-you-burn-walking-uphill-3975557
def get_calories(distance, slope_angle):
    
    #1609 meters per mile, 80 calories per mile
    calories_raw = (distance/1609) * 80

    if slope_angle < -1: #fewer calories burned downhill
        calories_adj = calories_raw * .934
    elif slope_angle > 1: #more calories burned uphill
        calories_adj = calories_raw + slope_angle * (distance/1609) * 10
    else:
        calories_adj = calories_raw
    
    return calories_adj

In [27]:
#calculation source based on https://www.verywellfit.com/how-many-more-calories-do-you-burn-walking-uphill-3975557
def get_effort(slope_angle):
    
    if slope_angle < 1: #fewer calories burned downhill
        effort = .934
    elif slope_angle > 1: #more calories burned uphill
        effort = 1 + slope_angle * (1/8)
    else:
        effort = 1
    
    return effort

<h1>Calculate Stats</h1>

In [28]:
df_walk = pd.read_csv(path_input)

#calculate distance and change in elevation, units in meters
df_walk['d_distance'] = \
    1000* haversine(df_walk.lat.shift(), df_walk.long.shift(),
                 df_walk.loc[1:, 'lat'], df_walk.loc[1:, 'long'])
df_walk["d_height"]= df_walk.elev.diff()
df_walk["slope"]=df_walk["d_height"]/df_walk["d_distance"]
df_walk["slope_angle"]=np.rad2deg(np.arctan2(df_walk["d_height"],df_walk["d_distance"]))
df_walk = df_walk.fillna(0)
df_walk = df_walk.drop('Unnamed: 0', axis=1) #not sure where this column comes from
df_walk

Unnamed: 0,lat,long,elev,d_distance,d_height,slope,slope_angle
0,40.83471,-73.94743,26.10,0.000000,0.00,0.000000,0.000000
1,40.83472,-73.94747,26.01,3.541924,-0.09,-0.025410,-1.455568
2,40.83444,-73.94766,26.27,34.976220,0.26,0.007434,0.425907
3,40.83432,-73.94775,26.00,15.332378,-0.27,-0.017610,-1.008863
4,40.83413,-73.94808,23.63,34.865646,-2.37,-0.067975,-3.888711
...,...,...,...,...,...,...,...
150,40.80850,-73.96473,36.96,7.509398,2.94,0.391509,21.380811
151,40.80820,-73.96403,41.00,67.660476,4.04,0.059710,3.417068
152,40.80818,-73.96404,41.00,2.376337,0.00,0.000000,0.000000
153,40.80812,-73.96409,40.58,7.883034,-0.42,-0.053279,-3.049777


In [29]:
# count calories and get cumulative sums
df_walk['d_calories'] = df_walk.apply(lambda x: get_calories(x['d_distance'], x['slope_angle']), axis=1)
df_walk['calories_sum'] = df_walk['d_calories'].cumsum()
df_walk['distance_sum'] = df_walk['d_distance'].cumsum()
df_walk

Unnamed: 0,lat,long,elev,d_distance,d_height,slope,slope_angle,d_calories,calories_sum,distance_sum
0,40.83471,-73.94743,26.10,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
1,40.83472,-73.94747,26.01,3.541924,-0.09,-0.025410,-1.455568,0.164483,0.164483,3.541924
2,40.83444,-73.94766,26.27,34.976220,0.26,0.007434,0.425907,1.739029,1.903512,38.518144
3,40.83432,-73.94775,26.00,15.332378,-0.27,-0.017610,-1.008863,0.712017,2.615529,53.850522
4,40.83413,-73.94808,23.63,34.865646,-2.37,-0.067975,-3.888711,1.619118,4.234647,88.716168
...,...,...,...,...,...,...,...,...,...,...
150,40.80850,-73.96473,36.96,7.509398,2.94,0.391509,21.380811,1.371238,197.710877,3557.438632
151,40.80820,-73.96403,41.00,67.660476,4.04,0.059710,3.417068,4.801021,202.511898,3625.099107
152,40.80818,-73.96404,41.00,2.376337,0.00,0.000000,0.000000,0.118152,202.630050,3627.475445
153,40.80812,-73.96409,40.58,7.883034,-0.42,-0.053279,-3.049777,0.366079,202.996128,3635.358479


<h1>Export It</h1>

In [30]:
df_walk.to_csv(path_output)