In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import scipy.interpolate

In [2]:
data_dir = 'Data/Activity Logs'
datafile = 'Activities_Andrew.csv'
pson = 'Andrew';
fig_dir = 'Figures';
ignore_noHR = True;


run_identifiers = ["Running","Track Running","Trail Running"];
walk_identifiers = ["Walking"]
pwd = os.getcwd();
os.chdir('..')
os.chdir(data_dir);
all_activities = pd.read_csv(datafile)
os.chdir(pwd)

FileNotFoundError: [Errno 2] No such file or directory: 'Data/Activities'

In [None]:
all_activities.head(10)

In [None]:
all_activities = all_activities.replace(',','', regex=True) #make sure no commas in numeric data
all_activities = all_activities.replace('--',None, regex=True) #convert any -- to NaN to avoid crashes

all_activities['Calories'] = all_activities['Calories'].astype(float)
all_activities['Distance'] = all_activities['Distance'].astype(float)
all_activities['Avg HR'] = all_activities['Avg HR'].astype(float)
all_activities['Max HR'] = all_activities['Max HR'].astype(float)
all_activities['Time'] = all_activities['Time'].str.split(':').apply(lambda x: float(x[0]) * 60 + float(x[1])+float(x[2])/60)

if ignore_noHR:
    all_activities = all_activities[all_activities["Avg HR"]!=0]
    all_activities = all_activities[all_activities["Avg HR"]!=None]

print('Total Activities Loaded: ' + str(all_activities.shape[0]))

runs = all_activities.loc[all_activities["Activity Type"].isin(run_identifiers)];
walks = all_activities.loc[all_activities["Activity Type"].isin(walk_identifiers)];

#handle runs not logged in Miles...but Garmin not providing Units. 
#Assume any Track Running activity needs to be converted from meters to miles.

runs.loc[all_activities["Activity Type"] == "Track Running", "Distance"] = runs.loc[all_activities["Activity Type"] == "Track Running", "Distance"]/1609.34;


print('Total Runs Loaded: ' + str(runs.shape[0]))
print('Total Walks Loaded: ' + str(walks.shape[0]))

runs.head(20)


In [None]:
%matplotlib inline

ax1 = plt.subplot(1,2,1)
plt.scatter(x = runs["Distance"], y = runs["Calories"], c = runs["Calories"], cmap=plt.get_cmap('Reds_r'),alpha = .5)
plt.scatter(x = walks["Distance"], y = walks["Calories"], c = walks["Calories"], cmap=plt.get_cmap('Blues_r'),alpha = .5)
plt.xlabel('Distance')
plt.ylabel('Calories (Garmin Est.)');


ax2 = plt.subplot(1,2,2, sharey = ax1)
plt.scatter(x = runs["Time"], y = runs["Calories"], c = runs["Calories"], cmap=plt.get_cmap('Reds_r'),alpha = .5)
plt.scatter(x = walks["Time"], y = walks["Calories"], c = walks["Calories"], cmap=plt.get_cmap('Blues_r'),alpha = .5)
plt.text(x=5, y=1750, s="N_runs = " + str(runs.shape[0]),c = 'firebrick') #hardcoded can fix this later
plt.text(x=105, y=1000, s="N_walks = " + str(walks.shape[0]), c = 'cornflowerblue') #hardcoded can fix this later
plt.xlim([0,300])
box = ax2.get_position()
box.x0 = box.x0 + 0.03
box.x1 = box.x1 + 0.03
ax2.set_position(box)
plt.xlabel('Time in Activity')
plt.suptitle('Walking vs Running - ' + pson)
plt.ylim([0,2000]);
plt.savefig(pson+'_walkrun.jpg',dpi = 500)


In [None]:
#Average Calories/Mile burned by Pace

pace_r = runs["Time"]/runs["Distance"];
calspmi_r = runs["Calories"]/runs["Distance"];
calspmin_r = runs["Calories"]/runs["Time"];
avg_hr_r = runs["Avg HR"];

pace_w = walks["Time"]/walks["Distance"];
calspmi_w = walks["Calories"]/walks["Distance"];
calspmin_w = walks["Calories"]/walks["Time"];
avg_hr_w = walks["Avg HR"];


r_meancalspmi = np.ma.masked_invalid(calspmi_r).mean();
w_meancalspmi = np.ma.masked_invalid(calspmi_w).mean();

r_meancalspmin = np.ma.masked_invalid(calspmin_r).mean();
w_meancalspmin = np.ma.masked_invalid(calspmin_w).mean();

r_avg_hr = np.ma.masked_invalid(avg_hr_r).mean();
w_avg_hr = np.ma.masked_invalid(avg_hr_w).mean();

ax1 = plt.subplot(2,1,1);
plt.scatter(pace_r, calspmi_r, c = calspmi_r, cmap=plt.get_cmap('Reds_r'), alpha = .5)
plt.plot(np.arange(0,61),np.ones(61)*r_meancalspmi,'firebrick',linewidth=1.5, alpha = .5)
plt.text(x=50, y=r_meancalspmi-15, s="Avg = " + str(np.round(r_meancalspmi)),c = 'firebrick') #hardcoded can fix this later
plt.xlabel('Avg Pace (min/mi)');
plt.ylabel('Calories/Mile');

plt.scatter(pace_w, calspmi_w, c = calspmi_w, cmap=plt.get_cmap('Blues_r'), alpha = .5)
plt.plot(np.arange(0,61),np.ones(61)*w_meancalspmi,'cornflowerblue', linewidth=1.5, alpha = .5)
plt.text(x=50, y=w_meancalspmi+8, s="Avg = " + str(np.round(w_meancalspmi)),c = 'cornflowerblue') #hardcoded can fix this later
plt.xlim([3,60])

plt.ylim([0,400])



ax2 = plt.subplot(2,1,2, sharex=ax1);
plt.scatter(pace_r, calspmin_r, c = calspmin_r, cmap=plt.get_cmap('Reds'), alpha = .5)
plt.plot(np.arange(0,61),np.ones(61)*r_meancalspmin,'firebrick',linewidth=1.5, alpha = .5)
plt.text(x=50, y=r_meancalspmin, s="Avg = " + str(np.round(r_meancalspmin)),c = 'firebrick') #hardcoded can fix this later
plt.xlabel('Avg Pace (min/mi)');
plt.ylabel('Calories/Min');

plt.scatter(pace_w, calspmin_w, c = calspmin_w, cmap=plt.get_cmap('Blues'), alpha = .5)
plt.plot(np.arange(0,61),np.ones(61)*w_meancalspmin,'cornflowerblue', linewidth=1.5, alpha = .5)
plt.text(x=50, y=w_meancalspmin, s="Avg = " + str(np.round(w_meancalspmin)),c = 'cornflowerblue') #hardcoded can fix this later
plt.xlim([0,60])
plt.suptitle('Calories Burned as Pace Drops - '+pson)
plt.savefig(pson+'_caloriepace.jpg',dpi = 500)


fig = plt.figure()
plt.scatter(pace_r, avg_hr_r, c = avg_hr_r, cmap=plt.get_cmap('Reds'), alpha = .5)
plt.text(x=50, y=r_avg_hr+3, s="Avg = " + str(np.round(r_avg_hr)),c = 'firebrick') #hardcoded can fix this later
plt.plot(np.arange(0,61),np.ones(61)*r_avg_hr,'firebrick',linewidth=1.5, alpha = .5)
plt.scatter(pace_w, avg_hr_w, c = avg_hr_w, cmap=plt.get_cmap('Blues'), alpha = .5)
plt.text(x=50, y=w_avg_hr+3, s="Avg = " + str(np.round(w_avg_hr)),c = 'cornflowerblue') #hardcoded can fix this later
plt.plot(np.arange(0,61),np.ones(61)*w_avg_hr,'cornflowerblue',linewidth=1.5, alpha = .5)
plt.xlabel('Avg Pace (min/mi)');
plt.ylabel('Calories/Mile');
plt.xlim([0,60])
plt.title('Avg HR as Pace Drops - '+pson)
plt.savefig(pson+'_hrpace.jpg',dpi = 500)


# ax = plt.axes()
# plt.scatter(pace, calspmi, c = calspmi, alpha = .5)
# plt.plot(np.arange(0,20),np.ones(20)*np.ma.masked_invalid(r_avg_hr).mean(),'firebrick')
# plt.xlabel('Avg Pace (min/mi)');
# plt.ylabel('Calories/Mile');
# # plt.xlim([4,13])
# # plt.ylim([0,200])
# ax.invert_xaxis();
# plt.show()

# plt.figure()
# plt.scatter(pace, avg_hr, c = avg_hr, alpha = .5)
# plt.xlabel('Avg Pace (min/mi)');
# plt.ylabel('Heart Rate');
# # plt.xlim([4,13])
# plt.show()
