## Load Libraries

In [1]:
import pandas as pd
import numpy as np

## Load total CSV after downloading from STRAVA API

In [2]:
# Load it into a Dataframe using pandas
path = 'RideData_20170508211642.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,act_startDate,timestamp,act_id,act_name,altitude,distance,grade_smooth,heartrate,moving,time,velocity_smooth,lat,long
0,2017-03-27 06:52:47+00:00,2017-03-27 06:52:47+00:00,916298883,Morning Ride,8.0,0.0,3.2,,False,0,0.0,55.674874,12.592714
1,2017-03-27 06:52:47+00:00,2017-03-27 06:52:57+00:00,916298883,Morning Ride,8.0,4.4,5.1,,True,10,0.4,55.67491,12.592685
2,2017-03-27 06:52:47+00:00,2017-03-27 06:52:59+00:00,916298883,Morning Ride,8.3,9.3,5.4,,True,12,0.8,55.674938,12.592625
3,2017-03-27 06:52:47+00:00,2017-03-27 06:53:01+00:00,916298883,Morning Ride,8.8,15.6,6.3,,True,14,1.1,55.674979,12.592555
4,2017-03-27 06:52:47+00:00,2017-03-27 06:53:02+00:00,916298883,Morning Ride,9.0,18.6,6.0,,True,15,2.8,55.674997,12.592519


## Useful Functions

In [3]:
# Trim data per specific time threshold
def f(s, thresh):
    cur = None
    for i, v in s.iteritems():
        if (cur is None) or (v - cur >= thresh):
            yield i
            cur = v

## Modify the data

In [10]:
# Convert to act_StartDate to datetime
df['act_startDate'] = pd.to_datetime(df['act_startDate'])
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Keep only date as a period in a seperate column
df['day'] = pd.DatetimeIndex(df['act_startDate']).to_period('D')

# Sort df by ascending day and timestamp
df = df.sort_values(by=['day','timestamp'],ascending=[True,True])

# Check main df
df.head()

Unnamed: 0,act_startDate,timestamp,act_id,act_name,altitude,distance,grade_smooth,heartrate,moving,time,velocity_smooth,lat,long,day,day_no,iter_no
0,2017-03-27 06:52:47,2017-03-27 06:52:47,916298883,Morning Ride,8.0,0.0,3.2,,False,0,0.0,55.674874,12.592714,2017-03-27,1,1
1,2017-03-27 06:52:47,2017-03-27 06:52:57,916298883,Morning Ride,8.0,4.4,5.1,,True,10,0.4,55.67491,12.592685,2017-03-27,1,1
2,2017-03-27 06:52:47,2017-03-27 06:52:59,916298883,Morning Ride,8.3,9.3,5.4,,True,12,0.8,55.674938,12.592625,2017-03-27,1,1
3,2017-03-27 06:52:47,2017-03-27 06:53:01,916298883,Morning Ride,8.8,15.6,6.3,,True,14,1.1,55.674979,12.592555,2017-03-27,1,1
4,2017-03-27 06:52:47,2017-03-27 06:53:02,916298883,Morning Ride,9.0,18.6,6.0,,True,15,2.8,55.674997,12.592519,2017-03-27,1,1


## Create a day number flag for keeping track of the trip days    
## Create an iteration number flag for keeping track of activities per day

In [11]:
# Create helper dataframe with unique days
df_helper = df.groupby(by=['day','act_id']).count().iloc[:,0].reset_index().filter(items=['day','act_id'])

# Create columns for iter_no(per activity) and day_no(per day)
days = list(set(df_helper['day']))
days.sort()
day_no = list()
iter_no = list()
for index,day in enumerate(days):
    counter=1
    for dfday in df_helper['day']:
        if dfday == day:
            iter_no.append(counter)
            day_no.append(index+1)
            counter+=1

df_helper['day_no'] = pd.Series(day_no).values
df_helper['iter_no'] = pd.Series(iter_no).values      

df_helper

Unnamed: 0,day,act_id,day_no,iter_no
0,2017-03-27,916298883,1,1
1,2017-03-27,916806776,1,2
2,2017-04-01,923496071,2,1
3,2017-04-03,926539428,3,1
4,2017-04-06,930641527,4,1
5,2017-04-07,931935227,5,1
6,2017-04-11,937765185,6,1
7,2017-05-01,966163233,7,1
8,2017-05-01,966417205,7,2


In [12]:
# Merge flagger with the main dataframe
df = pd.merge(df,df_helper,on=['day','act_id'])

# Check main df
df.head()

Unnamed: 0,act_startDate,timestamp,act_id,act_name,altitude,distance,grade_smooth,heartrate,moving,time,velocity_smooth,lat,long,day,day_no_x,iter_no_x,day_no_y,iter_no_y
0,2017-03-27 06:52:47,2017-03-27 06:52:47,916298883,Morning Ride,8.0,0.0,3.2,,False,0,0.0,55.674874,12.592714,2017-03-27,1,1,1,1
1,2017-03-27 06:52:47,2017-03-27 06:52:57,916298883,Morning Ride,8.0,4.4,5.1,,True,10,0.4,55.67491,12.592685,2017-03-27,1,1,1,1
2,2017-03-27 06:52:47,2017-03-27 06:52:59,916298883,Morning Ride,8.3,9.3,5.4,,True,12,0.8,55.674938,12.592625,2017-03-27,1,1,1,1
3,2017-03-27 06:52:47,2017-03-27 06:53:01,916298883,Morning Ride,8.8,15.6,6.3,,True,14,1.1,55.674979,12.592555,2017-03-27,1,1,1,1
4,2017-03-27 06:52:47,2017-03-27 06:53:02,916298883,Morning Ride,9.0,18.6,6.0,,True,15,2.8,55.674997,12.592519,2017-03-27,1,1,1,1


## Trim data points if they are too many to be handled by D3

In [13]:
df_trim = df.loc[list(f(df['timestamp'], pd.to_timedelta(30, 's')))]

In [14]:
# Check if the trimming is correct
df_trim.head()

Unnamed: 0,act_startDate,timestamp,act_id,act_name,altitude,distance,grade_smooth,heartrate,moving,time,velocity_smooth,lat,long,day,day_no_x,iter_no_x,day_no_y,iter_no_y
0,2017-03-27 06:52:47,2017-03-27 06:52:47,916298883,Morning Ride,8.0,0.0,3.2,,False,0,0.0,55.674874,12.592714,2017-03-27,1,1,1,1
18,2017-03-27 06:52:47,2017-03-27 06:53:17,916298883,Morning Ride,7.9,88.9,0.0,,True,30,4.1,55.675395,12.592052,2017-03-27,1,1,1,1
48,2017-03-27 06:52:47,2017-03-27 06:53:47,916298883,Morning Ride,3.9,283.2,0.9,,True,60,7.0,55.676493,12.594387,2017-03-27,1,1,1,1
65,2017-03-27 06:52:47,2017-03-27 06:54:17,916298883,Morning Ride,1.0,500.0,1.5,,True,90,6.7,55.677439,12.597199,2017-03-27,1,1,1,1
90,2017-03-27 06:52:47,2017-03-27 06:54:47,916298883,Morning Ride,6.7,650.3,4.2,,True,120,6.6,55.67828,12.596714,2017-03-27,1,1,1,1


## Show statistics per day

Create cumulative distance when changing activity
- Keep the last row of each iteration of the same day and add its number to all the rest of the next day and do the same for the next activity

Create total elevation per day by summing the absolute difference between each consecutive point way before trimming.


In [8]:
df

Unnamed: 0,act_startDate,timestamp,act_id,act_name,altitude,distance,grade_smooth,heartrate,moving,time,velocity_smooth,lat,long,day,day_no,iter_no
0,2017-03-27 06:52:47,2017-03-27 06:52:47+00:00,916298883,Morning Ride,8.0,0.0,3.2,,False,0,0.0,55.674874,12.592714,2017-03-27,1,1
1,2017-03-27 06:52:47,2017-03-27 06:52:57+00:00,916298883,Morning Ride,8.0,4.4,5.1,,True,10,0.4,55.674910,12.592685,2017-03-27,1,1
2,2017-03-27 06:52:47,2017-03-27 06:52:59+00:00,916298883,Morning Ride,8.3,9.3,5.4,,True,12,0.8,55.674938,12.592625,2017-03-27,1,1
3,2017-03-27 06:52:47,2017-03-27 06:53:01+00:00,916298883,Morning Ride,8.8,15.6,6.3,,True,14,1.1,55.674979,12.592555,2017-03-27,1,1
4,2017-03-27 06:52:47,2017-03-27 06:53:02+00:00,916298883,Morning Ride,9.0,18.6,6.0,,True,15,2.8,55.674997,12.592519,2017-03-27,1,1
5,2017-03-27 06:52:47,2017-03-27 06:53:03+00:00,916298883,Morning Ride,9.1,21.8,3.4,,True,16,2.9,55.675019,12.592488,2017-03-27,1,1
6,2017-03-27 06:52:47,2017-03-27 06:53:04+00:00,916298883,Morning Ride,9.3,26.1,1.7,,True,17,3.4,55.675047,12.592440,2017-03-27,1,1
7,2017-03-27 06:52:47,2017-03-27 06:53:05+00:00,916298883,Morning Ride,9.3,30.1,0.5,,True,18,3.5,55.675072,12.592395,2017-03-27,1,1
8,2017-03-27 06:52:47,2017-03-27 06:53:06+00:00,916298883,Morning Ride,9.3,36.5,-1.9,,True,19,4.2,55.675122,12.592342,2017-03-27,1,1
9,2017-03-27 06:52:47,2017-03-27 06:53:07+00:00,916298883,Morning Ride,9.2,41.3,-3.0,,True,20,4.5,55.675138,12.592270,2017-03-27,1,1
