## Load Libraries

In [4]:
import pandas as pd
import numpy as np

## Load total CSV after downloading from STRAVA API

In [9]:
# Load it into a Dataframe using pandas
path = 'RideData_20170508211642.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,act_startDate,timestamp,act_id,act_name,altitude,distance,grade_smooth,heartrate,moving,time,velocity_smooth,lat,long
0,2017-03-27 06:52:47+00:00,2017-03-27 06:52:47+00:00,916298883,Morning Ride,8.0,0.0,3.2,,False,0,0.0,55.674874,12.592714
1,2017-03-27 06:52:47+00:00,2017-03-27 06:52:57+00:00,916298883,Morning Ride,8.0,4.4,5.1,,True,10,0.4,55.67491,12.592685
2,2017-03-27 06:52:47+00:00,2017-03-27 06:52:59+00:00,916298883,Morning Ride,8.3,9.3,5.4,,True,12,0.8,55.674938,12.592625
3,2017-03-27 06:52:47+00:00,2017-03-27 06:53:01+00:00,916298883,Morning Ride,8.8,15.6,6.3,,True,14,1.1,55.674979,12.592555
4,2017-03-27 06:52:47+00:00,2017-03-27 06:53:02+00:00,916298883,Morning Ride,9.0,18.6,6.0,,True,15,2.8,55.674997,12.592519


## Modify the data

In [10]:
# Convert to act_StartDate to datetime
df['act_startDate'] = pd.to_datetime(df['act_startDate'])

# Keep only date as a period in a seperate column
df['day'] = pd.DatetimeIndex(df['act_startDate']).to_period('D')

# Sort df by ascending day and timestamp
df = df.sort_values(by=['day','timestamp'],ascending=[True,True])

# Check main df
df.head()

Unnamed: 0,act_startDate,timestamp,act_id,act_name,altitude,distance,grade_smooth,heartrate,moving,time,velocity_smooth,lat,long,day
0,2017-03-27 06:52:47,2017-03-27 06:52:47+00:00,916298883,Morning Ride,8.0,0.0,3.2,,False,0,0.0,55.674874,12.592714,2017-03-27
1,2017-03-27 06:52:47,2017-03-27 06:52:57+00:00,916298883,Morning Ride,8.0,4.4,5.1,,True,10,0.4,55.67491,12.592685,2017-03-27
2,2017-03-27 06:52:47,2017-03-27 06:52:59+00:00,916298883,Morning Ride,8.3,9.3,5.4,,True,12,0.8,55.674938,12.592625,2017-03-27
3,2017-03-27 06:52:47,2017-03-27 06:53:01+00:00,916298883,Morning Ride,8.8,15.6,6.3,,True,14,1.1,55.674979,12.592555,2017-03-27
4,2017-03-27 06:52:47,2017-03-27 06:53:02+00:00,916298883,Morning Ride,9.0,18.6,6.0,,True,15,2.8,55.674997,12.592519,2017-03-27


## Create a day number flag for keeping track of the trip days

In [11]:
# Create helper dataframe with unique days
df_helper = df.groupby(by='day').count().iloc[:,0].reset_index().filter(items=['day'])

#Create helper function to give index number to a new column
counter = 1
def giveFlag(x):
    global counter
    index = counter;
    counter+=1
    return index

# Create day flagger column
df_helper['day_no'] = df_helper['day'].apply(lambda x: giveFlag(x))

# Check df_helper
df_helper

Unnamed: 0,day,day_no
0,2017-03-27,1
1,2017-04-01,2
2,2017-04-03,3
3,2017-04-06,4
4,2017-04-07,5
5,2017-04-11,6
6,2017-05-01,7


In [12]:
# Merge flagger with the main dataframe
df = pd.merge(df,df_helper,on='day')

# Check main df
df.head()

Unnamed: 0,act_startDate,timestamp,act_id,act_name,altitude,distance,grade_smooth,heartrate,moving,time,velocity_smooth,lat,long,day,day_no
0,2017-03-27 06:52:47,2017-03-27 06:52:47+00:00,916298883,Morning Ride,8.0,0.0,3.2,,False,0,0.0,55.674874,12.592714,2017-03-27,1
1,2017-03-27 06:52:47,2017-03-27 06:52:57+00:00,916298883,Morning Ride,8.0,4.4,5.1,,True,10,0.4,55.67491,12.592685,2017-03-27,1
2,2017-03-27 06:52:47,2017-03-27 06:52:59+00:00,916298883,Morning Ride,8.3,9.3,5.4,,True,12,0.8,55.674938,12.592625,2017-03-27,1
3,2017-03-27 06:52:47,2017-03-27 06:53:01+00:00,916298883,Morning Ride,8.8,15.6,6.3,,True,14,1.1,55.674979,12.592555,2017-03-27,1
4,2017-03-27 06:52:47,2017-03-27 06:53:02+00:00,916298883,Morning Ride,9.0,18.6,6.0,,True,15,2.8,55.674997,12.592519,2017-03-27,1


## Show statistics per day