## Load Libraries

In [63]:
import pandas as pd
import numpy as np
from pandas import Timestamp
import os
from datetime import datetime, timedelta

### Keep only the days of the trip inside the directory

In [64]:
# Read the directories with the data and save file_names in two list
path_to_places = '../data/moves/moves_export/csv/daily/places/'
path_to_summary = '../data/moves/moves_export/csv/daily/summary/'

csv_files_places = [single_csv for single_csv in os.listdir(path_to_places) if single_csv.endswith('.csv')]
csv_files_summary = [single_csv for single_csv in os.listdir(path_to_summary) if single_csv.endswith('.csv')]

In [65]:
# Check if filenames are parsed correctly
print csv_files_places[:5]
print csv_files_summary[:5]

['places_20170321.csv', 'places_20170322.csv', 'places_20170323.csv', 'places_20170324.csv', 'places_20170325.csv']
['summary_20170321.csv', 'summary_20170322.csv', 'summary_20170323.csv', 'summary_20170324.csv', 'summary_20170325.csv']


## Usefull Functions

In [66]:
# Adds a number to index
def iterNo(d):
    return d + 1

In [82]:
# Transforms seconds to hours
def secToHours(d):
    # Create the rule
    seconds = d
    minutes, seconds = divmod(seconds, 60)
    hours, minutes = divmod(minutes, 60)
    
    #Convert all to string
    seconds = str(seconds)
    minutes = str(minutes)
    hours = str(hours)
    
    #Add 0(zeros) for single digit numbers
    if len(seconds)==1:
        seconds = '0'+seconds
    if len(minutes)==1:
        minutes = '0'+minutes
    if len(hours)==1:
        hours = '0'+hours
    return hours+':'+minutes+':'+seconds

In [90]:
# Create function that calculates km and seconds to km/h average
def avgSpeedConverter(f,d):
    # Define variables
    km = f
    seconds = d
    
    # Convert km to meters
    meters = km*1000
    
    # Calculate speed
    avg_speed = (meters/seconds) * 3.6
    return avg_speed

In [67]:
# Fix year to Date column
def yearFixer(s):
    main_part = s[:-2]
    year_before = s.split('/')[2]
    year_after = str(20) + year_before
    return main_part+year_after

In [68]:
# Create a good format for the Date column
def dateConverter(s):
    # Set date formats
    time_format = "%d/%m/%Y"

    # Convert from str to datetime
    converted = datetime.strptime(s,time_format)
    
    return converted

## Create a single CSV for each segment

### Places

In [69]:
# Places df
df_places = pd.DataFrame()
for file_name in csv_files_places:
    df_tmp = pd.read_csv(path_to_places+file_name)
    df_places = pd.concat([df_places, df_tmp])

# Drop last two columns
df_places = df_places.drop(['Category', 'Link'],axis = 1).reset_index()   
    
# Change str and end time to datetime type
df_places['Start'] = pd.to_datetime(df_places['Start']) 
df_places['End'] = pd.to_datetime(df_places['End'])
df_places['Date'] = df_places['Date'].apply(lambda x: yearFixer(x))
df_places['Date'] = df_places['Date'].apply(lambda x: dateConverter(x))

# Rename index column to iter_no like STRAVA
df_places.rename(columns={'index': 'iter_no'}, inplace=True)
df_places['iter_no'] = df_places['iter_no'].apply(lambda x: iterNo(x))

# Create day_no like STRAVA
days = list(set(df_places['Date']))
days.sort()
day_no = list()
for index,day in enumerate(days):
    for dfday in df_places['Date']:
        if dfday == day:
            day_no.append(index+1)

df_places['day_no'] = pd.Series(day_no).values

In [70]:
# Check if columns are correct
df_places.head(10)

Unnamed: 0,iter_no,Date,Name,Start,End,Duration,Latitude,Longitude,day_no
0,1,2017-03-21,DTU Bibliotek,2017-03-21 13:02:45,2017-03-21 16:16:43,11638,55.78699,12.52329,1
1,2,2017-03-21,Lyngby Svømmehal,2017-03-21 16:23:42,2017-03-21 17:42:07,4705,55.781602,12.507931,1
2,3,2017-03-21,Place in Kongens Lyngby,2017-03-21 17:49:24,2017-03-21 18:21:05,1901,55.78699,12.52329,1
3,4,2017-03-21,Place in Kongens Lyngby,2017-03-21 18:21:05,2017-03-21 18:31:17,612,55.787481,12.526896,1
4,5,2017-03-21,Home,2017-03-21 19:07:06,2017-03-21 23:00:00,13974,55.67439,12.59182,1
5,1,2017-03-22,Home,2017-03-21 23:00:00,2017-03-22 07:42:31,31351,55.67439,12.59182,2
6,2,2017-03-22,Dtu 324,2017-03-22 08:29:52,2017-03-22 20:07:48,41876,55.783588,12.518219,2
7,3,2017-03-22,Place in Kongens Lyngby,2017-03-22 20:20:32,2017-03-22 20:30:49,617,55.786867,12.528193,2
8,4,2017-03-22,"Place in Christianshavn, København",2017-03-22 21:08:28,2017-03-22 23:00:00,6692,55.67439,12.59182,2
9,1,2017-03-23,"Place in Christianshavn, København",2017-03-22 23:00:00,2017-03-23 07:12:15,29535,55.67439,12.59182,3


### Summary

In [71]:
# Places df
df_summary = pd.DataFrame()
for file_name in csv_files_summary:
    df_tmp = pd.read_csv(path_to_summary+file_name)
    df_summary = pd.concat([df_summary, df_tmp])
    
# Drop group column
df_summary = df_summary.drop('Group',axis = 1).reset_index() 
    
# Change str and end time to datetime type
df_summary['Date'] = df_summary['Date'].apply(lambda x: yearFixer(x))
df_summary['Date'] = df_summary['Date'].apply(lambda x: dateConverter(x))

# Rename index column to iter_no like STRAVA
df_summary.rename(columns={'index': 'iter_no'}, inplace=True)
df_summary['iter_no'] = df_summary['iter_no'].apply(lambda x: iterNo(x))

df_summary.head(10)

Unnamed: 0,iter_no,Date,Activity,Duration,Distance,Steps,Calories
0,1,2017-03-21,walking,329,0.263,527,17
1,2,2017-03-21,cycling,1263,4.377,0,126
2,3,2017-03-21,transport,1740,14.627,0,0
3,1,2017-03-22,walking,1725,2.035,2814,128
4,2,2017-03-22,transport,4590,32.249,0,0
5,1,2017-03-23,walking,1476,1.537,2584,97
6,2,2017-03-23,cycling,2727,7.744,0,229
7,1,2017-03-24,walking,1861,1.633,2892,103
8,2,2017-03-24,cycling,1020,3.937,0,112
9,3,2017-03-24,transport,2731,15.763,0,0


#### Cycling Distance per day

In [96]:
# List the number of cycling meters per day 
cycling_df = df_summary[df_summary['Activity']=='cycling'].groupby('Date').sum(
).reset_index().sort_values(by='Date', ascending=1)

# Drop unesessary columns
cycling_df = cycling_df.filter(items=['Date', 'Distance','Duration'])

# Change column name
cycling_df.rename(columns={'Distance': 'ttl_cyc_km','Duration' : 'ttl_cyc_seconds'}, inplace=True)

# Create column with converted seconds to hours:minutes:seconds format (string)
cycling_df['ttl_cyc_duration'] = cycling_df['ttl_cyc_seconds'].apply(lambda x: secToHours(x))

# Create a new column named avg_speed 
cycling_df['avg_day_speed']= cycling_df[['ttl_cyc_km','ttl_cyc_seconds']].apply(lambda x: avgSpeedConverter(*x), axis=1) 

In [97]:
cycling_df.head(10)

Unnamed: 0,Date,ttl_cyc_km,ttl_cyc_seconds,ttl_cyc_duration,avg_day_speed
0,2017-03-21,4.377,1263,00:21:03,12.47601
1,2017-03-23,7.744,2727,00:45:27,10.223102
2,2017-03-24,3.937,1020,00:17:00,13.895294
3,2017-03-25,1.885,468,00:07:48,14.5
4,2017-03-26,6.744,1460,00:24:20,16.629041
5,2017-03-27,3.916,1217,00:20:17,11.583895
6,2017-03-28,2.197,673,00:11:13,11.752155
7,2017-03-29,3.924,1423,00:23:43,9.927196
8,2017-03-30,2.105,629,00:10:29,12.047695
9,2017-04-03,4.054,1433,00:23:53,10.184508


In [105]:
# TTL
print 'Total cycling distance of the whole trip: \t%.2f km \nTotal time cycled: \t\t\t\t%s h|m|s' % (sum(
    cycling_df['ttl_cyc_km']),secToHours(sum(cycling_df['ttl_cyc_seconds'])))

Total cycling distance of the whole trip: 	302.17 km 
Total time cycled: 				18:14:10 h|m|s


#### Walking Distance and steps per day


In [31]:
# List the number of walking meters per day 
walking_df = df_summary[df_summary['Activity']=='walking'].groupby('Date').sum(
).reset_index().sort_values(by='Date', ascending=1)

# Drop unesessary columns
walking_df = walking_df.filter(items=['Date', 'Distance', 'Steps'])

# Change column name
walking_df.rename(columns={'Distance': 'ttl_wal_distance', 'Steps': 'ttl_steps'}, inplace=True)

In [32]:
walking_df.head()

Unnamed: 0,Date,ttl_wal_distance,ttl_steps
0,2017-03-21,0.263,527
1,2017-03-22,2.035,2814
2,2017-03-23,1.537,2584
3,2017-03-24,1.633,2892
4,2017-03-25,1.049,1584


In [58]:
# TTL
print 'Total walking distance covered during the trip: %.2f km \nTotal steps covered during the trip: \t\t%d steps' % (
    sum(walking_df['ttl_wal_distance']),sum(walking_df['ttl_steps']))

Total walking distance covered during the trip: 164.77 km 
Total steps covered during the trip: 		241391 steps


#### Calories burnt per day


In [60]:
# List the number of calories burnt per day 
calories_df = df_summary.groupby('Date').sum(
).reset_index().sort_values(by='Date', ascending=1)

# Drop unesessary columns
calories_df = calories_df.filter(items=['Date', 'Calories'])

# Change column name
calories_df.rename(columns={'Calories': 'ttl_cal_burnt'}, inplace=True)


In [61]:
calories_df.head()

Unnamed: 0,Date,ttl_cal_burnt
0,2017-03-21,143
1,2017-03-22,128
2,2017-03-23,326
3,2017-03-24,215
4,2017-03-25,120


In [62]:
# TTL
print 'Total calories burnt during the trip: %d cal' % sum(calories_df['ttl_cal_burnt'])

Total calories burnt during the trip: 22777 cal
