## Load Libraries

In [1]:
import pandas as pd
import numpy as np
from pandas import Timestamp
import os
from datetime import datetime, timedelta

In [2]:
# Read the directories with the data and save file_names in two list
path_to_places = '../data/moves/moves_export/csv/daily/places/'
path_to_summary = '../data/moves/moves_export/csv/daily/summary/'

csv_files_places = [single_csv for single_csv in os.listdir(path_to_places) if single_csv.endswith('.csv')]
csv_files_summary = [single_csv for single_csv in os.listdir(path_to_summary) if single_csv.endswith('.csv')]

In [3]:
# Check if filenames are parsed correctly
print csv_files_places[:5]
print csv_files_summary[:5]

['places_20170321.csv', 'places_20170322.csv', 'places_20170323.csv', 'places_20170324.csv', 'places_20170325.csv']
['summary_20170321.csv', 'summary_20170322.csv', 'summary_20170323.csv', 'summary_20170324.csv', 'summary_20170325.csv']


## Usefull Functions

In [4]:
# Adds a number to index
def iterNo(s):
    return s + 1

In [5]:
# Fix year to Date column
def yearFixer(s):
    main_part = s[:-2]
    year_before = s.split('/')[2]
    year_after = str(20) + year_before
    return main_part+year_after

In [6]:
# Create a good format for the Date column
def dateConverter(s):
    # Set date formats
    time_format = "%d/%m/%Y"

    # Convert from str to datetime
    converted = datetime.strptime(s,time_format)
    
    return converted

## Create a single CSV for each segment

### Places

In [7]:
# Places df
df_places = pd.DataFrame()
for file_name in csv_files_places:
    df_tmp = pd.read_csv(path_to_places+file_name)
    df_places = pd.concat([df_places, df_tmp])

# Drop last two columns
df_places = df_places.drop(['Category', 'Link'],axis = 1).reset_index()   
    
# Change str and end time to datetime type
df_places['Start'] = pd.to_datetime(df_places['Start']) 
df_places['End'] = pd.to_datetime(df_places['End'])
df_places['Date'] = df_places['Date'].apply(lambda x: yearFixer(x))
df_places['Date'] = df_places['Date'].apply(lambda x: dateConverter(x))

# Rename index column to iter_no like STRAVA
df_places.rename(columns={'index': 'iter_no'}, inplace=True)
df_places['iter_no'] = df_places['iter_no'].apply(lambda x: iterNo(x))

# Create day_no like STRAVA
days = list(set(df_places['Date']))
days.sort()
day_no = list()
for index,day in enumerate(days):
    for dfday in df_places['Date']:
        if dfday == day:
            day_no.append(index+1)

df_places['day_no'] = pd.Series(day_no).values

In [8]:
# Check if columns are correct
df_places.head(10)

Unnamed: 0,iter_no,Date,Name,Start,End,Duration,Latitude,Longitude,day_no
0,1,2017-03-21,DTU Bibliotek,2017-03-21 13:02:45,2017-03-21 16:16:43,11638,55.78699,12.52329,1
1,2,2017-03-21,Lyngby Svømmehal,2017-03-21 16:23:42,2017-03-21 17:42:07,4705,55.781602,12.507931,1
2,3,2017-03-21,Place in Kongens Lyngby,2017-03-21 17:49:24,2017-03-21 18:21:05,1901,55.78699,12.52329,1
3,4,2017-03-21,Place in Kongens Lyngby,2017-03-21 18:21:05,2017-03-21 18:31:17,612,55.787481,12.526896,1
4,5,2017-03-21,Home,2017-03-21 19:07:06,2017-03-21 23:00:00,13974,55.67439,12.59182,1
5,1,2017-03-22,Home,2017-03-21 23:00:00,2017-03-22 07:42:31,31351,55.67439,12.59182,2
6,2,2017-03-22,Dtu 324,2017-03-22 08:29:52,2017-03-22 20:07:48,41876,55.783588,12.518219,2
7,3,2017-03-22,Place in Kongens Lyngby,2017-03-22 20:20:32,2017-03-22 20:30:49,617,55.786867,12.528193,2
8,4,2017-03-22,"Place in Christianshavn, København",2017-03-22 21:08:28,2017-03-22 23:00:00,6692,55.67439,12.59182,2
9,1,2017-03-23,"Place in Christianshavn, København",2017-03-22 23:00:00,2017-03-23 07:12:15,29535,55.67439,12.59182,3


### Summary