## Load Libraries

In [1]:
import pandas as pd
import numpy as np

# Load sleep csv

In [2]:
# Load it into a Dataframe using pandas
path = 'fitbit_sleep.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Sleep
Date,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed
15-05-2017,0,0,0,0
16-05-2017,0,0,0,0
17-05-2017,391,20,1,412
18-05-2017,334,23,1,357


## Useful functions

In [3]:
# Transforms seconds to hours
def minToHours(s):
    # Create the rule
    minutes = int(s)
    hours, minutes = divmod(minutes, 60)
    
    #Convert all to string
    minutes = str(minutes)
    hours = str(hours)
    
    #Add 0(zeros) for single digit numbers
    if len(minutes)==1:
        minutes = '0'+minutes
    if len(hours)==1:
        hours = '0'+hours
    return hours+':'+minutes

In [4]:
# Sort day to match the rest of the formats (YYYY-MM-DD)
def dayTransformer(s):
    dd = s.split('-')[0]
    mm = s.split('-')[1]
    yyyy = s.split('-')[2]
    return yyyy+'-'+mm+'-'+dd

## Modify sleep data

In [5]:
# Drop first line
df = df.iloc[1:].reset_index()

# Rename the column
df.rename(columns={'Sleep': 'sleep_min','level_0': 'day',}, inplace=True)

# Keep only Sleep in min
df = df.filter(items=['day','sleep_min'])

# Create a new column with sleep duration in hh:mm format
df['sleep_duration'] = df['sleep_min'].apply(lambda x: minToHours(x))

# Transform day to (YYYY-MM-DD) format
df['day']=df['day'].apply(lambda x: dayTransformer(x))

In [6]:
df

Unnamed: 0,day,sleep_min,sleep_duration
0,2017-05-15,0,00:00
1,2017-05-16,0,00:00
2,2017-05-17,412,06:52
3,2017-05-18,357,05:57
4,2017-05-19,0,00:00
5,2017-05-20,0,00:00


# Load HR csv

In [8]:
# Load it into a Dataframe using pandas
path_2 = 'fitbit_HR.csv'
df_2 = pd.read_csv(path_2)
df_2.head()

Unnamed: 0,Date,Resting Heart Rate,Normal Min. Heart Rate,Normal Max. Heart Rate,Normal Calories Burned,Normal Minutes,Fat Burn Min. Heart Rate,Fat Burn Max. Heart Rate,Fat Burn Calories Burned,Fat Burn Minutes,Cardio Min. Heart Rate,Cardio Max. Heart Rate,Cardio Calories Burned,Cardio Minutes,Peak Min. Heart Rate,Peak Max. Heart Rate,Peak Calories Burned,Peak Minutes
0,2017-05-01,,30,95,,,95,133,,,133,161,,,161,220,,
1,2017-05-02,,30,95,,,95,133,,,133,161,,,161,220,,
2,2017-05-03,,30,95,,,95,133,,,133,161,,,161,220,,
3,2017-05-04,,30,95,,,95,133,,,133,161,,,161,220,,
4,2017-05-05,,30,95,,,95,133,,,133,161,,,161,220,,


In [9]:
# Keep only the columns needed
df_2 = df_2.filter(items=['Date','Resting Heart Rate'])

# Rename the column
df_2.rename(columns={'Date': 'day','Resting Heart Rate': 'rest_HR',}, inplace=True)

In [10]:
df_2

Unnamed: 0,day,rest_HR
0,2017-05-01,
1,2017-05-02,
2,2017-05-03,
3,2017-05-04,
4,2017-05-05,
5,2017-05-06,
6,2017-05-07,
7,2017-05-08,
8,2017-05-09,
9,2017-05-10,
