### Python documentation for the MIDS program
####Due November 20 2015 to Courtney

What I want to do:
- Import all data from actigraphs, azumio and fitbit
- Make graphs to compare all for each user
- With all data individually:
    Average steps over a 7 day period
- With actigraph data, get MVPA

In [78]:
import pandas as pd
import numpy as np
import os.path
import glob

In [79]:
# Creating a list of all accelerometer .csv files
accel_file = glob.glob("/Users/megan/Desktop/MIDS project/Accel CSV data/User*_ActigraphMay.csv")

# Creating a dataframe that stores user and date range information, setting user id as the index
dates = pd.read_csv('/Users/megan/Desktop/MIDS project/Dates.csv', index_col=0, parse_dates=['Initial', 'Final'])

# Creating an empty data frame to store individual data frames from the following for loop
accel_total = pd.DataFrame()

# Defining a variable for minuties of moderate to vigorous physical activity threshold
mvpa_threshold = 1951

In [80]:
for file in accel_file:
    # Reading the accelerometer file and storing it as a dataframe
    accel = pd.read_csv(file, skipinitialspace=True, parse_dates=['Date'])
    
    # Storing the user ID from the file name
    user_id = int(os.path.basename(file).split("_")[0].replace('User',''))
    
    # Limiting dates to the days of the study
    date_range = dates.loc[user_id]
    date_bool1 = np.array(accel.Date > date_range.Initial)
    date_bool2 = np.array(accel.Date < date_range.Final)
    wear_dates = np.logical_and(date_bool1, date_bool2)
    accel = accel[wear_dates]

    # 
    accel['MVPA'] = accel['Axis1'] > mvpa_threshold
    
    # Grouping by study day and finding out total steps per day
    by_date = accel.groupby("Date")
    daily_totals = by_date.sum().reset_index()
    daily_totals['User'] = user_id
    daily_totals['Type'] = 'Actigraph'
    
    # Appending the user dataframe to the total dataframe
    accel_total = accel_total.append(daily_totals, ignore_index=True)

accel_total = accel_total[['User', 'Type', 'Date', 'Steps', 'MVPA']]

In [81]:
# Reading in fitbit data and editing columns to match the Actigraph data
fitbit = pd.read_csv('/Users/megan/Desktop/MIDS project/Fitbit .txt files/meas_fitbit_intraday_1wk_4.txt', sep='|', parse_dates=['date'])
fitbit.rename(columns={'user_id':'User', 'date':'Date', 'steps':'Steps'}, inplace=True)
fitbit = fitbit.drop('minute', axis=1)

# Dropping dates outside of the date range for each user
fitbit = fitbit.join(dates, on='User')
date_bool1 = np.array(fitbit.Date > fitbit.Initial)
date_bool2 = np.array(fitbit.Date < fitbit.Final)
wear_dates = np.logical_and(date_bool1, date_bool2)
fitbit = fitbit[wear_dates]


<class 'numpy.datetime64'>
<class 'numpy.datetime64'>


In [101]:
# Grouping by study day, calculating total steps per day, and saving to a new dataframe
by_date = fitbit.groupby(["User", "Date"])
daily_totals = by_date.sum().reset_index()
daily_totals['Type'] = 'Fitbit'
fitbit_total = daily_totals[['User', 'Type', 'Date', 'Steps']]

In [110]:
both = pd.concat([total, fitbit_total])
both.sort(['User','Date'], inplace=True)
both = both[['User', 'Type', 'Date', 'Steps', 'MVPA']]
both

Unnamed: 0,User,Type,Date,Steps,MVPA
160,701,Actigraph,2015-05-18,9656,66
0,701,Fitbit,2015-05-18,12208,
161,701,Actigraph,2015-05-19,6649,41
1,701,Fitbit,2015-05-19,11458,
162,701,Actigraph,2015-05-20,7936,56
2,701,Fitbit,2015-05-20,11521,
163,701,Actigraph,2015-05-21,8030,51
3,701,Fitbit,2015-05-21,9595,
164,701,Actigraph,2015-05-22,11172,75
4,701,Fitbit,2015-05-22,14864,
