#### Here we load the protocol data and then linearly interpolate the heartrate data for each subject

In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import glob
import time

In [28]:
#Functions to load data 
#Slightly adapted from Chiu-Yun's code

def exportColName():
    
    '''
    Create columns names
    '''
    
    handColName=['hand_temp', 'hand_acc16g_x', 'hand_acc16g_y', 'hand_acc16g_z', 'hand_acc6g_x', 'hand_acc6g_y', 'hand_acc6g_z', 
             'hand_gyro_x', 'hand_gyro_y', 'hand_gyro_z', 'hand_mag_x', 'hand_mag_y', 'hand_mag_z', 'hand_ori_0', 'hand_ori_1', 
             'hand_ori_2', 'hand_ori_3']
    chestColName=['chest_temp', 'chest_acc16g_x', 'chest_acc16g_y', 'chest_acc16g_z', 'chest_acc6g_x', 'chest_acc6g_y', 
                  'chest_acc6g_z', 'chest_gyro_x', 'chest_gyro_y', 'chest_gyro_z', 'chest_mag_x', 'chest_mag_y', 'chest_mag_z', 
                  'chest_ori_0', 'chest_ori_1', 'chest_ori_2', 'chest_ori_3']
    ankleColName=['ankle_temp', 'ankle_acc16g_x', 'ankle_acc16g_y', 'ankle_acc16g_z', 'ankle_acc6g_x', 'ankle_acc6g_y', 
                  'ankle_acc6g_z', 'ankle_gyro_x', 'ankle_gyro_y', 'ankle_gyro_z', 'ankle_mag_x', 'ankle_mag_y', 'ankle_mag_z', 
                  'ankle_ori_0', 'ankle_ori_1', 'ankle_ori_2', 'ankle_ori_3']
    return ['timestamp', 'activityID', 'heart_rate']+handColName+chestColName+ankleColName


def loadSubject(filename):
    
    '''
    Load a single subject from file and return a dataframe
    '''
    
    col=exportColName()
    index = int(filename.split('.')[0][-1])
    tempData = pd.read_csv(filename, sep=' ', names=col)
    tempData['subject'] = (index)*np.ones(len(tempData))
    interpData = interpolate_heartrate(tempData)
    return interpData

def interpolate_heartrate(df):
    
    '''
    Interpolate values in the heartrate column of a dataframe so that they have the
    same sampling rate as the other columns
    '''
    
    df['heart_rate'].interpolate(inplace=True)
    
    return df


def loadAllSubjects(dirname):
    
    '''
    Load all subject files & return a dataframe
    '''
    
    if os.path.exists(dirname):
        dfiles = list(sorted(glob.glob("%s/*.dat" %dirname)))
    else:
        print ("Given dirname %s not found" %dirname)
        
    allData=loadSubject(dfiles[0])
    col=exportColName()
    
    for i in range(1,len(dfiles)):
        filename=dfiles[i]
        index = int(filename.split('.')[0][-1])
        tempData=pd.read_csv(filename, sep=' ', names=col)
        tempData['subject'] = (index)*np.ones(len(tempData))
        interpData = interpolate_heartrate(tempData)
        allData=allData.append(interpData)
        
    allData.reset_index(drop=True,inplace=True)
    
    return allData

def loadAllSubjectslist(dirname):
    
    '''
    Load all subject files & return a dataframe
    '''
    
    if os.path.exists(dirname):
        dfiles = list(sorted(glob.glob("%s/*.dat" %dirname)))
    else:
        print ("Given dirname %s not found" %dirname)
        
    col=exportColName()
    dfs = []
    
    for i in range(len(dfiles)):
        filename=dfiles[i]
        index = int(filename.split('.')[0][-1])
        tempData=pd.read_csv(filename, sep=' ', names=col)
        tempData['subject'] = (index)*np.ones(len(tempData))
        interpData = interpolate_heartrate(tempData)
        dfs.append(interpData)
    
    allData = pd.concat(dfs)
    allData.reset_index(drop=True,inplace=True)
    
    return allData


In [29]:
protocol_datadir = "/Users/rmartinshort/Documents/Berkeley/GDSO/PAMAP2_data/PAMAP2_Dataset/Protocol"

In [30]:
st= time.time()
all_data = loadAllSubjects(protocol_datadir)
ed = time.time()

In [31]:
print(ed-st)

75.17105603218079


In [32]:
st= time.time()
all_data = loadAllSubjectslist(protocol_datadir)
ed = time.time()

In [33]:
print(ed-st)

49.83340620994568


Using a list and pd.concat makes the process of loading the data slightly faster it seems

In [35]:
all_data = all_data[all_data['activityID'] != 0]

In [None]:
all_data_s1 = all_data[all_data]

In [20]:
s1 = loadSubject("/Users/rmartinshort/Documents/Berkeley/GDSO/PAMAP2_data/PAMAP2_Dataset/Protocol/subject101.dat")

In [23]:
s1.head(100)

Unnamed: 0,timestamp,activityID,heart_rate,hand_temp,hand_acc16g_x,hand_acc16g_y,hand_acc16g_z,hand_acc6g_x,hand_acc6g_y,hand_acc6g_z,...,ankle_gyro_y,ankle_gyro_z,ankle_mag_x,ankle_mag_y,ankle_mag_z,ankle_ori_0,ankle_ori_1,ankle_ori_2,ankle_ori_3,subject
0,8.38,0,104.0,30.0,2.37223,8.60074,3.51048,2.43954,8.76165,3.35465,...,0.009250,-0.017580,-61.1888,-38.9599,-58.1438,1.0,0.0,0.0,0.0,1.0
1,8.39,0,,30.0,2.18837,8.56560,3.66179,2.39494,8.55081,3.64207,...,-0.004638,0.000368,-59.8479,-38.8919,-58.5253,1.0,0.0,0.0,0.0,1.0
2,8.40,0,,30.0,2.37357,8.60107,3.54898,2.30514,8.53644,3.73280,...,0.000148,0.022495,-60.7361,-39.4138,-58.3999,1.0,0.0,0.0,0.0,1.0
3,8.41,0,,30.0,2.07473,8.52853,3.66021,2.33528,8.53622,3.73277,...,-0.020301,0.011275,-60.4091,-38.7635,-58.3956,1.0,0.0,0.0,0.0,1.0
4,8.42,0,,30.0,2.22936,8.83122,3.70000,2.23055,8.59741,3.76295,...,-0.014303,-0.002823,-61.5199,-39.3879,-58.2694,1.0,0.0,0.0,0.0,1.0
5,8.43,0,,30.0,2.29959,8.82929,3.54710,2.26132,8.65762,3.77788,...,-0.016024,0.001050,-60.2954,-38.8778,-58.3977,1.0,0.0,0.0,0.0,1.0
6,8.44,0,,30.0,2.33738,8.82900,3.54767,2.27703,8.77828,3.73230,...,-0.053934,0.015594,-60.6307,-38.8676,-58.2711,1.0,0.0,0.0,0.0,1.0
7,8.45,0,,30.0,2.37142,9.05500,3.39347,2.39786,8.89814,3.64131,...,-0.039937,-0.000785,-60.5171,-38.9819,-58.2733,1.0,0.0,0.0,0.0,1.0
8,8.46,0,,30.0,2.33951,9.13251,3.54668,2.44371,8.98841,3.62596,...,-0.010042,0.017701,-61.2916,-39.6182,-58.1499,1.0,0.0,0.0,0.0,1.0
9,8.47,0,,30.0,2.25966,9.09415,3.43015,2.42877,9.01871,3.61081,...,-0.013923,0.014498,-60.8509,-39.0821,-58.1478,1.0,0.0,0.0,0.0,1.0


In [24]:
s1['heart_rate'].interpolate(inplace=True)

In [26]:
s1.head(100)

Unnamed: 0,timestamp,activityID,heart_rate,hand_temp,hand_acc16g_x,hand_acc16g_y,hand_acc16g_z,hand_acc6g_x,hand_acc6g_y,hand_acc6g_z,...,ankle_gyro_y,ankle_gyro_z,ankle_mag_x,ankle_mag_y,ankle_mag_z,ankle_ori_0,ankle_ori_1,ankle_ori_2,ankle_ori_3,subject
0,8.38,0,104.000000,30.0,2.37223,8.60074,3.51048,2.43954,8.76165,3.35465,...,0.009250,-0.017580,-61.1888,-38.9599,-58.1438,1.0,0.0,0.0,0.0,1.0
1,8.39,0,104.000000,30.0,2.18837,8.56560,3.66179,2.39494,8.55081,3.64207,...,-0.004638,0.000368,-59.8479,-38.8919,-58.5253,1.0,0.0,0.0,0.0,1.0
2,8.40,0,104.000000,30.0,2.37357,8.60107,3.54898,2.30514,8.53644,3.73280,...,0.000148,0.022495,-60.7361,-39.4138,-58.3999,1.0,0.0,0.0,0.0,1.0
3,8.41,0,104.000000,30.0,2.07473,8.52853,3.66021,2.33528,8.53622,3.73277,...,-0.020301,0.011275,-60.4091,-38.7635,-58.3956,1.0,0.0,0.0,0.0,1.0
4,8.42,0,104.000000,30.0,2.22936,8.83122,3.70000,2.23055,8.59741,3.76295,...,-0.014303,-0.002823,-61.5199,-39.3879,-58.2694,1.0,0.0,0.0,0.0,1.0
5,8.43,0,104.000000,30.0,2.29959,8.82929,3.54710,2.26132,8.65762,3.77788,...,-0.016024,0.001050,-60.2954,-38.8778,-58.3977,1.0,0.0,0.0,0.0,1.0
6,8.44,0,104.000000,30.0,2.33738,8.82900,3.54767,2.27703,8.77828,3.73230,...,-0.053934,0.015594,-60.6307,-38.8676,-58.2711,1.0,0.0,0.0,0.0,1.0
7,8.45,0,104.000000,30.0,2.37142,9.05500,3.39347,2.39786,8.89814,3.64131,...,-0.039937,-0.000785,-60.5171,-38.9819,-58.2733,1.0,0.0,0.0,0.0,1.0
8,8.46,0,104.000000,30.0,2.33951,9.13251,3.54668,2.44371,8.98841,3.62596,...,-0.010042,0.017701,-61.2916,-39.6182,-58.1499,1.0,0.0,0.0,0.0,1.0
9,8.47,0,104.000000,30.0,2.25966,9.09415,3.43015,2.42877,9.01871,3.61081,...,-0.013923,0.014498,-60.8509,-39.0821,-58.1478,1.0,0.0,0.0,0.0,1.0
