In [36]:
import glob
import os
import numpy as np
import pandas as pd
from itertools import repeat
import json
from scipy import signal
from matplotlib import pyplot as plt
import pickle
%matplotlib inline

In [2]:
input_len = 16

In [3]:
files = glob.glob('wrist/*.csv')

In [4]:
dataset = {}

In [5]:
def get_samplingrate(df):
    row = list(df.iloc[1])
    task = row[1]
    start = pd.to_datetime(row[3])
    end = pd.to_datetime(row[4])
    df = df.loc[df.Task == task]
    length = len(df)
    secs = (end-start).total_seconds()
    return int(length/secs)

In [6]:
def subsample(data,in_freq,new_freq = 30):
    t = np.linspace(0,16,16*in_freq)
    reduced,_ = signal.resample(data,16*new_freq,t,axis=1)
    return reduced  

In [7]:
#filter only gravity acceleration out of the data
def butter_lowpass(cutoff, fs, order=5):
    nyq_freq = 0.5*fs
    normal_cutoff = float(cutoff) / nyq_freq
    b, a = signal.butter(order, normal_cutoff, btype='lowpass')
    return b, a

def butter_lowpass_filter(data, cutoff_freq, nyq_freq, order=4):
    b, a = butter_lowpass(cutoff_freq, nyq_freq, order=order)
    y = signal.filtfilt(b, a, data,axis=1)
    return y

In [8]:
#band pass filter
def butter_bandpass(lowcut, highcut, fs, order=6):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = signal.butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=6):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = signal.filtfilt(b, a, data,axis=1)
    return y

In [9]:
sedentary = ["TV WATCHING","STANDING STILL","COMPUTER WORK"]
locomotion = ["LEISURE WALK","RAPID WALK","STAIR DESCENT","STAIR ASCENT","WALKING"]
lifestyle  = ["WASHING DISHES","IRONING","PERSONAL CARE","SHOPPING",\
             "UNLOADING STORING DISHES","DRESSING","PREPARE SERVE MEAL",\
             "LAUNDRY WASHING","WASHING WINDOWS","LIGHT GARDENING",\
             "SWEEPING","LIGHT HOME MAINTENANCE","STRAIGHTENING UP DUSTING",\
             "REPLACING SHEETS ON A BED","MOPPING","VACUUMING","TRASH REMOVAL",\
             "DIGGING","YARD WORK","HEAVY LIFTING"]

In [22]:
i =0 ; j = 0; k = 0;
for file in files:
    participant = file.split('/')[-1][:-14]
    print (participant)
    data = []
    label = []
    df = pd.read_csv(file,index_col=False)
    freq = get_samplingrate(df)
    tasks = np.unique(df.Task)
    for task in tasks:
        if task in sedentary:
            task1 = 'sedentary'
            i+=1
        elif task in locomotion:
            task1 = 'locomotion'
            j+=1
        elif task in lifestyle:
            task1 = 'lifestyle'
            k+=1
        else:
            continue
        task_df = df.loc[df.Task == task]
        task_df = task_df[['X','Y','Z']].to_numpy()
        size = int(len(task_df)/input_len/freq)
        task_df = task_df[:size*input_len*freq]
        task_df = np.reshape(task_df,(-1,input_len*freq,3))
        if freq >30:
            reduced = subsample(task_df,freq) #sampling rate reduced to 30hz
        else:
            reduced = task_df
        
        #band pass filter
        low = 0.6
        high = 2.5
        filtered = butter_bandpass_filter(reduced,low,high,30)
        
        
        data.append(filtered)
        label.extend(repeat(task1,len(task_df)))
    if len(data)==0:
        continue
    data = np.vstack(data)
    dataset[participant]= [data,label]
    
    
    
    

ZOEY
ABBY
ADAD
ADMC
ALSH
ALWE
ANAD
ANCR
ASIA
AUBR
BAEL
BAVA
BELA
BEMC


  interactivity=interactivity, compiler=compiler, result=result)


BESM
BHTR
BRHO
CABO
CACL
CACO
CAFI
CAHA
CAHO
CAHU
CEKE
CHDO
CHTH
CLFE
CYSA
DAGO
DAGO
DANI
DARA
DAVA
DEFE
DEMO
DIDI
DIWI
DOCA
DOGI
DOKI
DOMI
DOSI
ECHO
ELLA
ELNA
ELRE
ELSU
ERPU
EVDO
FABU
FLTO
FRHU
FRTA
GADE
GAIA
GATR
GRMC
HEAH
HEPA
HERH
HUMC
JABU
JAHA
JAJE
JAJO
JAME
JASA
JATA
JECA
JEDI
JEFF
JEGA
JERO
JESW
JICH
JIGA
JOBR
JOCH
JOGA
JOHE
JOHN
JOLE
JOLO
JOLO
JULE
JUSA
KAFO
KAIA
KALA
KAPO
KARA
KASI
KAWI
KECH
KEMC
KEMI
KENO
KIWI
KRRO
LIRE
LIWI
LORE
LURH
LYFA
MACA
MAGA
MAHA
MALE
MALI
MAMA
MAME
MAPE
MAPL
MARK
MARY
MASC
MEBA
MENA
MESA
MIAT
NABR
NALA
NOWI
OSWA
PABE
PAMA
PAZE
PECA
PEED
PEPE
PINK
PRTR
PRTU
REJO
RILY
RIMI
ROCA
ROGI
ROHO
ROOW
ROWE
SALE
SAPH
SHBR
SHEA
SHWI
STBA
STCO
SUFA
SUPE
TALI
TAPA
TESS
TIKI
TOBR
VIBU
VIDO
VIJE
VIMI
VIPU
VIRE
WEHA
WIAD
WIPE
WOBL
ZARA


In [37]:
with open('data.p', 'wb') as fp:
    pickle.dump(dataset, fp, protocol=pickle.HIGHEST_PROTOCOL)

376 530 2590


In [30]:
len(dataset.keys()),len(files)

(145, 167)

['sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'sedentary',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'lifestyle',
 'life