In [53]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import warnings
#warnings.simplefilter("ignore", category=DeprecationWarning)
#warnings.simplefilter("ignore", category=PendingDeprecationWarning)
#warnings.simplefilter("ignore", category=ImportWarning)
warnings.filterwarnings("ignore")


# Standard library imports.
import os
from datetime import timedelta, datetime
import pprint
import pickle
import json
from typing import List, Dict

# Related third party imports.
import matplotlib.pyplot as plt
import numpy as np

# Local application/library specific imports.
from cerebralcortex.cerebralcortex import CerebralCortex
from cerebralcortex.core.datatypes.datastream import DataPoint
import utils
import validation


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [48]:
# Set important paths
config_path = '../config/'
data_path = '../data/'


# Set target sensor streams and fileanames to be saved
target_labels = ['ACTIVITY_TYPE--org.md2k.phonesensor--PHONE', 
                 'STEP_COUNT--org.md2k.phonesensor--PHONE', 
                 'ACCELEROMETER--org.md2k.phonesensor--PHONE',
                 'GYROSCOPE--org.md2k.phonesensor--PHONE']
target_filenames = ['act_type', 'step_cnt', 'accel', 'gyro']
valid_func = [validation.validate_activity_type, 
              validation.validate_step_count, 
              validation.validate_accelerometer,
              validation.validate_gyroscope]

In [21]:
# Load all user IDs
with open(config_path+'users.json', 'r') as f:
    USR_IDS = json.load(f)

# Load CerebralCortex Configurations
CC = CerebralCortex("/home/mperf/sandeep/CerebralCortex-DockerCompose/cc_config_file/cc_vagrant_configuration.yml")

# Load GroundTruth
at_desk = pickle.load(open(data_path+'at_desk_groundtruth.pkl', 'rb'))

pp = pprint.PrettyPrinter(indent=2)

# Create directories

In [49]:
# define the name of the directory to be created

for usr_id in USR_IDS:
    path = data_path + usr_id
    try:  
        os.mkdir(path)
    except OSError:  
        print ("Creation of the directory %s failed. Already Existed." % path)
    else:  
        print ("Successfully created the directory %s " % path)

Successfully created the directory ../data/00162d05-3248-4b7d-b4f6-8593b4faaa63 
Successfully created the directory ../data/03996723-2411-4167-b14b-eb11dfc33124 
Successfully created the directory ../data/00ab666c-afb8-476e-9872-6472b4e66b68 
Successfully created the directory ../data/03c26210-7c9f-4bf2-b1c2-59d0bd64ffac 
Successfully created the directory ../data/022e4ff8-e1af-43dc-b747-862ac83518d2 
Successfully created the directory ../data/03ec3750-641a-4039-8b5d-74b485bde1ea 
Successfully created the directory ../data/02e82ef0-acb8-4366-8c83-4c3f2f69f7ea 
Successfully created the directory ../data/0457f007-211c-4dc3-844e-47d724fece51 


# Filter out useless stream days

In [34]:
def get_dates(streams):
    results = set()
    for stream in streams:
        results.add(stream[0].date().strftime('%Y%m%d'))
        results.add(stream[1].date().strftime('%Y%m%d'))
    return sorted(list(results))

def remove_useless_days(at_desk_days, stream_days):
    results = list()
    if not at_desk_days:
        return results
    if not stream_days:
        return results
    
    for at_desk_day in at_desk_days:
        if at_desk_day in stream_days:
            results.append(at_desk_day)
        else:
            print('remove ' + at_desk_day)
    return results
    

In [54]:
usr_work_days = dict()
for usr_id in USR_IDS:
    print('User '+usr_id)
    usr_streams = CC.get_user_streams(usr_id)
    at_desk_days = get_dates(at_desk[usr_id])
    for lbl in target_labels:
        print('\t'+lbl)
        try:
            target_stream = usr_streams[lbl]  
            # Enumerate stream id in target each stream
            stream_days = list()
            for stream_id in target_stream['stream_ids']:
                stream_days.extend(CC.get_stream_days(stream_id))
            at_desk_days = remove_useless_days(at_desk_days, stream_days)

        except KeyError:
            print(usr_id + " does not have stream " + lbl)
    usr_work_days[usr_id] = at_desk_days
    
    # print(at_desk_days)
with open(data_path+'usr_work_days.pkl', 'wb') as f:
    pickle.dump(usr_work_days, f)

User 00162d05-3248-4b7d-b4f6-8593b4faaa63
	ACTIVITY_TYPE--org.md2k.phonesensor--PHONE
	STEP_COUNT--org.md2k.phonesensor--PHONE
00162d05-3248-4b7d-b4f6-8593b4faaa63 does not have stream STEP_COUNT--org.md2k.phonesensor--PHONE
	ACCELEROMETER--org.md2k.phonesensor--PHONE
	GYROSCOPE--org.md2k.phonesensor--PHONE
User 03996723-2411-4167-b14b-eb11dfc33124
	ACTIVITY_TYPE--org.md2k.phonesensor--PHONE
	STEP_COUNT--org.md2k.phonesensor--PHONE
	ACCELEROMETER--org.md2k.phonesensor--PHONE
	GYROSCOPE--org.md2k.phonesensor--PHONE
User 00ab666c-afb8-476e-9872-6472b4e66b68
	ACTIVITY_TYPE--org.md2k.phonesensor--PHONE
remove 20171211
	STEP_COUNT--org.md2k.phonesensor--PHONE
remove 20171120
remove 20171121
remove 20171122
remove 20171127
remove 20171128
remove 20171129
remove 20171130
remove 20171201
remove 20171204
remove 20171205
remove 20171206
remove 20171207
remove 20171208
remove 20171209
remove 20171210
	ACCELEROMETER--org.md2k.phonesensor--PHONE
	GYROSCOPE--org.md2k.phonesensor--PHONE
User 03c26210

In [44]:
for usr in usr_work_days:
    print(usr, len(usr_work_days[usr]))

00162d05-3248-4b7d-b4f6-8593b4faaa63 0
03996723-2411-4167-b14b-eb11dfc33124 34
00ab666c-afb8-476e-9872-6472b4e66b68 25
03c26210-7c9f-4bf2-b1c2-59d0bd64ffac 26
022e4ff8-e1af-43dc-b747-862ac83518d2 25
03ec3750-641a-4039-8b5d-74b485bde1ea 14
02e82ef0-acb8-4366-8c83-4c3f2f69f7ea 42
0457f007-211c-4dc3-844e-47d724fece51 9


In [None]:
for usr_id in USR_IDS:
    print('User '+usr_id)
    usr_streams = CC.get_user_streams(usr_id)
    for lbl, fname, func in zip(target_labels, target_filenames, valid_func):
        print('\t'+lbl)
        try:
            target_stream = usr_streams[lbl]

            # Enumerate stream id in target each stream
            for stream_id in target_stream['stream_ids']:
                stream_days = CC.get_stream_days(stream_id)

                for i, stream_day in enumerate(stream_days):
                    if stream_day in usr_work_days[usr_id]:
                        #print(stream_day)
                        ds = CC.get_stream(stream_id, usr_id, stream_day) 
                        data = func(ds.data)
                        data.sort()
                        np.savez(data_path+usr_id+'/'+fname+stream_day, utils.to_numpy_array(data))
        except KeyError:
            print(usr_id + " does not have stream " + lbl)
     

    #left_accel[usr_id] = utils.extract_all_data(CC, usr_id, 'ACCELEROMETER--org.md2k.motionsense--MOTION_SENSE_HRV--LEFT_WRIST')
    #left_gyro[usr_id] = utils.extract_all_data(CC, usr_id, 'GYROSCOPE--org.md2k.motionsense--MOTION_SENSE_HRV--LEFT_WRIST')

User 00162d05-3248-4b7d-b4f6-8593b4faaa63
	ACTIVITY_TYPE--org.md2k.phonesensor--PHONE
	STEP_COUNT--org.md2k.phonesensor--PHONE
00162d05-3248-4b7d-b4f6-8593b4faaa63 does not have stream STEP_COUNT--org.md2k.phonesensor--PHONE
	ACCELEROMETER--org.md2k.phonesensor--PHONE
	GYROSCOPE--org.md2k.phonesensor--PHONE
User 03996723-2411-4167-b14b-eb11dfc33124
	ACTIVITY_TYPE--org.md2k.phonesensor--PHONE
	STEP_COUNT--org.md2k.phonesensor--PHONE
	ACCELEROMETER--org.md2k.phonesensor--PHONE
