In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import warnings
warnings.simplefilter("ignore", category=DeprecationWarning)
warnings.simplefilter("ignore", category=PendingDeprecationWarning)
warnings.simplefilter("ignore", category=ImportWarning)
warnings.filterwarnings("ignore")


# Standard library imports.
from datetime import timedelta, datetime
import pprint
import pickle
import json
from typing import List

# Related third party imports.
import matplotlib.pyplot as plt
import numpy as np

# Local application/library specific imports.
from cerebralcortex.cerebralcortex import CerebralCortex
import utils
import validation

In [2]:
# Load all user IDs
config_path = '../config/'
with open(config_path+'users.json', 'r') as f:
    USR_IDS = json.load(f)


# Load CerebralCortex Configurations
CC = CerebralCortex("/home/mperf/sandeep/CerebralCortex-DockerCompose/cc_config_file/cc_vagrant_configuration.yml")

pp = pprint.PrettyPrinter(indent=2)

# Extract working beacon data 

In [4]:
id_to_work_bcn = dict() # Dictionary: usr_id -> list of datapoints

for usr_id in USR_IDS:

    id_to_work_bcn[usr_id] = []
    usr_streams = CC.get_user_streams(usr_id)
    stream_labels = list(usr_streams.keys())

    # Get beacon stream at work place
    work_bcn_stream_labels = utils.extract_matched_labels(stream_labels, ['BEACON', 'WORK'])

    if work_bcn_stream_labels:
        work_bcn_stream = usr_streams[work_bcn_stream_labels[0]]
        
        # Enumerate stream id in each ble beacon stream
        for stream_id in work_bcn_stream['stream_ids']:
            stream_days = CC.get_stream_days(stream_id)
            
            # Get all work beacon data from a user 
            for i, stream_day in enumerate(stream_days):
                ds = CC.get_stream(stream_id, usr_id, stream_day)
                id_to_work_bcn[usr_id].extend(ds.data)
    id_to_work_bcn[usr_id].sort() # Sort the user's beacon in chronological order 




# Check beacon data validity

In [5]:
for usr_id in USR_IDS:
    work_bcns = id_to_work_bcn[usr_id]
    old_len = len(work_bcns)
    work_bcns = validation.validate_beacon(work_bcns)
    new_len = len(work_bcns)
    
    id_to_work_bcn[usr_id] = work_bcns
    
    print('---- User {} ----'.format( usr_id))
    print('Beacon stream length before', old_len)
    print('Beacon stream length after', new_len)
    print('\n')

---- User 00162d05-3248-4b7d-b4f6-8593b4faaa63 ----
Beacon stream length before 0
Beacon stream length after 0


---- User 03996723-2411-4167-b14b-eb11dfc33124 ----
Beacon stream length before 23561
Beacon stream length after 23419


---- User 00ab666c-afb8-476e-9872-6472b4e66b68 ----
Beacon stream length before 13976023
Beacon stream length after 1960532


---- User 03c26210-7c9f-4bf2-b1c2-59d0bd64ffac ----
Beacon stream length before 21970
Beacon stream length after 21758


---- User 022e4ff8-e1af-43dc-b747-862ac83518d2 ----
Beacon stream length before 22168
Beacon stream length after 22039


---- User 03ec3750-641a-4039-8b5d-74b485bde1ea ----
Beacon stream length before 12889
Beacon stream length after 12869


---- User 02e82ef0-acb8-4366-8c83-4c3f2f69f7ea ----
Beacon stream length before 41759
Beacon stream length after 41734


---- User 0457f007-211c-4dc3-844e-47d724fece51 ----
Beacon stream length before 3348
Beacon stream length after 3345




# Extract groundtrtuth from working beacon data

In [6]:
# distance threshold value in meter for a beacon to be considered as valid.
dist_th = 2.0 

def dist_filter(bcns, dist_th) -> List:
    results = list()
    for bcn in bcns:
        dist = bcn.sample[0]
        if dist <= dist_th:
            results.append(bcn)
    print('Before dist filter:', len(bcns))
    print('After dist filter:', len(results))
    
    return results

In [9]:
import pdb

# key: usr_id, value: list of (start time: datetime, end time: datetime)
at_desk = dict()

# time difference threshold value in second for two consecutive beacons to be considered as a stream.
tdf_th = 25.0



for usr_id in USR_IDS:
    work_bcns = id_to_work_bcn[usr_id]
    at_desk[usr_id] = list()
     
    work_bcns = dist_filter(work_bcns, dist_th) # Filter out beacon with distance
  
    start_bcn = None
    prev_bcn = None
    for bcn in work_bcns:
        
        if start_bcn is None:
            start_bcn = bcn
        if prev_bcn is None:
            prev_bcn = bcn

        # Time gap between current bcn and prev bcn
        tdf = bcn.start_time - prev_bcn.start_time
        
        if tdf.total_seconds() > tdf_th:
            # Current beacon is too long time away from previous beacon
            # Save the stream
            start_time = start_bcn.start_time
            end_time = prev_bcn.start_time
            if start_time == end_time:
                at_desk[usr_id].append((start_time - timedelta(seconds=tdf_th/2.0),
                                        end_time + timedelta(seconds=tdf_th/2.0)))
            else:
                at_desk[usr_id].append((start_time, end_time))
            
            # Start a new stream
            start_bcn = bcn

        prev_bcn = bcn
     

Before dist filter: 0
After dist filter: 0
Before dist filter: 23419
After dist filter: 19947
Before dist filter: 1960532
After dist filter: 408922
Before dist filter: 21758
After dist filter: 13963
Before dist filter: 22039
After dist filter: 13862
Before dist filter: 12869
After dist filter: 10428
Before dist filter: 41734
After dist filter: 34331
Before dist filter: 3345
After dist filter: 3128


# Save processed raw data and groundtruth

In [12]:
path = '../data/'
with open(path+'work_bcn.pkl', 'wb') as f:
    pickle.dump(id_to_work_bcn, f)
with open(path+'at_desk_groundtruth.pkl', 'wb') as f:
    pickle.dump(at_desk, f)

In [11]:
# for i, usr_id in enumerate(at_desk.keys()):
#     print('------------usr %d----------' % i)
#     for bcn_pair in at_desk[usr_id]:
#         #print(bcn_pair)
#         tdf = bcn_pair[1] - bcn_pair[0]
#         print(tdf.total_seconds())