# Imports and Functions

In [184]:
# imports and constants
import json
import pickle

import gmaps
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import multiprocessing
import numpy as np
import pandas as pd
import pingouin as pg
import seaborn as sns

from IPython.display import display, HTML
import ipywidgets as widgets

# user imports
from utils.process_data import *
from utils.stats import *
from utils.lifesense_utils import *

%matplotlib inline

# Extract REDCap Surveys

In [201]:
baseline_redcap_df = pd.read_excel("data_pull/LS_Wave1_SC_BL_WK1_Data_081419-revised.xlsx",
                            sheet_name=0)
wk_4_7_10_df = pd.read_excel("data_pull/LS_Wave1_REDCap_wk1_4_7_10_110719.xlsx",
                            sheet_name=0)
wk_13_16_df = pd.read_excel("data_pull/LS_Wave1_REDCap_wk13_16_120519.xlsx",
                            sheet_name=0)
mapping_df = pd.read_csv("data_pull/Wave1LifeSenseEnroll_DATA_LABELS_2019-08-15_0929.csv")

baseline_redcap_dict = dict(zip(mapping_df['Case#:'], mapping_df['LifeSense Study App ID:    ']))
wk_redcap_dict = dict(zip(mapping_df['ID:'], mapping_df['LifeSense Study App ID:    ']))

In [202]:
with open("data_pull/ids/wave1_ids.txt", "r") as internal_f:
    wave1_ids = [line.strip() for  line in internal_f.readlines()]

In [208]:
wk_4_7_10_df['pid'] = wk_4_7_10_df['record_id'].map(wk_redcap_dict)
wk_13_16_df['pid'] = wk_13_16_df['record_id'].map(wk_redcap_dict)

baseline_redcap_df['pid'] = baseline_redcap_df['study_id'].map(baseline_redcap_dict)

def pad_pid(row):
    if len(row['pid']) < 8:
        return ('0' * (8-len(row['pid']))) + row['pid']
    else:
        return row['pid']

wk_4_7_10_df['pid'] = wk_4_7_10_df['pid'].astype(str)
wk_4_7_10_df['pid'] = wk_4_7_10_df.apply(pad_pid, axis=1)

wk_13_16_df['pid'] = wk_13_16_df['pid'].astype(str)
wk_13_16_df['pid'] = wk_13_16_df.apply(pad_pid, axis=1)

baseline_redcap_df['pid'] = baseline_redcap_df['pid'].astype(str)
baseline_redcap_df['pid'] = baseline_redcap_df.apply(pad_pid, axis=1)

### Investigate psychological treatment indicators

In [220]:
psych_cols = wk_4_7_10_df.columns[wk_4_7_10_df.columns.str.contains('txhx')]
wk_4_7_10_df['psytxhx_psychother_3wk'].isna().sum()

2344

In [228]:
psych_cols

Index(['txhx_start_fu', 'psytxhx_psychother_3wk', 'psytxhx_curr_meds_fu'], dtype='object')

In [221]:
psych_df = wk_4_7_10_df[['pid'] + list(psych_cols)]

In [222]:
psych_df.head()

Unnamed: 0,pid,txhx_start_fu,psytxhx_psychother_3wk,psytxhx_curr_meds_fu
0,91048552,,,
1,91048552,2019-08-23 10:18:53,0.0,0.0
2,91048552,2019-09-13 10:37:41,0.0,0.0
3,91048552,2019-10-04 10:38:41,0.0,0.0
4,91048552,,,


In [226]:
baseline_redcap_df.columns[baseline_redcap_df.columns.str.contains('txhx')]

Index(['psytxhx_psychother_4wk', 'psytxhx_curr_meds'], dtype='object')

## Build State DataFrame

In [236]:
base_rc_cols = baseline_redcap_df.columns[baseline_redcap_df.columns.str.startswith('shaps') |
                                           baseline_redcap_df.columns.str.startswith('phq') | 
                                           baseline_redcap_df.columns.str.startswith('gad') |
                                           baseline_redcap_df.columns.str.startswith('spin') |
                                           baseline_redcap_df.columns.str.startswith('psqi')]

wk_rc_cols = wk_4_7_10_df.columns[wk_4_7_10_df.columns.str.startswith('shaps') |
                                  wk_4_7_10_df.columns.str.startswith('phq') | 
                                  wk_4_7_10_df.columns.str.startswith('gad') |
                                  wk_4_7_10_df.columns.str.startswith('spin') |
                                  wk_4_7_10_df.columns.str.startswith('psqi') | 
                                  wk_4_7_10_df.columns.str.contains('txhx')]

In [237]:
state_df = wk_4_7_10_df[['pid', 'redcap_event_name'] + list(wk_rc_cols)]
# no surveys conducted in wk1
state_df = state_df[~(state_df['redcap_event_name'] == 'week_1_arm_1')]

# append wks 13 and 16
state_df = state_df.append(wk_13_16_df[['pid', 'redcap_event_name'] + list(wk_rc_cols)])

In [238]:
start_cols = list(state_df.columns[state_df.columns.str.contains('start')])

for col in start_cols:
    baseline_redcap_df[col] = baseline_redcap_df['dt_screen']

# populate baseline columns to match the week columns
baseline_redcap_df['redcap_event_name'] = 'week_0_arm_1'
baseline_redcap_df['psqi_05j_1'] = baseline_redcap_df['psqi_other']
baseline_redcap_df['psqi_05j_2'] = baseline_redcap_df['psqi_05j']
baseline_redcap_df['psytxhx_curr_meds_fu'] = baseline_redcap_df['psytxhx_curr_meds']
baseline_redcap_df['psytxhx_psychother_3wk'] = baseline_redcap_df['psytxhx_psychother_4wk']

In [239]:
state_df = state_df.append(baseline_redcap_df[state_df.columns])

In [242]:
state_df = state_df.sort_values(by=['pid', 'gad_start'])
print(state_df.shape)
# drop rows with no survey responses
print(state_df.dropna(subset=wk_rc_cols, how='all').shape)
state_df = state_df.dropna(subset=wk_rc_cols, how='all')
state_df.head(15)

(1588, 68)
(1588, 68)


Unnamed: 0,pid,redcap_event_name,gad_start,gad01,gad02,gad03,gad04,gad05,gad06,gad07,...,spin_08,spin_09,spin_10,spin_11,spin_12,spin_13,spin_14,spin_15,spin_16,spin_17
39,746649,week_0_arm_1,2019-07-22,1.0,1.0,1.0,1.0,2.0,2.0,2.0,...,4.0,4.0,3.0,4.0,4.0,2.0,4.0,4.0,4.0,4.0
405,746649,week_4_arm_1,2019-08-23 19:04:16,3.0,3.0,3.0,3.0,3.0,3.0,2.0,...,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,2.0,4.0
406,746649,week_7_arm_1,2019-09-14 15:19:45,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,3.0
407,746649,week_10_arm_1,2019-10-04 16:25:27,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
78,746649,week_13_arm_1,2019-10-25 22:38:25,3.0,2.0,2.0,2.0,2.0,3.0,3.0,...,3.0,4.0,2.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0
79,746649,week_16_arm_1,2019-11-15 10:38:57,1.0,2.0,2.0,3.0,2.0,3.0,2.0,...,3.0,4.0,3.0,4.0,3.0,4.0,4.0,4.0,3.0,4.0
93,1225297,week_0_arm_1,2019-07-24,3.0,3.0,3.0,2.0,1.0,1.0,2.0,...,4.0,4.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,3.0
1005,1225297,week_4_arm_1,2019-08-26 14:53:14,1.0,1.0,1.0,1.0,1.0,1.0,2.0,...,3.0,3.0,3.0,4.0,3.0,2.0,3.0,3.0,2.0,3.0
1006,1225297,week_7_arm_1,2019-09-16 15:52:35,2.0,2.0,2.0,2.0,1.0,1.0,2.0,...,3.0,3.0,3.0,3.0,3.0,1.0,3.0,3.0,2.0,3.0
1007,1225297,week_10_arm_1,2019-10-05 18:47:20,2.0,2.0,2.0,2.0,0.0,1.0,2.0,...,3.0,4.0,3.0,4.0,3.0,1.0,3.0,3.0,2.0,2.0


In [243]:
state_df.columns

Index(['pid', 'redcap_event_name', 'gad_start', 'gad01', 'gad02', 'gad03',
       'gad04', 'gad05', 'gad06', 'gad07', 'gad08', 'txhx_start_fu',
       'psytxhx_psychother_3wk', 'psytxhx_curr_meds_fu', 'shaps_start',
       'shaps_01', 'shaps_02', 'shaps_03', 'shaps_04', 'shaps_05', 'shaps_06',
       'shaps_07', 'shaps_08', 'shaps_09', 'shaps_10', 'shaps_11', 'shaps_12',
       'shaps_13', 'shaps_14', 'psqi_start', 'psqi_01', 'psqi_02', 'psqi_03',
       'psqi_04', 'psqi_05a', 'psqi_05b', 'psqi_05c', 'psqi_05d', 'psqi_05e',
       'psqi_05f', 'psqi_05g', 'psqi_05h', 'psqi_05i', 'psqi_05j',
       'psqi_05j_1', 'psqi_05j_2', 'psqi_06', 'psqi_07', 'psqi_08', 'psqi_09',
       'spin_start', 'spin_01', 'spin_02', 'spin_03', 'spin_04', 'spin_05',
       'spin_06', 'spin_07', 'spin_08', 'spin_09', 'spin_10', 'spin_11',
       'spin_12', 'spin_13', 'spin_14', 'spin_15', 'spin_16', 'spin_17'],
      dtype='object')

In [244]:
state_df.to_pickle("ls_data/state_all.df")

### Process PHQ 

In [156]:
phq_cols = list(baseline_redcap_df.columns[baseline_redcap_df.columns.str.startswith('phq')])
phq_df = baseline_redcap_df[['pid'] + phq_cols]
phq_df['phq_start'] = baseline_redcap_df['dt_screen']
phq_df['redcap_event_name'] = 'week_0_arm_1'
phq_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,pid,phq01_sc,phq02_sc,phq03_sc,phq04_sc,phq05_sc,phq06_sc,phq07_sc,phq08_sc,phq_total_sc,phq10_sc,phq_start,redcap_event_name
0,91048552,0,0,2,1,0,0,0,0,3,1.0,2019-07-22,week_0_arm_1
1,31456993,3,3,1,2,2,1,2,1,15,1.0,2019-07-22,week_0_arm_1
2,51735262,0,1,1,1,0,1,0,0,4,0.0,2019-07-22,week_0_arm_1
3,69452375,1,1,0,1,0,3,1,0,7,2.0,2019-07-22,week_0_arm_1
4,28021601,0,0,0,0,0,1,0,0,1,1.0,2019-07-22,week_0_arm_1


In [157]:
# process PHQ values populated via EMA

even_phq8_df = pd.DataFrame()

for pid in wave1_ids:
    even_df = pd.read_pickle("/data/tliu/all_ema_data/evening_phq8/{}.df".format(pid))
    even_phq8_df = even_phq8_df.append(even_df, sort=True)

In [158]:
morn_phq8_df = pd.DataFrame()

for pid in wave1_ids:
    morn_df = pd.read_pickle("/data/tliu/all_ema_data/morning_phq8/{}.df".format(pid))
    morn_phq8_df = morn_phq8_df.append(morn_df, sort=True)

In [159]:
even_phq8_df = format_time(even_phq8_df)
morn_phq8_df = format_time(morn_phq8_df)

In [160]:
def get_redcap_event_name(row):
    """populates redcap event name based on date."""

    if (row['date'] > pd.to_datetime('2019-07-23')) and (row['date'] < pd.to_datetime('2019-08-12')):
        return 'week_1_arm_1'
    
    if (row['date'] > pd.to_datetime('2019-08-13')) and (row['date'] < pd.to_datetime('2019-09-02')):
        return 'week_4_arm_1'
    
    if (row['date'] > pd.to_datetime('2019-09-03')) and (row['date'] < pd.to_datetime('2019-09-23')):
        return 'week_7_arm_1'
    
    if (row['date'] > pd.to_datetime('2019-09-24')) and (row['date'] < pd.to_datetime('2019-10-14')):
        return 'week_10_arm_1'
    
    if (row['date'] > pd.to_datetime('2019-10-15')) and (row['date'] < pd.to_datetime('2019-11-04')):
        return 'week_13_arm_1'
    
    if (row['date'] > pd.to_datetime('2019-11-05')) and (row['date'] < pd.to_datetime('2019-11-25')):
        return 'week_16_arm_1'
    

even_phq8_df['redcap_event_name'] = even_phq8_df.apply(get_redcap_event_name, axis=1)
morn_phq8_df['redcap_event_name'] = morn_phq8_df.apply(get_redcap_event_name, axis=1)

In [161]:
phq_dict = {
    'pleasure': 'phq01_sc',
    'depression': 'phq02_sc',
    'sleep': 'phq03_sc',
    'energy': 'phq04_sc',
    'appetite': 'phq05_sc',
    'feeling-bad': 'phq06_sc',
    'concentration': 'phq07_sc',
    'movement': 'phq08_sc',
    'difficulty': 'phq10_sc',
    'time': 'phq_start'
}

for k, v in phq_dict.items():
    even_phq8_df[v] = even_phq8_df[k]
    morn_phq8_df[v] = morn_phq8_df[k]
    
even_phq8_df['phq_total_sc'] = even_phq8_df[even_phq8_df.columns[even_phq8_df.columns.str.startswith("phq0")]].astype(int).sum(axis=1)
morn_phq8_df['phq_total_sc'] = morn_phq8_df[morn_phq8_df.columns[morn_phq8_df.columns.str.startswith("phq0")]].astype(int).sum(axis=1)

In [164]:
phq_df = phq_df.append(morn_phq8_df[phq_df.columns])
phq_df = phq_df.append(even_phq8_df[phq_df.columns])
phq_df = phq_df.sort_values(by=["pid", "phq_start"])
phq_df.head(20)

Unnamed: 0,pid,phq01_sc,phq02_sc,phq03_sc,phq04_sc,phq05_sc,phq06_sc,phq07_sc,phq08_sc,phq_total_sc,phq10_sc,phq_start,redcap_event_name
39,746649,2,1,3,1,1,1,1,2,12,3,2019-07-22,week_0_arm_1
0,746649,2,2,2,3,2,1,1,0,13,1,2019-07-30 12:31:50,week_1_arm_1
0,746649,2,2,2,3,2,1,1,0,13,1,2019-07-30 12:31:50,week_1_arm_1
0,746649,1,2,1,2,0,1,0,0,7,2,2019-08-05 23:32:00,week_1_arm_1
0,746649,1,2,1,2,0,1,0,0,7,2,2019-08-05 23:32:00,week_1_arm_1
1,746649,1,2,2,2,0,1,1,0,9,2,2019-08-26 19:04:10,week_4_arm_1
1,746649,1,2,2,2,0,1,1,0,9,2,2019-08-26 19:04:10,week_4_arm_1
1,746649,1,1,1,2,2,3,2,0,12,2,2019-09-10 14:28:31,week_7_arm_1
1,746649,1,1,1,2,2,3,2,0,12,2,2019-09-10 14:28:31,week_7_arm_1
2,746649,1,1,2,1,0,1,1,0,7,2,2019-09-16 19:08:22,week_7_arm_1


In [167]:
phq_df = phq_df.drop_duplicates()

In [168]:
phq_df['redcap_event_name'].value_counts()

week_1_arm_1     477
week_10_arm_1    470
week_13_arm_1    435
week_7_arm_1     411
week_16_arm_1    379
week_4_arm_1     306
week_0_arm_1     282
Name: redcap_event_name, dtype: int64

In [169]:
phq_df.to_pickle("ls_data/phq_0_16.df")

# Pull Gender

In [360]:
baseline_redcap_df.columns.values

array(['study_id', 'dt_screen', 'us_res', 'age', 'us_citizen',
       'smartphone', 'smartphone_os', 'smartphone_android',
       'smartphone_shared', 'smartphone_power', 'services_talk',
       'services_text', 'services_data', 'smartphone_data',
       'smartphone_network', 'smartphone_wifi', 'phq01_sc', 'phq02_sc',
       'phq03_sc', 'phq04_sc', 'phq05_sc', 'phq06_sc', 'phq07_sc',
       'phq08_sc', 'phq_total_sc', 'phq10_sc', 'dx_depression',
       'dx_bipolar', 'dx_ocd', 'dx_ptsd', 'dx_schizo', 'dx_eating',
       'dx_substance', 'elig', 'wk1_ema_start_dt', 'enroll_status',
       'dt_demo', 'demo_gender', 'demo_zip', 'demo_maritalstatus',
       'demo_highest_education', 'demo_household_num', 'demo_fam_income',
       'demo_personal_income', 'demo_health_insurance', 'sds_mobility1',
       'routine_slabels02', 'slabels03', 'slabels03a', 'work_schd',
       'slabels03b', 'slabels03c', 'slabels03d', 'slabels04',
       'routine_slabels05', 'routine_slabels07', 'routine_slabels08',

# Physical Symptoms

In [325]:
all_ema_df = morn_phq8_df.copy()
all_ema_df = all_ema_df.append(even_phq8_df)

In [326]:
all_ema_df.shape

(2478, 150)

In [327]:
physical_symptoms = ['muscle-aches', 'headache', 'fatigue', 'fever', 'indigestion', 'sinus-nasal', 'cough', 'sore-throat']
physical_symptoms = sorted(physical_symptoms)

In [328]:
physical_symptoms

['cough',
 'fatigue',
 'fever',
 'headache',
 'indigestion',
 'muscle-aches',
 'sinus-nasal',
 'sore-throat']

In [338]:
indicators = []
for phys_str in all_ema_df['physical-health'].values:
    print(phys_str)
    indicator_list = [1 if s in phys_str else 0 for s in physical_symptoms]
    indicators.append(indicator_list)
    
phys_df = pd.DataFrame(indicators, columns=physical_symptoms)

[fatigue]
[headache, fatigue]
[fatigue, muscle-aches]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, fatigue]
[muscle-aches, fatigue]
[muscle-aches]
[fatigue, sinus-nasal, cough]
[headache, fatigue]
[fatigue, sinus-nasal]
[none]
[none]
[headache, fatigue, sinus-nasal]
[headache, fatigue]
[headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, sinus-nasal]
[muscle-aches]
[muscle-aches, headache]
[headache, sinus-nasal]
[sinus-nasal]
[headache]
[muscle-aches, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue, sinus-nasal]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue, indigestion]
[muscle-aches, headache, fatigue, indigestion]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue, indigestion]
[muscle-aches, fatigue, headache, sinus-nasal]
[muscle-aches, headache, fatigue, indiges

In [341]:
phys_df['fever'].describe()

count    2478.000000
mean        0.033495
std         0.179961
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         1.000000
Name: fever, dtype: float64

In [350]:
all_ema_df = all_ema_df.reset_index(drop=True)

In [351]:
for t in all_ema_df['physical-health'].values:
    print(t)

[fatigue]
[headache, fatigue]
[fatigue, muscle-aches]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, fatigue]
[muscle-aches, fatigue]
[muscle-aches]
[fatigue, sinus-nasal, cough]
[headache, fatigue]
[fatigue, sinus-nasal]
[none]
[none]
[headache, fatigue, sinus-nasal]
[headache, fatigue]
[headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, sinus-nasal]
[muscle-aches]
[muscle-aches, headache]
[headache, sinus-nasal]
[sinus-nasal]
[headache]
[muscle-aches, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue, sinus-nasal]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue, indigestion]
[muscle-aches, headache, fatigue, indigestion]
[muscle-aches, headache, fatigue]
[muscle-aches, headache, fatigue, indigestion]
[muscle-aches, fatigue, headache, sinus-nasal]
[muscle-aches, headache, fatigue, indiges

In [353]:
all_ema_df[physical_symptoms] = phys_df

In [354]:
all_ema_df.shape

(2478, 158)

In [355]:
all_ema_df[['physical-health'] + physical_symptoms]

Unnamed: 0,physical-health,cough,fatigue,fever,headache,indigestion,muscle-aches,sinus-nasal,sore-throat
0,[fatigue],0,1,0,0,0,0,0,0
1,"[headache, fatigue]",0,1,0,1,0,0,0,0
2,"[fatigue, muscle-aches]",0,1,0,0,0,1,0,0
3,"[muscle-aches, headache, fatigue]",0,1,0,1,0,1,0,0
4,"[muscle-aches, headache, fatigue]",0,1,0,1,0,1,0,0
5,"[muscle-aches, fatigue]",0,1,0,0,0,1,0,0
6,"[muscle-aches, fatigue]",0,1,0,0,0,1,0,0
7,[muscle-aches],0,0,0,0,0,1,0,0
8,"[fatigue, sinus-nasal, cough]",1,1,0,0,0,0,1,0
9,"[headache, fatigue]",0,1,0,1,0,0,0,0


In [356]:
all_ema_df['fever'].describe()

count    2478.000000
mean        0.033495
std         0.179961
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         1.000000
Name: fever, dtype: float64

In [357]:
physical_cols = ['pid', 'phq_start', 'redcap_event_name'] + physical_symptoms

In [358]:
all_ema_df[physical_cols].to_pickle("ls_data/phys_0_16.df")

# Extract Wk 4 Features

## Utilities

In [6]:
fus_loc = "/data/tliu/wk{}_ls_data/pdk-location"
fga_loc = "/data/tliu/wk{}_ls_data/pdk-foreground-application"
sms_loc = "/data/tliu/wk{}_ls_data/pdk-text-messages"
cal_loc = "/data/tliu/wk{}_ls_data/pdk-phone-calls"

sensor_locs = [fus_loc, fga_loc, sms_loc, cal_loc]
wks = [7,10]

# load data
with open("data_pull/ids/wave1_ids.txt", "r") as internal_f:
    wave1_ids = [line.strip() for  line in internal_f.readlines()]

    
def process_sensor_data(pids, loc, out_loc, func, n_procs=4):
    """Wrapper function for processing sensor data.
    
    Args:
        pids (list): list of pids to process
        #wk (int): the week of data to process
        loc (str): the file location
        out_loc (str): the output file name and location
        func (function): the processing function to apply
        n_procs (int): the number of processes to spin up
    
    Returns:
        None, but writes to 
    """
    #loc = loc.format(wk)
    func_args = [(pid, loc) for pid in pids]
    with multiprocessing.Pool(n_procs) as pool:
        results = pool.starmap(func, func_args)
        
    df = pd.DataFrame()

    for res in results:
        df = df.append(res)
    
    df.to_pickle(out_loc)
        

fus_str = "ls_data/wk{}/fus_daily.df"
circ_str = "ls_data/wk{}/circ_movt.df"
fga_str = "ls_data/wk{}/fga_hr.df"
cal_str = "ls_data/wk{}/cal_hr.df"
sms_str = "ls_data/wk{}/sms_hr.df"

def process_all_data(pids, wks, n_procs=4):    
    for wk in wks:
        #process_sensor_data(pids, fus_loc.format(wk), fus_str.format(wk), build_fus, n_procs)
        process_sensor_data(pids, fus_loc.format(wk), circ_str.format(wk), build_circadian_stats, n_procs)
        process_sensor_data(pids, fga_loc.format(wk), fga_str.format(wk), build_fga_hr, n_procs)
        process_sensor_data(pids, cal_loc.format(wk), cal_str.format(wk), build_cal_hr, n_procs)
        process_sensor_data(pids, sms_loc.format(wk), sms_str.format(wk), build_sms_hr, n_procs)

In [199]:
%%time

process_all_data(wave1_ids, [13, 16], n_procs=12)

29878406
08343773
27099517
28244292
48367404
61762096
58780031
50550619
21594071
84469352
01254121
44667026
28949890
47363974
46002724
70483015
03327555
59654069
10285142
51419094
56912666
28939704
05261598
33250639
80504454
16777771
18583649
15565415
09489685
91788916
59222410
20206315
62860600
44655272
95556839
90496706
47688944
74575289
06638392
76854891
53435128
97678130
71676393
32718334
86283726
49001726
27761141
34262165
50765631
99050875
44293762
87485171
31456993
52982527
81049144
19410615
98621494
36969413
37168430
77842251
39106805
56184073
69452375
32573840
98250113
71219000
42258080
81729157
45433155
50730294
67900112
01495950
73518938
12807049
74739196
75348018
81720300
55313474
66873010
31477083
20706360
81968737
13250317
89434074
39725031
85752121
69335292
39548248
56596866
38646138
53808826
52581458
75282136
44909649
14753485
62463869
90638927
02970060
74371880
67597747
13051775
65143770
21894119
28540480
56723660
30501084
51456954
19674187
32309079
45761494
02817507
6

50730294
73518938
67900112
74739196
55313474
39548248
75348018
81729157
81720300
45433155
66873010
44909649
89434074
01495950
56596866
31477083
81968737
13250317
52581458
39725031
53808826
74371880
20706360
38646138
75282136
14753485
69335292
62463869
85752121
67597747
02970060
28540480
90638927
68744652
76432041
65143770
21894119
56723660
30501084
51456954
43589028
19674187
13051775
32309079
93627939
54841471
78911129
52064875
38890840
81558830
45761494
02817507
78681731
03939827
74589634
75696701
79316475
79439002
11770862
83085276
58093242
03384972
22498610
23066392
24936642
17328943
70035688
38588231
72038219
53097921
39561926
18156803
55542659
46484562
02144163
55979795
42871706
19663467
43292038
11436422
70027963
50939076
12616311
99127649
31574721
23388083
41606321
93606382
57473014
26080346
83062037
29384065
56910929
90229239
11927637
40932643
75437581
07974290
87400142
54461187
72685265
90763832
81249330
65149091
79510141
87929316
09269616
71189891
84902402
61131074
78352234
3

In [247]:
%%time

import multiprocessing
fus_loc = "/data/tliu/wk16_ls_data/pdk-location"
fus_args = [(pid, fus_loc) for pid in wave1_ids]

with multiprocessing.Pool(12) as pool:
    
    fus_results = pool.starmap(build_fus, fus_args)

44667026
48367404
28244292
(598, 18)
28949890
(1416, 18)
58780031
(2232, 18)
61762096
(2948, 18)
84469352
(2322, 18)
50550619
(3248, 18)
27099517
(3230, 18)
01254121
(3493, 18)
08343773
(4954, 18)
21594071
(3572, 18)
47363974
(4343, 18)
29878406
(4918, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


46002724
(2506, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


59654069
(3108, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


70483015


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


03327555
(3452, 18)
51419094
(3064, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


59222410
(1250, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


44655272
(1650, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


10285142
(1633, 18)
76854891
56912666
(1697, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


53435128
(4831, 18)
09489685
(2432, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


91788916
20206315
(675, 18)
32718334
(2022, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


47688944
(3387, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


28939704
(1179, 18)
97678130
(2361, 18)
36969413
(499, 18)
80504454
(3479, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


87485171
(2926, 18)
90496706
(2827, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


86283726
(3237, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


95556839


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


06638392
(3442, 18)
49001726
(1884, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


62860600
(1124, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


98621494
(3824, 18)
19410615
(3949, 18)
37168430
(4170, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


74575289
(3000, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


16777771
(23, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


15565415
(2069, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


39106805


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


56184073
(3313, 18)
50730294
(3600, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


18583649
(4390, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


98250113
(3070, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


50765631
(1624, 18)
81729157
(3425, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


71676393
(1412, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


44293762
(568, 18)
77842251
(3502, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


71219000
(2085, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


52982527
69452375
(2267, 18)
74739196
(3626, 18)
20706360
(1239, 18)
81049144
(4027, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


56596866
(1743, 18)
73518938
(2646, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


05261598
(308, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


42258080
(3269, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


33250639
(3271, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


89434074
(2042, 18)
74371880
(3501, 18)
85752121
(3185, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


99050875
(1163, 18)
38646138
(2615, 18)
45433155
(3962, 18)
12807049
(3716, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


75348018
(24, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


27761141
53808826
(4200, 18)
81720300
(1587, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


34262165
(1923, 18)
31456993
(3334, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


32573840


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


66873010
(1641, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


65143770
(3364, 18)
13250317
(3284, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


81968737
(3560, 18)
55313474
(3401, 18)
76432041
(2752, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


01495950
(2988, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


39548248
(1455, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


67597747
(2088, 18)
39725031
(3206, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


44909649
(2647, 18)
75282136
(3029, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


13051775
(3443, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


69335292
(1151, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


30501084
(651, 18)
02970060
(4279, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


93627939
(1513, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


14753485
(3345, 18)
38890840
(3267, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


19674187
(3437, 18)
62463869
(3555, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


67900112
(2414, 18)
68744652
(2057, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


45761494
02817507
(2133, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


52581458
(3038, 18)
83085276
(3271, 18)
43589028
(3718, 18)
56723660
(3085, 18)
32309079
(3995, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


31477083
58093242
(3025, 18)
(2508, 18)
79439002
(3629, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


81558830
(4358, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


90638927
(3386, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


21894119
(2988, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


11770862
(624, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


17328943
(616, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


22498610
51456954
(3366, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


23066392
(3672, 18)
18156803
(1129, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


03384972
(2787, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


02144163
(157, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


38588231
(204, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


52064875
(3609, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


46484562
(1508, 18)
55979795
(2804, 18)
53097921
(2200, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


79316475
(3190, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


28540480
(3530, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


78911129
(1758, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


42871706
(68, 18)
55542659
(3033, 18)
43292038


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


11436422
(425, 18)
70035688
(2575, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


70027963
50939076


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


23388083
(3115, 18)
12616311
(3464, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


31574721
(1638, 18)
11927637
(2267, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


99127649
(2924, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


56910929
(570, 18)
93606382
(4300, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


74589634
(3506, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


41606321
(4369, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


78681731
(2984, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


09269616
(1887, 18)
75437581
(4312, 18)
57473014
(4291, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


54841471
(1504, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


90763832
(4569, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


40932643
(315, 18)
83062037
(3506, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


54461187
(1863, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


19663467
72038219
(2177, 18)
(3441, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


03939827
(941, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


75696701
(1838, 18)
07974290
(734, 18)
79510141
(2526, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


81249330
(1120, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


17294720
(2649, 18)
90229239
(1422, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


84902402
(3637, 18)
24936642
(3131, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


87929316
(1249, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


78352234
(3407, 18)
26080346
(1697, 18)
72685265
(3522, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


61131074
(4643, 18)
27762780
(3244, 18)
39561926
(4081, 18)
71189891
(3654, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


36795256
(3305, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


55915099
(1484, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


74805749
(3434, 18)
42215399
(3665, 18)
01225297
(2317, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


13567195
(3500, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


45517860
(1754, 18)
54004910
(3687, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


80206225
(4212, 18)
50707558
(3370, 18)
87400142
(2678, 18)
08103884
(3225, 18)
65381988
(3326, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


80657933
(2569, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


27330785
(2645, 18)
48315222
(1948, 18)
91048552
(2012, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


28021601
(1457, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


73960495
00746649
(4362, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


(3050, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


46222210
(3290, 18)
51735262
(3110, 18)
35576469
(1673, 18)
67615491
(3139, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


64142475
(1376, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


29384065
(2424, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


22656406
(3871, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


08007167
(3289, 18)
62599280
(608, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


28458341
(3853, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


65741560
(2555, 18)
43093019
(2529, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


74626135
(3060, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


46175798
(193, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


65149091
(1429, 18)
83963249
(1414, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


71745031
53874087
(2735, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


99338619
(2822, 18)
03578019
(3836, 18)
14549710
(2827, 18)
06400675
(4675, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


53236058
(1713, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


90934495
(3440, 18)
05565365
(2470, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


73326278


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


14113160
(3296, 18)
84877086
(3226, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


83275234


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


38459884
(2635, 18)
80700486
(3054, 18)
94277599
(3231, 18)
26957252
(3119, 18)
66507502
(4565, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


55463070
(3200, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


35493515
(2713, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


48625414
(4363, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


44933937
(2475, 18)
78327476
(1943, 18)
08007329
(3152, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


76366191
(2831, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


58081753
(3257, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


76562623
(433, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


77579838
(67, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


82727218
(3569, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


32888746
(1222, 18)
47505792
(3334, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


10099555
(3507, 18)
86756971
(1843, 18)
51612397
(1184, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


07854544


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


29149362
(3175, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


71043609
(2216, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


03233601
(3392, 18)
90587846
(1785, 18)
73142171
(4163, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


04918121
(3059, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


57973631
(3101, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


50931782
(2844, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


93519386
(3186, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


89346491
(1672, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


01766910
(3158, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


04133537
(2264, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


79819446
(3588, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


68756107
(3221, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


39854689
(2787, 18)
62375942
(4466, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


59764431
(3021, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


14196469


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


65696941
89057862
(3643, 18)
74133461
(3340, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


09611865
(1236, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


64292248
(2327, 18)
83056303
(2287, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


18740846
(2233, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


22086591
(3333, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


29584096
62808613
(3072, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


73916801
(932, 18)
22352222
(3352, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


58740880
(3971, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


21150752
(3865, 18)


  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)
  label_group['entropy'] = -(np.log(label_group) * label_group).sum(axis=1)


CPU times: user 733 ms, sys: 320 ms, total: 1.05 s
Wall time: 1min 49s


In [248]:
fus_df = pd.DataFrame()

for df in fus_results:
    fus_df = fus_df.append(df)
fus_df.head()
fus_df.to_pickle("ls_data/wk16/fus_daily.df")

## Fused Location

In [None]:
%%time

import multiprocessing
fus_loc = "/data/tliu/wk4_ls_data/pdk-location"
fus_args = [(pid, fus_loc) for pid in wave1_ids]

with multiprocessing.Pool(8) as pool:
    
    fus_results = pool.starmap(build_fus, fus_args)

In [None]:
fus_df = pd.DataFrame()

for df in fus_results:
    fus_df = fus_df.append(df)
fus_df.head()

In [None]:
# Should only be run once
#fus_df.to_pickle("ls_data/wk4/fus_daily.df")

In [None]:
%%time

import multiprocessing
fus_loc = "/data/tliu/wk4_ls_data/pdk-location"
fus_args = [(pid, fus_loc) for pid in wave1_ids]

with multiprocessing.Pool(12) as pool:
    
    circ_results = pool.starmap(build_circadian_stats, fus_args)

In [None]:
circ_df = pd.DataFrame()

for df in circ_results:
    circ_df = circ_df.append(df)
    
circ_df.head()

In [None]:
# only needs to be run once
#circ_df.to_pickle("ls_data/wk4/circ_movt.df")

## Foreground application

In [None]:
%%time

import multiprocessing

fga_loc = "/data/tliu/wk4_ls_data/pdk-foreground-application"
fga_args = [(pid, fga_loc) for pid in wave1_ids]

with multiprocessing.Pool(12) as pool:
    
    fga_results = pool.starmap(build_fga_hr, fga_args)

In [None]:
fga_df = pd.DataFrame()

for df in fga_results:
    fga_df = fga_df.append(df)
    
fga_df.head()

In [None]:
# only needs to be run once
#fga_df.to_pickle("ls_data/wk4/fga_hr.df")

## Calls

In [None]:
%%time

import multiprocessing

cal_loc = "/data/tliu/wk4_ls_data/pdk-phone-calls"
cal_args = [(pid, cal_loc) for pid in wave1_ids]

with multiprocessing.Pool(8) as pool:
    
    cal_results = pool.starmap(build_cal_hr, cal_args)

In [None]:
cal_df = pd.DataFrame()

for df in cal_results:
    cal_df = cal_df.append(df)
    
cal_df.head()

In [None]:
# only needs to be run once
#cal_df.to_pickle("ls_data/wk4/cal_hr.df")

## Texts

In [None]:
%%time

import multiprocessing

sms_loc = "/data/tliu/wk4_ls_data/pdk-text-messages"
sms_args = [(pid, sms_loc) for pid in wave1_ids]

with multiprocessing.Pool(8) as pool:
    
    sms_results = pool.starmap(build_sms_hr, sms_args)

In [None]:
sms_df = pd.DataFrame()

for df in sms_results:
    sms_df = sms_df.append(df)
    
sms_df.head()

In [None]:
# only needs to be run once
#sms_df.to_pickle("ls_data/wk4/sms_hr.df")