<h1 id="tocheading">Table of Contents</h1>
<div id="toc"></div>

# Imports and Functions

In [6]:
# imports and constants
%matplotlib inline
import json
import pickle

import gmaps
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import seaborn as sns

from IPython.display import display, HTML
import ipywidgets as widgets

# user imports
from utils.process_data import *
from utils.stats import *

# rpy 
from rpy2.robjects import r, pandas2ri
import rpy2.robjects as robjects
import rpy2
from rpy2.robjects.packages import importr
utils = importr('utils')
lmtest = importr('lmtest')
Hmisc = importr("Hmisc")
pandas2ri.activate()

In [7]:
%%javascript
// ToC script
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')

<IPython.core.display.Javascript object>

In [8]:
# Code hider, source: http://chris-said.io/2016/02/13/how-to-make-polished-jupyter-presentations-with-optional-code-visibility/
from IPython.display import HTML

HTML('''
<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }

  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>
''')

# Load Data

In [9]:
baseline_df = pd.read_excel("data_pull/LS_Wave1_SC_BL_WK1_Data_081419-revised.xlsx",
                            sheet_name=0)
wk1_df = pd.read_excel("data_pull/LS_Wave1_SC_BL_WK1_Data_081419.xlsx",
                       sheet_name=2)
mapping_df = pd.read_csv("data_pull/Wave1LifeSenseEnroll_DATA_LABELS_2019-08-15_0929.csv")

In [10]:
redcap_dict = dict(zip(mapping_df['Case#:'], mapping_df['LifeSense Study App ID:    ']))

In [29]:
all_fga = pd.read_pickle("ls_data/all_fga.df")
all_scr = pd.read_pickle("ls_data/all_scr.df")

## Build Demographics DF

In [12]:
demo_df = pd.DataFrame()
demo_df['study_id'] = baseline_df['study_id']
demo_df['pid'] = demo_df['study_id'].map(redcap_dict)
demo_df['age'] = baseline_df['age']
demo_df['gender'] = baseline_df['demo_gender']


In [16]:
demo_df['employment'] = baseline_df['routine_slabels02']

employment_dict = {
    1: 'employed',
    2: 'unemployed',
    3: 'disability',
    4: 'retired',
    88: 'other',
    99: 'pna'
}

demo_df['employment'] = demo_df['employment'].map(employment_dict)

demo_df['employment'].value_counts()

In [18]:
demo_df['marital_status'] = baseline_df['demo_maritalstatus']

marital_dict = {
    0: 'single',
    1: 'live_with_partner',
    2: 'domestic_partnership',
    3: 'married',
    4: 'separated',
    5: 'divorced',
    6: 'dont_know',
    99: 'pna'
}

demo_df['marital_status'] = demo_df['marital_status'].map(marital_dict)

demo_df['marital_status'].value_counts()

single                  101
married                  92
live_with_partner        41
divorced                 33
separated                 6
domestic_partnership      6
pna                       2
dont_know                 1
Name: marital_status, dtype: int64

In [19]:
demo_df['education'] = baseline_df['demo_highest_education']

edu_dict = {
    1: 'never_attended', 
    2: '1st_grade',
    3: '2nd_grade', 
    4: '3rd_grade', 
    5: '4th_grade', 
    6: '5th_grade', 
    7: '6th_grade', 
    8: '7th_grade', 
    9: '8th_grade', 
    10: '9th_grade', 
    11: '10th_grade', 
    12: '11th_grade' , 
    13: '12th_grade_no_diploma', 
    14: 'high_school', 
    15: 'ged', 
    16: 'some_college',
    17: 'associates_vocational',
    18: 'associates_academic',
    19: 'bachelors',
    20: 'masters',
    21: 'professional_degree', 
    22: 'doctoral_degree', 
    23: 'dont_know', 
    99: 'pna'
}

demo_df['education'] = demo_df['education'].map(edu_dict)

demo_df['education'].value_counts()

bachelors                99
masters                  62
some_college             47
associates_vocational    26
associates_academic      13
doctoral_degree          12
professional_degree       7
high_school               7
ged                       6
11th_grade                1
10th_grade                1
12th_grade_no_diploma     1
Name: education, dtype: int64

In [20]:
demo_df['fam_income'] = baseline_df['demo_fam_income']
demo_df['ind_income'] = baseline_df['demo_personal_income']
income_dict = {
    1: '<10000',
    2: '10000-19999',
    3: '20000-39999',
    4: '40000-59999',
    5: '60000-99999',
    6: '>100000',
    7: 'dont_know',
    99: 'pna'
}

demo_df['fam_income'] = demo_df['fam_income'].map(income_dict)
demo_df['ind_income'] = demo_df['ind_income'].map(income_dict)


In [21]:
demo_df['fam_income'].value_counts()

60000-99999    88
40000-59999    59
>100000        49
20000-39999    44
10000-19999    20
<10000         13
pna             5
dont_know       4
Name: fam_income, dtype: int64

In [22]:
demo_df['ind_income'].value_counts()

40000-59999    71
20000-39999    60
60000-99999    54
10000-19999    40
<10000         39
>100000        12
pna             4
dont_know       2
Name: ind_income, dtype: int64

## Build Initial State DF

In [25]:
init_df = pd.DataFrame()
init_df['study_id'] = baseline_df['study_id']
init_df['pid'] = init_df['study_id'].map(redcap_dict)
init_df['pid'] = init_df['pid'].astype(str)
init_df['pid'] = init_df['pid'].str.pad(width=8, side='left', fillchar='0')

# phq
init_df['phq8'] = baseline_df['phq_total_sc']
init_df['phq_q10'] = baseline_df['phq10_sc']

# gad
init_df['gad7'] = baseline_df['gad_total']
init_df['gad_q8'] = baseline_df['gad08']
init_df.loc[97, 'gad7'] = (107-98)
init_df['gad7']

# shaps
init_df['shaps'] = baseline_df['shaps_score']

shaps_cols = baseline_df.columns[baseline_df.columns.str.contains('shaps')][:-1]
shaps_df = baseline_df[shaps_cols]
shaps_df = shaps_df.replace(to_replace=99, value=3)
init_df['shaps_sum'] = shaps_df.sum(axis=1)

# diagnoses
dx_cols = baseline_df.columns[baseline_df.columns.str.startswith('dx')]
init_df[dx_cols] = baseline_df[dx_cols]

In [26]:
# spin
fear = [1,3,5,10,14,15]
avoid = [4,6,8,9,10,11,16]
physio = [2,7,13,17]
mini = [6,9,15]

def proc_spin_cols(qs):
    col_names = []
    for q in qs:
        name = "spin_0{}" if q < 10 else "spin_{}"
        col_names.append(name.format(q))
    return col_names

fear_cols = proc_spin_cols(fear)
avoid_cols = proc_spin_cols(avoid)
phys_cols = proc_spin_cols(avoid)
mini_cols = proc_spin_cols(mini)

spin_cols = baseline_df.columns[baseline_df.columns.str.contains('spin')][:-1]
spin_df = baseline_df[spin_cols]
spin_df = spin_df.replace(to_replace=999, value=1)
spin_df['spin_fear'] = spin_df[fear_cols].sum(axis=1)
spin_df['spin_avoid'] = spin_df[avoid_cols].sum(axis=1)
spin_df['spin_phys'] = spin_df[phys_cols].sum(axis=1)
spin_df['spin_mini'] = spin_df[mini_cols].sum(axis=1)
spin_df['spin_total'] = spin_df[spin_cols].sum(axis=1)
spin_cols = ['spin_total', 'spin_fear', 'spin_avoid', 'spin_phys', 'spin_mini']
init_df[spin_cols] = spin_df[spin_cols]

In [27]:
init_df.head()

Unnamed: 0,study_id,pid,phq8,phq_q10,gad7,gad_q8,shaps,shaps_sum,dx_depression,dx_bipolar,dx_ocd,dx_ptsd,dx_schizo,dx_eating,dx_substance,spin_total,spin_fear,spin_avoid,spin_phys,spin_mini
0,601011,91048552,3,1.0,0,0.0,0,52,0,0,0,0,0,0,0,2,1,1,1,1
1,601012,31456993,15,1.0,15,2.0,5,39,1,0,0,1,0,0,0,30,11,13,13,6
2,601013,51735262,4,0.0,11,1.0,0,51,0,0,0,0,0,0,0,8,4,2,2,1
3,601014,69452375,7,2.0,7,1.0,3,44,0,0,0,0,0,0,0,23,10,17,17,4
4,601015,28021601,1,1.0,0,0.0,0,55,1,0,0,0,0,0,0,2,0,2,2,0


## Build App DF

In [None]:
all_fga[apps] = all_fga[apps].clip(lower=0)
all_fga['date'] = all_fga['hr'].dt.floor('d')

In [None]:
fga_daily = all_fga.groupby(['pid', 'date']).sum()
fga_daily = fga_daily.reset_index()