In [80]:
%matplotlib inline

import pandas as pd
import pickle
import time
import importlib
import numpy as np
import importlib


import sys
sys.path.append('/home/ngr/gdrive/wearables/scripts')
import data as weardata
import utils as wearutils

import matplotlib.pyplot as plt
import seaborn as sns

# settings
plt.rc('font', size = 9)
plt.rc('font', family='sans serif')
plt.rcParams['pdf.fonttype']=42
plt.rcParams['ps.fonttype']=42
plt.rcParams['text.usetex']=False
plt.rcParams['legend.frameon']=False
plt.rcParams['axes.grid']=False
plt.rcParams['legend.markerscale']=0.5
plt.rcParams['savefig.dpi']=600
sns.set_style("ticks")

In [2]:
timer = wearutils.timer()
timer.start()
data, md = weardata.load_data_md()
print('Data and md loaded in {:.0f}-s'.format(timer.stop()))

Data and md loaded in 68-s


In [26]:
voi = {
        # tip: search for sections in code book by "intstrument"

            # dmographics
            'age_enroll': (['22nan', 'mean_impute'], 'continuous'),
            'marital': ('nan27', 'categorical'),
            'gestage_by': ('nan2-99', 'categorical'),
            'insur': ('nan2-99', 'categorical'),
            'ethnicity': ('nan23', 'categorical'),
            'race': ('nan27', 'categorical'),
            'bmi_1vis': ('mean_impute', 'continuous'),
            'prior_ptb_all': ('nan25', 'categorical'),
            'fullterm_births': ('nan25', 'categorical'),
            'surghx_none': ('nan20', 'categorical'),
            'alcohol': ('nan22', 'categorical'),
            'smoke': ('nan22', 'categorical'),
            'drugs': ('nan22', 'categorical'),
            'hypertension': ('nan22', 'categorical'),
            'pregestational_diabetes': ('nan22', 'categorical'),

            # chronic conditions (?)
            'asthma_yes___1': (None, 'categorical'), # asthma
            'asthma_yes___2': (None, 'categorical'), # diabetes
            'asthma_yes___3': (None, 'categorical'), # gestational hypertension
            'asthma_yes___4': (None, 'categorical'), # CHTN
            'asthma_yes___5': (None, 'categorical'), # anomaly
            'asthma_yes___6': (None, 'categorical'), # lupus
            'asthma_yes___7': (None, 'categorical'), # throid disease
            'asthma_yes___8': (None, 'categorical'), # heart disease
            'asthma_yes___9': (None, 'categorical'), # liver disease
            'asthma_yes___10': (None, 'categorical'), # renal disease
            'asthma_yes___13': (None, 'categorical'), # IUGR
            'asthma_yes___14': (None, 'categorical'), # polyhraminios
            'asthma_yes___15': (None, 'categorical'), # oligohydraminos
            'asthma_yes___18': (None, 'categorical'), # anxiety
            'asthma_yes___19': (None, 'categorical'), # depression
            'asthma_yes___20': (None, 'categorical'), # anemia
            'other_disease': ('nan22', 'categorical'),
            'gestational_diabetes': ('nan22', 'categorical'),
            'ghtn': ('nan22', 'categorical'),
            'preeclampsia': ('nan22', 'categorical'),
            'rh': ('nan22', 'categorical'),
            'corticosteroids': ('nan22', 'categorical'),
            'abuse': ('nan23', 'categorical'),
            'assist_repro': ('nan23', 'categorical'),
            'gyn_infection': ('nan22', 'categorical'),
            'maternal_del_weight': ('-992nan', 'continuous'),
            'ptb_37wks': ('nan22', 'categorical'),

            # vitals and labs @admission
            'cbc_hct': ('-992nan', 'continuous'), # NOTE: some of these shouldn't be negative, need some filtering
            'cbc_wbc': ('-992nan', 'continuous'),
            'cbc_plts': ('-992nan', 'continuous'),
            'cbc_mcv': ('-992nan', 'continuous'),
            'art_ph': ('-992nan', 'continuous'),
            'art_pco2': ('-992nan', 'continuous'),
            'art_po2': ('-992nan', 'continuous'),
            'art_excess': ('-992nan', 'continuous'),
            'art_lactate': ('-992nan', 'continuous'),
            'ven_ph': ('-992nan', 'continuous'),
            'ven_pco2': ('-992nan', 'continuous'),
            'ven_po2': ('-992nan', 'continuous'),
            'ven_excess': ('-992nan', 'continuous'),
            'ven_lactate': ('-992nan', 'continuous'),
            'anes_type': ('-992nan', 'continuous'),
            'epidural': ('nan20', 'categorical'),
            'deliv_mode': ('nan24', 'categorical'),

            # infant things
            'infant_wt': ('-992nan', 'continuous'), # kg
            'infant_length': ('-992nan', 'continuous'),
            'head_circ': ('-992nan', 'continuous'),
            'death_baby': ('nan20', 'categorical'),
            'neonatal_complication': (['22nan', 'nan20'], 'categorical'),

            # postpartum
            'ervisit': ('nan20', 'categorical'),
            'ppvisit_dx': ('nan26', 'categorical'),

            # surveys
            'education1': ('nan2-99', 'categorical'),
            'paidjob1': ('nan20', 'categorical'),
            'work_hrs1': ('nan2-99', 'categorical'),
            'income_annual1': ('nan2-99', 'categorical'),
            'income_support1': ('nan2-99', 'categorical'),
            'regular_period1': ('nan2-88', 'categorical'),
            'period_window1': ('nan2-88', 'categorical'),
            'menstrual_days1': ('nan2-88', 'categorical'),
            'bc_past1': ('nan20', 'categorical'),
            'bc_years1': (['882nan', 'nan2-88'], 'categorical'),
            'months_noprego1': ('nan24', 'categorical'),
            'premature_birth1': ('nan2-88', 'categorical'),
            'stress3_1': ('nan2-99', 'categorical'),
            'workreg_1trim': ('nan20', 'categorical'),

            'choosesleep_1trim': ('nan2-99', 'categorical'),
            'slpwake_1trim': ('nan2-99', 'categorical'),
            'slp30_1trim': ('nan2-99', 'categorical'),
            'sleep_qual1': ('nan2-99', 'categorical'),
            'slpenergy1': ('nan2-99', 'categorical'),
            ## epworth (sum), for interpretation: https://epworthsleepinessscale.com/about-the-ess/ (NOTE: convert 4 to np.nan for sum)
            'sitting1': ('nan20', 'categorical'), ### TODO: add fx to sum this from metadata, then convert to continuous label for regression
            'tv1': ('nan20', 'categorical'),
            'inactive1': ('nan20', 'categorical'),
            'passenger1': ('nan20', 'categorical'),
            'reset1': ('nan20', 'categorical'),
            'talking1': ('nan20', 'categorical'),
            'afterlunch1': ('nan20', 'categorical'),
            'cartraffic1': ('nan20', 'categorical'),
            ## edinburgh depression scale
            'edinb1_1trim': ('nan2-99', 'categorical'),
            'edinb2_1trim': ('nan2-99', 'categorical'),
            'edinb3_1trim': ('nan2-99', 'categorical'),
            'edinb4_1trim': ('nan2-99', 'categorical'),
            'edinb5_1trim': ('nan2-99', 'categorical'),
            'edinb6_1trim': ('nan2-99', 'categorical'),
            'edinb7_1trim': ('nan2-99', 'categorical'),
            'edinb8_1trim': ('nan2-99', 'categorical'),
            'edinb9_1trim': ('nan2-99', 'categorical'),
            'edinb10_1trim': ('nan2-99', 'categorical'),
            ## difficult life circumstances
            ## sleep diary
    }

In [95]:
X_train, y_train, X_test, y_test = get_Xy(data, md, label='edinb10_1trim')

In [None]:
def md_experiment(data, md, model='RFts'):
    '''
    
    Arguments:
      model (str): implemented models are RFts (short for RF Time Series), kNN
    '''
    from sklearn.model_selection import KFold
    
    for label in voi.keys():
        X_train, y_train, X_test, y_test = get_Xy(data, md, label=label)

        # clf or reg
        if isinstance(y_train[0], float):
            continue
        else:
            
    
    
    

In [98]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [96]:
pd.oney_train.append(y_test)

0       1.0
1       1.0
2       1.0
3       1.0
4       1.0
       ... 
2351    1.0
2488    1.0
2556    1.0
2577    1.0
2613    1.0
Name: edinb10_1trim, Length: 2618, dtype: object

In [99]:
md.columns

Index(['record_id', 'age_enroll', 'marital', 'gestage_by', 'insur',
       'ethnicity', 'race', 'bmi_1vis', 'prior_ptb_all', 'fullterm_births',
       ...
       'edinb1_1trim', 'edinb2_1trim', 'edinb3_1trim', 'edinb4_1trim',
       'edinb5_1trim', 'edinb6_1trim', 'edinb7_1trim', 'edinb8_1trim',
       'edinb9_1trim', 'edinb10_1trim'],
      dtype='object', length=104)