In [1]:
import obs_data_sets
import obs_clinic_migration_preprocessing
import obs_clinic_migration
import pandas as pd
import numpy as np
import re

# this is a known issue https://github.com/pandas-dev/pandas/issues/9784
# need to use older version of pandas due to compatability with Access
# pd.set_option('display.max_colwidth', -1)

pd.set_option('display.max_colwidth', None) # future warning with home computer
pd.set_option('display.max_columns', 10)

In [2]:
# Inclusion Exclusion Criteria

In [3]:
## Inclusion Exclusion Criteria - Preprocessing

In [4]:
obs_data_sets.rave_clinic.loc[:, 'INFORMED_CONSENT_DT'] = pd.to_datetime(
    obs_data_sets.rave_clinic['INFORMED_CONSENT_DT']
).dt.strftime('%Y-%m-%d')

obs_data_sets.rave_clinic['PREV_OBS_ID_COMBINED'] = (
    obs_data_sets.rave_clinic['PREV_OBS_ID_1'].astype(str)
    + ", " 
    + obs_data_sets.rave_clinic['PREV_OBS_ID_2'].astype(str)
)
obs_data_sets.rave_clinic['PREV_OBS_ID_COMBINED'].replace(
    ', nan','', regex = True, inplace = True
)
obs_data_sets.rave_clinic['PREV_OBS_ID_COMBINED'].replace(
    'nan, nan', 'nan', regex = True, inplace = True
)

In [5]:
## Inclusion Exclusion Criteria - Processing

In [6]:
inc_excl_dict = {
    'INFORMED_CONSENT_DT': 'main_icf_date', 

    'INC1': 'incl_main_ga', 
    'INC2': 'incl_main_eng', 
    'INC3' : 'incl_main_age', 
    'INC4':'incl_main_icf', 
    'INC5': 'incl_main_del', 
    'INC6' : 'incl_main_ante', 
    'EXC1': 'excl_main_nvf', 
    'EXC2': 'excl_main_noif',
    'EX3': 'excl_main_abn',
    'ENROL_OBS_PREV_PREG_NY': 'main_prev_obs_yn',
    'PREV_OBS_ID_COMBINED': 'main_prev_obs_id'
 }
inc_excl = obs_clinic_migration.RedcapConv(inc_excl_dict, 0)

In [7]:
## Inclusion Exclusion Criteria - Post-processing

In [8]:
inc_excl.data['incl_excl_entry_type'] = 1
inc_excl.data['subject_id_complete'] = 2

In [9]:
## Inclusion Exclusion Criteria - Compare Data

In [10]:
inc_excl.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,subject_id_complete,incl_excl_entry_type,main_icf_date,incl_main_ga,...,excl_main_noif,excl_main_abn,main_prev_obs_yn,main_prev_obs_id,Source


In [11]:
## Inclusion Exclusion Criteria - Save Data

In [12]:
inc_excl.prep_imp('baseline_arm_1', 'inclusion_exclusion_criteria_complete')
inc_excl.data.to_csv('../data/processed/inc_excl.csv', index = False)

In [13]:
# Inclusion Exclusion Criteria

In [14]:
## Inclusion Exclusion Criteria - Preprocessing

In [15]:
obs_data_sets.rave_clinic.loc[:, 'INFORMED_CONSENT_DT'] = pd.to_datetime(obs_data_sets.rave_clinic['INFORMED_CONSENT_DT']).dt.strftime('%Y-%m-%d')
obs_data_sets.rave_clinic['PREV_OBS_ID_COMBINED'] = (
    obs_data_sets.rave_clinic['PREV_OBS_ID_1'].astype(str)
    + ", " 
    + obs_data_sets.rave_clinic['PREV_OBS_ID_2'].astype(str)
)
obs_data_sets.rave_clinic['PREV_OBS_ID_COMBINED'].replace(', nan','', regex = True, inplace = True)
obs_data_sets.rave_clinic['PREV_OBS_ID_COMBINED'].replace('nan, nan', 'nan', regex = True, inplace = True)

In [16]:
## Inclusion Exclusion Criteria - Processing

In [17]:
inc_excl_dict = {
    'INFORMED_CONSENT_DT': 'main_icf_date', 
    'INC1': 'incl_main_ga', 
    'INC2': 'incl_main_eng', 
    'INC3' : 'incl_main_age', 
    'INC4':'incl_main_icf', 
    'INC5': 'incl_main_del', 
    'INC6' : 'incl_main_ante', 
    'EXC1': 'excl_main_nvf', 
    'EXC2': 'excl_main_noif',
    'EX3': 'excl_main_abn',
    'ENROL_OBS_PREV_PREG_NY': 'main_prev_obs_yn',
    'PREV_OBS_ID_COMBINED': 'main_prev_obs_id'
 }
inc_excl = obs_clinic_migration.RedcapConv(inc_excl_dict, 0)

In [18]:
## Inclusion Exclusion Criteria - Post-processing

In [19]:
inc_excl.data['incl_excl_entry_type'] = 1
inc_excl.data['subject_id_complete'] = 2

In [20]:
## Inclusion Exclusion Criteria - Compare Data

In [21]:
inc_excl.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,subject_id_complete,incl_excl_entry_type,main_icf_date,incl_main_ga,...,excl_main_noif,excl_main_abn,main_prev_obs_yn,main_prev_obs_id,Source


In [22]:
## Inclusion Exclusion Criteria - Prepare and Save Data

In [23]:
inc_excl.prep_imp('baseline_arm_1', 'inclusion_exclusion_criteria_complete')
inc_excl.data.to_csv('../data/processed/inc_excl.csv', index = False)

In [24]:
# Baseline Assessment: Demographics

In [25]:
## Baseline Assessment: Demographics - Preprocessing

In [26]:
obs_data_sets.rave_clinic.loc[:, 'BASE_ASSESS_DT'] = pd.to_datetime(obs_data_sets.rave_clinic['BASE_ASSESS_DT']).dt.strftime('%Y-%m-%d')
obs_data_sets.rave_clinic.loc[:, 'DOB'] = pd.to_datetime(obs_data_sets.rave_clinic['DOB']).dt.strftime('%Y-%m-%d')
obs_data_sets.rave_clinic['HEIGHT_UNK'] = obs_data_sets.rave_clinic['HEIGHT_UNK'].replace({
    '0': 'Known',
    '1': 'Unknown'
})
obs_data_sets.rave_clinic['PRE_PREG_WT_UNK'] = obs_data_sets.rave_clinic['PRE_PREG_WT_UNK'].replace({
    '0': 'Known',
    '1': 'Unknown'
})
obs_data_sets.rave_clinic['PRE_PREGNANT_WT_STD_UN'] = (
    obs_data_sets.rave_clinic['PRE_PREGNANT_WT_STD_UN'].replace({'1': 'Metric', '2': 'Imperial'})
)
obs_data_sets.rave_clinic['HEIGHT_STD_UN'] = obs_data_sets.rave_clinic['HEIGHT_STD_UN'].replace({
    '1': 'Metric',
    '2': 'Imperial'
})

In [27]:
## Baseline Assessment: Demographics - Processing

In [28]:
base_dem_dict = {
    'BASE_ASSESS_DT': 'base_assess_date', 
    'DOB': 'base_dem_dob', 
    'HEIGHT_UNK': 'base_dem_ht_unk',
    'HEIGHT_STD_UN' : 'base_dem_ht_unit',
    'HEIGHT_STD': 'base_dem_ht_met', 

    'PRE_PREG_WT_UNK' : 'base_dem_prepreg_wt_unk',
    'PRE_PREGNANT_WT_STD_UN': 'base_dem_prepreg_wt_unit',
    #'PRE_PREGNANT_WT_STD_UN': 'base_dem_ht_wt_unit',
    'PRE_PREGNANT_WT_STD': 'base_dem_prepreg_wt_met',
    #'PRE_PREGNANT_WT_imp': 'base_dem_prepreg_wt_imp',
}
base_dem = obs_clinic_migration.RedcapConv(base_dem_dict, 0, master_df = obs_data_sets.rave_clinic)

In [29]:
## Baseline Assessment: Demographics - Post-processing

In [30]:
base_dem.remove_na()

In [31]:
## Baseline Assessment: Demographics - Compare Data

In [32]:
base_dem.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,base_assess_date,base_dem_dob,base_dem_ht_unk,base_dem_ht_unit,base_dem_ht_met,base_dem_prepreg_wt_unk,base_dem_prepreg_wt_unit,base_dem_prepreg_wt_met,Source


In [33]:
## Baseline Assessment: Demographics - Prepare and Save Data

In [34]:
base_dem.prep_imp('baseline_assessmen_arm_1', 'baseline_assessment_complete')
base_dem.data.to_csv('../data/processed/base_dem.csv', index = False)

In [35]:
# Baseline Assessment: Current Pregnancy

In [36]:
## Baseline Assessment: Current Pregnancy - Preprocessing

In [37]:
obs_data_sets.rave_clinic.loc[:, 'FINAL_EDD_DT'] = pd.to_datetime(obs_data_sets.rave_clinic['FINAL_EDD_DT']).dt.strftime('%Y-%m-%d')

# create dummy columns from RAVE's 'EDD_DETERMINED' to match REDCap
rave_edd_determined = pd.get_dummies(obs_data_sets.rave_clinic['EDD_DETERMINED'], dummy_na=True)
rave_edd_determined['Unknown combined'] = (
    rave_edd_determined['Unknown'] 
    + rave_edd_determined[np.nan]
)

# rave_edd_determined.rename(columns={
#     'Last menstrual period (LMP)': 'RAVE_EDD_DUMMY_LMP',
#     '1st trimester ultrasound': 'RAVE_EDD_DUMMY_1ST_US',
#     '2nd trimester ultrasound': 'RAVE_EDD_DUMMY_2ND_US',
#     'Assisted reproductive technology': 'RAVE_EDD_DUMMY_ART',
#     'Obstetrical clinical estimate': 'RAVE_EDD_DUMMY_OB_EST',
#     'Unknown combined': 'RAVE_EDD_DUMMY_UNK',
# }, inplace=True)
# rave_edd_determined['RAVE_EDD_DUMMY_UNK'].replace(0, 'No', inplace = True)

rave_edd_determined['Unknown combined'].replace(0, 'No', inplace = True)
rave_edd_determined.replace({1: 'Yes', 0: np.nan}, inplace = True)
obs_data_sets.rave_clinic = pd.concat([obs_data_sets.rave_clinic, rave_edd_determined], axis = 1)


# also consider np.select or np.where
obs_data_sets.rave_clinic['ASSISTED_UNK'] = np.NaN
obs_data_sets.rave_clinic.loc[
    obs_data_sets.rave_clinic['MODEOFCONCEPTION'] == 'Assisted',
    'ASSISTED_UNK'
] = 'No'
obs_data_sets.rave_clinic.loc[
    (
         (obs_data_sets.rave_clinic['MODEOFCONCEPTION'] == 'Assisted')
          & (obs_data_sets.rave_clinic['INTRAUTERINE_INSEMN_NY'] == 'No')
          & (obs_data_sets.rave_clinic['INTRAUTERIN_INSEMN_OVUL_INDUCTN_'] == 'No')
          & (obs_data_sets.rave_clinic['OVULN_INDUCTN_NO_IVF_CLOMID_FSH_'] == 'No')
          & (obs_data_sets.rave_clinic['INVITROFERTILIZATION'] == 'No')
          & (obs_data_sets.rave_clinic['INTRACYTOPLASMICSPERM'] == 'No')
          & (obs_data_sets.rave_clinic['DONORSPERM'] == 'No')
          & (obs_data_sets.rave_clinic['DONOREGG'] == 'No')
          & (obs_data_sets.rave_clinic['SURROGACY'] == 'No')
     
     ),
    'ASSISTED_UNK'
] = 'Yes'

In [38]:
## Baseline Assessment: Current Pregnancy - Processing

In [39]:
base_curr_dict = {
    'FOLICACID_PRIOR_PREG_NY': 'base_curr_folic_acid',
    'MODEOFCONCEPTION': 'base_curr_mode_conception',
    'INTRAUTERINE_INSEMN_NY': 'base_curr_iui_alone',
    'INTRAUTERIN_INSEMN_OVUL_INDUCTN_': 'base_curr_iui_ovul_noivf',
    'OVULN_INDUCTN_NO_IVF_CLOMID_FSH_': 'base_curr_ovul_ind_no_ivf',
    'INVITROFERTILIZATION': 'base_curr_ivf',
    'INTRACYTOPLASMICSPERM': 'base_curr_ivf_icsi',
    'DONORSPERM': 'base_curr_donor_sperm',
    'DONOREGG': 'base_curr_donor_egg',    
    'SURROGACY': 'base_curr_surrogate',
    'ASSISTED_UNK': 'base_curr_unk',
#     base_curr_unk
    'Last menstrual period (LMP)': 'base_curr_edd_lmp',
    '1st trimester ultrasound': 'base_curr_edd_1st_trim_us',
    '2nd trimester ultrasound': 'base_curr_edd_2nd_trim_us',
    'Assisted reproductive technology': 'base_curr_edd_reprod_tech',
    'Obstetrical clinical estimate': 'base_curr_edd_ob_estim',
    'Unknown combined': 'base_curr_edd_unk',
    'FINAL_EDD_DT': 'base_curr_edd',
    'LMP1_DT_DD': 'base_curr_lmp_day',
    'LMP1_DT_MM': 'base_curr_lmp_month',
    'LMP1_DT_YYYY': 'base_curr_lmp_year',
    'GRAVIDA': 'base_curr_gravida',
    'PARITY': 'base_curr_parity',   
}

base_curr = obs_clinic_migration.RedcapConv(base_curr_dict, 0, master_df = obs_data_sets.rave_clinic)

Column 'base_curr_mode_conception' has an issue with the variable 'Unknown'.


In [40]:
## Baseline Assessment: Current Pregnancy - Post-processing

In [41]:
base_curr.change_str(
    {
        'base_curr_mode_conception': {'Unknown': np.NaN} # no 'Unknown' option in REDCap
    }
)

1    2115
2     304
Name: base_curr_mode_conception, dtype: int64


In [42]:
## Baseline Assessment: Current Pregnancy - Compare Data

In [43]:
base_curr.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,base_curr_folic_acid,base_curr_mode_conception,base_curr_iui_alone,base_curr_iui_ovul_noivf,...,base_curr_lmp_month,base_curr_lmp_year,base_curr_gravida,base_curr_parity,Source


In [44]:
## Baseline Assessment: Current Pregnancy - Prepare and Save Data

In [45]:
base_curr.prep_imp('baseline_assessmen_arm_1', 'baseline_assessment_complete')
base_curr.data.to_csv('../data/processed/base_cur.csv', index = False)

In [46]:
# Baseline Assessment: Obstetrical History

In [47]:
## Baseline Assessment: Obstetrical History - Preprocessing

In [48]:
for i in range(1, 13):
    obs_data_sets.rave_clinic['WEIGHT_UNK1_' + str(i)].replace({'0': 'Known', '1': 'Unknown'}, inplace = True)
    
    # in rave, 'type of twin' is required even if answer 'Singleton' to fetus
    obs_data_sets.rave_clinic.loc[
        ((obs_data_sets.rave_clinic['FOETUSES_' + str(i)] != 'Twins')
        & (obs_data_sets.rave_clinic['FOETUSES_' + str(i)].notna())), 
        'TYPEOFTWIN_' + str(i)
    ] = np.NaN
    
    # Rave delivery date is in one column
    try:
        obs_data_sets.rave_clinic[[
            'DELIVERY_DT_DD_' + str(i), 
            'DELIVERY_DT_MM_' + str(i), 
            'DELIVERY_DT_YYYY_' + str(i)
        ]] = obs_data_sets.rave_clinic['DELIVERY_DT_' + str(i)].str.split(' ', expand = True)
        
        obs_data_sets.rave_clinic['DELIVERY_DT_DD_' + str(i)].replace({'UN': 'Unknown'}, inplace = True)
        obs_data_sets.rave_clinic['DELIVERY_DT_MM_' + str(i)].replace({
            'UNK': 'Unknown', 
            'JAN': 'January',
            'FEB': 'February',
            'MAR': 'March',
            'APR': 'April',
            'MAY': 'May',
            'JUN': 'June',
            'JUL': 'July',
            'AUG': 'August',
            'SEP': 'September',
            'OCT': 'October',
            'NOV': 'November',
            'DEC': 'December'
        }, inplace = True)
    except:
        obs_data_sets.rave_clinic['DELIVERY_DT_DD_' + str(i)] = np.NaN
        obs_data_sets.rave_clinic['DELIVERY_DT_MM_' + str(i)] = np.NaN
        obs_data_sets.rave_clinic['DELIVERY_DT_YYYY_' + str(i)] = np.NaN
    
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['DELIVERY_DT_YYYY_' + str(i)] == '1900',
        'DELIVERY_DT_YYYY_' + str(i)
    ] = '99'
    
    # remove leading zeros from day number
    obs_data_sets.rave_clinic['DELIVERY_DT_DD_' + str(i)] = obs_data_sets.rave_clinic[
        'DELIVERY_DT_DD_' + str(i)
    ].str.lstrip('0')
    
    # Redcap has third option ('Not applicable') if weight, type of delivery is missing
    
    # for pregoutcome in ['Miscarriage (< 13 weeks)', 'Therapeutic abortion']:
    #     obs_data_sets.rave_clinic.loc[
    #         obs_data_sets.rave_clinic['PREGOUTCOME_'+ str(i)] == pregoutcome, 
    #         'WEIGHT_UNK1_' + str(i)
    #     ] = 'Not applicable'
    #     obs_data_sets.rave_clinic.loc[
    #         obs_data_sets.rave_clinic['PREGOUTCOME_'+ str(i)] == pregoutcome, 
    #         'TYPE_OF_DELIVERY_' + str(i)
    #     ] = 'Not applicable'
    #     obs_data_sets.rave_clinic.loc[
    #         obs_data_sets.rave_clinic['PREGOUTCOME_'+ str(i)] == pregoutcome, 
    #         'COMPLICATIONS_NY_' + str(i)
    #     ] = 'No'
    
    obs_data_sets.rave_clinic['GESTATION_AGE_WKS_99_' + str(i)] = 'nan'
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['GESTATION_AGE_WKS_' + str(i)] == '99',
        'GESTATION_AGE_WKS_99_' + str(i)
    ] = 'Unknown'
    obs_data_sets.rave_clinic.loc[
        (
            (obs_data_sets.rave_clinic['GESTATION_AGE_WKS_' + str(i)] != '99')
            & (~obs_data_sets.rave_clinic['GESTATION_AGE_WKS_' + str(i)].isna())
        ),
        'GESTATION_AGE_WKS_99_' + str(i)
    ] = 'Known'
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['GESTATION_AGE_WKS_' + str(i)] == '99',
        'GESTATION_AGE_WKS_' + str(i)
    ] = 'nan'
    
    # RAVE representation is a checkbox where checked (1) is associated with 
    # 'Unknown' and unchecked (0) represents 'Known'
    obs_data_sets.rave_clinic['WEIGHT_UNK1_' + str(i)].replace(
        {'1': 'Unknown', '0': 'Known'}, inplace = True
    )
    
    
    # fetal chromosome abnormalities does not have a yes/no question
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['FETAL_CHROM_ABNORM_' + str(i)] == 'None', 
        'FETAL_CHROM_ABNORM_yn_' + str(i)
    ] = 'No'
    obs_data_sets.rave_clinic.loc[
        ((obs_data_sets.rave_clinic['FETAL_CHROM_ABNORM_' + str(i)] == 'Trisomy 13')
         | (obs_data_sets.rave_clinic['FETAL_CHROM_ABNORM_' + str(i)] == 'Trisomy 18')
         | (obs_data_sets.rave_clinic['FETAL_CHROM_ABNORM_' + str(i)] == 'Trisomy 21')
         | (obs_data_sets.rave_clinic['FETAL_CHROM_ABNORM_' + str(i)] == 'Other')), 
        'FETAL_CHROM_ABNORM_yn_' + str(i)
    ] = 'Yes'
    # 'None' is not an accepted answer in REDCap dictionary
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['FETAL_CHROM_ABNORM_' + str(i)] == 'None', 
        'FETAL_CHROM_ABNORM_' + str(i)
    ] = np.NaN
    
    
    # if no answer is given for Complications, Other (i.e. Yes, No), 
    # RAVE still allows RA to enter string into Other (speicfy)
    obs_data_sets.rave_clinic.loc[
        (
           (obs_data_sets.rave_clinic['SPECIFY_OTHER_' + str(i)].notna())
        ),
        'OTHER_COMPLICATION_NY_' + str(i)
    ] = 'Yes'
    
    
    obs_data_sets.rave_clinic.loc[
        (
           (obs_data_sets.rave_clinic['GESTATIONAL_HYPERTENSION_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['PREECLAMPSIA_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['HELLPSYNDROME_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['IUGR_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['PRETERMLABOUR_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['GESTATN_DIABETES_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['CHOLESTASIS_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['PLACENTA_PREVIA_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['PLACENTAL_ABRUPTION_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['FETAL_CONGENITAL_ANOM_NY_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['FETAL_CHROM_ABNORM_yn_' + str(i)] == 'Yes')
           | (obs_data_sets.rave_clinic['OTHER_COMPLICATION_NY_' + str(i)] == 'Yes')
        ),
        'COMPLICATIONS_NY_' + str(i)
    ] = 'Yes'
    
    # redcap data does not force a yes/no answer if answered no to 
    # complications; reformat to remove unnecessary column values
    for complication in [
        'GESTATIONAL_HYPERTENSION_NY_', 'PREECLAMPSIA_NY_', 
        'HELLPSYNDROME_NY_', 'IUGR_NY_', 'PRETERMLABOUR_NY_',
        'GESTATN_DIABETES_NY_', 'CHOLESTASIS_NY_', 'PLACENTA_PREVIA_NY_',
        'PLACENTAL_ABRUPTION_NY_', 'FETAL_CONGENITAL_ANOM_NY_',
        'SPECIFY_FETAL_COGEN_ANOMAL_', 'FETAL_CHROM_ABNORM_yn_', 
        'OTHER_COMPLICATION_NY_'
    ]:
        obs_data_sets.rave_clinic.loc[
            obs_data_sets.rave_clinic['COMPLICATIONS_NY_' + str(i)] == 'No', 
            complication + str(i)
        ] = np.NaN
     
    # 'Specify fetal congential anomaly' had a answer; however 'fetal congential anomaly' was empty
    obs_data_sets.rave_clinic.loc[obs_data_sets.rave_clinic['SPECIFY_FETAL_COGEN_ANOMAL_' + str(i)].notna(), 
                    'FETAL_CONGENITAL_ANOM_NY_' + str(i)] = 'Yes'

In [49]:
## Baseline Assessment: Obstetrical History - Processing

In [50]:
obhx_dict = {
    'PREGNANCY_STD_': 'obhx_preg_num',
    'BABYNUM_': 'obhx_babynum',
    'FOETUSES_': 'obhx_fetusnum',
    'TYPEOFTWIN_' : 'obhx_typeoftwin',
    'PREGOUTCOME_': 'obhx_pregoutcome',
    'DELIVERY_DT_DD_': 'obhx_del_day',
    'DELIVERY_DT_MM_': 'obhx_del_month',
    'DELIVERY_DT_YYYY_' : 'obhx_del_year',
    'CHILDCURRENTLYALIVE_': 'obhx_alive',
    'SEX_': 'obhx_sex',
    'GESTATION_AGE_WKS_99_':'obhx_ga_wk_status',
    'GESTATION_AGE_WKS_':'obhx_ga_wk',
    'WEIGHT_UNK1_': 'obhx_brthwt_unk',
    'GEST_WT_': 'obhx_brth_wt',
    'GEST_WT_UN_': 'obhx_brth_wt_unit',
    'TYPE_OF_DELIVERY_': 'obhx_delivery_type',
    'COMPLICATIONS_NY_': 'obhx_comp_yn',
    'GESTATIONAL_HYPERTENSION_NY_':'obhx_htn',
    'PREECLAMPSIA_NY_': 'obhx_preeclampsia',
    'HELLPSYNDROME_NY_': 'obhx_hellp',
    'IUGR_NY_': 'obhx_iugr',
    'PRETERMLABOUR_NY_': 'obhx_preterm_labour',
    'GESTATN_DIABETES_NY_':'obhx_gdm',
    'CHOLESTASIS_NY_': 'obhx_cholestasis',
    'PLACENTA_PREVIA_NY_': 'obhx_previa',
    'PLACENTAL_ABRUPTION_NY_': 'obhx_plac_abrupt',
    'FETAL_CONGENITAL_ANOM_NY_': 'obhx_fet_anom',
    'SPECIFY_FETAL_COGEN_ANOMAL_': 'obhx_fet_anom_spec',
    'FETAL_CHROM_ABNORM_yn_': 'obhx_fet_chromo_abn',
    'FETAL_CHROM_ABNORM_': 'obhx_fet_chromo_abn_spec',
    'SPECIFY_FETAL_CHROM_ABN_': 'obhx_fet_chromo_oth_spec',
    'OTHER_COMPLICATION_NY_': 'obhx_comp_oth',
    'SPECIFY_OTHER_': 'obhx_comp_oth_spec'      
}


obhx = obs_clinic_migration.RedcapConv(obhx_dict, 12, obs_data_sets.rave_clinic)

Column 'obhx_fetusnum' has an issue with the variable 'Singleton'.
Column 'obhx_fetusnum' has an issue with the variable 'Twins'.
Column 'obhx_fetusnum' has an issue with the variable 'Triplets'.
Column 'obhx_pregoutcome' has an issue with the variable 'Miscarriage (< 13 weeks)'.
Column 'obhx_pregoutcome' has an issue with the variable 'Miscarriage GA unknown'.
Column 'obhx_pregoutcome' has an issue with the variable 'Still-birth (>= 20 weeks)'.


In [51]:
## Baseline Assessment: Obstetrical History - Post-processing

In [52]:
obhx.change_str(
    {
        'obhx_fetusnum': {
            'Singleton': 'Singleton (1)',
             'Twins': 'Twins (2)',
             'Triplets': 'Triplets (3)'
         },
        'obhx_pregoutcome':{
            'Miscarriage (< 13 weeks)': 'Miscarriage (<13 weeks)',
            'Miscarriage GA unknown': 'Miscarriage (GA unknown)',
            'Still-birth (>= 20 weeks)': 'Stillbirth (≥20 weeks)'
        }
    }
)
obhx.remove_na()

1    1524
5    1200
2      51
3       7
Name: obhx_fetusnum, dtype: int64
1    1349
3     774
6     360
5      98
4      57
8      49
2      44
7      41
9      10
Name: obhx_pregoutcome, dtype: int64


In [53]:
## Baseline Assessment: Obstetrical History - Compare Data

In [54]:
obhx.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,obhx_preg_num,obhx_babynum,obhx_fetusnum,...,obhx_fet_anom_spec,obhx_fet_chromo_abn_spec,obhx_fet_chromo_oth_spec,obhx_comp_oth_spec,Source


In [55]:
## Baseline Assessment: Obstetrical History - Prepare and Save Data

In [56]:
obhx.prep_imp('baseline_assessmen_arm_1', 'obstetrical_history_complete', 'obstetrical_history')
obhx.data.to_csv('../data/processed/obhx.csv', index = False)

In [57]:
# Baseline Assessment: Pre-existing Medical Conditions

In [58]:
## Baseline Assessment: Pre-existing Medical Conditions - Preprocessing

In [59]:
obs_data_sets.rave_clinic.loc[
    obs_data_sets.rave_clinic['RESOLUTION_YR_YYYY_3'] == '1901',
    'RESOLUTION_YR_YYYY_3'
] = '1900'

obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.rave_date_unknown(
    obs_data_sets.rave_clinic, 'MEDHX_NY_', 'Yes','ONSET_YR_', 11
)
obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.rave_date_unknown(
    obs_data_sets.rave_clinic, 'CONTINUE_NY_', 'No','RESOLUTION_YR_', 11
)

In [60]:
## Baseline Assessment: Pre-existing Medical Conditions - Processing

In [61]:
medhx_dict = {
    'MEDHX_NY_': 'medhx_yn',
    'MEDHX_': 'medhx_condn_type',
    'SPECIFY_MEDICAL_CONDN_': 'medhx_condn_spec',
    'ONSET_YR_DD_': 'medhx_strt_day',
    'ONSET_YR_MM_': 'medhx_strt_month',
    'ONSET_YR_YYYY_': 'medhx_strt_year',
    'CONTINUE_NY_': 'medhx_cont_yn',
    'RESOLUTION_YR_DD_': 'medhx_stop_day',
    'RESOLUTION_YR_MM_': 'medhx_stop_month',
    'RESOLUTION_YR_YYYY_': 'medhx_stop_year',
    'COMMENTS1_': 'medhx_comments',
    'ONSET_YR_yn_date_': 'medhx_onset_date_yn',
    'RESOLUTION_YR_yn_date_': 'medhx_stop_date_yn',
#    'RAVE_medhx_onset_date_yn_': 'medhx_onset_date_yn',
#    'RAVE_medhx_resol_date_yn_': 'medhx_stop_date_yn',             
}

# if making changes to obs_data_sets.rave_clinic, need to pass it back into class instead
# of relying on default
medhx = obs_clinic_migration.RedcapConv(medhx_dict, 11, master_df = obs_data_sets.rave_clinic)

Column 'medhx_condn_type' has an issue with the variable 'Mental Health - Other'.
Column 'medhx_condn_type' has an issue with the variable 'Diabetes:  Type Unknown'.
Column 'medhx_condn_type' has an issue with the variable 'Diabetes: Diabetes Type II – Insulin'.
Column 'medhx_condn_type' has an issue with the variable 'Diabetes: Diabetes Type II – No Insulin'.


In [62]:
## Baseline Assessment: Pre-existing Medical Conditions - Post-processing

In [63]:
medhx.change_str(
    {
        'medhx_condn_type': {
            'Diabetes: Diabetes Type II – Insulin': 'Diabetes: Diabetes Type II - Insulin',
             'Diabetes: Diabetes Type II – No Insulin': 'Diabetes: Diabetes Type II - No Insulin',
             'Diabetes:  Type Unknown': 'Diabetes: Type Unknown',
             'Mental Health - Other': 'Mental Health: Other',
             'Neurology: Other': 'Neurology: Other'
        }
    }
)

23    790
15    348
33    237
31    234
20    215
44    208
22    197
30    196
39    186
40    167
16    158
9     104
36    103
24     94
27     81
35     77
8      31
2      30
14     26
13     24
4      22
6      19
42     16
18     16
17     15
32     13
19     11
21     10
37      9
29      9
5       8
43      7
41      6
11      5
12      5
1       4
38      4
26      2
25      1
10      1
Name: medhx_condn_type, dtype: int64


In [64]:
## Baseline Assessment: Pre-existing Medical Conditions - Compare Data

In [65]:
medhx.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,medhx_yn,medhx_condn_type,medhx_condn_spec,...,medhx_stop_day,medhx_stop_month,medhx_stop_year,medhx_comments,Source


In [66]:
## Baseline Assessment: Pre-existing Medical Conditions - Prepare and Save Data

In [67]:
medhx.prep_imp('baseline_assessmen_arm_1', 'preexisting_medical_conditions_complete','preexisting_medical_conditions')
medhx.data.to_csv('../data/processed/medhx.csv', index = False)

In [68]:
# Baseline Assessment: Allergies

In [69]:
## Baseline Assessment: Allergies - Preprocessing

In [70]:
## Baseline Assessment: Allergies - Processing

In [71]:
base_all_dict = {
    'ANY_ALLERGIES': 'base_all_yn',
    'FOOD_ALLERGY_NY': 'base_all_food_yn',
    'SHELLFISH_YN': 'base_all_shellfish',
    'PEANUTS_YN': 'base_all_peanuts',
    'TREENUTS_YN': 'base_all_tree_nuts',
    'DAIRY_YN': 'base_all_dairy',
    'SOY_YN': 'base_all_soy',
    'FISH_YN': 'base_all_fish',
    'CITRUSFRUITS_NY': 'base_all_citrus_fruit',
    'EGGS_NY': 'base_all_eggs',
    'GLUTEN_NY': 'base_all_gluten',
    'OTHER_FOOD_ALLERGY': 'base_all_food_oth',
    'SPECIFY_OTHER_FOOD_ALLERGY': 'base_all_food_oth_spec',
    'DRUG_ALLERGY_NY': 'base_all_drug_yn',
    'PENICILLIN_NY': 'base_all_penicillin',
    'ERYTHROMYCIN_NY': 'base_all_erythromycin',
    'SULFADRUGS_NY': 'base_all_sulfa_drugs',
    'CEPHALOSPORINS_NY': 'cbase_all_ephalosporins',
    'NSAID_NY': 'base_all_nsaids',
    'TETRACYCLINE_NY': 'base_all_tetracycline',
    'CODEINE_NY': 'base_all_codeine',
    'CONTRAST_DYE_NY': 'base_all_contrast_dye',
    'OTHER_DRUG_ALLERGY_NY': 'base_all_drug_oth',
    'SPECIFY_OTHER_DRUG_ALLERGY': 'base_all_drug_oth_spec',
    'ENVIRONMENT_ALLERGY_NY': 'base_all_enviro_yn',
    'SEASONAL_NY': 'base_all_seasonal',
    'DUST_NY': 'base_all_dust',
    'MOLD_NY': 'base_all_mold',
    'POLLEN_NY': 'base_all_pollen',
    'ANIMALS_YN': 'base_all_animals',
    'BEE_WASP_STINGS_NY': 'base_all_stings',
    'LATEX_TAPE_NY': 'base_all_latex_tape',
    'OTHER_ENVIRON_ALLERGY_NY': 'base_all_enviro_oth',
    'SPECIFY_OTHER_ENVIRON_ALLERGY': 'base_all_enviro_oth_spec'
}

base_all = obs_clinic_migration.RedcapConv(base_all_dict, 0)

In [72]:
## Baseline Assessment: Allergies - Post-processing

In [73]:
## Baseline Assessment: Allergies - Compare Data

In [74]:
base_all.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,base_all_yn,base_all_food_yn,base_all_shellfish,base_all_peanuts,...,base_all_stings,base_all_latex_tape,base_all_enviro_oth,base_all_enviro_oth_spec,Source


In [75]:
## Baseline Assessment: Allergies - Prepare and Save Data

In [76]:
base_all.prep_imp('baseline_assessmen_arm_1', 'baseline_assessment_complete')
base_all.data.to_csv('../data/processed/base_all.csv', index = False)

In [77]:
# Baseline Assessment: Surgical History

In [78]:
## Baseline Assessment: Surgical History - Preprocessing

In [79]:
obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.rave_date_unknown(obs_data_sets.rave_clinic, 'SURGERY_NY_', 'Yes','SURGERY_YEAR_', 7)

In [80]:
## Baseline Assessment: Surgical History - Processing

In [81]:
surhx_dict = {
    'SURGERY_NY_': 'surhx_yn',
    'SURG_PROCEDURE_': 'surhx_procedure',
    'SPECIFY_SURGICAL_PROCEDURE_': 'surhx_proc_spec',
    
    'SURGERY_YEAR_yn_date_': 'surhx_proc_date_yn',
    #'PROCEDUR_DT_UNK_': 'surhx_proc_date_yn',
    
    'SURGERY_YEAR_DD_': 'surhx_surg_day',
    'SURGERY_YEAR_MM_': 'surhx_surg_month',
    'SURGERY_YEAR_YYYY_': 'surhx_surg_year',
    'SURG_REASON_': 'surhx_indication',
    'SURG_COMMEN_': 'surhx_comments'        
}
    
surhx = obs_clinic_migration.RedcapConv(surhx_dict, 7, master_df = obs_data_sets.rave_clinic)

Column 'surhx_procedure' has an issue with the variable 'Other, specify'.


In [82]:
## Baseline Assessment: Surgical History - Post-processing

In [83]:
surhx.change_str(
        {'surhx_procedure': 
            {'Other, specify': 'Other'},
        }
)

5    1143
2     720
4     294
6     201
7     116
8     115
3      13
1       6
Name: surhx_procedure, dtype: int64


In [84]:
## Baseline Assessment: Surgical History - Compare Data

In [85]:
surhx.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,surhx_yn,surhx_procedure,surhx_proc_spec,...,surhx_surg_month,surhx_surg_year,surhx_indication,surhx_comments,Source


In [86]:
## Baseline Assessment: Surgical History - Prepare and Save Data

In [87]:
surhx.prep_imp('baseline_assessmen_arm_1', 'surgical_history_complete', 'surgical_history')
surhx.data.to_csv('../data/processed/surhx.csv', index = False)

In [88]:
# Baseline Assessment: Medications taken since the beginning of pregnancy

In [89]:
## Baseline Assessment: Medications taken since the beginning of pregnancy - Preprocessing

In [90]:
## Baseline Assessment: Medications taken since the beginning of pregnancy - Processing

In [91]:
rxhx_preg_dict = {
    'MEDS_PREG_': 'rxhx_preg_yn', 
    'MED_CATEGORY_': 'rxhx_preg_med_class', 
    'MEDICATION_': 'rxhx_preg_med_spec', 
    'MED_CONTINUE_NY_': 'rxhx_preg_med_cont',    
}
rxhx_preg = obs_clinic_migration.RedcapConv(rxhx_preg_dict, 8) 

Column 'rxhx_preg_med_class' has an issue with the variable 'Selective Serotonin reuptake inhibitors (SSRI)'.
Column 'rxhx_preg_med_class' has an issue with the variable 'Antidepressants: other'.


In [92]:
## Baseline Assessment: Medications taken since the beginning of pregnancy - Post-processing

In [93]:
rxhx_preg.change_str(
        {'rxhx_preg_med_class': 
            {'Selective Serotonin reuptake inhibitors (SSRI)': 'Selective Serotonin Reuptake Inhibitors (SSRI)',
             'Antidepressants: other' : 'Antidepressants: Other'},
        }
)

8     417
25    323
26    136
2     101
22     83
24     74
5      66
17     64
4      51
7      33
14     24
10     23
3      18
9      13
6      11
19     11
20      8
13      8
12      7
21      5
23      5
1       3
16      1
Name: rxhx_preg_med_class, dtype: int64


In [94]:
## Baseline Assessment: Medications taken since the beginning of pregnancy - Compare Data

In [95]:
rxhx_preg.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,rxhx_preg_yn,rxhx_preg_med_class,rxhx_preg_med_spec,rxhx_preg_med_cont,Source


In [96]:
## Baseline Assessment: Medications taken since the beginning of pregnancy - Prepare and Save Data

In [97]:
rxhx_preg.prep_imp('baseline_assessmen_arm_1', 'medications_taken_since_beginning_of_pregnancy_complete', 'medications_taken_since_beginning_of_pregnancy')
rxhx_preg.data.to_csv('../data/processed/rxhx_preg.csv', index = False)

In [98]:
# Baseline Assessment: Supplements taken since the beginning of pregnancy (1)

In [99]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (1) - Preprocessing

In [100]:
for i in range(1, 9):
    
    # create_specify_col(
    #     create_col = 'LABELLED_SUPPLEMENT_PRENAT_SPEC_' + str(i), 
    #     coded_col = 'SUPPLEMENT_STD_' + str(i), 
    #     label_col = 'SUPPLEMENT_' + str(i), 
    #     label_code = '90', 
    #     label_ans = 'Other prenatal vitamin',
    # )
    
    # create_specify_col(
    #     create_col = 'LABELLED_SUPPLEMENT_SPEC_' + str(i), 
    #     coded_col = 'SUPPLEMENT_STD_' + str(i), 
    #     label_col = 'SUPPLEMENT_' + str(i), 
    #     label_code = '99', 
    #     label_ans = 'Other',
    # )

    rave_supplement = pd.get_dummies(obs_data_sets.rave_clinic['SUPPLEMENT_STD_' + str(i)], 
        prefix = 'LABELLED_SUPPLEMENT', dummy_na = False
    )
    rave_supplement = rave_supplement.add_suffix('_' + str(i))
    
    for col_check in ['LABELLED_SUPPLEMENT_1_' + str(i), 
                      'LABELLED_SUPPLEMENT_2_' + str(i),
                      'LABELLED_SUPPLEMENT_3_' + str(i),
                      'LABELLED_SUPPLEMENT_4_' + str(i),
                      'LABELLED_SUPPLEMENT_5_' + str(i),
                      'LABELLED_SUPPLEMENT_6_' + str(i),
                      'LABELLED_SUPPLEMENT_7_' + str(i),
                      'LABELLED_SUPPLEMENT_8_' + str(i),
                      'LABELLED_SUPPLEMENT_9_' + str(i),
                      'LABELLED_SUPPLEMENT_10_' + str(i),
                      'LABELLED_SUPPLEMENT_11_' + str(i),
                      'LABELLED_SUPPLEMENT_90_' + str(i),
                      'LABELLED_SUPPLEMENT_99_' + str(i)
    ]:
        if col_check not in list(rave_supplement.columns.values):
            rave_supplement[col_check] = 0
    obs_data_sets.rave_clinic = pd.concat([obs_data_sets.rave_clinic, rave_supplement], axis = 1)
    
    
    obs_data_sets.rave_clinic['LABELLED_SUPPLEMENT_PRENAT_SPEC_' + str(i)] = obs_data_sets.rave_clinic['SUPPLEMENT_' + str(i)]
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['SUPPLEMENT_STD_' + str(i)] != '90',
        'LABELLED_SUPPLEMENT_PRENAT_SPEC_' + str(i)
    ] = np.NaN
    
    obs_data_sets.rave_clinic['LABELLED_SUPPLEMENT_SPEC_' + str(i)] = obs_data_sets.rave_clinic['SUPPLEMENT_' + str(i)]
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['SUPPLEMENT_STD_' + str(i)] != '99',
        'LABELLED_SUPPLEMENT_SPEC_' + str(i)
    ] = np.NaN




## remove rows which contain all zeros

In [101]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (1)- Processing

In [102]:
supp_ant_dict = {
    'LABELLED_SUPPLEMENT_1_': 'supp_preg_type___1', # Preg-vit
    'LABELLED_SUPPLEMENT_2_': 'supp_preg_type___2', # Materna
    'LABELLED_SUPPLEMENT_3_': 'supp_preg_type___3', # Vitamin D
    'LABELLED_SUPPLEMENT_4_': 'supp_preg_type___4', # Omega 3 and/or 6
    'LABELLED_SUPPLEMENT_5_': 'supp_preg_type___5', # Calcium
    'LABELLED_SUPPLEMENT_6_': 'supp_preg_type___6', # Vitamin B12
    'LABELLED_SUPPLEMENT_7_': 'supp_preg_type___7', # Iron
    'LABELLED_SUPPLEMENT_8_': 'supp_preg_type___8', # Folic acid
    'LABELLED_SUPPLEMENT_9_': 'supp_preg_type___9', # Jamieson Prenatal vitamin
    'LABELLED_SUPPLEMENT_10_': 'supp_preg_type___10', # Vitamin C
    'LABELLED_SUPPLEMENT_11_': 'supp_preg_type___11', # Probiotics
    'LABELLED_SUPPLEMENT_90_': 'supp_preg_type___12', # Other Prenatal Vitamin, specify
    'LABELLED_SUPPLEMENT_99_': 'supp_preg_type___13', # Other, specify
}

supp_ant = obs_clinic_migration.RedcapConv(supp_ant_dict, 8, master_df = obs_data_sets.rave_clinic, recode_long = False)

In [103]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (1)- Post-processing

In [104]:
supp_ant.remove_na()

In [105]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (1)- Compare Data

In [106]:
supp_ant.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,supp_preg_type___1,supp_preg_type___2,supp_preg_type___3,...,supp_preg_type___10,supp_preg_type___11,supp_preg_type___12,supp_preg_type___13,Source


In [107]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (1)- Prepare and Save Data

In [108]:
supp_ant.prep_imp('baseline_arm_1', 'supplements_taken_since_beginning_of_pregnancy_complete', 'supplements_taken_since_beginning_of_pregnancy')
supp_ant.data.to_csv('../data/processed/supp_ant.csv', index = False)

In [109]:
# Baseline Assessment: Supplements taken since the beginning of pregnancy (2)

In [110]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (2) - Preprocessing

In [111]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (2)- Processing

In [112]:
supp_ant_spec_dict = {
    'LABELLED_SUPPLEMENT_PRENAT_SPEC_': 'supp_preg_oth_previt',
    'LABELLED_SUPPLEMENT_SPEC_': 'supp_preg_oth_spec',
}
supp_ant_spec = obs_clinic_migration.RedcapConv(supp_ant_spec_dict, 8, master_df = obs_data_sets.rave_clinic)

max redcap_repeat_instance = 7; stub_repeat = 8


In [113]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (2)- Post-processing

In [114]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (2)- Compare Data

In [115]:
supp_ant_spec.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,supp_preg_oth_previt,supp_preg_oth_spec,Source


In [116]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (2)- Prepare and Save Data

In [117]:
supp_ant_spec.prep_imp('baseline_arm_1', 'supplements_taken_since_beginning_of_pregnancy_complete', 'supplements_taken_since_beginning_of_pregnancy')
supp_ant_spec.data.to_csv('../data/processed/supp_ant_spec.csv', index = False)

In [118]:
# Baseline Assessment: Supplements taken since the beginning of pregnancy (3)

In [119]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (3) - Preprocessing

In [120]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (3)- Processing

In [121]:
supp_ant_yn_dict = {
    'SUPPLEMENT_PREG_1': 'supp_preg_yn'
}

supp_ant_yn = obs_clinic_migration.RedcapConv(supp_ant_yn_dict, 0, master_df = obs_data_sets.rave_clinic)

In [122]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (3)- Post-processing

In [123]:
supp_ant_yn.remove_na()

In [124]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (3)- Compare Data

In [125]:
supp_ant_yn.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,supp_preg_yn,Source


In [126]:
## Baseline Assessment: Supplements taken since the beginning of pregnancy (3)- Prepare and Save Data

In [127]:
supp_ant_yn.prep_imp('baseline_arm_1', 'supplements_taken_since_beginning_of_pregnancy_complete', 'supplements_taken_since_beginning_of_pregnancy')
supp_ant_yn.data.to_csv('../data/processed/supp_ant_yn.csv', index = False)

In [128]:
# Antenatal Visits: Visit Data

In [129]:
## Antenatal Visits: Visit Data - Preprocessing

In [130]:
# value indicating weight of 2770.6 pounds
obs_data_sets.rave_clinic.loc[obs_data_sets.rave_clinic['ANTENATAL_WEIGHT_2'] == '2770.60',
                'ANTENATAL_WEIGHT_2'] = np.nan


for i in range(1, 17):
    obs_data_sets.rave_clinic.loc[:, 'ANTENAT_VISIT_DT_' + str(i)] = pd.to_datetime(obs_data_sets.rave_clinic['ANTENAT_VISIT_DT_' + str(i)]).dt.strftime('%Y-%m-%d')
  
    obs_data_sets.rave_clinic.loc[
        ~obs_data_sets.rave_clinic['ANTENATAL_WEIGHT_' + str(i)].isna(), 'ANTENATAL_WEIGHT_' + str(i)
    ] = obs_data_sets.rave_clinic.loc[
        ~obs_data_sets.rave_clinic['ANTENATAL_WEIGHT_' + str(i)].isna(), 'ANTENATAL_WEIGHT_' + str(i)
    #].astype(float).map('{0:g}'.format)
    ].astype(float).map('{:,.2f}'.format)
        
        
        
    obs_data_sets.rave_clinic['WT_NR_' + str(i)].replace({'0': 'Yes', '1': 'No'}, inplace = True)
    obs_data_sets.rave_clinic['ANTENATAL_WEIGHT_UN_' + str(i)].replace({'1': 'kg', '2': 'lbs'}, inplace = True)
    obs_data_sets.rave_clinic['BP_ND_' + str(i)].replace({'0': 'Yes', '1': 'No'}, inplace = True)

     
        

In [131]:
## Antenatal Visits: Visit Data - Processing

In [132]:
ante_visit_dict = {
    'ANTENAT_VISIT_DT_': 'ante_visit_date',
    'WT_NR_': 'ante_visit_wt_yn',
    'ANTENATAL_WEIGHT_': 'ante_visit_wt_kg',
    'ANTENATAL_WEIGHT_UN_': 'ante_visit_wt_unit',
    'BP_ND_': 'ante_visit_bp_yn',
    'ANTENAT_SYSTOLIC_': 'ante_visit_sbp',
    'ANTENATAL_DIASTOLIC_': 'ante_visit_dbp',
    'URINE_DIPSTICK_': 'ante_visit_urine_prot'        
 }

ante_visit = obs_clinic_migration.RedcapConv(ante_visit_dict, 16, master_df = obs_data_sets.rave_clinic)

In [133]:
## Antenatal Visits: Visit Data - Post-processing

In [134]:
## Antenatal Visits: Visit Data - Compare Data

In [135]:
ante_visit.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,ante_visit_date,ante_visit_wt_yn,ante_visit_wt_kg,...,ante_visit_bp_yn,ante_visit_sbp,ante_visit_dbp,ante_visit_urine_prot,Source


In [136]:
## Antenatal Visits: Visit Data - Prepare and Save Data

In [137]:
ante_visit.prep_imp('antenatal_arm_1', 'antenatal_visits_complete', 'antenatal_visits')
ante_visit.data.to_csv('../data/processed/ante_visit.csv', index = False)

In [138]:
# Antenatal Visits: Antenatal Health Care Provider

In [139]:
## Antenatal Visits: Antenatal Health Care Provider - Preprocessing

In [140]:
# rave requests health care provider for each individual antenatal visit while 
# there is only one overall health care provider

hcps = [
    'OBSTETRICIAN', 'FAM_PHYSICIAN', 'MIDWIFE', 'NURSE_PRACTITIONER', 'OTHER1'
]


obs_data_sets.rave_clinic['SPECIFY1_OTHER_HCP'] = np.NaN
for i in range(1, 17):
    obs_data_sets.rave_clinic['SPECIFY1_OTHER_HCP'] = (
        obs_data_sets.rave_clinic['SPECIFY1_OTHER_HCP'].astype(str) + "; " 
        + obs_data_sets.rave_clinic['SPECIFY1_OTHER_' + str(i)].astype(str)
    )
obs_data_sets.rave_clinic['SPECIFY1_OTHER_HCP'].replace('nan; *','', regex = True, inplace = True)
obs_data_sets.rave_clinic['SPECIFY1_OTHER_HCP'].replace('; nan$','', regex = True, inplace = True)
obs_data_sets.rave_clinic['SPECIFY1_OTHER_HCP'].replace('nan','', regex = True, inplace = True)

# need to transfer 'SPECIFY' column

for hcp in hcps:
    obs_data_sets.rave_clinic[hcp] = '0'

for i in range(1, 17):
    for hcp in hcps:
        obs_data_sets.rave_clinic.loc[
            obs_data_sets.rave_clinic[hcp + '_NY_' + str(i)] == 'Yes',
            hcp
        ] = '1'
    

In [141]:
## Antenatal Visits: Antenatal Health Care Provider - Processing

In [142]:
ante_hcp_dict = {
    'OBSTETRICIAN': 'ante_hcp___1', 
    'FAM_PHYSICIAN': 'ante_hcp___2', 
    'MIDWIFE': 'ante_hcp___3', 
    'NURSE_PRACTITIONER': 'ante_hcp___4', 
    'OTHER1': 'ante_hcp___5',
    'SPECIFY1_OTHER_HCP': 'ante_hcp_oth_spec'
}

ante_hcp = obs_clinic_migration.RedcapConv(ante_hcp_dict, 0, master_df = obs_data_sets.rave_clinic, recode_long = False)

In [143]:
## Antenatal Visits: Antenatal Health Care Provider - Post-processing

In [144]:
ante_hcp.remove_na()

In [145]:
## Antenatal Visits: Antenatal Health Care Provider - Compare Data

In [146]:
ante_hcp.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,ante_hcp___1,ante_hcp___2,ante_hcp___3,ante_hcp___4,ante_hcp___5,ante_hcp_oth_spec,Source
0,91200010,1,0,0,0,0,,REDCapDDE
39,91200010,1,0,0,0,0,,RaveConverted
1,91200080,1,0,0,0,0,,REDCapDDE
40,91200080,1,0,0,0,0,,RaveConverted
2,91200119,1,0,0,0,0,,REDCapDDE
...,...,...,...,...,...,...,...,...
76,91202144,1,0,0,0,0,,RaveConverted
37,91202234,1,0,0,0,0,,REDCapDDE
77,91202234,1,0,0,0,0,,RaveConverted
38,91202343,1,0,0,0,0,,REDCapDDE


In [147]:
## Antenatal Visits: Antenatal Health Care Provider - Prepare and Save Data

In [148]:
ante_hcp.prep_imp('antenatal_arm_1', 'antenatal_health_care_provider_complete')
ante_hcp.data.to_csv('../data/processed/ante_hcp.csv', index = False)

In [149]:
# Antenatal Visits: Conditions/Abnormalities Diagnosed During Pregnancy

In [150]:
## Antenatal Visits: Conditions/Abnormalities Diagnosed During Pregnancy - Preprocessing

In [151]:
# need to use rave_date_unknown
obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.rave_date_unknown(obs_data_sets.rave_clinic, 'DX_MED_CONDTN_SIN_BASELIN_NY_', 'Yes','ONSET_DT_', 7)
obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.rave_date_unknown(obs_data_sets.rave_clinic, 'CONTINUING_YN_', 'No','RESOLUTION_DT_', 7)     

In [152]:
## Antenatal Visits: Conditions/Abnormalities Diagnosed During Pregnancy - Processing

In [153]:
ante_cond_dict = {
    'DX_MED_CONDTN_SIN_BASELIN_NY_': 'ante_cond_dx_yn',
    'MED_CONDN_DX_PREG_': 'ante_cond_class',
    'SPECIFYABNCOND_': 'ante_cond_spec',
    'ONSET_DT_yn_date_':'ante_cond_onset_date_yn',
    'ONSET_DT_DD_': 'ante_cond_onset_day',
    'ONSET_DT_MM_': 'ante_cond_onset_month',
    'ONSET_DT_YYYY_': 'ante_cond_onset_year',
    'CONTINUING_YN_': 'ante_cond_ongoing',
    'RESOLUTION_DT_yn_date_': 'ante_cond_stop_yn',
    'RESOLUTION_DT_DD_': 'ante_cond_res_day',
    'RESOLUTION_DT_MM_': 'ante_cond_res_month',
    'RESOLUTION_DT_YYYY_': 'ante_cond_res_year',
    'COMMENTS_': 'ante_cond_comment'        
}    
    
ante_cond = obs_clinic_migration.RedcapConv(ante_cond_dict, 7, master_df = obs_data_sets.rave_clinic)

Column 'ante_cond_class' has an issue with the variable 'Diabetes:  Type Unknown'.
Column 'ante_cond_class' has an issue with the variable 'Diabetes: Diabetes Type II – No Insulin'.
Column 'ante_cond_class' has an issue with the variable 'Diabetes: Diabetes Type II – Insulin'.


In [154]:
## Antenatal Visits: Conditions/Abnormalities Diagnosed During Pregnancy - Post-processing

In [155]:
ante_cond.change_str(
        {'ante_cond_class': 
            {'Diabetes: Diabetes Type II – Insulin': 'Diabetes: Diabetes Type II - Insulin',
             'Diabetes: Diabetes Type II – No Insulin': 'Diabetes: Diabetes Type II - No Insulin',
             'Diabetes:  Type Unknown': 'Diabetes: Type Unknown'},
        }
)

12    142
8     126
23    125
19    103
45     74
32     58
14     44
38     43
22     31
44     24
29     23
21     22
41     19
15     16
24     10
33      7
35      4
37      4
28      3
13      2
20      2
11      2
10      1
2       1
9       1
7       1
42      1
3       1
Name: ante_cond_class, dtype: int64


In [156]:
## Antenatal Visits: Conditions/Abnormalities Diagnosed During Pregnancy - Compare Data

In [157]:
ante_cond.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,ante_cond_dx_yn,ante_cond_class,ante_cond_spec,...,ante_cond_res_day,ante_cond_res_month,ante_cond_res_year,ante_cond_comment,Source


In [158]:
## Antenatal Visits: Conditions/Abnormalities Diagnosed During Pregnancy - Prepare and Save Data

In [159]:
ante_cond.prep_imp('antenatal_arm_1', 'conditions_diagnosed_during_pregnancy_complete', 'conditions_diagnosed_during_pregnancy')
ante_cond.data.to_csv('../data/processed/ante_cond.csv', index = False)

In [160]:
# Antenatal Visits: Surgical procedures performed during pregnancy

In [161]:
## Antenatal Visits: Surgical procedures performed during pregnancy - Preprocessing

In [162]:
obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.rave_date_unknown(obs_data_sets.rave_clinic, 'SURG_PROCS_AFTER_BASLIN_NY_', 'Yes', 'PROCEDURE_DT_', 2) 
    



In [163]:
## Antenatal Visits: Surgical procedures performed during pregnancy - Processing

In [164]:
ante_sur_dict = {
    'SURG_PROCS_AFTER_BASLIN_NY_': 'ante_sx_yn',
    'SURG_PROC_PERF_DURN_PREG_': 'ante_sx_class',
    'SPECIFY_PROCEDURE_': 'ante_sx_spec',
    'PROCEDURE_DT_DD_': 'ante_sx_day',
    'PROCEDURE_DT_MM_': 'ante_sx_month',
    'PROCEDURE_DT_YYYY_': 'ante_sx_year',
    'INDICATION_': 'ante_sx_indication',
    'COMMENT1_': 'ante_sx_comment',
    
    'PROCEDURE_DT_yn_date_': 'ante_sx_date_yn'
        
}
    
ante_sur = obs_clinic_migration.RedcapConv(ante_sur_dict, 2, master_df = obs_data_sets.rave_clinic)

Column 'ante_sx_class' has an issue with the variable 'Other, specify'.


In [165]:
## Antenatal Visits: Surgical procedures performed during pregnancy - Post-processing

In [166]:
ante_sur.change_str(
        {'ante_sx_class': 
            {'Other, specify': 'Other'},
        }
)

5    66
4    10
8     3
2     2
6     1
Name: ante_sx_class, dtype: int64


In [167]:
## Antenatal Visits: Surgical procedures performed during pregnancy - Compare Data

In [168]:
ante_sur.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,ante_sx_yn,ante_sx_class,ante_sx_spec,...,ante_sx_month,ante_sx_year,ante_sx_indication,ante_sx_comment,Source


In [169]:
## Antenatal Visits: Surgical procedures performed during pregnancy - Prepare and Save Data

In [170]:
ante_sur.prep_imp('antenatal_arm_1', 'surgical_procedures_performed_during_pregnancy_complete', 'surgical_procedures_performed_during_pregnancy')    
ante_sur.data.to_csv('../data/processed/ante_sur.csv', index = False)

In [171]:
# Antenatal Visits: Medications

In [172]:
## Antenatal Visits: Medications - Preprocessing

In [173]:
obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.rave_date_unknown(obs_data_sets.rave_clinic, 'MED_ONGOING_NY_', 'No', 'STOP_DT_', 12) 
obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.rave_date_unknown(obs_data_sets.rave_clinic, 'MEDS_ANTENATAL_NY_', 'Yes', 'MED_ONSET_DT_', 12) 
for i in range(1, 13):
    obs_data_sets.rave_clinic.loc[
        (obs_data_sets.rave_clinic['MEDS_ANTENATAL_NY_' + str(i)] == 'Yes')
        & (obs_data_sets.rave_clinic['MED_ONSET_DT_DD_' + str(i)].isna()),
        'MED_ONSET_DT_DD_' + str(i)] = 'Unknown'
    obs_data_sets.rave_clinic.loc[
        (obs_data_sets.rave_clinic['MEDS_ANTENATAL_NY_' + str(i)] == 'Yes')
        & (obs_data_sets.rave_clinic['MED_ONSET_DT_MM_' + str(i)].isna()),
        'MED_ONSET_DT_MM_' + str(i)] = 'Unknown'
    
    obs_data_sets.rave_clinic['MEDS_ONSET_date_ny_' + str(i)] = np.NaN
    obs_data_sets.rave_clinic.loc[
        (obs_data_sets.rave_clinic['MEDS_ANTENATAL_NY_' + str(i)] == 'Yes'),
        'MEDS_ONSET_date_ny_' + str(i)] = 'Yes'
    obs_data_sets.rave_clinic.loc[
        (obs_data_sets.rave_clinic['MEDS_ANTENATAL_NY_' + str(i)] == 'Yes')
        & (obs_data_sets.rave_clinic['MED_ONSET_DT_YYYY_' + str(i)] == '1900'),
        'MEDS_ONSET_date_ny_' + str(i)] = 'No'
    

In [174]:
## Antenatal Visits: Medications - Processing

In [175]:
ante_meds_dict = {
     'MEDS_ANTENATAL_NY_': 'ante_med_yn',
     'MEDS_ANTENATAL_': 'ante_med_class',
     'SPECIFY_ANTENAT_MED_': 'ante_med_spe',
     
     'MEDS_ONSET_date_ny_' : 'ante_med_start_date_yn',
     'MED_ONSET_DT_DD_': 'ante_med_start_day',
     'MED_ONSET_DT_MM_': 'ante_med_start_month',
     'MED_ONSET_DT_YYYY_': 'ante_med_start_year',
     'MED_ONGOING_NY_': 'ante_med_cont',
     'STOP_DT_yn_date_': 'ante_med_stop_date_yn',
     'STOP_DT_DD_': 'ante_med_stop_day',
     'STOP_DT_MM_': 'ante_med_stop_month',
     'STOP_DT_YYYY_': 'ante_med_stop_year'   
        
}    

ante_meds = obs_clinic_migration.RedcapConv(ante_meds_dict, 12, obs_data_sets.rave_clinic)

Column 'ante_med_class' has an issue with the variable 'Antidepressants: other'.
Column 'ante_med_class' has an issue with the variable 'Selective Serotonin reuptake inhibitors (SSRI)'.


In [176]:
## Antenatal Visits: Medications - Post-processing

In [177]:
ante_meds.change_str(
        {'ante_med_class': 
            {'Selective Serotonin reuptake inhibitors (SSRI)': 'Selective Serotonin Reuptake Inhibitors (SSRI)',
              'Antidepressants: other' : 'Antidepressants: Other'},
        }
)

4     200
26    169
19    105
17    102
20     88
25     61
13     43
8      41
22     39
5      37
14     29
2      17
21     12
9      10
3       8
10      6
7       4
15      4
24      3
11      2
6       1
1       1
Name: ante_med_class, dtype: int64


In [178]:
## Antenatal Visits: Medications - Compare Data

In [179]:
ante_meds.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,ante_med_yn,ante_med_class,ante_med_spe,...,ante_med_stop_date_yn,ante_med_stop_day,ante_med_stop_month,ante_med_stop_year,Source


In [180]:
## Antenatal Visits: Medications - Prepare and Save Data

In [181]:
ante_meds.prep_imp('antenatal_arm_1', 'medications_complete', 'medications')
ante_meds.data.to_csv('../data/processed/ante_meds.csv', index = False)

In [182]:
# Delivery Data: Antepartum (1)

In [183]:
## Delivery Data: Antepartum (1) - Preprocessing

In [184]:
obs_data_sets.rave_clinic['BABYNUMBER'].replace('[0-9\-]*(?=B0)', '', regex = True, inplace = True)
obs_data_sets.rave_clinic.loc[:, 'ANTEPARTUMBLEED_ONSET_DT'] = pd.to_datetime(obs_data_sets.rave_clinic['ANTEPARTUMBLEED_ONSET_DT']).dt.strftime('%Y-%m-%d')

# RAVE database does not have a 'STD' column for 'UNKNOWN_PAIN_NY' 
obs_data_sets.rave_clinic['UNKNOWN_PAIN_NY'].replace({'1': 'Yes', '0': 'No'}, inplace = True)

for reformat_date_col in ['DISCHARG_HOME_DT', 'HOSP_TRANSFER_DT',
                          'TRANSFER_ICU_DT', 'DATE_MATERNAL_DEATH_UNREL_PREG',
                          'MATERNAL_DEATH_RELAT_PREG_DT']:
    obs_data_sets.rave_clinic[reformat_date_col] = pd.to_datetime(
            obs_data_sets.rave_clinic[reformat_date_col].str.split(' ', expand = True)[0], 
            format = '%d%b%Y'
    ).dt.strftime('%Y-%m-%d')

# this needs to be checked with; 04APR2021, I have this repeated twice, commenting out until there's a reason to keep it.
# obs_clinic_migration_preprocessing.create_specify_col(
#         create_col = 'NAMEOFHOSPITAL1_SPEC', 
#         coded_col = 'NAMEOFHOSPITAL1_STD', 
#         label_col = 'NAMEOFHOSPITAL1', 
#         label_code = '3', 
#         label_ans = 'Other',
#         obs_data_sets.rave_clinic
#     )

# 4. Admission Data

obs_data_sets.rave_clinic['CERV_DILAT_UNK'].replace({'1': 'Unknown', '0': 'Known'}, inplace = True)
obs_data_sets.rave_clinic['RUP_MEM_DTTM_UNK'].replace({'1': 'Unknown', '0': 'Known'}, inplace = True)
obs_data_sets.rave_clinic['FULL_DILATN_TM_NA'].replace({'1': 'Not applicable or unknown', '0': 'Known'}, inplace = True)
obs_data_sets.rave_clinic['PUSHSTART_DTTM_NA_UNK'].replace({'1': 'Not applicable or unknown', '0': 'Known'}, inplace = True)


obs_data_sets.rave_clinic.loc[
    obs_data_sets.rave_clinic['ONSET_LAB_DTTM'].notna(),
    'ONSET_LAB_NR_NA'
] = 'Recorded'


obs_data_sets.rave_clinic['ADMIT_DT_TM'] = pd.to_datetime(
            (
                obs_data_sets.rave_clinic['ADMIT_DT'].str.split(' ', expand = True)[0]
                + obs_data_sets.rave_clinic['ADMIT_TM'].str.zfill(5) # some times have paddes zero and some don't
             ), 
            format = '%d%b%Y%H:%M'
).dt.strftime('%Y-%m-%d %H:%M')

obs_data_sets.rave_clinic['ONSET_LAB_DTTM'] = pd.to_datetime(
            (
                obs_data_sets.rave_clinic['ONSET_LAB_DTTM'].str.split(' ', expand = True)[0]
                + obs_data_sets.rave_clinic['ONSET_LAB_DTTM'].str.split(' ', expand = True)[1].str.zfill(12) # some times have paddes zero and some don't
             ), 
            format = '%d%b%Y%H:%M:%S.%f'
).dt.strftime('%Y-%m-%d %H:%M')

obs_data_sets.rave_clinic['RUP_MEM_DTTM'] = pd.to_datetime(
            (
                obs_data_sets.rave_clinic['RUP_MEM_DTTM'].str.split(' ', expand = True)[0]
                + obs_data_sets.rave_clinic['RUP_MEM_DTTM'].str.split(' ', expand = True)[1].str.zfill(12) # some times have paddes zero and some don't
             ), 
            format = '%d%b%Y%H:%M:%S.%f'
).dt.strftime('%Y-%m-%d %H:%M')


obs_data_sets.rave_clinic['FULLDILATN_DTTM'] = pd.to_datetime(
            (
                obs_data_sets.rave_clinic['FULLDILATN_DTTM'].str.split(' ', expand = True)[0]
                + obs_data_sets.rave_clinic['FULLDILATN_DTTM'].str.split(' ', expand = True)[1].str.zfill(12) # some times have paddes zero and some don't
             ), 
            format = '%d%b%Y%H:%M:%S.%f'
).dt.strftime('%Y-%m-%d %H:%M')


obs_data_sets.rave_clinic['PUSHSTART_DTTM'] = pd.to_datetime(
            (
                obs_data_sets.rave_clinic['PUSHSTART_DTTM'].str.split(' ', expand = True)[0]
                + obs_data_sets.rave_clinic['PUSHSTART_DTTM'].str.split(' ', expand = True)[1].str.zfill(12) # some times have paddes zero and some don't
             ), 
            format = '%d%b%Y%H:%M:%S.%f'
).dt.strftime('%Y-%m-%d %H:%M')




obs_data_sets.rave_clinic['BISHOPSCOR_UNK'].replace({'1': 'Unknown', '0': 'Known'}, inplace = True)
obs_data_sets.rave_clinic['BISHOPSCOR'] = obs_data_sets.rave_clinic['BISHOPSCOR'].str.replace('.00', '')




obs_data_sets.rave_clinic['CERVDILAT_CSECT_UNK'].replace({'1': 'Unknown', '0': 'Known'}, inplace = True)


obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.create_specify_col(
    'NAMEOFHOSPITAL1_SPEC', 'NAMEOFHOSPITAL1_STD', 'NAMEOFHOSPITAL1', '3', 'Other', obs_data_sets.rave_clinic
)

In [185]:
## Delivery Data: Antepartum (1) - Processing

In [186]:
del_ant_dict = {
    # 1. Number of fetuses
    'NUMBERFETUSES': 'del_num_fetus',
    'SPECIFY_CHRORION_AMNION': 'del_chor_amnio',
    'BABYNUMBER': 'del_baby_obsid',
    # 2. Prior to labour and delivery, did any complications occur?
    'COMPLICATIONS_OCCUR_NY': 'del_ant_comp_yn',
    'FETAL1_NY': 'del_ant_comp_fet',
    'ANOMALY_NY': 'del_ant_fet_anom',
    'ISO_ALLO_IMMUNIZATION_NY': 'del_ant_fet_allo',
    'IUGR1_NY': 'del_ant_fet_iugr',
    'LARGE_GESTN_AGE_NY': 'del_ant_fet_lga',
    'OLIGOHYDRAMNIOS_NY': 'del_ant_fet_oligo',
    'POLYHYDRAMNIOS_NY': 'del_ant_fet_poly',
    'OTHER_FETAL_NY': 'del_ant_fet_oth',
    'ANOMALY_SPECIFY': 'del_ant_fetanom_spec',
    'IUGR_PERCENT': 'del_ant_iugr_per',
    'SPECIFY_OTHER_FETAL_COMPLICATION': 'del_ant_fet_oth_spec',
    'PLACENTAL1_NY': 'del_ant_plac_yn',
    'PLACENTAL_ABRUPTION1_NY': 'del_ant_plac_abrpt',
    'PLACENTA_ACCRETA_NY': 'del_ant_plac_accreta',
    'PLACENTA_INCRETA_NY': 'del_ant_plac_increta',
    'PLACENTA_PERCRETA_NY': 'del_ant_plac_percreta',
    'PLACENTA_PREVIA1_NY': 'del_ant_plac_previa',
    'OTHERPLACENTALCOMPLN_NY': 'del_ant_plac_oth',
    'SEVERITY_PLACENT_ABRUPTION': 'del_ant_abrpt_spec',
    'PLACENTPREVIA_CM': 'del_ant_prev_spec',
    'SPECIFY_OTHER_PLACEN_COMPLN': 'del_ant_plac_oth_spec',
    'MATERNAL1_NY': 'del_ant_mat_yn',
    'ANEMIA_UNRESPON_THERAPY_NY': 'del_ant_mat_anemia',
    'ANTEPARTUM_BLEED_NY': 'del_ant_mat_aph',
    'GESTATIONALDIABETES': 'del_ant_mat_gdm',
    'HYPEREMESIS_GRAVIDARUM_NY': 'del_ant_mat_hyperemesis',
    'HYPERTENDISORDER_PREG_NY': 'del_ant_mat_htn_gen',
    'ECLAMPSIA1_NY': 'del_ant_mat_eclampsia',
    'GESTATN_HYPERTENSION': 'del_ant_mat_ghtn',
    'HELLP1_NY': 'del_ant_mat_hellp',
    'PREECLAMPSIA1_NY': 'del_ant_mat_pet',
    'PREEXIST_HYPERTEN_SUP_PREECAL_NY': 'del_ant_mat_htn_pet',
    'PRETERMLABOUR1_NY': 'del_ant_mat_ptl',
    'PRETERMPREMRUPMEMBRAN_NY': 'del_ant_mat_pprom',
    'OTHER_MATERN_COMPLICATN_NY': 'del_ant_mat_oth',
    'ANTEPARTUMBLEED_ONSET_DT': 'del_ant_aph_onset',
    'TRANSFUSION1_NY': 'del_ant_aph_trans',
    'ESTIMATEDBLOODLOSS1': 'del_ant_aph_ebl',
    'SPECIFY_OTHER_MATN_COMPLICATN': 'del_ant_mat_oth_spec',
    # 3. Antenatal Glucocorticoids
    'ANTENAT_GLUCOCORTICOIDS_NY': 'del_acs_yn',
    'CELESTONE_DEXAMETHASONE': 'del_acs_type',
    'CELESTONE_DOSE': 'del_acs_cel_dose',
    'DEXAMETHASONE_DOSE': 'del_acs_dex_dose',
    # 4. Admission Data
    'ADMIT_DT_TM': 'del_lnd_adm_dt_time',
    'CERV_DILAT_UNK': 'del_lnd_adm_cx_unk',
    'CERV_DILATN_ADMIT': 'del_lnd_adm_cx',
    'ONSET_LAB_NR_NA': 'del_lnd_lab_onset_unk',
    'ONSET_LAB_DTTM': 'del_lnd_lab_onset',
    'RUP_MEM_DTTM_UNK': 'del_lnd_rom_dttm_unk',
    'RUP_MEM_DTTM': 'del_lnd_rom_dttm',
    'RUPT_MEMBRAN_TYPE': 'del_lnd_rom_type',
    'GRP_B_STREP_RESULTS':'del_lnd_gbs',
    'FULL_DILATN_TM_NA': 'del_lnd_fully_unk',
    'FULLDILATN_DTTM': 'del_lnd_fully_dttm',
    'PUSHSTART_DTTM_NA_UNK': 'del_lnd_push_unk',
    'PUSHSTART_DTTM':'del_lnd_push_dttm',
    
    
    # 5. Induction of labour 
    'INDUCTN_LAB_NY': 'del_lnd_iol_yn',
    'INDICAT_FOR_INDUCTN': 'del_lnd_iol_reason',
    'FETAL_INDUCTN_NY': 'del_iol_fet_yn',
    'ATYP_ABN_FETAL_SURV': 'del_iol_fet_atypical',
    'FETAL_ANOM_NY': 'del_iol_fet_anomaly',
    'IUGR_INDN_NY': 'del_iol_fet_iugr',
    'MACROSOMNIA1_NY': 'del_iol_fet_macrosomia',
    'MULT_GESTN_NY': 'del_iol_fet_multiple',
    'POSTDATES_NY': 'del_iol_fet_post_dat',
    'OTH_FETAL_COMPLICATN': 'del_iol_fet_oth',
    'TERMINATN_PREG_NY': 'del_iol_fet_termination',
    'SPECIFYOTHERFETALCOMPLICN': 'del_iol_fet_oth_spec',
    'MATERNAL_INDN_NY': 'del_iol_mat_yn',
    'PRELABOR_RUP_MEM_NY': 'del_iol_mat_prom',
    'DIABETES_INDIC_NY': 'del_iol_mat_diabetes',
    'HX_PRECIP_DELIV_NY': 'del_iol_mat_precipitous',
    'OLIGOHY_DECR_AMNIOT_FLUID': 'del_iol_mat_oligo',
    'OTH_OBSTET_COMPLICATN': 'del_iol_mat_oth',
    'PREEXIST_MAT_MEDCONDNS': 'del_iol_mat_pre_med_cond',
    'HYPERTEN_DISORDER_PREG': 'del_iol_mat_preeclampsia',
    'SPECIFYOTHEROBSTETCOMP': 'del_iol_mat_oth_spec',
    'OTHER_INDICATN_INDUCTN': 'del_iol_oth_yn',
    'MATERN_REQUEST_NY': 'del_iol_oth_mat_req',
    'HOSP_DISTANCE_NY': 'del_iol_oth_safety',
    'ACOMODATE_CAREGIVER': 'del_iol_oth_accom',
    'OTHER_INDICATN_OTHER_NY': 'del_iol_oth_oth',
    'OTHER_OTH_SPECIFY': 'del_iol_oth_oth_spec',
    'METHOD_INDUCTION': 'del_iol_method',
    'AMNIOTOM_NY': 'del_lnd_iol_meth_amni',
    'OXYTOCIN': 'del_lnd_iol_meth_pit',
    'PROSTAGLAN1': 'del_lnd_iol_meth_prost',
    'BALOON_MECH_DEVIC_NY': 'del_lnd_iol_meth_foley',
    'LAMINAR_ARTIFIC_TENT': 'del_lnd_iol_meth_tent',
    'SWEEP_MEMBRAN_NY': 'del_lnd_iol_meth_sweep',
    'BISHOPSCOR_UNK': 'del_lnd_iol_bishop_unk',
    'BISHOPSCOR': 'del_lnd_iol_bishop',
    
    # 6. Augmentation of labour
    'AUGMENTATN_LABOR': 'del_lnd_aug_yn', 
    'AMNIOTOM2_NY': 'del_lnd_aug_rom', 
    'OXYTOCIN2': 'del_lnd_aug_pit', 
    'PROSTAGLAN2_NY': 'del_lnd_aug_prost',
    # 7. Type of birth
    'TYPEOFBIRTH3': 'del_mod', 
    'TYPEVAGINALBIRTH': 'del_mod_svd_avd', 
    'FORCEPS': 'del_mod_avd_forcep', 
    'VACUUM': 'del_mod_avd_vacuum', 
    'CERVDILAT_CSECT_UNK': 'del_mod_cs_dil_unk', 
    'CERVDILAT_CSECTN': 'del_mod_cs_dil', 
    'TYPE_CSECTN': 'del_mod_cs_type', 
    'INDICATN_FOR_C_SECTN_KUNK': 'del_mod_cs_ind', 
    'FETAL_INDICAT_NY': 'del_mod_cs_fet', 
    'FETAL_ANOMAL_CSECTN': 'del_mod_cs_fet_anom', 
    'CORD_PROLAPS_CSECTN': 'del_mod_cs_fet_cord', 
    'IUGR_CSECTN': 'del_mod_cs_fet_iugr', 
    'MACROSOM_CSECTN': 'del_mod_cs_fet_lga', 
    'MALPOSITN_CSECTN': 'del_mod_cs_fet_pos', 
    'WELLBEINCONCERN': 'del_mod_cs_fet_nrfh', 
    'OTHERFETAL_INDICATN': 'del_mod_cs_fet_oth', 
    'SPECFY_OTH_FET_IDICAT': 'del_mod_cs_fet_oth_spec', 
    'MATERN_INDICATN': 'del_mod_cs_mat', 
    'FAIL_FORCEP_VAC': 'del_mod_cs_mat_fail_avd', 
    'HYPER_DIS_PREG': 'del_mod_cs_mat_htn', 
    'MULTI_GESTN': 'del_mod_cs_mat_multi', 
    'NON_PROGRES_1STSTAGE': 'del_mod_cs_mat_np_1st', 
    'NON_PROGRES_2NDSTAGE': 'del_mod_cs_mat_np_2st', 
    'PLACENT_PREVIA': 'del_mod_cs_mat_previa', 
    'PLACENT_ABRUPTN': 'del_mod_cs_mat_abrupt', 
    'PREEXIST_HEALTH_PROBM': 'del_mod_cs_mat_cond', 
    'PREV_CSECTN': 'del_mod_cs_mat_prev_cs', 
    'PREV_UTERIN_INCISN': 'del_mod_cs_mat_myo', 
    'SUSPECT_CHORIOAMNION': 'del_mod_cs_mat_chorio', 
    'UNSUCCESS_VBAC1': 'del_mod_cs_mat_vbac', 
    'UTERIN_RUPT': 'del_mod_cs_mat_rupture', 
    'OTH_OBSTET_COMPLICAT': 'del_mod_cs_mat_other', 
    'SPECFY_OTH_OBSTET_COMP': 'del_mod_cs_mat_oth_spec', 
    'OTHER_INDICATN_FORCSECTN': 'del_mod_cs_oth', 
    'ACCOMODATE_CAREPROVID': 'del_mod_cs_oth_accom', 
    'MAT_REQUEST': 'del_mod_cs_oth_mat_req', 
    'OTH_INDICAT_UNK': 'del_mod_cs_oth_unk', 
    'OTHER_OTHER_INDICATNFOR_CSECTN': 'del_mod_cs_oth_oth', 
    'SPECFY_OTH_OTH_INDI_CSECTN': 'del_mod_cs_oth_oth_spec',
    # 8. Presentation at delivery
    'PRESENTATN_ATDELIVRY': 'del_presentation', 
    'TYPEOFBREECH': 'del_breech_spec', 
    'TYPEOFCEPHALIC': 'del_cephalic_spec',
    
    # 9. Birth location
    'BIRTHLOCATION1': 'del_birth_location',
    'NAMEOFHOSPITAL1': 'del_hospital_spec',
    'NAMEOFHOSPITAL1_SPEC': 'del_hospital_oth_spec',


    # 10. During labour and delivery, did any complications occur?
    'COMPLICATN_LAB_DEL': 'del_lnd_comp_yn', 
    'ATYPICAL_ABN_FETAL_SURVELNCE_NY': 'del_lnd_comp_fetal_surv', 
    'CLINICAL_INFECTION_NY': 'del_lnd_comp_infxn', 
    'MATERNAL_FEVER_NY': 'del_lnd_comp_mat_fever', 
    'FETAL_TACHYCARDIA1_NY': 'del_lnd_comp_fet_tachy', 
    'CLIN_CHORIOAMNION_NY': 'del_lnd_comp_chorio', 
    'POSITIV_BLOOD_CULT_NY': 'del_lnd_comp_culture', 
    'CORD_PROLAPSE1_NY': 'del_lnd_comp_prolapse', 
    'HYPERTEN_DISORDER_PREG1_NY': 'del_lnd_comp_htn', 
    'ECLAMPSIA2_NY': 'del_lnd_comp_eclampsia', 
    'GESTATIONAL_HYPERTEN2_NY': 'del_lnd_comp_ghtn', 
    'HELLP2_NY': 'del_lnd_comp_hellp', 
    'PREECLAMPSIA2_NY': 'del_lnd_comp_pet', 
    'PRE_HYPERTEN_IMPOS_PREECLAMP_NY': 'del_lnd_comp_htn_pet', 
    'HYSTERECTOMY2_NY': 'del_lnd_comp_hyst', 
    'MECONIUM2_NY': 'del_lnd_comp_mec', 
    'NON_PROGRESS_1STSTAGE_LABOUR_NY': 'del_lnd_comp_np_1st', 
    'NON_PROGRESS_2NDSTAGE_LABOR': 'del_lnd_comp_np_2st', 
    'NON_PROGRESIV_LABOR_DESC_DYS_NY': 'del_lnd_comp_np_gen', 
    'PERINEAL_HEMATOMA2_NY': 'del_lnd_comp_hematoma', 
    'PLACENTAL_ABRUPTION3_NY': 'del_lnd_comp_abruption', 
    'POSTPARTUM_HEMORHAGE2_NY': 'del_lnd_comp_pph', 
    'TRANSFUSION2_NY': 'del_lnd_comp_pph_trans', 
    'PULMONARY_EMBOLISM2_NY': 'del_lnd_comp_pe', 
    'RETAINED_PLACENTA_MAN_REMOV2_NY': 'del_lnd_comp_plac_man', 
    'RETAIN_PLACENTA_SURG_REMOV2_NY': 'del_lnd_comp_plac_sx', 
    'SHOULDER_DYSTOCIA1_NY': 'del_lnd_comp_shoulder', 
    'UTERINE_ATONY2_NY': 'del_lnd_comp_atony', 
    'UTERINE_DEHISCENCE2_NY': 'del_lnd_comp_dehis', 
    'UTERINE_RUPTURE2_NY': 'del_lnd_comp_rupture', 
    'OTHER_COMPLICATIONS_NY': 'del_lnd_comp_other', 
    'ESTIMAT_BLOOD_LOSS2': 'del_lnd_comp_blood_loss', 
    'SPECIFY_OTHER_COMPLICATIONS': 'del_lnd_comp_oth_spec',
    # 12. Pain Management for labour and/or vaginal delivery
    'PAIN_MGMT_NY': 'del_pain_mgmt_yn',
    'NITROUS_OXIDE_NY': 'del_pain_nitrous', 
    'OPIOIDS_NY': 'del_pain_opiod', 
    'EPIDURAL_NY': 'del_pain_epi', 
    'COMBN_SPINAL_EPIDURAL_NY': 'del_pain_cse', 
    'STER_WATER_SALINE_INJECTN_NY': 'del_pain_sterile_h2o', 
    'TENS': 'del_pain_tens', 'TUB_SHOWER_NY': 'del_pain_tub', 
    'LOCAL_NY': 'del_pain_local', 
    'PUDENTAL_NY': 'del_pain_pudendal', 
    'OTHER_PAIN_MGNT': 'del_pain_oth', 
    'UNKNOWN_PAIN_NY': 'del_pain_unk', 
    'SPECIFY_OTHER_PAIN_MGMT': 'del_pain_oth_spc', 
    # 13. Anesthesia for Cesarian Section
    'ANESTHESIA_CSECTION_NY': 'del_pain_cs_yn', 
    'EPIDURAL_CS_NY': 'del_pain_cs_epi', 
    'SPINAL_NY': 'del_pain_cs_spinal', 
    'GENERAL_NY': 'del_pain_cs_general', 
    'ANATHESIA_UNK': 'del_pain_cs_unk',
    # 14. Was the placenta:
    'PLACENTA_SENT_PATHOLOG_NY': 'del_pp_pathology', 
    'PLACENTA_PICKUP_BIOBANK_NY': 'del_pp_biobank', 
    'DISCHARGED_HOME_NY': 'del_pp_home', 
    'TRANSFER_TO_OTH_HOSP_NY': 'del_pp_oth_hospital', 
    'TRANSFER_TO_ICU_NY': 'del_pp_icu', 
    'MATERNAL_DEATH_UNREL_PREG_BIRTH': 'del_pp_death', 
    'MAT_DEATH_RELAT_PREG_BIRTH_NY': 'del_pp_death_preg', 
    # 15. Maternal outcome:
    'DISCHARG_HOME_DT': 'del_pp_home_date', 
    'HOSP_TRANSFER_DT': 'del_pp_oth_hospital_date', 
    'TRANSFER_ICU_DT': 'del_pp_icu_date', 
    'DATE_MATERNAL_DEATH_UNREL_PREG': 'del_pp_death_date', 
    'MATERNAL_DEATH_RELAT_PREG_DT': 'del_pp_death_preg_date'      
}

del_ant = obs_clinic_migration.RedcapConv(del_ant_dict, 0, master_df = obs_data_sets.rave_clinic)

Column 'del_num_fetus' has an issue with the variable 'Singleton'.
Column 'del_num_fetus' has an issue with the variable 'Twins'.
Column 'del_baby_obsid' has an issue with the variable 'B0'.
Column 'del_baby_obsid' has an issue with the variable '91600122B-01'.
Column 'del_baby_obsid' has an issue with the variable '91201767-BOI'.
Column 'del_ant_iugr_per' has an issue with the variable '= 3 or < 10 Percentile'.
Column 'del_ant_aph_ebl' has an issue with the variable '<500 cc'.
Column 'del_breech_spec' has an issue with the variable 'Breech Type Unknown'.
Column 'del_cephalic_spec' has an issue with the variable 'Cephalic Type Unknown'.
Column 'del_hospital_spec' has an issue with the variable 'St. Michael’s Hospital'.


In [187]:
## Delivery Data: Antepartum (1) - Post-processing

In [188]:
del_ant.change_str(
    {'del_num_fetus': 
        {'Singleton': 'Singleton (1)',
         'Twins' : 'Twins (2)'},
     'del_ant_iugr_per':
         {'= 3 or < 10 Percentile': '≥3 or < 10 Percentile'},
     'del_ant_aph_ebl':
         {'<500 cc': '< 500 cc'},
     'del_breech_spec':
         {'Breech Type Unknown': 'Unknown'},
         'del_cephalic_spec':
         {'Cephalic Type Unknown': 'Unknown'},
         'del_hospital_spec':
         {"St. Michael’s Hospital": "St. Michael's Hospital"},
         'del_baby_obsid':
             {'91201767-BOI': 'B01',
              'B0': 'B01', # I should check if 91202009 and 2149 are correct
              '91600122B-01': 'B01'}
    }
)
del_ant.remove_na()

1    2259
2      57
Name: del_num_fetus, dtype: int64
2    44
1     6
Name: del_ant_iugr_per, dtype: int64
1    13
4    11
3     1
Name: del_ant_aph_ebl, dtype: int64
5    102
4     23
3     11
1      6
Name: del_breech_spec, dtype: int64
4    2092
5       9
2       3
3       1
1       1
Name: del_cephalic_spec, dtype: int64
1    2196
2      96
3      15
Name: del_hospital_spec, dtype: int64
1    2316
Name: del_baby_obsid, dtype: int64


In [189]:
## Delivery Data: Antepartum (1) - Compare Data

In [190]:
del_ant.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,del_num_fetus,del_chor_amnio,del_baby_obsid,del_ant_comp_yn,...,del_pp_oth_hospital_date,del_pp_icu_date,del_pp_death_date,del_pp_death_preg_date,Source


In [191]:
## Delivery Data: Antepartum (1) - Prepare and Save Data

In [192]:
del_ant.prep_imp('delivery_data_arm_1', 'delivery_data_complete')
del_ant.data.to_csv('../data/processed/del_ant.csv', index = False)

In [193]:
# Delivery Data: Antepartum (2)

In [194]:
## Delivery Data: Antepartum (2) - Preprocessing

In [195]:
## Delivery Data: Antepartum (2) - Processing

In [196]:
del_lnd_comp_dict = {
    'CERVDILAT_CSECT_UNK': 'del_mod_cs_dil_unk', 
    'CERVDILAT_CSECTN': 'del_mod_cs_dil', 

}

del_lnd_comp = obs_clinic_migration.RedcapConv(del_lnd_comp_dict, 0, master_df = obs_data_sets.rave_clinic)

In [197]:
## Delivery Data: Antepartum (2) - Post-processing

In [198]:
del_lnd_comp.remove_na()

In [199]:
## Delivery Data: Antepartum (2) - Compare Data

In [200]:
del_lnd_comp.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,del_mod_cs_dil_unk,del_mod_cs_dil,Source


In [201]:
## Delivery Data: Antepartum (2) - Prepare and Save Data
del_lnd_comp.prep_imp('delivery_data_arm_1', 'delivery_data_complete')
del_lnd_comp.data.to_csv('../data/processed/del_lnd_comp.csv', index = False)

In [202]:
# Delivery Data: Antibiotics during Labour and Delivery

In [203]:
## Delivery Data: Antibiotics during Labour and Delivery - Preprocessing

In [204]:
for i in range(1, 18):
    
    # Add 'Other antibiotic, please specify' column 
    # In Rave, 'Other antibiotic, please specify' data is intermingled with
    # associated labelled data
    obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.create_specify_col(
        create_col = 'ANTIBIOTIC_SPEC_' + str(i), 
        coded_col = 'ANTIBIOTIC_STD_' + str(i), 
        label_col = 'ANTIBIOTIC_' + str(i), 
        label_code = '12', 
        label_ans = 'Other',
        df = obs_data_sets.rave_clinic
    )
    obs_data_sets.rave_clinic = obs_clinic_migration_preprocessing.create_specify_col(
        create_col = 'INDICATION_ANTIBIOT_SPEC_' + str(i), 
        coded_col = 'INDICATION_ANTIBIOT_STD_' + str(i), 
        label_col = 'INDICATION_ANTIBIOT_' + str(i), 
        label_code = '9', 
        label_ans = 'Other',
        df = obs_data_sets.rave_clinic
    )

# for i in range(1, 18):    
    # fix date formats
    obs_data_sets.rave_clinic['ANTI_BIOT_ONSET_DT_TM_' + str(i)] = pd.to_datetime(
        obs_data_sets.rave_clinic['ANTI_BIOT_ONSET_DT_TM_' + str(i)], 
        format = '%d/%m/%Y %I:%M %p'
    ).dt.strftime('%Y-%m-%d %H:%M')
    

    

In [205]:
## Delivery Data: Antibiotics during Labour and Delivery - Processing

In [206]:
abx_labr_dict = {
    'ANTIBIOTICS_LAB_DEL_NY_': 'abx_yn', 
    'ANTIBIOTIC_' : 'abx_med_spec',
    'ANTIBIOTIC_SPEC_': 'abx_med_oth_spec',
    'INDICATION_ANTIBIOT_': 'abx_indc',
    'INDICATION_ANTIBIOT_SPEC_': 'abx_indc_oth_spec',
    'ANTI_BIOT_ONSET_DT_TM_': 'abx_dt_time',    
}
abx_labr = obs_clinic_migration.RedcapConv(abx_labr_dict, 17, master_df = obs_data_sets.rave_clinic)

In [207]:
## Delivery Data: Antibiotics during Labour and Delivery - Post-processing

In [208]:
## Delivery Data: Antibiotics during Labour and Delivery - Compare Data

In [209]:
abx_labr.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,abx_yn,abx_med_spec,abx_med_oth_spec,abx_indc,abx_indc_oth_spec,abx_dt_time,Source


In [210]:
## Delivery Data: Antibiotics during Labour and Delivery - Prepare and Save Data

In [211]:
abx_labr.prep_imp('delivery_data_arm_1', 'antibiotics_during_labour_delivery_complete', 'antibiotics_during_labour_delivery')
abx_labr.data.to_csv('../data/processed/abx_labr.csv', index = False)

In [212]:
# Neonatal Form: Delivery Data

In [213]:
## Neonatal Form: Delivery Data - Preprocessing

In [214]:
# RAVE has date and time in one column; separate columns in REDCap
for i in range(1, 3):
    date_time = obs_data_sets.rave_clinic['DELIVERY_DT_TM_' + str(i)].str.split(' ', 
                       n = 1, expand = True)
    obs_data_sets.rave_clinic['DELIVERY_DATE_' + str(i)] = pd.to_datetime(date_time[0], format = '%d%b%Y').dt.strftime('%Y-%m-%d')
    obs_data_sets.rave_clinic['DELIVERY_TIME_' + str(i)] = pd.to_datetime(date_time[1], format = '%H:%M:%S').dt.strftime('%H:%M')
    
    for neonatal_nr in ['BIRTHWT_NR_', 'HEADCIRCUM_NR_', 'LENGTH_NR_',  'APGAR_SCORE1_NA_', 'APGAR_SCORE5_NA_', 'APGAR_SCORE10_NA_']:
        obs_data_sets.rave_clinic[neonatal_nr + str(i)].replace({'1': 'Not recorded', '0': 'Yes'}, inplace = True)

In [215]:
## Neonatal Form: Delivery Data - Processing

In [216]:
ante_temp_dict = {
    'DELIVERY_DATE_': 'neo_del_date', 
    'DELIVERY_TIME_': 'neo_del_time', 
    #'BABYOBSID_NEO_': 'neo_baby_num', 
    #'GEST_AGE_DEL_WEEK_': 'neo_del_ga_weeks', 
    #'GEST_AGE_DEL_DAYS_': 'neo_del_ga_days', 
    #'MATERN_AGE_ATDELVRY_': 'neo_del_mat_age', 
    'PREGNANCY_OUTCOME_': 'neo_del_preg_outcome', 
    'PREGNANCY_LOSS_': 'neo_del_loss_20wk', 
    'STILLBIRTH_': 'neo_del_stillbirth', 
    'INFANTSEX_': 'neo_del_fet_sex', 
    'BIRTHWT_NR_': 'neo_del_birthwt_yn', 
    'BIRTHWT_': 'neo_del_birthwt', 
    'HEADCIRCUM_NR_': 'neo_del_head_circ_yn', 
    'HEADCIRCUM_': 'neo_del_head_circ', 
    'LENGTH_NR_': 'neo_del_length_yn', 
    'LENGTH_': 'neo_del_length', 
    'APGAR_SCORE1_NA_': 'neo_del_apgar_1_yn', 
    'APGAR_SCORE1_': 'neo_del_apgar_1', 
    'APGAR_SCORE5_NA_': 'neo_del_apgar_5_yn', 
    'APGAR_SCORE5_': 'neo_del_apgar_5', 
    'APGAR_SCORE10_NA_': 'neo_del_apgar_10_yn', 
    'APGAR_SCORE10_': 'neo_del_apgar_10'
}



ante_temp = obs_clinic_migration.RedcapConv(ante_temp_dict, 2, obs_data_sets.rave_clinic)

Column 'neo_del_preg_outcome' has an issue with the variable 'Stillbirth at >=20wks or >=500g'.
Column 'neo_del_stillbirth' has an issue with the variable 'Spontaneous - occurred during antepartum period'.
Column 'neo_del_stillbirth' has an issue with the variable 'Spontaneous - occurred during intrapartum period'.


In [217]:
## Neonatal Form: Delivery Data - Post-processing

In [218]:
ante_temp.change_str(
     {'neo_del_preg_outcome': 
            {'Stillbirth at >=20wks or >=500g' : 'Stillbirth at >20wks or >500g'},
        'neo_del_stillbirth':
            {'Spontaneous - occurred during antepartum period': 'Spontaneous - Occurred during antepartum period',
             'Spontaneous - occurred during intrapartum period': 'Spontaneous - Occurred during intrapartum period'}
        }
)

1    2337
3      19
2      12
Name: neo_del_preg_outcome, dtype: int64
1    9
2    6
3    2
Name: neo_del_stillbirth, dtype: int64


In [219]:
## Neonatal Form: Delivery Data - Compare Data

In [220]:
ante_temp.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,neo_del_date,neo_del_time,neo_del_preg_outcome,...,neo_del_apgar_5_yn,neo_del_apgar_5,neo_del_apgar_10_yn,neo_del_apgar_10,Source


In [221]:
## Neonatal Form: Delivery Data - Prepare and Save Data

In [222]:
ante_temp.prep_imp('neonatal_data_arm_1', 'neonatal_form_complete', 'neonatal_form')
ante_temp.data.to_csv('../data/processed/ante_temp.csv', index = False)

In [223]:
# Neonatal Form: Cord Blood

In [224]:
## Neonatal Form: Cord Blood - Preprocessing



# check that 'BABY_NUM3_1' goes with 'BABY_NUM3_2' and
# 'BABY_NUM3_3' goes with 'BABY_NUM3_4'
obs_data_sets.rave_clinic['BABY_NUM3_1'].equals(obs_data_sets.rave_clinic['BABY_NUM3_2'])
obs_data_sets.rave_clinic['BABY_NUM3_3'].equals(obs_data_sets.rave_clinic['BABY_NUM3_4'])

obs_data_sets.rave_clinic.loc[
    obs_data_sets.rave_clinic['ARTERIAL_VENOUS_1'] == obs_data_sets.rave_clinic['ARTERIAL_VENOUS_2'],
    'Subject'
]
obs_data_sets.rave_clinic.loc[
    obs_data_sets.rave_clinic['ARTERIAL_VENOUS_3'] == obs_data_sets.rave_clinic['ARTERIAL_VENOUS_4'],
    'Subject'
]

for var_w_unknown in [
    'PH_UNKNOWN_', 'HCO3_UNKNOWN_', 'PCO2_UNKNOWN_', 
    'PO2_UNKNOWN_', 'BASE_EXCESS_UNKNOWN_'
]:
    for i in range(1, 5):
        obs_data_sets.rave_clinic[var_w_unknown + str(i)].replace(
            {'1': 'Unknown', '0': 'Known'}, inplace = True
        )
        obs_data_sets.rave_clinic.loc[
            obs_data_sets.rave_clinic['COLLECTED_NY_' + str(i)] == 'No',
            var_w_unknown + str(i)
        ] = np.NaN
        



obs_data_sets.rave_clinic.rename(columns={
    'BABY_NUM3_1': 'ART_BABY_NUM3_1',
    'COLLECTED_NY_1': 'ART_COLLECTED_NY_1',
    'PH_1': 'ART_PH_1',
    'PH_UNKNOWN_1': 'ART_PH_UNKNOWN_1',
    'HCO3_1': 'ART_HCO3_1',
    'HCO3_UNKNOWN_1': 'ART_HCO3_UNKNOWN_1',
    'PCO2_1': 'ART_PCO2_1',
    'PCO2_UNKNOWN_1': 'ART_PCO2_UNKNOWN_1', 
    'PO2_1': 'ART_PO2_1',
    'PO2_UNKNOWN_1': 'ART_PO2_UNKNOWN_1',
    'BASE_EXCESS_1': 'ART_BASE_EXCESS_1',
    'BASE_EXCESS_UNKNOWN_1': 'ART_BASE_EXCESS_UNKNOWN_1',
    
    'BABY_NUM3_2': 'VEN_BABY_NUM3_1',
    'COLLECTED_NY_2': 'VEN_COLLECTED_NY_1',
    'PH_2': 'VEN_PH_1',
    'PH_UNKNOWN_2': 'VEN_PH_UNKNOWN_1',
    'HCO3_2': 'VEN_HCO3_1',
    'HCO3_UNKNOWN_2': 'VEN_HCO3_UNKNOWN_1',
    'PCO2_2': 'VEN_PCO2_1',
    'PCO2_UNKNOWN_2': 'VEN_PCO2_UNKNOWN_1', 
    'PO2_2': 'VEN_PO2_1',
    'PO2_UNKNOWN_2': 'VEN_PO2_UNKNOWN_1',
    'BASE_EXCESS_2': 'VEN_BASE_EXCESS_1',
    'BASE_EXCESS_UNKNOWN_2': 'VEN_BASE_EXCESS_UNKNOWN_1',
    
    'BABY_NUM3_3': 'ART_BABY_NUM3_2',
    'COLLECTED_NY_3': 'ART_COLLECTED_NY_2',
    'PH_3': 'ART_PH_2',
    'PH_UNKNOWN_3': 'ART_PH_UNKNOWN_2',
    'HCO3_3': 'ART_HCO3_2',
    'HCO3_UNKNOWN_3': 'ART_HCO3_UNKNOWN_2',
    'PCO2_3': 'ART_PCO2_2',
    'PCO2_UNKNOWN_3': 'ART_PCO2_UNKNOWN_2', 
    'PO2_3': 'ART_PO2_2',
    'PO2_UNKNOWN_3': 'ART_PO2_UNKNOWN_2',
    'BASE_EXCESS_3': 'ART_BASE_EXCESS_2',
    'BASE_EXCESS_UNKNOWN_3': 'ART_BASE_EXCESS_UNKNOWN_2',
    
    'BABY_NUM3_4': 'VEN_BABY_NUM3_2',
    'COLLECTED_NY_4': 'VEN_COLLECTED_NY_2',
    'PH_4': 'VEN_PH_2',
    'PH_UNKNOWN_4': 'VEN_PH_UNKNOWN_2',
    'HCO3_4': 'VEN_HCO3_2',
    'HCO3_UNKNOWN_4': 'VEN_HCO3_UNKNOWN_2',
    'PCO2_4': 'VEN_PCO2_2',
    'PCO2_UNKNOWN_4': 'VEN_PCO2_UNKNOWN_2', 
    'PO2_4': 'VEN_PO2_2',
    'PO2_UNKNOWN_4': 'VEN_PO2_UNKNOWN_2',
    'BASE_EXCESS_4': 'VEN_BASE_EXCESS_2',
    'BASE_EXCESS_UNKNOWN_4': 'VEN_BASE_EXCESS_UNKNOWN_2',
}, inplace=True)  

In [225]:
## Neonatal Form: Cord Blood - Processing

In [226]:
neo_cbg_dict = {
    'ART_BABY_NUM3_': 'neo_baby_num',
    'ART_COLLECTED_NY_': 'neo_cbg_art_yn',
    'ART_PH_': 'neo_cbg_art_ph',
    'ART_PH_UNKNOWN_': 'neo_cbg_art_ph_unk',
    'ART_HCO3_': 'neo_cbg_art_hco3',
    'ART_HCO3_UNKNOWN_': 'neo_cbg_art_hco3_unk',
    'ART_PCO2_': 'neo_cbg_art_pco2',
    'ART_PCO2_UNKNOWN_': 'neo_cbg_art_pco2_unk', 
    'ART_PO2_': 'neo_cbg_art_po2',
    'ART_PO2_UNKNOWN_': 'neo_cbg_art_po2_unk',
    'ART_BASE_EXCESS_': 'neo_cbg_art_base',
    'ART_BASE_EXCESS_UNKNOWN_': 'neo_cbg_art_base_unk',
    
    #'VEN_BABY_NUM3_1',
    'VEN_COLLECTED_NY_': 'neo_cbg_ven_yn',
    'VEN_PH_': 'neo_cbg_ven_ph',
    'VEN_PH_UNKNOWN_': 'neo_cbg_ven_ph_unk',
    'VEN_HCO3_': 'neo_cbg_ven_hco3',
    'VEN_HCO3_UNKNOWN_': 'neo_cbg_ven_hco3_unk',
    'VEN_PCO2_': 'neo_cbg_ven_pco2',
    'VEN_PCO2_UNKNOWN_': 'neo_cbg_ven_pco2_unk',
    'VEN_PO2_': 'neo_cbg_ven_po2',
    'VEN_PO2_UNKNOWN_': 'neo_cbg_ven_po2_unk',
    'VEN_BASE_EXCESS_': 'neo_cbg_ven_base',
    'VEN_BASE_EXCESS_UNKNOWN_': 'neo_cbg_ven_base_unk',
    }


neo_cbg = obs_clinic_migration.RedcapConv(neo_cbg_dict, 2, obs_data_sets.rave_clinic)

In [227]:
## Neonatal Form: Cord Blood - Post-processing

In [228]:
neo_cbg.change_str(
     {'neo_baby_num': 
            {'001' : '1'},
        }
)

1    2304
2      54
Name: neo_baby_num, dtype: int64


In [229]:
## Neonatal Form: Cord Blood - Compare Data

In [230]:
neo_cbg.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,neo_baby_num,neo_cbg_art_yn,neo_cbg_art_ph_unk,...,neo_cbg_ven_hco3,neo_cbg_ven_pco2,neo_cbg_ven_po2,neo_cbg_ven_base,Source


In [231]:
## Neonatal Form: Cord Blood - Prepare and Save Data

In [232]:
neo_cbg.prep_imp('neonatal_data_arm_1', 'neonatal_form_complete', 'neonatal_form')
neo_cbg.data.to_csv('../data/processed/neo_cbg.csv', index = False)

In [233]:
# Neonatal Form: Neonatal Resuscitation (1)

In [234]:
## Neonatal Form: Neonatal Resuscitation (1) - Preprocessing

In [235]:
# 'LEVEL2_NY_1',
# #'LEVEL2_NY_STD_1',
# 'LEVEL2_NUM_DYS_1',
# 'LEVEL3_NY_1',
# #'LEVEL3_NY_STD_1',
# 'LEVEL3_NUM_DYS_1',
for i in range(1, 3):
    obs_data_sets.rave_clinic['LEVEL2_NY_STD_' + str(i)] = obs_data_sets.rave_clinic['LEVEL2_NY_STD_' + str(i)].replace({'2': '1', '1': '0'})
    obs_data_sets.rave_clinic['LEVEL3_NY_STD_' + str(i)] = obs_data_sets.rave_clinic['LEVEL3_NY_STD_' + str(i)].replace({'2': '1', '1': '0'})


for i in range(1, 3):
    obs_data_sets.rave_clinic['LEVEL2_NY_CB_' + str(i)] = np.where(
        ((obs_data_sets.rave_clinic['LEVEL2_NY_' + str(i)] == 'Yes')), '1', '0')
    obs_data_sets.rave_clinic.loc[ obs_data_sets.rave_clinic['BABY_NUM4_' + str(i)].isna(), 
                    'LEVEL2_NY_CB_' + str(i)] = np.NaN
    obs_data_sets.rave_clinic['LEVEL3_NY_CB_' + str(i)] = np.where(
        ((obs_data_sets.rave_clinic['LEVEL3_NY_' + str(i)] == 'Yes')), '1', '0')
    obs_data_sets.rave_clinic.loc[ obs_data_sets.rave_clinic['BABY_NUM4_' + str(i)].isna(), 
                    'LEVEL3_NY_CB_' + str(i)] = np.NaN
    

In [236]:
## Neonatal Form: Neonatal Resuscitation (1) - Processing

In [237]:
neo_resus_lvl_dict = {
    'BABY_NUM4_': 'neo_baby_num',
    'LEVEL2_NY_CB_' : 'neo_care_levels___2',
    'LEVEL3_NY_CB_' : 'neo_care_levels___3',
}


neo_resus_lvl = obs_clinic_migration.RedcapConv(neo_resus_lvl_dict, 2, obs_data_sets.rave_clinic, recode_long = False)

In [238]:
## Neonatal Form: Neonatal Resuscitation (1)- Post-processing

In [239]:
neo_resus_lvl.change_str(
    {'neo_baby_num': 
        {'001': 'B01',
         'BO1': 'B01',
         # manually checked subject below only had a singleton
         ' B0' : 'B01'} 
    }
)

1    2309
2      54
Name: neo_baby_num, dtype: int64


In [240]:
## Neonatal Form: Neonatal Resuscitation (1) - Compare Data

In [241]:
neo_resus_lvl.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,neo_baby_num,neo_care_levels___2,neo_care_levels___3,Source
13,91200444,2,2,0,0,REDCapDDE


In [242]:
## Neonatal Form: Neonatal Resuscitation (1) - Prepare and Save Data

In [243]:
neo_resus_lvl.prep_imp('neonatal_data_arm_1', 'neonatal_form_complete', 'neonatal_form')

neo_resus_lvl.data.to_csv('../data/processed/neo_resus_lvl.csv', index = False)

In [244]:
# Neonatal Form: Neonatal Resuscitation (2)

In [245]:
## Neonatal Form: Neonatal Resuscitation (2) - Preprocessing

In [246]:
## Neonatal Form: Neonatal Resuscitation (2) - Processing

In [247]:
neo_resus_dict = {
    'BABY_NUM4_': 'neo_baby_num',
    'NEONAT_RESUSCITATION_NY_': 'neo_resus_initial_yn',
    'CHEST_COMPRESN_': 'neo_resus_init_cpr',
    'CPAP_AIR_': 'neo_resus_init_cpap_air',
    'CPAP_OXYGEN_': 'neo_resus_init_cpap_o2',
    'EPINEPHRINE_NY_': 'neo_resus_init_epi',
    'FFO2_NY_': 'neo_resus_init_ffo2',
    'INTUBATION_PPV_NY_': 'neo_resus_init_ppv',
    'INTUBATN_TRACH_SUCTION_NY_': 'neo_resus_init_suction',
    'LARYN_MASK_AIRWAY_NY_': 'neo_resus_init_lma',
    'NARCAN_NY_': 'neo_resus_init_narcan',
    'PPV_AIR_': 'neo_resus_init_ppv_air',
    'PPV_OXYGEN_': 'neo_resus_init_ppv_o2',
    'VOLUM_EXPAND_': 'neo_resus_init_vol_exp',
    'UNKNOWN_': 'neo_resus_init_unk',
    'OTHER_': 'neo_resus_init_oth',
    'OTHER_SPECIFY_': 'neo_resus_init_oth_spec',
    'RECV_HIGH_LEVEL_CARE_': 'neo_care_yn',

    # 04APR2021 is this taken care of from neo_resus_lvl_dict
    #'LEVEL2_NY_STD_' : 'neo_care_levels___2', # should this be 1
    #'LEVEL3_NY_STD_' : 'neo_care_levels___3', # should this be 2, according to data dictionary
    
    #'RECV_HIGH_LEVEL_CARE_LEVEL_': 'neo_care_levels',
    'LEVEL2_NUM_DYS_': 'neo_care_level2',
    'LEVEL3_NUM_DYS_': 'neo_care_level3',
    'RESPIRATORY_PROB_AFT_RESUS_NY_': 'neo_resp_yn'
}


neo_resus = obs_clinic_migration.RedcapConv(neo_resus_dict, 2, obs_data_sets.rave_clinic)

Column 'neo_baby_num' has an issue with the variable ' B0'.
Column 'neo_baby_num' has an issue with the variable 'BO1'.


In [248]:
## Neonatal Form: Neonatal Resuscitation (2) - Post-processing

In [249]:
neo_resus.change_str(
    {'neo_baby_num': 
        {'001': 'B01',
         'BO1': 'B01',
         # manually checked subject below only had a singleton
         ' B0' : 'B01'} 
    }
)

1    2309
2      54
Name: neo_baby_num, dtype: int64


In [250]:
## Neonatal Form: Neonatal Resuscitation (2) - Compare Data

In [251]:
neo_resus.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,neo_baby_num,neo_resus_initial_yn,neo_resus_init_cpr,...,neo_care_yn,neo_care_level2,neo_care_level3,neo_resp_yn,Source
13,91200444,2,2,,,...,,,,,REDCapDDE


In [252]:
## Neonatal Form: Neonatal Resuscitation (2) - Prepare and Save Data

In [253]:
neo_resus.prep_imp('neonatal_data_arm_1', 'neonatal_form_complete', 'neonatal_form')
neo_resus.data.to_csv('../data/processed/neo_resus.csv', index = False)

In [254]:
# Neonatal Form: Neonatal Resuscitation (3)

In [255]:
## Neonatal Form: Neonatal Resuscitation (3) - Preprocessing

In [256]:
## Neonatal Form: Neonatal Resuscitation (3) - Processing

In [257]:
neo_resp_dict = {
    'BABY_NUM5_': 'neo_baby_num',
    'RESP_DISTRESS_SYNDROME_': 'neo_resp_dx_rds',
    'MECON_ASPIR_SYNDROM_': 'neo_resp_dx_mec',
    'PNEUMONIA_': 'neo_resp_dx_pneu',
    'PNEUM_THORX_MEDIASTM_': 'neo_resp_dx_pnthorax',
    'TRANS_TACHYPNOEA_': 'neo_resp_dx_tachypnoea',
    'PULMON_HYPERTEN_': 'neo_resp_dx_pul_htn',
    'OTHER_CLIN_DX_': 'neo_resp_dx_oth',
    'SPECIFY_OTHER_DX_': 'neo_resp_dx_oth_spec',
    'ADDN_VENT_SUPPORT_': 'neo_resp_vent_sup_yn',
    'SUPPLEMENT_OXYGEN_': 'neo_resp_vent_o2',
    # 'SUPPLEMENT_OXYG_HRS_1',
    # 'SUPP_OXY_HRS_UNK_1',
    'INTUBN_VENTN_ETT_': 'neo_resp_vent_ett',
    # 'INTUBN_VENT_HRS_1',
    # 'INTUBN_VENT_HRS_UNK_1',
    'CPAP_': 'neo_resp_vent_cpap',
    # 'CPAP_HRS_1',
    # 'CPAP_HRS_UNK_1',
    'OTHER_VENT_SUPPORT_': 'neo_resp_vent_oth',
    'SPECIFY_OTH_VENT_SUPPORT_': 'neo_resp_vent_oth_spec',
    # 'OTHER_VENT_SUP_HRS_1',
    # 'OTH_VENT_SUP_HRS_UNK_1',
    
    }


neo_resp = obs_clinic_migration.RedcapConv(neo_resp_dict, 2)

In [258]:
## Neonatal Form: Neonatal Resuscitation (3) - Post-processing

In [259]:
## Neonatal Form: Neonatal Resuscitation (3) - Compare Data

In [260]:
neo_resp.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,neo_baby_num,neo_resp_dx_rds,neo_resp_dx_mec,...,neo_resp_vent_ett,neo_resp_vent_cpap,neo_resp_vent_oth,neo_resp_vent_oth_spec,Source


In [261]:
## Neonatal Form: Neonatal Resuscitation (3) - Prepare and Save Data

In [262]:
neo_resp.prep_imp('neonatal_data_arm_1', 'neonatal_form_complete', 'neonatal_form')
neo_resp.data.to_csv('../data/processed/neo_resp.csv', index = False)

In [263]:
# Neonatal Form: Labwork

In [264]:
## Neonatal Form: Labwork - Preprocessing

In [265]:
# FutureWarning
# https://stackoverflow.com/questions/40659212/futurewarning-elementwise-comparison-failed-returning-scalar-but-in-the-futur
for i in range(1, 3):
    obs_data_sets.rave_clinic['ORGANISM_IDENTIFIED_STD_' + str(i)].replace({'1': 'Group B strep',
                                                  '2': 'Other Gram positive',
                                                  '3': 'Gram negative'}, inplace = True)
    
    obs_data_sets.rave_clinic['ORGANISM_IDENTIFIED_GRAM_POS_' + str(i)] = np.NaN
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['ORGANISM_IDENTIFIED_STD_'+ str(i)] == 'Other Gram positive',
        'ORGANISM_IDENTIFIED_GRAM_POS_'+ str(i)
    ] = obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['ORGANISM_IDENTIFIED_STD_'+ str(i)] == 'Other Gram positive',
        'ORGANISM_IDENTIFIED_'+ str(i)
    ]
    obs_data_sets.rave_clinic['ORGANISM_IDENTIFIED_GRAM_NEG_' + str(i)] = np.NaN
    obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['ORGANISM_IDENTIFIED_STD_'+ str(i)] == 'Gram negative',
        'ORGANISM_IDENTIFIED_GRAM_NEG_'+ str(i)
    ] = obs_data_sets.rave_clinic.loc[
        obs_data_sets.rave_clinic['ORGANISM_IDENTIFIED_STD_'+ str(i)] == 'Gram negative',
        'ORGANISM_IDENTIFIED_'+ str(i)
    ]
    
    obs_data_sets.rave_clinic['BANDS_UNK_' + str(i)].replace({'0': 'Available',
           '1': 'Not available'}, inplace = True)
    obs_data_sets.rave_clinic['NEUTROPHILS_UNK_' + str(i)].replace({'0': 'Available',
           '1': 'Not available'}, inplace = True)

# : 'neo_lab_cbc_band_na'


# : 'neo_lab_cbc_neut_na'





# : 'neo_lab_sepsis_gram_neg'

  res_values = method(rvalues)


In [266]:
## Neonatal Form: Labwork - Processing

In [267]:
neo_lab_dict = {
    'BABY_NUM6_': 'neo_baby_num',
    'CBC_24HRS_': 'neo_lab_cbc_yn',
    #'CBC_24HRS_STD_1',
    'HGB_': 'neo_lab_cbc_hgb',
    'PLATELETS_': 'neo_lab_cbc_plat',
    'BANDS_24HRS_': 'neo_lab_cbc_band',
    'BANDS_UNK_': 'neo_lab_cbc_band_na',
    'LEUK_WBC_24HRS_': 'neo_lab_cbc_wbc',
    'NEUTROPHIL_POLYCYT_': 'neo_lab_cbc_neut',
    'NEUTROPHILS_UNK_': 'neo_lab_cbc_neut_na',
    'CULTURE_SEPSIS_': 'neo_lab_sepsis_yn',
    #'CULTURE_SEPSIS_STD_1',
    'WHEN_SEPSIS_PROVEN_': 'neo_lab_sepsis_dx_time',
    #'WHEN_SEPSIS_PROVEN_STD_1',
    
    
    #'ORGANISM_IDENTIFIED_1': 'neo_lab_sepsis_path',
    'ORGANISM_IDENTIFIED_STD_': 'neo_lab_sepsis_path',
    
    'ORGANISM_IDENTIFIED_GRAM_POS_': 'neo_lab_sepsis_gram_pos',
    'ORGANISM_IDENTIFIED_GRAM_NEG_': 'neo_lab_sepsis_gram_neg', 
    
    
    'CULT_NEG_SEPSIS_RECV_ANBIOTICS_': 'neo_lab_sepsis_cult_neg_yn',
    #'CULT_NEG_SEPSIS_RECV_ANBIO_STD_1',
    'NUM_NEG_CULTURES_': 'neo_lab_sepsis_cult_neg',
  
}


neo_lab = obs_clinic_migration.RedcapConv(neo_lab_dict, 2, obs_data_sets.rave_clinic)

Column 'neo_lab_sepsis_dx_time' has an issue with the variable '= 48 hours after birth'.


In [268]:
## Neonatal Form: Labwork - Post-processing

In [269]:
neo_lab.change_str(
    {
     'neo_lab_sepsis_dx_time': 
        {'= 48 hours after birth': '≤48 hours after birth'},
      'neo_baby_num': 
         {'001': 'B01',
          '01': 'B01'} # should this be move to preprocessing?
    }
)

2    6
1    3
Name: neo_lab_sepsis_dx_time, dtype: int64
1    2308
2      55
Name: neo_baby_num, dtype: int64


In [270]:
## Neonatal Form: Labwork - Compare Data

In [271]:
neo_lab.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,neo_baby_num,neo_lab_cbc_yn,neo_lab_cbc_hgb,...,neo_lab_sepsis_gram_pos,neo_lab_sepsis_gram_neg,neo_lab_sepsis_cult_neg_yn,neo_lab_sepsis_cult_neg,Source
13,91200444,2,2,,,...,,,,,REDCapDDE


In [272]:
## Neonatal Form: Labwork - Prepare and Save Data

In [273]:
neo_lab.prep_imp('neonatal_data_arm_1', 'neonatal_form_complete', 'neonatal_form')
neo_lab.data.to_csv('../data/processed/neo_lab.csv', index = False)

In [274]:
# Neonatal Form: Discharge Data

In [275]:
## Neonatal Form: Discharge Data - Preprocessing

In [276]:
for i in range(1, 3):
    for date_time in [
            'FIRST_DISCHARGE_HOME_DT_TM_', 'BABY_HOSP_READMIT_DTTM_',
            'FINAL_DISCHARGE_DT_TM_','BABY_DEATH_DT_TM_'
    ]:
        obs_data_sets.rave_clinic[date_time + 'DATE_' + str(i)] = pd.to_datetime(
            obs_data_sets.rave_clinic[date_time + str(i)], 
            format = '%d%b%Y %H:%M:%S'
        ).dt.strftime('%Y-%m-%d')
        obs_data_sets.rave_clinic[date_time + 'TIME_' + str(i)] = pd.to_datetime(
            obs_data_sets.rave_clinic[date_time + str(i)], 
            format = '%d%b%Y %H:%M:%S'
        ).dt.strftime('%H:%M')
    
    
    
    
    
    obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DT_TM_yn_' + str(i)] = np.NaN
    obs_data_sets.rave_clinic.loc[
        (   
            (obs_data_sets.rave_clinic['BABYOBSID_' + str(i)].notna())
            & (obs_data_sets.rave_clinic['FIRST_DIS_HOME_NA_' + str(i)] == '1')
        ),
        'FIRST_DISCHARGE_HOME_DT_TM_yn_' + str(i)
    ] = 'Not applicable'
    
    obs_data_sets.rave_clinic.loc[
        (   
            (obs_data_sets.rave_clinic['BABYOBSID_' + str(i)].notna())
            & (obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DT_TM_' + str(i)].notna())
            & (obs_data_sets.rave_clinic['FIRST_DIS_HOME_NA_' + str(i)] == '0')
        ),
        'FIRST_DISCHARGE_HOME_DT_TM_yn_' + str(i)
    ] = 'Yes'
    
    
    
    
    
    
    
    
    
    obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DT_TM_yn_date_' + str(i)] = np.NaN
    
    obs_data_sets.rave_clinic.loc[
        (   
            (obs_data_sets.rave_clinic['BABYOBSID_' + str(i)].notna())
            & (obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DT_TM_' + str(i)].isna())
            & (obs_data_sets.rave_clinic['FIRST_DIS_HOME_NA_' + str(i)] == '0')
        ),
        'FIRST_DISCHARGE_HOME_DT_TM_yn_date_' + str(i)
    ] = 'No'
    obs_data_sets.rave_clinic.loc[
        (   
            (obs_data_sets.rave_clinic['BABYOBSID_' + str(i)].notna())
            & (obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DT_TM_' + str(i)].notna())
            & (obs_data_sets.rave_clinic['FIRST_DIS_HOME_NA_' + str(i)] == '0')
        ),
        'FIRST_DISCHARGE_HOME_DT_TM_yn_date_' + str(i)
    ] = 'Yes'
    
    obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DT_TM_yn_time_' + str(i)] = obs_data_sets.rave_clinic[
        'FIRST_DISCHARGE_HOME_DT_TM_yn_date_' + str(i)
    ]
    
    
    
    
    
    
    

    
    
    
    # no info for FINAL_DISCHARG_DTTM_NA_ (empty column)
    obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_yn_' + str(i)] = np.NaN
    obs_data_sets.rave_clinic.loc[
        (
            (obs_data_sets.rave_clinic['BABY_READMIT_HOSP_' + str(i)] == 'Yes')
            & (
                obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_' + str(i)].notna()
                | (obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_YYYY_' + str(i)].notna())
            )
            # (obs_data_sets.rave_clinic['BABY_READMIT_HOSP_' + str(i)] == 'Yes')
            # & (obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_' + str(i)].notna())
        ),
        'FINAL_DISCHARGE_DT_TM_yn_' + str(i)
    ] = 'Yes'
    

    
    
    
    obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_yn_date_' + str(i)] = np.NaN
    obs_data_sets.rave_clinic.loc[
        (   
            (obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_yn_' + str(i)] == 'Yes')
            & (obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_' + str(i)].notna())
        ),
        'FINAL_DISCHARGE_DT_TM_yn_date_' + str(i)
    ] = 'Yes'
    obs_data_sets.rave_clinic.loc[
        (   
            (obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_yn_' + str(i)] == 'Yes')
            & (obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_' + str(i)].isna())
        ),
        'FINAL_DISCHARGE_DT_TM_yn_date_' + str(i)
    ] = 'No'
    obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_yn_time_' + str(i)] = obs_data_sets.rave_clinic[
        'FINAL_DISCHARGE_DT_TM_yn_date_' + str(i)
    ]
    
    
    
    # obs_data_sets.rave_clinic[key + 'yn_date_' + str(i)] = np.NaN
    #     obs_data_sets.rave_clinic.loc[
    #         (
    #             (obs_data_sets.rave_clinic[val + str(i)] == 'Yes')
    #             & (obs_data_sets.rave_clinic[key + str(i)].isna())
    #         ),
    #         key + 'yn_date_' + str(i)
    #     ] = 'No'
    #     obs_data_sets.rave_clinic.loc[
    #         (
    #             (obs_data_sets.rave_clinic[val + str(i)] == 'Yes')
    #             & (obs_data_sets.rave_clinic[key + str(i)].notna())
    #         ),
    #         key + 'yn_date_' + str(i)
    #     ] = 'Yes'
    #     obs_data_sets.rave_clinic[key + 'yn_time_' + str(i)] = obs_data_sets.rave_clinic[
    #         key + 'yn_date_' + str(i)
    #     ]
    
    # # working on during Wednesday meeting
    # obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_yn_time_' + str(i)] = np.NaN
    # obs_data_sets.rave_clinic.loc[
    #     (   
    #         (obs_data_sets.rave_clinic['BABY_READMIT_HOSP_' + str(i)] == 'Yes')
    #         & (obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_' + str(i)].notna())
    #     ),
    #     'FINAL_DISCHARGE_DT_TM_yn_time_' + str(i)
    # ] = 'Yes'
    # # obs_data_sets.rave_clinic.loc[
    # #     (   
            
    # #         (obs_data_sets.rave_clinic['BABY_READMIT_HOSP_' + str(i)] == 'Yes')
    # #         & (obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_' + str(i)].isna())

    # #     ),
    # #     'FINAL_DISCHARGE_DT_TM_yn_date_' + str(i)
    # # ] = 'Yes'
    
    # obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_yn_time_' + str(i)] = obs_data_sets.rave_clinic[
    #     'FINAL_DISCHARGE_DT_TM_yn_date_' + str(i)
    # ]
    
    
    
    
    
    
    
    
    
    for key, val in {
            'BABY_HOSP_READMIT_DTTM_' : 'BABY_READMIT_HOSP_',
            #'FINAL_DISCHARGE_DT_TM_': 'BABY_READMIT_HOSP_',
            'BABY_DEATH_DT_TM_': 'BABY_DIE_FIRST28DYS_'
    }.items():
        obs_data_sets.rave_clinic[key + 'yn_date_' + str(i)] = np.NaN
        obs_data_sets.rave_clinic.loc[
            (
                (obs_data_sets.rave_clinic[val + str(i)] == 'Yes')
                & (obs_data_sets.rave_clinic[key + str(i)].isna())
            ),
            key + 'yn_date_' + str(i)
        ] = 'No'
        obs_data_sets.rave_clinic.loc[
            (
                (obs_data_sets.rave_clinic[val + str(i)] == 'Yes')
                & (obs_data_sets.rave_clinic[key + str(i)].notna())
            ),
            key + 'yn_date_' + str(i)
        ] = 'Yes'
        obs_data_sets.rave_clinic[key + 'yn_time_' + str(i)] = obs_data_sets.rave_clinic[
            key + 'yn_date_' + str(i)
        ]








      
    
# neo_dc_rave = ['BABY_NUM8_',
# 'OTHER_DX_PRIOR_DISCHARGE_NY_',
# 'NOENATAL_ENCEPHAL_',
# 'INTRACRAN_HEM_',
# 'HYPOGLYCEMIA_',
# 'HYPERBILIRUB_REQ_TX_',
# 'NEONATAL_ABSTIN_SYND_',
# 'INTESTINAL_OBSTRUCTN_',
# 'NECROTIZ_ENTEROCOLITIS_',
# 'INTESTINAL_PERFORN_',
# 'RENAL_FAILURE_',
# 'DEHYDRATION_',
# 'ELECTROLYTE_ABNORM_',
# 'METABOLIC_DISORDER_',
# 'WEIGHT_LOSS_GT10PC_NY_',
# 'BRACHIAL_PLEXUS_INJURY_NY_',
# 'CAPUT_SUCCEDANEUM_NY_',
# 'CEPHALOHEMATOMA_NY_',
# 'CLAVICULAR_FRACTURE_NY_',
# 'FACIAL_NERVE_INJURY_NY_',
# 'OTHER_DX_DISCHARGE_',
# 'SPECIFY_DX_DISCHARGE_',
# 'FIRST_DISCHARGE_HOME_DATE_',
# 'FIRST_DISCHARGE_HOME_TIME_',
# 'BABY_READMIT_HOSP_',
# 'BABY_HOSP_READMIT_DATE_',
# 'BABY_HOSP_READMIT_TIME_',
# 'REASON_ADMIT_HOSP_',
# 'FINAL_DISCHARGE_DATE_',
# 'FINAL_DISCHARGE_TIME_',
# 'BABY_DIE_FIRST28DYS_',
# 'BABY_DEATH_DATE_',
# 'BABY_DEATH_TIME_',
# 'AUTOPSY_PERFORMED_NYU_',
# ]

# neo_dc_redcap = ['neo_baby_num',
# 'neo_dc_dx_oth_yn',
# 'neo_dc_dx_enceph',
# 'neo_dc_dx_ich',
# 'neo_dc_dx_hypogly',
# 'neo_dc_dx_bili',
# 'neo_dc_dx_nas',
# 'neo_dc_dx_int_obst',
# 'neo_dc_dx_nec',
# 'neo_dc_dx_int_perf',
# 'neo_dc_dx_renal_fail',
# 'neo_dc_dx_deh2o',
# 'neo_dc_dx_elec_abn',
# 'neo_dc_dx_metabolic',
# 'neo_dc_dx_wt_loss',
# 'neo_dc_dx_brachia',
# 'neo_dc_dx_caput',
# 'neo_dc_dx_cepha_hema',
# 'neo_dc_dx_fracture',
# 'neo_dc_dx_face_nerve',
# 'neo_dc_dx_oth',
# 'neo_dc_dx_oth_oth_spec',
# 'neo_dc_1st_date',
# 'neo_dc_1st_time',
# 'neo_dc_readmit_yn',
# 'neo_dc_readmit_date',
# 'neo_dc_readmit_time',
# 'neo_dc_readmit_reason',
# 'neo_dc_final_date',
# 'neo_dc_final_time',
# 'neo_dc_death_yn',
# 'neo_dc_death_date',
# 'neo_dc_death_time',
# 'neo_dc_autopsy',
# ]
    
    
# neo_dc = REDCap_Conv(neo_dc_rave, neo_dc_redcap, 2)
  

In [277]:
## Neonatal Form: Discharge Data - Processing

In [334]:
neo_dc_dict = {
    'BABY_NUM8_': 'neo_baby_num', 
    'OTHER_DX_PRIOR_DISCHARGE_NY_': 'neo_dc_dx_oth_yn', 
    'NOENATAL_ENCEPHAL_': 'neo_dc_dx_enceph', 
    'INTRACRAN_HEM_': 'neo_dc_dx_ich', 
    'HYPOGLYCEMIA_': 'neo_dc_dx_hypogly', 
    'HYPERBILIRUB_REQ_TX_': 'neo_dc_dx_bili', 
    'NEONATAL_ABSTIN_SYND_': 'neo_dc_dx_nas', 
    'INTESTINAL_OBSTRUCTN_': 'neo_dc_dx_int_obst', 
    'NECROTIZ_ENTEROCOLITIS_': 'neo_dc_dx_nec', 
    'INTESTINAL_PERFORN_': 'neo_dc_dx_int_perf', 
    'RENAL_FAILURE_': 'neo_dc_dx_renal_fail', 
    'DEHYDRATION_': 'neo_dc_dx_deh2o',
    'SHOCK_': 'neo_dc_dx_shock',
    'HEMATOL_ABNORM_': 'neo_dc_dx_hemat_ab',
    'ELECTROLYTE_ABNORM_': 'neo_dc_dx_elec_abn', 
    'METABOLIC_DISORDER_': 'neo_dc_dx_metabolic', 
    'WEIGHT_LOSS_GT10PC_NY_': 'neo_dc_dx_wt_loss', 
    'BRACHIAL_PLEXUS_INJURY_NY_': 'neo_dc_dx_brachia', 
    'CAPUT_SUCCEDANEUM_NY_': 'neo_dc_dx_caput', 
    'CEPHALOHEMATOMA_NY_': 'neo_dc_dx_cepha_hema', 
    'CLAVICULAR_FRACTURE_NY_': 'neo_dc_dx_fracture', 
    'FACIAL_NERVE_INJURY_NY_': 'neo_dc_dx_face_nerve', 
    'OTHER_DX_DISCHARGE_': 'neo_dc_dx_oth', 
    'SPECIFY_DX_DISCHARGE_': 'neo_dc_dx_oth_oth_spec', 
    'FIRST_DISCHARGE_HOME_DT_TM_yn_date_': 'neo_dc_1st_date_yn',
    'FIRST_DISCHARGE_HOME_DT_TM_yn_time_': 'neo_dc_1st_time_yn',
    'FIRST_DISCHARGE_HOME_DT_TM_DATE_': 'neo_dc_1st_date', 
    'FIRST_DISCHARGE_HOME_DT_TM_TIME_': 'neo_dc_1st_time', 
    'BABY_READMIT_HOSP_': 'neo_dc_readmit_yn', 
    'BABY_HOSP_READMIT_DTTM_yn_date_': 'neo_dc_readmit_date_yn',
    'BABY_HOSP_READMIT_DTTM_yn_time_': 'neo_dc_readmit_time_yn',
    'BABY_HOSP_READMIT_DTTM_DATE_': 'neo_dc_readmit_date', 
    'BABY_HOSP_READMIT_DTTM_TIME_': 'neo_dc_readmit_time', 
    'REASON_ADMIT_HOSP_': 'neo_dc_readmit_reason', 
    
    
    
    
    'FINAL_DISCHARGE_DT_TM_yn_': 'neo_dc_final_yn',
    
    'FINAL_DISCHARGE_DT_TM_yn_date_': 'neo_dc_final_date_yn',
    'FINAL_DISCHARGE_DT_TM_yn_time_': 'neo_dc_final_time_yn',
    'FINAL_DISCHARGE_DT_TM_DATE_': 'neo_dc_final_date', 
    'FINAL_DISCHARGE_DT_TM_TIME_': 'neo_dc_final_time', 
    'BABY_DIE_FIRST28DYS_': 'neo_dc_death_yn', 
    'BABY_DEATH_DT_TM_yn_date_': 'neo_dc_death_date_yn',
    'BABY_DEATH_DT_TM_yn_time_': 'neo_dc_death_time_yn',
    'BABY_DEATH_DT_TM_DATE_': 'neo_dc_death_date', 
    'BABY_DEATH_DT_TM_TIME_': 'neo_dc_death_time', 
    'AUTOPSY_PERFORMED_NYU_': 'neo_dc_autopsy'
}

neo_dc = obs_clinic_migration.RedcapConv(neo_dc_dict, 2, obs_data_sets.rave_clinic)

In [279]:
## Neonatal Form: Discharge Data - Post-processing

In [335]:
neo_dc.change_str(
    {'neo_baby_num': 
        {'001': 'B01',
         'BO1': 'B01',
         # manually checked subject below only had a singleton
         ' B0' : 'B01'} 
    }
)

1    2308
2      56
Name: neo_baby_num, dtype: int64


In [281]:
## Neonatal Form: Discharge Data - Compare Data

In [336]:
neo_dc.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,neo_baby_num,neo_dc_dx_oth_yn,neo_dc_dx_enceph,...,neo_dc_death_date,neo_dc_death_time_yn,neo_dc_death_time,neo_dc_autopsy,Source
13,91200444,2,2,,,...,,,,,REDCapDDE


In [283]:
## Neonatal Form: Discharge Data - Prepare and Save Data

In [284]:
neo_dc.prep_imp('neonatal_data_arm_1', 'neonatal_form_complete', 'neonatal_form')

#neo_dc_1st_yn
neo_dc.data.to_csv('../data/processed/neo_dc.csv', index = False)

In [285]:
# Neonatal Form: Anomally dx

In [286]:
## Neonatal Form: Anomally dx - Preprocessing

In [287]:
## Neonatal Form: Anomally dx - Processing

In [288]:
anom_dx_dict = {
    'BABY_NUM7_': 'anom_baby_num',
    'DX_CONGENITAL_ANOMALY_NY_': 'anom_dx_yn',
    'ANOMALY_CATEGORY_': 'anom_dx_cat',
    'SPECIFY_ANOMALY2_': 'anom_dx_spec',
    'DIAGNOSIS_': 'anom_dx_prepost'
    
}

anom_dx = obs_clinic_migration.RedcapConv(anom_dx_dict, 6, master_df = obs_data_sets.rave_clinic)

Column 'anom_baby_num' has an issue with the variable 'BOI'.
Column 'anom_baby_num' has an issue with the variable 'B)!'.
Column 'anom_dx_cat' has an issue with the variable 'Genitourinary Tract'.
Column 'anom_dx_cat' has an issue with the variable 'Extremities – Skeletal'.
Column 'anom_dx_cat' has an issue with the variable 'Head – Cranium & Brain'.
Column 'anom_dx_cat' has an issue with the variable 'Spine – Back'.
Column 'anom_dx_cat' has an issue with the variable 'Structural – Other'.
Column 'anom_dx_cat' has an issue with the variable 'Abdominal Wall'.


In [289]:
## Neonatal Form: Anomally dx - Post-processing

In [290]:
anom_dx.change_str(
    {
     'anom_dx_cat': 
         {'Head – Cranium & Brain': 'Head - Cranium & brain',
          'Genitourinary Tract' : 'Genitourinary tract',
          'Extremities – Skeletal': 'Extremities - Skeletal', 
          'Spine – Back': 'Spine - Back', 
          'Structural – Other': 'Structural - Other', 
          'Abdominal Wall': 'Abdominal wall'},
     'anom_baby_num':
         {'001': 'B01',
          'B)!': 'B01',
          'BOI': 'B01'}
    }
)

8     57
2     46
6     43
5     18
9     17
11    16
12    13
7      9
17     9
3      5
15     4
13     3
1      2
10     2
Name: anom_dx_cat, dtype: int64
1    2354
2      58
Name: anom_baby_num, dtype: int64


In [291]:
## Neonatal Form: Anomally dx - Compare Data

In [292]:
anom_dx.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,anom_baby_num,anom_dx_yn,anom_dx_cat,anom_dx_spec,anom_dx_prepost,Source


In [293]:
## Neonatal Form: Anomally dx - Prepare and Save Data

In [294]:
anom_dx.prep_imp('neonatal_data_arm_1', 'neonatal_congenital_anomaly_complete', 'neonatal_congenital_anomaly')
anom_dx.data.to_csv('../data/processed/anom_dx.csv', index = False)

In [295]:
# Supplemental Delivery Data: Antepartum

In [296]:
## Supplemental Delivery Data: Antepartum - Preprocessing

In [297]:
## Supplemental Delivery Data: Antepartum - Processing

In [298]:
supp_del_ant_dict = {
    'BABY_SSD': 'supp_baby_obs_id',
    'COMPLICATIONS_SSD_NY': 'supp_ant_comp_yn', 
    'FETAL_COMPLICATION_NY': 'supp_ant_fet_yn', 
    'FETAL_ANOMALY_NY': 'supp_ant_fet_anom', 
    'ISOIMMUN_ALLOIMMUN_NY': 'supp_ant_fet_isoimm', 
    'INTRAUTERIN_GROWTH_RESTRICTN_NY': 'supp_ant_fet_iugr', 
    'LARGE_GESTN_AGE': 'supp_ant_fet_lga', 
    'OLIGOHYDRAMNIOS2_NY': 'supp_ant_fet_oligo', 
    'POLYHYDRAMNIOS2_NY': 'supp_ant_fet_poly', 
    'OTHER_FETAL_COMPLICATN2_NY': 'supp_ant_fet_oth', 
    'SPECIFY_ANOMALY3': 'supp_ant_fet_anom_spec', 
    'IUGR_PERCENTILE2': 'supp_ant_fet_iugr_spec', 
    'SPECIFY_OTH_FETAL_COMPLICATN': 'supp_ant_fet_oth_spec', 
    'PLACENTAL_COMPLICATION_NY': 'supp_ant_plac_yn', 
    'PLACENTAL_ABRUPTION4_NY': 'supp_ant_plac_abrptn', 
    'SEVERITY_PLACEN_ABRUPTN': 'supp_ant_plac_abrptn_sev', 
    'MATERNAL_COMPLICATION_NY': 'supp_ant_mat_yn', 
    'PRETERM_PREMAT_RUPTUE_MEMBRAN': 'supp_ant_mat_pprom', 
    'OTHER_MATERNAL_COMPLICATN': 'supp_ant_mat_oth', 
    'SPECIFY_OTH_MAT_COMPLICATN': 'supp_ant_mat_oth_spec'}

supp_del_ant = obs_clinic_migration.RedcapConv(supp_del_ant_dict, 0)

Column 'supp_ant_fet_iugr_spec' has an issue with the variable '= 3 or < 10 Percentile'.


In [299]:
## Supplemental Delivery Data: Antepartum - Post-processing

In [300]:
supp_del_ant.change_str(
    {'supp_ant_fet_iugr_spec': 
        {"= 3 or < 10 Percentile": "≥3 or < 10 Percentile"}, # ≥ not read correctly
    }
)
# remove columns with no baby id (i.e. no second baby and no data)
#supp_del_ant.data.drop(supp_del_ant.data[supp_del_ant.data['supp_baby_obs_id'].astype(str) == 'nan'].index, inplace = True)

supp_del_ant.remove_na()
supp_del_ant.data['redcap_repeat_instance'] = 1   

1    1
2    1
Name: supp_ant_fet_iugr_spec, dtype: int64


In [301]:
## Supplemental Delivery Data: Antepartum - Compare Data

In [302]:
supp_del_ant.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,supp_baby_obs_id,supp_ant_comp_yn,supp_ant_fet_yn,...,supp_ant_mat_yn,supp_ant_mat_pprom,supp_ant_mat_oth,supp_ant_mat_oth_spec,Source


In [303]:
## Supplemental Delivery Data: Antepartum - Prepare and Save Data

In [304]:
supp_del_ant.prep_imp('supplementary_deli_arm_1', 'supplemental_delivery_data_complete', 'supplemental_delivery_data')

supp_del_ant.data.to_csv('../data/processed/supp_del_ant.csv', index = False)

In [305]:
# Supplemental Delivery Data: Labour and Delivery

In [306]:
## Supplemental Delivery Data: Labour and Delivery - Preprocessing

In [307]:
obs_data_sets.rave_clinic['RUP_MEMBRANE_DTTM_UNK'].replace({'0': 'Known',
           '1': 'Unknown'}, inplace = True)

obs_data_sets.rave_clinic.loc[:, 'RUPTUR_MEMBRANE_DTTM'] = pd.to_datetime(obs_data_sets.rave_clinic['RUPTUR_MEMBRANE_DTTM']).dt.strftime('%Y-%m-%d %H:%M')

In [308]:
## Supplemental Delivery Data: Labour and Delivery - Processing

In [309]:
supp_del_lnd_dict = {
    'BABY_NUM9': 'supp_baby_obs_id',
    
    
    
    
    'RUP_MEMBRANE_DTTM_UNK': 'supp_lnd_rom_dttm_unk', 
    'RUPTUR_MEMBRANE_DTTM': 'supp_lnd_rom_dttm', 
    'RUPTURE_MEMBRANE_NY': 'supp_lnd_rom_meth', 
    'TYPE_BIRTH': 'supp_lnd_mod', 
    'SUPP_TYPE_VAGIN_BIRTH': 'supp_lnd_vag_type', 
    'FORCEPS_ASSIST': 'supp_lnd_avd_forcep', 
    'VACUUM_ASSIST': 'supp_lnd_avd_vacuum', 
    'TYPE_C_SECTION': 'supp_lnd_cs_type', 
    'INDICATN_C_SECTION': 'supp_lnd_cs_ind', 
    'FETAL_INDICATN': 'supp_lnd_cs_fet_yn', 
    'FETALANOMALY': 'supp_lnd_cs_fet_anom', 
    'CORDPROLAPSE': 'supp_lnd_cs_fet_cord', 
    'IUGR3': 'supp_lnd_cs_fet_iugr', 
    'MACROSOMNIA1': 'supp_lnd_cs_fet_lga', 
    'MALPOSITION1': 'supp_lnd_cs_fet_pos', 
    'WELL_BEING_CONCERNS1': 'supp_lnd_cs_fet_wellbeing', 
    'OTHER_FETAL_INDICATN': 'supp_lnd_cs_fet_oth', 
    'SPECIFY_OTHER_INDICATN1': 'supp_lnd_cs_fet_oth_spec', 
    'MATERNAL_INDICATN_C_SECTN': 'supp_lnd_cs_mat_yn', 
    'FAILEDFORCEPS_VACUM': 'supp_lnd_cs_mat_avd', 
    'HYPERTENSIVE_DISORD_PREG': 'supp_lnd_cs_mat_htn', 
    'MULTIPLE_GESTATION': 'supp_lnd_cs_mat_mg', 
    'NON_PROGRESIV_1ST_STAGE': 'supp_lnd_cs_mat_npfs', 
    'NON_PROGRESIV_2ND_STAGE': 'supp_lnd_cs_mat_npss', 
    'PLACENTAPREVIA': 'supp_lnd_cs_mat_prev', 
    'PLACENTAL_ABRUPTN': 'supp_lnd_cs_mat_abrpt', 
    'PRE_EXISTHEALTH_PROBMS': 'supp_lnd_cs_mat_hea', 
    'PREV_C_SECTION': 'supp_lnd_cs_mat_pvcs', 
    'PREV_UTERINE_INCISION': 'supp_lnd_cs_mat_myo', 
    'SUSPECTED_CHORIOAMNIONITIS': 'supp_lnd_cs_mat_chro', 
    'UNSUCCESS_VBAC': 'supp_lnd_cs_mat_vbac', 
    'UTERINE_RUPTUR': 'supp_lnd_cs_mat_rup', 
    'OTHER_OBSTET_COMPLICATN': 'supp_lnd_cs_mat_oth', 
    'SPECIFY_OTH_MAT_INDICATN': 'supp_lnd_cs_mat_oth_spec', 
    'OTHER_INDICATN_C_SECTION': 'supp_lnd_cs_oth_yn', 
    'ACCOMDATE_CAREPROVID_ORG': 'supp_lnd_cs_oth_accom', 
    'MATERNALREQUEST': 'supp_lnd_cs_oth_mat_req', 
    'OTHER_UNKNOWN': 'supp_lnd_cs_oth_unk', 
    'OTHER_OTHER2': 'supp_lnd_cs_oth_oth', 
    'SPECIFY_OTH_INDICATION': 'supp_lnd_cs_oth_spec', 
    'PRESENTATION_DELIVERY': 'supplnd_present', 
    'TYPE_BREECH': 'supplnd_breech', 
    'CEPHALIC_TYPE': 'supplnd_pres_vtx', 
    'LABOR_DELIVERY_COMPLICATIONS_NY': 'supp_lnd_complications_yn', 
    'ATYPICAL_ABNORM_FETAL_SURVEIL': 'supplnd_nrfhr', 
    'CLIN_INFECTION_NY': 'supplnd_infxn', 
    'FETAL_TACHYCARDIA2_NY': 'supplnd_fet_tachy', 
    'CLIN_CHORIOAMNIONITIS_NY': 'supplnd_chorio', 
    'CORD_PROLAPSE2_NY': 'supplnd_prolapse', 
    'MECONIUM3_NY': 'supplnd_mec', 
    'PLACENTAL_ABRUPTION5_NY': 'supplnd_abrpt', 
    'SHOULDER_DYSTOCIA3_NY': 'supplnd_shoulder', 
    'PLACENTA_SENT_PATHOLOGY_NYUNK': 'supplnd_plac_path_yn', 
    'PLACENTA_PICKEDUP_BIOBANK': 'supplnd_biobank_yn'}



supp_del_lnd = obs_clinic_migration.RedcapConv(supp_del_lnd_dict, 0, obs_data_sets.rave_clinic)

Column 'supp_lnd_mod' has an issue with the variable 'C-Section'.
Column 'supplnd_breech' has an issue with the variable 'Breech Type Unknown'.


In [310]:
## Supplemental Delivery Data: Labour and Delivery - Post-processing

In [311]:
supp_del_lnd.change_str(
        {'supp_lnd_mod': 
            {'C-Section': 'C-section'},
        'supplnd_breech': 
            {'Breech Type Unknown': 'Unknown'},
        }
)

# remove columns with no baby id (i.e. no second baby and no data)
#supp_del_lnd.data.drop(supp_del_lnd.data[supp_del_lnd.data['supp_baby_obs_id'].astype(str) == 'nan'].index, inplace = True)
supp_del_lnd.remove_na()
supp_del_lnd.data['redcap_repeat_instance'] = 1

2    40
1    18
Name: supp_lnd_mod, dtype: int64
5    19
3     4
Name: supplnd_breech, dtype: int64


In [312]:
## Supplemental Delivery Data: Labour and Delivery - Compare Data

In [313]:
supp_del_lnd.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,redcap_repeat_instance,supp_baby_obs_id,supp_lnd_rom_dttm_unk,supp_lnd_rom_dttm,...,supplnd_abrpt,supplnd_shoulder,supplnd_plac_path_yn,supplnd_biobank_yn,Source


In [314]:
## Supplemental Delivery Data: Labour and Delivery - Prepare and Save Data

In [315]:
supp_del_lnd.prep_imp('supplementary_deli_arm_1', 'supplemental_delivery_data_complete', 'supplemental_delivery_data')

supp_del_lnd.data.to_csv('../data/processed/supp_del_lnd.csv', index = False)

In [316]:
# Withdrawal

In [317]:
## Withdrawal - Preprocessing

In [318]:
obs_data_sets.rave_clinic.loc[:, 'WITHDRAWAL_DT'] = pd.to_datetime(
    obs_data_sets.rave_clinic['WITHDRAWAL_DT'], 
    format = '%d%b%Y %H:%M:%S.%f'
).dt.strftime('%Y-%m-%d')


#withdrawal.compare_conv_dde(obs_data_sets.redcap_clinic)

In [319]:
## Withdrawal - Processing
withdrawal_dict = {
    'WITHDRAW_NY': 'wd_yn',
    'WITHDRAWAL_DT': 'wd_dt',
    'TYPEWITHDRAWAL': 'wd_type'    
}

withdrawal = obs_clinic_migration.RedcapConv(withdrawal_dict, 0, master_df = obs_data_sets.rave_clinic)

Column 'wd_type' has an issue with the variable 'No Contact'.
Column 'wd_type' has an issue with the variable 'No Use'.
Column 'wd_type' has an issue with the variable 'No Access'.


In [320]:
## Withdrawal - Post-processing
withdrawal.change_str(
    {'wd_type': 
        {'No Access': 'No access',
         'No Contact': 'No contact',
         'No Use': 'No use'
         }
    }
)
withdrawal.remove_na()

3    66
1    19
2    19
4     9
Name: wd_type, dtype: int64


In [321]:
## Withdrawal - Compare Data

In [322]:
withdrawal.compare_conv_dde(obs_data_sets.redcap_clinic)

Unnamed: 0,obs_id,wd_yn,wd_dt,wd_type,Source


In [323]:
## Withdrawal - Prepare and Save Data

In [324]:
withdrawal.prep_imp('withdrawal_arm_1', 'withdrawal_complete')

withdrawal.data.to_csv('../data/processed/withdrawal.csv', index = False)

In [None]:
## Overall check for missing columns

In [339]:
# determine caputured columns from dictionaries created during migration process
present_columns = []
for included_redcap_col in [
    inc_excl_dict ,base_dem_dict, base_curr_dict, obhx_dict, medhx_dict,
    surhx_dict, rxhx_preg_dict, supp_ant_dict,  supp_ant_spec_dict, 
    base_all_dict, supp_ant_yn_dict, ante_visit_dict, ante_hcp_dict, ante_cond_dict,
    ante_sur_dict, ante_meds_dict, del_ant_dict, del_lnd_comp_dict,
    abx_labr_dict, ante_temp_dict, neo_cbg_dict, neo_resus_lvl_dict, neo_resus_dict,
    neo_resp_dict, neo_lab_dict, neo_dc_dict, supp_del_ant_dict,
    supp_del_lnd_dict, anom_dx_dict, withdrawal_dict,
]:
    present_columns.extend(list(included_redcap_col.values()))

# get columns from the REDCap double data entry file
# remove the columns accounted for (present_columns) to determine missing columns 
missing_columns = [
    redcap_column 
    for redcap_column in list(obs_data_sets.redcap_clinic.columns.values) 
    if redcap_column not in present_columns
]
# from missing_columns, ignore columns that have a valid reason to be ignored
missing_columns = [
    redcap_column 
    for redcap_column in missing_columns 
    if (
        # created in post-processing phase so not in included_redcap_col/associated dictionaries
        (not bool(re.search('^redcap_event_name$', redcap_column)))
        & (not bool(re.search('^redcap_repeat_instrument$', redcap_column)))    
        & (not bool(re.search('_complete$', redcap_column)))
        # created during processing so not in included_redcap_col/associated dictionaries
        & (not bool(re.search('^redcap_repeat_instance$', redcap_column)))
        # represents who has access to the data; no impact on data completeness
        & (not bool(re.search('^redcap_data_access_group$', redcap_column)))
        # inclusion/exclusion for Diabetes in Pregnancy subgroup not captured in RAVE
        & (not bool(re.search('^incl_dip_', redcap_column)))
        & (not bool(re.search('^excl_dip_', redcap_column)))
        & (not bool(re.search('^dip_', redcap_column)))
        # inclusion/exclusion for late entry subgroup not captured in RAVE
        & (not bool(re.search('^incl_le_', redcap_column)))
        & (not bool(re.search('^excl_le_', redcap_column)))
        & (not bool(re.search('^le_prev_', redcap_column)))
        # pertains to OBS subgroups not captured in RAVE
        & (not bool(re.search('^incl_excl_entry_type$', redcap_column)))
        # metric values imported so no to import imperial
        & (not bool(re.search('^base_dem_ht_imp$', redcap_column)))
        & (not bool(re.search('^base_dem_prepreg_wt_imp$', redcap_column)))
        # no equivalent column in RAVE
        & (not bool(re.search('^neo_dc_1st_yn$', redcap_column)))
        # no equivalent column in RAVE
        & (not bool(re.search('^obhx_preg_yn$', redcap_column)))
        # hypertensive disorders subgroup not captured in RAVE
        & (not bool(re.search('^hdp_', redcap_column)))
        # preterm labour subgroup not captured in RAVE
        & (not bool(re.search('^thptl_', redcap_column)))
        # questionnaire data not caputred in RAVE
        & (not bool(re.search('^quest_', redcap_column)))
        # abbreviated baseline questionnaire data not caputred in RAVE
        & (not bool(re.search('^abq_', redcap_column)))
        # ultrasound data not captured in RAVE
        &(not bool(re.search('^us_', redcap_column)))
    )
]

# from missing_columns, ignore columns that will be produced/calculated
# by REDCap
calc_columns = list(obs_data_sets.redcap_data_dict.loc[
    (obs_data_sets.redcap_data_dict['Field Type'] == 'calc'),
    'Variable / Field Name'    
])
missing_columns = [
    redcap_column 
    for redcap_column in missing_columns 
    if redcap_column not in calc_columns
]

# print the missing_columns that still need to be accounted for
# expecting empty list when complete
print(missing_columns)
# []

[]


In [None]:
#%% inclusion_exclusion_criteria








#%% Baseline Assessment: Demographics

# make note about how this wasn't fixed at time of import

# all are entered as kg
   





#base_dem.find_cols_issue(obs_data_sets.redcap_clinic)




#%% Baseline Assessment: Current Pregnancy


#base_curr.change_str




# base_curr.find_cols_issue(obs_data_sets.redcap_clinic)



#%% Baseline Assessment: Obstetrical History

# 'obhx_preg_yn',
# ,
# ,
# ,
# ,
# ,



# ,
# ,

# ,
# ,
# ,
# ,
# ,
# ,
# ,
# ,
# ,
# ,
# ,
# ,

# ,
# ,
# ,
# ,
# ,
# ,
# ,
# ,



# 'PREGNANCY_1',
# ,
# ,
# ,
# 'FOETUSES_STD_1',
# ,
# 'TYPEOFTWIN_STD_1',
# ,
# 'PREGOUTCOME_STD_1',
# 'DELIVERY_DT_1',
# ,
# 'CHILDCURRENTLYALIVE_STD_1',
# ,
# 'SEX_STD_1',
# ,
# ,
# ,
# 'GEST_WT_STD_1',
# 'GEST_WT_STD_UN_1',

# ,
# 'TYPE_OF_DELIVERY_STD_1',
# ,
# 'COMPLICATIONS_NY_STD_1',
# ,
# 'GESTATIONAL_HYPERTENSION_STD_1',
# ,
# 'PREECLAMPSIA_NY_STD_1',
# ,
# 'HELLPSYNDROME_NY_STD_1',
# ,
# 'IUGR_NY_STD_1',
# ,
# 'PRETERMLABOUR_NY_STD_1',
# ,
# 'GESTATN_DIABETES_NY_STD_1',
# ,
# 'CHOLESTASIS_NY_STD_1',
# ,
# 'PLACENTA_PREVIA_NY_STD_1',
# ,
# 'PLACENTAL_ABRUPTION_NY_STD_1',
# ,
# 'FETAL_CONGENITAL_ANOM_NY_STD_1',
# ,
# ,
# 'FETAL_CHROM_ABNORM_STD_1',
# ,
# ,
# 'OTHER_COMPLICATION_NY_STD_1',
# ,
 










#%% Baseline Assessment: Pre-existing Medical Conditions medical history
#picked up by REDCap






#%% Baseline Assessment: Allergies





#%% surgical_history


## first surgery column is inconsistently named with the rest of the columns
# surg_cols = ['SURGERY_NY_', 'SURGERY_NY_STD_', 'SURG_PROCEDURE_', 
#              'SURG_PROCEDURE_STD_', 'SPECIFY_SURGICAL_PROCEDURE_', 
#              'SURGERY_YEAR_', 'SURGERY_YEAR_YYYY_', 'SURGERY_YEAR_MM_', 
#              'SURGERY_YEAR_DD_', 'PROCEDUR_DT_UNK_', 'SURG_REASON_', 
#              'SURG_COMMEN_']

# for i in range(8, 1, -1):
#     for surg_col in surg_cols:
#         obs_data_sets.rave_clinic.rename(columns = {surg_col + str(i - 1): surg_col + str(i)}, 
#                                  inplace = True)
    
# obs_data_sets.rave_clinic.rename(columns = {'SURGERY_NY': 'SURGERY_NY_1', 
#                          #'SURGERY_NY_STD': 'SURGERY_NY_STD_1', 
#                          'SURG_PROCEDURE': 'SURG_PROCEDURE_1', 
#                          'SPECIFY_SURGICAL_PROCEDURE': 'SPECIFY_SURGICAL_PROCEDURE_1',
#                          #'SURG_PROCEDURE_STD': 'SURG_PROCEDURE_STD_1', 
#                          #'SURGERY_YEAR':  'SPECIFY_SURGICAL_PROCEDURE_1', 
#                          #'SURGERY_YEAR_INT': 'SURGERY_YEAR_1', 
#                          'SURGERY_YEAR_YYYY': 'SURGERY_YEAR_YYYY_1', 
#                          'SURGERY_YEAR_MM': 'SURGERY_YEAR_MM_1', 
#                          'SURGERY_YEAR_DD': 'SURGERY_YEAR_DD_1', 
#                          'PROCEDUR_DT_UNK': 'PROCEDUR_DT_UNK_1', # probably don't need this column, replaced with SURGERY_YEAR_yn_date_
#                          'SURG_REASON': 'SURG_REASON_1', 
#                          'SURG_COMMEN': 'SURG_COMMEN_1'}, inplace = True)

# obs_data_sets.rave_clinic.loc[
#     obs_data_sets.rave_clinic['SURGERY_NY_1'] == 'No', 'SURGERY_NY_2'
# ] = np.NaN
 


# # for i in range(1, 9):


# #     obs_data_sets.rave_clinic['PROCEDUR_DT_UNK_' + str(i)].replace({'0': 'Yes', '1': 'No'}, inplace = True)
# obs_data_sets.rave_clinic = rave_date_unknown(obs_data_sets.rave_clinic, 'SURGERY_NY_', 'Yes','SURGERY_YEAR_', 8)



# surhx_dict = {
#     'SURGERY_NY_': 'surhx_yn',
#     'SURG_PROCEDURE_': 'surhx_procedure',
#     'SPECIFY_SURGICAL_PROCEDURE_': 'surhx_proc_spec',
    
#     'SURGERY_YEAR_yn_date_': 'surhx_proc_date_yn',
#     #'PROCEDUR_DT_UNK_': 'surhx_proc_date_yn',
    
#     'SURGERY_YEAR_DD_': 'surhx_surg_day',
#     'SURGERY_YEAR_MM_': 'surhx_surg_month',
#     'SURGERY_YEAR_YYYY_': 'surhx_surg_year',
#     'SURG_REASON_': 'surhx_indication',
#     'SURG_COMMEN_': 'surhx_comments'        
# }
    
# surhx = obs_clinic_migration.RedcapConv(surhx_dict, 7, master_df = obs_data_sets.rave_clinic)

# surhx.change_str(
#         {'surhx_procedure': 
#             {'Other, specify': 'Other'},
#         }
# )

# foo = surhx.compare_conv_dde(redcap_clinic, additional_ignore_cols = ['redcap_repeat_instance'], remove_text_cols = True)







## first surgery column is inconsistently named with the rest of the columns
# surg_cols = ['SURGERY_NY_', 'SURGERY_NY_STD_', 'SURG_PROCEDURE_', 
#              'SURG_PROCEDURE_STD_', 'SPECIFY_SURGICAL_PROCEDURE_', 
#              'SURGERY_YEAR_', 'SURGERY_YEAR_YYYY_', 'SURGERY_YEAR_MM_', 
#              'SURGERY_YEAR_DD_', 'PROCEDUR_DT_UNK_', 'SURG_REASON_', 
#              'SURG_COMMEN_']

# for i in range(8, 1, -1):
#     for surg_col in surg_cols:
#         obs_data_sets.rave_clinic.rename(columns = {surg_col + str(i - 1): surg_col + str(i)}, 
#                                  inplace = True)
    
# obs_data_sets.rave_clinic.rename(columns = {'SURGERY_NY': 'SURGERY_NY_1', 
#                          #'SURGERY_NY_STD': 'SURGERY_NY_STD_1', 
#                          'SURG_PROCEDURE': 'SURG_PROCEDURE_1', 
#                          'SPECIFY_SURGICAL_PROCEDURE': 'SPECIFY_SURGICAL_PROCEDURE_1',
#                          #'SURG_PROCEDURE_STD': 'SURG_PROCEDURE_STD_1', 
#                          #'SURGERY_YEAR':  'SPECIFY_SURGICAL_PROCEDURE_1', 
#                          #'SURGERY_YEAR_INT': 'SURGERY_YEAR_1', 
#                          'SURGERY_YEAR_YYYY': 'SURGERY_YEAR_YYYY_1', 
#                          'SURGERY_YEAR_MM': 'SURGERY_YEAR_MM_1', 
#                          'SURGERY_YEAR_DD': 'SURGERY_YEAR_DD_1', 
#                          'PROCEDUR_DT_UNK': 'PROCEDUR_DT_UNK_1', # probably don't need this column, replaced with SURGERY_YEAR_yn_date_
#                          'SURG_REASON': 'SURG_REASON_1', 
#                          'SURG_COMMEN': 'SURG_COMMEN_1'}, inplace = True)

# obs_data_sets.rave_clinic.loc[
#     obs_data_sets.rave_clinic['SURGERY_NY_1'] == 'No', 'SURGERY_NY_2'
# ] = np.NaN
 


# # for i in range(1, 9):


# #     obs_data_sets.rave_clinic['PROCEDUR_DT_UNK_' + str(i)].replace({'0': 'Yes', '1': 'No'}, inplace = True)








# ignore surgery column without iterator suffix as it is a repeat of the last surgery instance



#%% medications_taken_since_beginning_of_pregnancy

#%% supplements_taken_since_beginning_of_pregnancy
### this needs work
# add checkbox into function

# try:
    
#     obs_data_sets.rave_clinic['LABELLED_SUPPLEMENT_PRENAT_SPEC_' + str(i)] = np.NaN

            
#     obs_data_sets.rave_clinic.loc[
#         obs_data_sets.rave_clinic['SUPPLEMENT_STD_' + str(i)] == '90',
#         'LABELLED_SUPPLEMENT_PRENAT_SPEC_' + str(i)
#     ] = obs_data_sets.rave_clinic['SUPPLEMENT_' + str(i)][
#             obs_data_sets.rave_clinic['SUPPLEMENT_STD_' + str(i)] == '90'
#         ]
#     obs_data_sets.rave_clinic.loc[
#         obs_data_sets.rave_clinic['SUPPLEMENT_STD_' + str(i)] == '90',
#         'SUPPLEMENT_' + str(i)
#     ] = 'Other prenatal vitamin'
    

    
    
    
    
    


#%% Antenatal Visits: Visit Data


#%% Antenatal Visits: Antenatal Health Care Provider








#%% Antenatal Visits: Conditions/Abnormalities Diagnosed During Pregnancy


 



#%% Antenatal Visits: Surgical procedures performed during pregnancy
# surgical_procedures_performed_during_pregnancy

#%% Antenatal Visits: Medications
# medications

    

#%% Delivery Data: Antepartum






# need to remove empty rows before importing; some OBS subjects do not have 
# delivery data










#%% antibiotics_during_labour_delivery



# def create_specify_col(
#         create_col, coded_col, label_col, label_code, label_ans,
#         df = obs_data_sets.rave_clinic
# ):
#     '''
#     Add 'please specify' column to RAVE dataframe
    
#     The new REDCap database has a separate column for 'please specify'. In the
#     RAVE database 'please specify' is intermingled in a column with the 
#     associated labelled data. This function separates the 'please specify' 
#     answer into a unique column based on the coded column.

#     Parameters
#     ----------
#     create_col : str
#         Name of new column created in df.
#     coded_col : str
#         Name of column containing the coded values.
#     label_col : str
#         Name of column containing the label values.
#     label_code : str
#         Coded value that corresponds to the'please specify' value
#     label_ans : str
#         Corrected label associated with the 'please specify' value. This value 
#         will replace the 'please specify' value in the column with the label 
#         values (label_col). It is expected that the label will have a 
#         corresponding coded value in the REDCap data dictionary.
#     df : pandas.dataframe, optional
#         Dataframe the function will be performed on. The default is 
#         obs_data_sets.rave_clinic.

#     Returns
#     -------
#     None.

#     '''
    
#     try:
#         df[create_col] = np.where(
#             df[coded_col] == label_code, 
#             df.loc[
#                 df[coded_col] == label_code, 
#                 label_col
#             ], 
#             np.NaN
#         )
#         df.loc[
#             df[coded_col] == label_code,
#             label_col
#         ] = label_ans
#     except:
#         df[create_col] = np.NaN
        



#%% Neonatal form




    
    
#%% Neonatal Form: Cord Blood    
 
    
# #BABY_NUM3_1
# #ARTERIAL_VENOUS_1
# 'COLLECTED_NY_1': 'neo_cbg_art_yn',
# #'COLLECTED_NY_STD_1'
# 'PH_1': 'neo_cbg_art_ph'
# 'PH_UNKNOWN_1': 'neo_cbg_art_ph_unk',
# 'HCO3_1': 'neo_cbg_art_hco3'
# 'HCO3_UNKNOWN_1': 'neo_cbg_art_hco3_unk',
# 'PCO2_1': 'neo_cbg_art_pco2'
# 'PCO2_UNKNOWN_1': 'neo_cbg_art_pco2_unk', 
# 'PO2_1': 'neo_cbg_art_po2'
# 'PO2_UNKNOWN_1': 'neo_cbg_art_po2_unk',
# 'BASE_EXCESS_1': 'neo_cbg_art_base'
# 'BASE_EXCESS_UNKNOWN_1': 'neo_cbg_art_base_unk',

# BABY_NUM3_2
# 'ARTERIAL_VENOUS_2'
# 'COLLECTED_NY_2': 'neo_cbg_ven_yn',
# #'COLLECTED_NY_STD_2'
# 'PH_2': 'neo_cbg_ven_ph',
# 'PH_UNKNOWN_2': 'neo_cbg_ven_ph_unk',
# 'HCO3_2': 'neo_cbg_ven_hco3',
# 'HCO3_UNKNOWN_2': 'neo_cbg_ven_hco3_unk',
# 'PCO2_2': 'neo_cbg_ven_pco2',
# 'PCO2_UNKNOWN_2': 'neo_cbg_ven_pco2_unk',
# 'PO2_2': 'neo_cbg_ven_po2',
# 'PO2_UNKNOWN_2': 'neo_cbg_ven_po2_unk',
# 'BASE_EXCESS_2': 'neo_cbg_ven_base',
# 'BASE_EXCESS_UNKNOWN_2': 'neo_cbg_ven_base_unk',



















    
#%% Neonatal Form: Neonatal Resuscitation   
    








# for i in range(1, 3):
#     obs_data_sets.rave_clinic['RECV_HIGH_LEVEL_CARE_LEVEL_' + str(i)] = np.NaN
#     obs_data_sets.rave_clinic.loc[
#         obs_data_sets.rave_clinic['LEVEL2_NY_' + str(i)] == 'Yes',
#         'RECV_HIGH_LEVEL_CARE_LEVEL_' + str(i)] = 'Level 2'
#     # overwrite 'Level 2' with more significnat 'Level 3'
#     obs_data_sets.rave_clinic.loc[
#         obs_data_sets.rave_clinic['LEVEL3_NY_' + str(i)] == 'Yes',
#         'RECV_HIGH_LEVEL_CARE_LEVEL_' + str(i)] = 'Level 3'









# 'BABY_NUM4_1',
# 'NEONAT_RESUSCITATION_NY_1': 'neo_resus_initial_yn',
# #'NEONAT_RESUSCITATION_NY_STD_1',
# 'CHEST_COMPRESN_1': 'neo_resus_init_cpr',
# #'CHEST_COMPRESN_STD_1',
# 'CPAP_AIR_1': 'neo_resus_init_cpap_air',
# #'CPAP_AIR_STD_1',
# 'CPAP_OXYGEN_1': 'neo_resus_init_cpap_o2',
# #'CPAP_OXYGEN_STD_1',
# 'EPINEPHRINE_NY_1': 'neo_resus_init_epi',
# #'EPINEPHRINE_NY_STD_1',
# 'FFO2_NY_1': 'neo_resus_init_ffo2',
# #'FFO2_NY_STD_1',
# 'INTUBATION_PPV_NY_1': 'neo_resus_init_ppv',
# #'INTUBATION_PPV_NY_STD_1',
# 'INTUBATN_TRACH_SUCTION_NY_1': 'neo_resus_init_suction',
# #'INTUBATN_TRACH_SUCTION_NY_STD_1',
# 'LARYN_MASK_AIRWAY_NY_1': 'neo_resus_init_lma',
# #'LARYN_MASK_AIRWAY_NY_STD_1',
# 'NARCAN_NY_1': 'neo_resus_init_narcan',
# #'NARCAN_NY_STD_1',
# 'PPV_AIR_1': 'neo_resus_init_ppv_air',
# #'PPV_AIR_STD_1',
# 'PPV_OXYGEN_1': 'neo_resus_init_ppv_o2',
# #'PPV_OXYGEN_STD_1',
# 'VOLUM_EXPAND_1': 'neo_resus_init_vol_exp',
# #'VOLUM_EXPAND_STD_1',
# 'UNKNOWN_1': 'neo_resus_init_unk',
# #'UNKNOWN_STD_1',
# 'OTHER_1': 'neo_resus_init_oth',
# #'OTHER_STD_1',
# 'OTHER_SPECIFY_1': 'neo_resus_init_oth_spec',
# 'RECV_HIGH_LEVEL_CARE_1': 'neo_care_yn',
# #'RECV_HIGH_LEVEL_CARE_STD_1',



# #'RESPIRATORY_PROB_AFT_RESUS_STD_1',
# 'BABY_NUM5_1',
# 'RESP_DISTRESS_SYNDROME_1': 'neo_resp_dx_rds',
# #'RESP_DISTRESS_SYNDROME_STD_1',
# 'MECON_ASPIR_SYNDROM_1': 'neo_resp_dx_mec',
# #'MECON_ASPIR_SYNDROM_STD_1',
# 'PNEUMONIA_1': 'neo_resp_dx_pneu',
# #'PNEUMONIA_STD_1',
# 'PNEUM_THORX_MEDIASTM_1': 'neo_resp_dx_pnthorax',
# #'PNEUM_THORX_MEDIASTM_STD_1',
# 'TRANS_TACHYPNOEA_1': 'neo_resp_dx_tachypnoea',
# #'TRANS_TACHYPNOEA_STD_1',
# 'PULMON_HYPERTEN_1': 'neo_resp_dx_pul_htn',
# #'PULMON_HYPERTEN_STD_1',
# 'OTHER_CLIN_DX_1': 'neo_resp_dx_oth',
# #'OTHER_CLIN_DX_STD_1',
# 'SPECIFY_OTHER_DX_1': 'neo_resp_dx_oth_spec',
# 'ADDN_VENT_SUPPORT_1': 'neo_resp_vent_sup_yn',
# #'ADDN_VENT_SUPPORT_STD_1',
# 'SUPPLEMENT_OXYGEN_1': 'neo_resp_vent_o2',
# #'SUPPLEMENT_OXYGEN_STD_1',
# 'SUPPLEMENT_OXYG_HRS_1',
# 'SUPP_OXY_HRS_UNK_1',
# 'INTUBN_VENTN_ETT_1': 'neo_resp_vent_ett',
# #'INTUBN_VENTN_ETT_STD_1',
# 'INTUBN_VENT_HRS_1',
# 'INTUBN_VENT_HRS_UNK_1',
# 'CPAP_1': 'neo_resp_vent_cpap',
# #'CPAP_STD_1',
# 'CPAP_HRS_1',
# 'CPAP_HRS_UNK_1',
# 'OTHER_VENT_SUPPORT_1': 'neo_resp_vent_oth',
# #'OTHER_VENT_SUPPORT_STD_1',
# 'SPECIFY_OTH_VENT_SUPPORT_1': 'neo_resp_vent_oth_spec',
# 'OTHER_VENT_SUP_HRS_1',
# 'OTH_VENT_SUP_HRS_UNK_1',


#%% Neonatal Form: Labwork


#%% Neonatal Form: Discharge Data

# RAVE dates are not split into date and time    
    


# REFORMAT DATE AND TIME    pd.to_datetime(obs_data_sets.rave_clinic['RUPTUR_MEMBRANE_DTTM']).dt.strftime('%Y-%m-%d %H:%M')
# empty redcap_repeat_instances ??? due to change in date format???    
    
# for i in range(1, 3):
#     date_time = obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DT_TM_' + str(i)].str.split(' ', 
#                        n = 1, expand = True)
#     #obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DATE_' + str(i)] = date_time[0]
#     obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_DATE_' + str(i)] = pd.to_datetime(date_time[0]).dt.strftime('%Y-%m-%d')
#     obs_data_sets.rave_clinic['FIRST_DISCHARGE_HOME_TIME_' + str(i)] = pd.to_datetime(date_time[1]).dt.strftime('%H:%M')

    
#     date_time = obs_data_sets.rave_clinic['BABY_HOSP_READMIT_DTTM_' + str(i)].str.split(' ', 
#                        n = 1, expand = True)
#     obs_data_sets.rave_clinic['BABY_HOSP_READMIT_DATE_' + str(i)] = date_time[0]
#     obs_data_sets.rave_clinic['BABY_HOSP_READMIT_TIME_' + str(i)] = date_time[1]
    
    
#     date_time = obs_data_sets.rave_clinic['FINAL_DISCHARGE_DT_TM_' + str(i)].str.split(' ', 
#                        n = 1, expand = True)
#     obs_data_sets.rave_clinic['FINAL_DISCHARGE_DATE_' + str(i)] = date_time[0]
#     obs_data_sets.rave_clinic['FINAL_DISCHARGE_TIME_' + str(i)] = date_time[1]
    
#     date_time = obs_data_sets.rave_clinic['BABY_DEATH_DT_TM_' + str(i)].str.split(' ', 
#                        n = 1, expand = True)
#     obs_data_sets.rave_clinic['BABY_DEATH_DATE_' + str(i)] = date_time[0]
#     obs_data_sets.rave_clinic['BABY_DEATH_TIME_' + str(i)] = date_time[1]




















































#%% Supplemental Delivery Data: Antepartum, Did any complications occur


#%% Supplemental Delivery Data: Labour and Delivery














#%% 'anomally dx'


#%% 'withdrawal'