In [1]:
from src import utils
import pandas as pd

In [24]:
data = utils.data_load(data_keys={'medications', 'surveys'})

In [25]:
metadata = data['medications']

In [26]:
metadata

Unnamed: 0,record_id,redcap_event_name,meds_1,meds_1_list,meds_2,meds_2_list,medications_complete
0,4,ci_1_arm_1,1,"prenatal vitamin (NatureMade, ""Prenatal Multi+...",,,2
1,4,ci_2_arm_1,0,,0.0,,2
2,4,ci_3_arm_1,0,,1.0,"prenatal vitamin (NatureMade, ""Prenatal Multi+...",2
3,4,ci_4_arm_1,1,"prenatal vitamin (NatureMade, ""Prenatal Multi+...",,,2
4,4,ci_5_arm_1,0,,0.0,,2
...,...,...,...,...,...,...,...
3867,842,ci_3_arm_1,0,,0.0,,2
3868,842,ci_4_arm_1,0,,0.0,,2
3869,854,ci_1_arm_1,1,Ritual (Prenatal Vitamins); NA; 1qday; START: ...,,,2
3870,862,ci_1_arm_1,1,Nature Made (prenatal vitamins); NA; 1qday; ST...,,,2


In [29]:
unique = metadata['meds_2_list'].unique()

In [1]:
from src.s3_utils import *


In [29]:
bucket = 'fouryouandme-study-data'

# read survey
key = 'bump/redcap/wave_4/phq9.csv.gz'
survey_df = pandas_from_csv_s3(bucket, key=key, compression='gzip')

# read study ids
key = 'bump/redcap/wave_4/study_ids.csv.gz'
id_df = pandas_from_csv_s3(bucket, key=key, compression='gzip')
id_df = id_df[['record_id', 'evidation_id']]
id_df.rename(columns={'evidation_id': 'user_id'}, inplace=True)

# add ids to survey
survey_df = survey_df.merge(id_df, on=['record_id'])
survey_df.user_id = survey_df.user_id.fillna(-1).astype(int)
# read check-in dates
key = 'bump/redcap/wave_4/check_in_adherence_log.csv.gz'
ci_df = pandas_from_csv_s3(bucket, key=key, compression='gzip')
cols = ['record_id'] + [col for col in ci_df.columns if col.endswith('_date')]
ci_df = ci_df[cols]

# add dates to survey
survey_df = survey_df.merge(ci_df, on=['record_id'])
survey_df['checkin_number'] = survey_df.redcap_event_name.str.split('ci_').str[1].str.split('_arm').str[0].fillna(0).astype(int)
survey_df['date'] = survey_df.apply(lambda x: x[f'checkin_{str(x.checkin_number)}_date'] if x.checkin_number > 0 else None, axis=1)
survey_df = survey_df[survey_df.columns.drop(list(survey_df.filter(regex='checkin_')))]

# read survey metadata (for survey result interpretation)
key = 'bump/redcap/wave_4/metadata.csv.gz'
met_df = pandas_from_csv_s3(bucket, key=key, compression='gzip')

label = met_df.loc[met_df.field_name == 'phq9_1'].field_label.values[0]
choices = met_df.loc[met_df.field_name == 'phq9_1'].select_choices_or_calculations.values[0]

In [28]:
id_df

Unnamed: 0,record_id,redcap_event_name,phq9_1,phq9_2,phq9_3,phq9_4,phq9_5,phq9_6,phq9_7,phq9_8,phq9_9,phq9_10,phq9_complete
0,4,ci_1_arm_1,0.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,2
1,4,ci_2_arm_1,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,2
2,4,ci_3_arm_1,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,2
3,4,ci_4_arm_1,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,2
4,4,ci_5_arm_1,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3840,842,ci_3_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3841,842,ci_4_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3842,854,ci_1_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2
3843,862,ci_1_arm_1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2


In [24]:
survey_df

Unnamed: 0,record_id,redcap_event_name,phq9_1,phq9_2,phq9_3,phq9_4,phq9_5,phq9_6,phq9_7,phq9_8,phq9_9,phq9_10,phq9_complete,user_id,date
0,4,ci_1_arm_1,0.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,2,28,2021-03-18
1,4,ci_2_arm_1,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,2,28,2021-04-01
2,4,ci_3_arm_1,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,2,28,2021-04-15
3,4,ci_4_arm_1,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,2,28,2021-04-29
4,4,ci_5_arm_1,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,2,28,2021-05-13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3840,842,ci_3_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,2109,2022-06-24
3841,842,ci_4_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,2109,2022-07-07
3842,854,ci_1_arm_1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2,2134,2022-07-18
3843,862,ci_1_arm_1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2,2142,2022-07-13
