Paper: "Women’s Preferences for Treatment of Perinatal Depression and Anxiety: A Discrete Choice Experiment"

Author(s): Jemimah Ride, Emily Lancsar

Year: 2016

Model(s): Multinomial Logit, Mixed Logit

Main findings: \_\_\_\_

# Import needed libraries

In [1]:
import sys
from collections import OrderedDict

import scipy.stats

import numpy as np
import pandas as pd

import seaborn as sbn
import matplotlib.pyplot as plt

from statsmodels.formula.api import logit

sys.path.insert(0, '/Users/timothyb0912/Documents/pylogit/')
import pylogit as pl

sys.path.insert(0, '../../src/')
from visualization import predictive_viz as viz

%matplotlib inline



# Import data

In [2]:
data_path =\
    '../../data/raw/ride_2016/ride_2016_data.dta'
df = (pd.read_stata(data_path)
        .sort_values(['id', 'cset', 'alt']))

# Look at data

In [3]:
df.shape

(10416, 50)

In [4]:
df.head().T

Unnamed: 0,41,43,44,14,18
choice,1,0,0,0,1
alt,A,B,N,A,B
id,101,101,101,101,101
cset,1,1,1,2,2
block,2,2,2,2,2
preg,Currently pregnant,Currently pregnant,Currently pregnant,Currently pregnant,Currently pregnant
post,Has not had baby in last 12 months,Has not had baby in last 12 months,Has not had baby in last 12 months,Has not had baby in last 12 months,Has not had baby in last 12 months
lact,not currently breastfeeding,not currently breastfeeding,not currently breastfeeding,not currently breastfeeding,not currently breastfeeding
age,26,26,26,26,26
educ,postgrad,postgrad,postgrad,postgrad,postgrad


In [5]:
set(df.columns.tolist())

{'ASC',
 'IRDdecile',
 'MYE',
 'age',
 'alt',
 'block',
 'child',
 'children',
 'choice',
 'clinic',
 'combo',
 'cost',
 'couns',
 'country',
 'cset',
 'educ',
 'effect',
 'employ',
 'epc',
 'group',
 'hcc',
 'helpseek',
 'herb',
 'home',
 'id',
 'income',
 'lact',
 'lang',
 'married',
 'med',
 'online',
 'pasmbu',
 'pastcouns',
 'pastepc',
 'pastgrp',
 'pastherb',
 'pastinpt',
 'pastmedanx',
 'pastmeddep',
 'pastmedpsy',
 'pastnone',
 'pastpeer',
 'pastsleepmedication',
 'pastyoga',
 'peer',
 'phi',
 'phone',
 'post',
 'preg',
 'support'}

In [6]:
# How many individuals are in the dataset?
df.id.unique().size

217

In [7]:
# How many choice sets per person?
df.cset.unique().size

16

# Clean data

In [8]:
# Give names to the alternatives
alt_id_map = {'A': 1, 'B': 2, 'N':3}
alt_id_reverse_map = {alt_id_map[k]: k for k in alt_id_map}

In [9]:
# Initialize the clean datafram
clean_df = df.copy()

# Create an observation id column for each choice set
categorical_obs_ids =\
    sorted(list(set((x, y) for x, y in 
                    clean_df[['id', 'cset']].values)))
obs_id_map =\
    {k: (pos + 1) for pos, k in enumerate(categorical_obs_ids)}

def get_obs_id(series, mapping):
    decision_maker_id = series.at['id']
    choice_set_id = series.at['cset']
    key = (decision_maker_id, choice_set_id)
    return mapping[key]

clean_df['obs_id'] =\
    clean_df.apply(get_obs_id, axis=1, args=(obs_id_map,))

# Convert the alternative id column to be numeric
clean_df['alt'] = clean_df['alt'].map(alt_id_map)


In [10]:
# Look at the shape of clean_df
print('clean_df.shape == {}'.format(clean_df.shape))

# Look at some records in clean_df
clean_df.head(6).T


clean_df.shape == (10416, 51)


Unnamed: 0,41,43,44,14,18,45
choice,1,0,0,0,1,0
alt,1,2,3,1,2,3
id,101,101,101,101,101,101
cset,1,1,1,2,2,2
block,2,2,2,2,2,2
preg,Currently pregnant,Currently pregnant,Currently pregnant,Currently pregnant,Currently pregnant,Currently pregnant
post,Has not had baby in last 12 months,Has not had baby in last 12 months,Has not had baby in last 12 months,Has not had baby in last 12 months,Has not had baby in last 12 months,Has not had baby in last 12 months
lact,not currently breastfeeding,not currently breastfeeding,not currently breastfeeding,not currently breastfeeding,not currently breastfeeding,not currently breastfeeding
age,26,26,26,26,26,26
educ,postgrad,postgrad,postgrad,postgrad,postgrad,postgrad


In [11]:
# Record the columns that denote the various treatment types
treatment_types = ['couns', 'combo', 'peer', 'group',
                   'epc', 'herb', 'MYE']
# Note the names of these variables as they should be displayed
# in the estimation results table
treatment_names =\
    ['Counseling',
     'Counseling & medication',
     'Peer support',
     'Group counseling',
     'Early parenting centre programme',
     'Natural, herbal or traditional Chinese medicine',
     'Meditation, yoga or exercise',
    ]

# Record the varioius consulatation types and the names that
# should be used for these types in the estimation results table.
consultation_types = ['home', 'phone', 'online']
consultation_names = ['Home visit', 'Telephone', 'Online']


# Compute needed transformations / derived features

In [12]:
# Compute the socio-demographics interacted with the alternative
# specific constant
clean_df['Age'] = clean_df['age'] * clean_df['ASC']

clean_df['Employed'] =\
    (clean_df['employ'].isin(['full time', 'part time']) * 
     clean_df['ASC'])

clean_df['Unemployed'] =\
    (clean_df['employ'].isin(['unemployed', 'student', 'unable']) *
     clean_df['ASC'])

clean_df['low_support'] =\
    (clean_df['support'] != 'a lot') * clean_df['ASC']

clean_df['seek_help'] =\
    (clean_df['helpseek'] == 'Y') * clean_df['ASC']


In [13]:
# Create a numeric income column
income_map = {'$12999 or less': 12999,
              '$13000-$25999': 19500,
              '$26000-$51999': 39000,
              '$52000-88399': 70200,
              '88400-$155999': 122200,
              '$156000 or more': 156000}

clean_df['income_numeric'] = clean_df['income'].map(income_map)
clean_df['income_numeric_thousands'] =\
    clean_df['income_numeric'] / 1000.0

In [14]:
# Interact socio-demographics with treatment attributes
clean_df['income_times_cost'] =\
    clean_df['income_numeric_thousands'] * clean_df['cost']

clean_df['insurance_times_cost'] =\
    (clean_df['phi'] == 'has phi') * clean_df['cost']

clean_df['up_to_highschool'] =\
    clean_df['educ'].isin(['less than high school',
                           'Completed high school',
                           '7'])
highschool_treatment_types =\
    [x + '_highschool' for x in treatment_types]
highschool_treatment_names =\
    [x + ' (Highschool)' for x in treatment_names]
for new_col in highschool_treatment_types:
    orig_col = new_col[:new_col.find('_highschool')]
    clean_df[new_col] =\
        clean_df['up_to_highschool'] * clean_df[orig_col]

breastfeeding_treatment_types =\
    [x + '_breastfeeding' for x in treatment_types]
breastfeeding_treatment_names =\
    [x + ' (Breastfeeding)' for x in treatment_names]
for new_col in breastfeeding_treatment_types:
    orig_col = new_col[:new_col.find('_breastfeeding')]
    clean_df[new_col] =\
        ((clean_df['lact'] == 'currently breastfeeding') *
         clean_df[orig_col])

pregnant_treatment_types =\
    [x + '_pregnant' for x in treatment_types]
pregnant_treatment_names =\
    [x + ' (Pregnant)' for x in treatment_names]
for new_col in pregnant_treatment_types:
    orig_col = new_col[:new_col.find('_pregnant')]
    clean_df[new_col] =\
        ((clean_df['preg'] == 'Currently pregnant') *
         clean_df[orig_col])


In [15]:
# Determine if a person has had prior combination treatment
# Note, it was not immediately clear from the article that
# pastmedpsy was to be included in past_medication.
clean_df['past_medication'] =\
    ((clean_df['pastmedanx'] == 'Y') |
     (clean_df['pastmeddep'] == 'Y') |
     (clean_df['pastmedpsy'] == 'Y')).astype(int)
clean_df['past_combo'] =\
    (clean_df['past_medication'] * clean_df['pastcouns'] )

# Map each treatment type to it's corresponding "past" variable
past_to_treatment_map =\
    {'couns': 'pastcouns',
     'combo': 'past_combo',
     'peer': 'pastpeer',
     'group': 'pastgrp',
     'epc': 'pastepc',
     'herb': 'pastherb',
     'MYE': 'pastyoga',
     'med': 'past_medication',
    }
past_treatment_cols = past_to_treatment_map.values()

# Determine whether each row corresponds to a choice situation
# where an individual has been assigned a treatment type they
# have previous experience with
clean_df['prior_experience_of_treatment_type'] = 0
current_col = 'prior_experience_of_treatment_type'
for row_idx in viz.PROGRESS(clean_df.index.values):
    row = clean_df.loc[row_idx]
    for col in treatment_types + ['med']:
        past_col = past_to_treatment_map[col]
        if (row.at[col] == 1) and row.at[past_col] in ('Y', 1):
            clean_df.loc[row_idx, current_col] = 1

# Determine if one has had past experience with any treatment types
clean_df['past_experience_any_type'] =\
    ((clean_df[past_treatment_cols]
              .isin(['Y', 1])
              .sum(axis=1) > 0) *
     clean_df['ASC'])


100%|██████████| 10416/10416 [00:08<00:00, 1248.80it/s]


# Save the dataset

In [16]:
clean_data_path =\
    '../../data/processed/ride_2016/ride_2016_final_data.csv'
clean_df.to_csv(clean_data_path, index=False)