In [1]:
import pandas as pd
import numpy as np

# Read in file and drop respondents who didn't get through Case 1
pvp_clean = pd.read_csv('C:/Users/julienw/OneDrive - Michigan Medicine/Documents/PVP/Data/pvp_clean_fixed.csv')
pvp_clean = pvp_clean[pvp_clean['case1_q3'].notna() | pvp_clean['case2_q6'].notna() 
                   | pvp_clean['case3_q8'].notna() | pvp_clean['case4_q11'].notna() |
                   pvp_clean['case5_q14'].notna() | pvp_clean['case6_q17'].notna()].reset_index(drop = True)

In [2]:
# Hard code factor variables not randomized in cases
pvp_clean['examq8_n'] = 3
pvp_clean['examq11_n'] = 3
pvp_clean['examq14_n'] = 3
pvp_clean['examq17_n'] = 3
pvp_clean['pmhq8_n'] = 4
pvp_clean['pmhq11_n'] = 4
pvp_clean['pmhq14_n'] = 4
pvp_clean['pmhq17_n'] = 4
pvp_clean['o2q3_n'] = 3
pvp_clean['o2q6_n'] = 3
pvp_clean['o2q14_n'] = 3
pvp_clean['o2q17_n'] = 3
pvp_clean['rrq3_n'] = 4
pvp_clean['rrq6_n'] = 4
pvp_clean['rrq14_n'] = 4
pvp_clean['rrq17_n'] = 4
pvp_clean['lactateq3_n'] = 4
pvp_clean['lactateq6_n'] = 4
pvp_clean['lactateq8_n'] = 4
pvp_clean['lactateq11_n'] = 4
pvp_clean['akiq3_n'] = 4
pvp_clean['akiq6_n'] = 4
pvp_clean['akiq8_n'] = 4
pvp_clean['akiq11_n'] = 4

In [3]:
# Create new fluid variables derived from recommended treatment variable
# np.where(condition, value if condition is true, value if condition is false)
pvp_clean['fluid 1'] = np.where(pvp_clean['case1_q3'].isna(), np.nan,
                                np.where((pvp_clean['case1_q3'] == 1) | (pvp_clean['case1_q3'] == 2), 1, 0))
pvp_clean['fluid 2'] = np.where(pvp_clean['case2_q6'].isna(), np.nan,
                                np.where((pvp_clean['case2_q6'] == 1) | (pvp_clean['case2_q6'] == 2), 1, 0))
pvp_clean['fluid 3'] = np.where(pvp_clean['case3_q8'].isna(), np.nan,
                                np.where((pvp_clean['case3_q8'] == 1) | (pvp_clean['case3_q8'] == 2), 1, 0))
pvp_clean['fluid 4'] = np.where(pvp_clean['case4_q11'].isna(), np.nan,
                                np.where((pvp_clean['case4_q11'] == 1) | (pvp_clean['case4_q11'] == 2), 1, 0))
pvp_clean['fluid 5'] = np.where(pvp_clean['case5_q14'].isna(), np.nan,
                                np.where((pvp_clean['case5_q14'] == 1) | (pvp_clean['case5_q14'] == 2), 1, 0))
pvp_clean['fluid 6'] = np.where(pvp_clean['case6_q17'].isna(), np.nan,
                                np.where((pvp_clean['case6_q17'] == 1) | (pvp_clean['case6_q17'] == 2), 1, 0))

In [4]:
# Create pressors variables derived from recommended treatment variable
pvp_clean['pressor 1'] = np.where(pvp_clean['case1_q3'].isna(), np.nan,
                                  np.where((pvp_clean['case1_q3'] == 2) | (pvp_clean['case1_q3'] == 3), 1, 0))
pvp_clean['pressor 2'] = np.where(pvp_clean['case2_q6'].isna(), np.nan,
                                  np.where((pvp_clean['case2_q6'] == 2) | (pvp_clean['case2_q6'] == 3), 1, 0))
pvp_clean['pressor 3'] = np.where(pvp_clean['case3_q8'].isna(), np.nan,
                                  np.where((pvp_clean['case3_q8'] == 2) | (pvp_clean['case3_q8'] == 3), 1, 0))
pvp_clean['pressor 4'] = np.where(pvp_clean['case4_q11'].isna(), np.nan,
                                  np.where((pvp_clean['case4_q11'] == 2) | (pvp_clean['case4_q11'] == 3), 1, 0))
pvp_clean['pressor 5'] = np.where(pvp_clean['case5_q14'].isna(), np.nan,
                                  np.where((pvp_clean['case5_q14'] == 2) | (pvp_clean['case5_q14'] == 3), 1, 0))
pvp_clean['pressor 6'] = np.where(pvp_clean['case6_q17'].isna(), np.nan,
                                  np.where((pvp_clean['case6_q17'] == 2) | (pvp_clean['case6_q17'] == 3), 1, 0))

In [5]:
# Create perip variables derived from route variable
pvp_clean['perip 1'] = np.where(pvp_clean['route1_q3'].isna(), np.nan,
                                np.where((pvp_clean['route1_q3'] == 1) | (pvp_clean['route1_q3'] == 2), 1, 0))
pvp_clean['perip 2'] = np.where((pvp_clean['examq6_n'].isna()) & (pvp_clean['pmhq6_n'].isna()) &
                                (pvp_clean['fluidq6_n'].isna()) & (pvp_clean['mapq6_n'].isna()), np.nan, 0)
pvp_clean['perip 3'] = np.where(pvp_clean['route3_q8'].isna(), np.nan,
                                np.where((pvp_clean['route3_q8'] == 1) | (pvp_clean['route3_q8'] == 2), 1, 0))
pvp_clean['perip 4'] = np.where(pvp_clean['route4_q11'].isna(), np.nan,
                                np.where((pvp_clean['route4_q11'] == 1) | (pvp_clean['route4_q11'] == 2), 1, 0))
pvp_clean['perip 5'] = np.where(pvp_clean['route5_q14'].isna(), np.nan,
                                np.where((pvp_clean['route5_q14'] == 1) | (pvp_clean['route5_q14'] == 2), 1, 0))
pvp_clean['perip 6'] = np.where(pvp_clean['route6_q17'].isna(), np.nan,
                                np.where((pvp_clean['route6_q17'] == 1) | (pvp_clean['route6_q17'] == 2), 1, 0))

In [6]:
# Create cvc variable derived from continuation route
pvp_clean['cvc 7'] = np.where(pvp_clean['case7_q21'].isna(), np.nan,
                              np.where((pvp_clean['case7_q21'] == 3) | ((pvp_clean['case7_q21'] == 4)
                                 & (pvp_clean['altacc7_q21'] == 3)), 1, 0))
pvp_clean['cvc 8'] = np.where(pvp_clean['case8_q24'].isna(), np.nan,
                              np.where((pvp_clean['case8_q24'] == 3) | ((pvp_clean['case8_q24'] == 4)
                                 & (pvp_clean['altacc8_q24'] == 3)), 1, 0))
pvp_clean['cvc 9'] = np.where(pvp_clean['case9_q27'].isna(), np.nan,
                              np.where((pvp_clean['case9_q27'] == 3) | ((pvp_clean['case9_q27'] == 4)
                                 & (pvp_clean['altacc9_q27'] == 3)), 1, 0))
pvp_clean['cvc 10'] = np.where(pvp_clean['case10_q30'].isna(), np.nan,
                              np.where((pvp_clean['case10_q30'] == 3) | ((pvp_clean['case10_q30'] == 4)
                                 & (pvp_clean['altacc10_q30'] == 3)), 1, 0))


In [7]:
# Rename all variables to be used as stub names for long transformation
pvp_clean.rename({'examq3_n':'exam 1', 'pmhq3_n':'pmh 1', 'fluidq3_n':'volfluid 1', 'mapq3_n':'map 1', 'o2q3_n':'o2 1', 'rrq3_n':'rr 1', 'lactateq3_n':'lactate 1', 'akiq3_n':'aki 1',
                   'examq6_n':'exam 2', 'pmhq6_n':'pmh 2', 'akiq6_n':'aki 2', 'fluidq6_n':'volfluid 2', 'mapq6_n':'map 2', 'o2q6_n':'o2 2', 'rrq6_n':'rr 2', 'lactateq6_n':'lactate 2',
                   'examq8_n':'exam 3', 'pmhq8_n':'pmh 3', 'akiq8_n':'aki 3', 'lactateq8_n':'lactate 3', 'rrq8_n':'rr 3', 'o2q8_n':'o2 3', 'fluidq8_n':'volfluid 3', 'mapq8_n':'map 3',
                   'examq11_n':'exam 4', 'pmhq11_n':'pmh 4', 'akiq11_n':'aki 4', 'lactateq11_n':'lactate 4', 'rrq11_n':'rr 4', 'o2q11_n':'o2 4', 'fluidq11_n':'volfluid 4', 'mapq11_n':'map 4',
                   'examq14_n':'exam 5', 'pmhq14_n':'pmh 5', 'rrq14_n':'rr 5', 'o2q14_n':'o2 5', 'akiq14_n':'aki 5', 'lactateq14_n':'lactate 5', 'fluidq14_n':'volfluid 5', 'mapq14_n':'map 5',
                   'examq17_n':'exam 6', 'pmhq17_n':'pmh 6', 'rrq17_n':'rr 6', 'o2q17_n':'o2 6', 'akiq17_n': 'aki 6', 'lactateq17_n':'lactate 6', 'fluidq17_n':'volfluid 6', 'mapq17_n':'map 6',
                   'locationq21_n':'location 7', 'trendq21_n':'trend 7', 'doseq21_n':'dose 7', 'durationq21_n':'duration 7',
                   'locationq24_n':'location 8', 'trendq24_n':'trend 8', 'doseq24_n':'dose 8', 'durationq24_n':'duration 8',
                   'locationq27_n':'location 9', 'trendq27_n':'trend 9', 'doseq27_n':'dose 9', 'durationq27_n':'duration 9',
                   'locationq30_n':'location 10', 'trendq30_n':'trend 10', 'doseq30_n':'dose 10', 'durationq30_n':'duration 10',
                   'diff1_q3': 'diff 1', 'diff2_q6':'diff 2','diff3_q8':'diff 3','diff4_q11':'diff 4','diff5_q14':'diff 5',
                   'diff6_q17':'diff 6','altacc7_q21':'altacc 7','altacc8_q24':'altacc 8','altacc9_q27':'altacc 9','altacc10_q30':'altacc 10'},
                axis = 1,
                inplace = True)

In [8]:
# Convert data set to long format based on variable stubnames
long = pd.wide_to_long(pvp_clean, stubnames=['fluid ', 'pressor ', 'perip ', 'cvc ', 
                                             'exam ', 'pmh ', 'volfluid ', 'map ', 
                                             'rr ', 'o2 ', 'aki ', 'lactate ', 
                                             'location ', 'trend ', 'dose ', 'duration ',
                                             'diff ', 'altacc '], i='id', j='case').reset_index()

# Remove space from variable names
long.rename({'fluid ':'fluid', 'pressor ':'pressor', 'perip ':'perip', 
             'cvc ':'cvc', 'exam ':'exam', 'pmh ':'pmh', 'volfluid ':'volfluid', 
             'map ':'map', 'rr ':'rr', 'o2 ':'o2', 'aki ':'aki', 'lactate ':'lactate',
             'location ':'location', 'trend ':'trend', 'dose ':'dose', 'duration ':'duration',
             'diff ':'diff','altacc ':'altacc'}, axis = 1, inplace = True)

In [9]:
# Convert gender variable to 3 levels
long['gender'] = pd.factorize(long['gender'])[0]+1
long['gender'] = long['gender'].replace({4: 3, 0: 3})

# Create cctrain variable
long['cctrain'] = np.where(long['specialty'].isna(), np.nan,
          np.where((long['specialty'] == 1) | long['specialty'].str.contains('1'), 1, 2))
long['cctrain'] = long['cctrain'].fillna(3)

# Create spec_area variable (not included in model)
long['specialty'] = long['specialty'].fillna(7)
long['spec_area'] = long['specialty'].replace({
'1,2' : '2',
'1,3' : '3',
'1,4' : '4',
'1,5' : '5',
'1,6' : '6',
'2,6' : '2',
'3,6' : '3',
'1,2,3': '3',
'1,2,6' : '2',
'1,3,6' : '3',
'1,2,3' : '3',
'1,2,4' : '4',
'1,2,4,6' : '4',
'1,2,5' : '5'
}).astype(int)

In [10]:
# Subset outcome and factor variables for regression
all_vars = long[['id', 'case', 'exam', 'pmh', 'o2', 'rr', 'aki', 'lactate', 
                 'volfluid', 'map', 'fluid', 'pressor', 'perip', 'role', 'experience',
                 'gender', 'cctrain', 'spec_area', 'practype', 'icutype', 'beds', 'where',
                'dose','trend','duration','location','cvc','diff']].copy(deep = True)

# Dichotomize difficulty variable
all_vars['diff'] = np.where(all_vars['diff'].isna(), np.nan, 
                                  np.where((all_vars['diff'] == 1) | (all_vars['diff'] == 2), 1, 0))

# Combining levels in experience and beds columns to match regression table levels
all_vars['experience'] = all_vars['experience'].add(1).floordiv(2)
all_vars['beds'] = all_vars['beds'].add(1).floordiv(2)

# Convert NaN values to column max
all_vars[['role', 'practype', 'icutype', 'where']] = all_vars[['role', 'practype', 'icutype', 'where']].fillna(all_vars.max(), downcast='infer')
all_vars[['experience', 'beds']] = all_vars[['experience', 'beds']].fillna(all_vars.max()+1, downcast='infer')

In [11]:
# Convert data set to csv and export to working directory
all_vars.to_csv('C:/Users/julienw/OneDrive - Michigan Medicine/Documents/PVP/Data/all_variables.csv',index = False)

### Suppl. Table 2

In [12]:
counts = long['altacc'].value_counts()
percs = long['altacc'].value_counts(normalize=True)*100
pd.concat([counts,percs], axis=1, keys=['count', 'percentage'])

Unnamed: 0,count,percentage
3.0,101,66.447368
2.0,25,16.447368
1.0,24,15.789474
4.0,2,1.315789
