In [1]:
import pandas as pd
import itertools
import numpy as np
import statsmodels.imputation.mice as mice
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
data4 = pd.read_csv('~/Desktop/Data_TB/MDR.csv', low_memory=False)

data4.shape

(12072, 162)

In [10]:
data4['Author'].value_counts()

SouthAfrica2018    2842
Cegielski          1734
Brust              1201
Podewils            555
Gegia               380
Kvasnovsky          338
Koh                 314
Viiklepp            293
Seung               283
Janssen             230
Migliori            222
Bonnet              204
LEUNG201213         199
Smith               196
Ahmad               180
endTB               158
Koenig              155
RIEKS201213         150
Kempker             150
Lange               146
BDQtrial            132
MiglioriBDQ         130
Anderson            129
Isaakidis           124
Russia2018          123
Yim                 123
Marks               112
ODonnell            111
Dheda               103
Belarus2018          98
Brazil2018           97
van der Werf         90
Ndjeka               86
Shim                 71
AHUJA2010            67
BARRYCALIF           52
Laniado-Laborin      50
Milanov              49
Hughes               48
Guglielmetti         44
Latvia2018           40
BARRYKOREA      

In [11]:
# Pre-cleaning: removing unknown/outlier values
data4['AIntNo'] = data4['AIntNo'].map(lambda x: np.nan if x >= 100 else x)
data4['AContNo'] = data4['AContNo'].map(lambda x: np.nan if x >= 100 else x)
data4['ATotalDur'] = data4['ATotalDur'].map(lambda x: np.nan if x >= 100 else x)
data4['Site'] = data4['Site'].map(lambda x: np.nan if x == 'U' else x)

In [12]:
# Combine Cs and Trd use (NB: no unknown expected in the use of drugs)
data4['CsTrd'] = data4[['Cs', 'Trd']].apply(lambda x: 'Yes' if (x[1] == 'Yes' or x[0] == 'Yes')
                                     else 'No' if (x[1] == 'No' and x[0] == 'No')
                                     else 'U', axis = 1)

# Combine Eto and Pto use (NB: no unknown expected in the use of drugs)
data4['EtoPto'] = data4[['Eto', 'Pto']].apply(lambda x: 'Yes' if (x[1] == 'Yes' or x[0] == 'Yes')
                                     else 'No' if (x[1] == 'No' and x[0] == 'No')
                                     else 'U', axis = 1)

# Combine Ipm and Mpm use (NB: no unknown expected in the use of drugs)
data4['MpmIpm'] = data4[['Mpm', 'Ipm']].apply(lambda x: 'Yes' if (x[1] == 'Yes' or x[0] == 'Yes')
                                     else 'No' if (x[1] == 'No' and x[0] == 'No')
                                     else 'U', axis = 1)

# Combine Eto and Pto drug resistances
data4['DSTEtoPto'] = data4[['DSTEto','DSTPto']].apply(lambda x: 'R' if (x[1] == 'R' or x[0] == 'R')
                                     else 'S' if (x[1] == 'S' and x[0] == 'S')
                                     else 'S' if (x[1] == 'U' and x[0] == 'S')
                                     else 'S' if (x[1] == 'S' and x[0] == 'U')
                                     else 'U', axis = 1)

In [13]:
# Define hierachical resistance to second line injectables (SLI) - as Jonathon did
data4['DSTAmKm'] = data4[['DSTAm', 'DSTKm','Author']].apply(lambda x: 'R' if (x[1] == 'R' or x[0] == 'R')
                                     else 'S' if (x[1] == 'S' and x[0] == 'S')
                                     else 'S' if (x[1] == 'U' and x[0] == 'S')
                                     else 'S' if (x[1] == 'S' and x[0] == 'U')
                                     else 'S' if (x[1] == 'U' and x[0] == 'U' and x[2] == 'SouthAfrica2018')
                                     else 'U', axis = 1)
data4['DSTCm'] = data4[['DSTCm', 'Author']].apply(lambda x: 'R' if x[0]=='R' else 'S' if (x[0]=='S') or (x[0]=='U' and x[1]=='SouthAfrica2018') else 'U', axis = 1)
data4['DSTSLI'] = data4[['DSTCm', 'DSTAmKm']].apply(lambda x: 'R' if (x[0]=='R') or (x[1]=='R') 
                                                 else 'S' if (x[0]=='S') and (x[1]=='S')
                                                 else 'S' if (x[0]=='U') and (x[1]=='S')
                                                 else 'S' if (x[0]=='S') and (x[1]=='U')
                                                 else 'U',axis=1)

data4['DSTSLI'].value_counts(dropna=False)

S    8510
R    3222
U     340
Name: DSTSLI, dtype: int64

In [14]:
# Define hierarchical resistance to FQs - as Jonathon did
data4.loc[data4['DSTFQ']=='','DSTFQ'] = 'U'
data4.loc[data4['DSTFQ'].isna(),'DSTFQ'] = 'U'

data4.loc[(data4['DSTOfx']=='U') & (data4['DSTMfx']=='U') &
          (data4['DSTLfx']=='U') & (data4['DSTCfx']=='U') & (data4['DSTFQ']=='R'),'DSTOfx'] = 'R'
data4.loc[(data4['DSTOfx']=='U') & (data4['DSTMfx']=='U') &
          (data4['DSTLfx']=='U') & (data4['DSTCfx']=='U') & (data4['DSTFQ']=='S'),'DSTOfx'] = 'S'

data4['DSTOfxCfx'] = data4[['DSTCfx','DSTOfx']].apply(lambda x: 'R' if (x[0]=='R') or (x[1]=='R') 
                                                      else 'S' if (x[0]=='S') and (x[1]=='S')
                                                      else 'S' if (x[0]=='U') and (x[1]=='S')
                                                      else 'S' if (x[0]=='S') and (x[1]=='U')
                                                      else 'U',axis=1)

data4['DSTMfxLfx'] = data4[['DSTMfx','DSTLfx']].apply(lambda x: 'R' if (x[0]=='R') or (x[1]=='R') 
                                                      else 'S' if (x[0]=='S') and (x[1]=='S')
                                                      else 'S' if (x[0]=='U') and (x[1]=='S')
                                                      else 'S' if (x[0]=='S') and (x[1]=='U')
                                                      else 'U',axis=1)

data4['DSTFQnew'] = data4[['DSTOfxCfx','DSTMfxLfx','DSTFQ','Author']].apply(lambda x: 'R' if x[1]=='R'
                                                                           else 'S' if x[1]=='S'
                                                                           else 'R' if (x[0]=='R' and x[1]=='U')
                                                                           else 'S' if (x[0]=='S' and x[1]=='U')
                                                                           else 'S' if (x[0]=='U' and x[1]=='U' and x[2]=='SouthAfrica2018')
                                                                           else 'U', axis=1)
data4['DSTFQnew'].value_counts(dropna=False)

S    6576
R    3002
U    2494
Name: DSTFQnew, dtype: int64

In [15]:
# Define number of effective drugs as Jonathon did

# Correction in accounting of resistance to Am and Km (cf JOnathon's code)
data4['DSTAmNew'] = data4[['DSTAm','DSTKm']].apply(lambda x: 'R' if x[0]=='R'
                                                            else 'S' if x[0]=='S'
                                                            else 'R' if (x[0]=='U' and x[1]=='R')
                                                            else 'S' if (x[0]=='U' and x[1]=='S')
                                                            else 'U', axis=1)
data4['DSTKmNew'] = data4[['DSTKm','DSTAm']].apply(lambda x: 'R' if x[0]=='R'
                                                            else 'S' if x[0]=='S'
                                                            else 'R' if (x[0]=='U' and x[1]=='R')
                                                            else 'S' if (x[0]=='U' and x[1]=='S')
                                                            else 'U', axis=1)
data4.loc[(data4['DSTAmNew']=='U') & (data4['Author']=='SouthAfrica2018'),'DSTAmNew'] = 'S'
data4.loc[(data4['DSTKmNew']=='U') & (data4['Author']=='SouthAfrica2018'),'DSTKmNew'] = 'S'
data4['DSTAmUpdate'] = data4[['DSTAmNew','DSTAmKm']].apply(lambda x: 'R' if x[0]=='R' else 'S' if x[0]=='S'
                                                           else 'R' if (x[0]=='U' and x[1]=='R') else 'S', axis=1)
data4['DSTKmUpdate'] = data4[['DSTKmNew','DSTAmKm']].apply(lambda x: 'R' if x[0]=='R' else 'S' if x[0]=='S'
                                                           else 'R' if (x[0]=='U' and x[1]=='R') else 'S', axis=1)

# Assess the effective drugs
data4['effE'] = data4[['E','DSTE']].apply(lambda x: 1 if (x[0]=='Yes' and x[1]=='S') else 0)
data4['effZ'] = data4[['Z','DSTZ']].apply(lambda x: 1 if (x[0]=='Yes' and x[1]=='S') else 0)
data4['effInj'] = data4[['Am','Km','Cm','S',
                         'DSTAmUpdate','DSTKmUpdate',
                         'DSTCm','DSTS']].apply(lambda x: 1 if (x[0]=='Yes' and x[4]=='S') or
                                                (x[1]=='Yes' and x[5]=='S') or
                                                (x[2]=='Yes' and x[6]=='S') or
                                                (x[3]=='Yes' and x[7]=='S')
                                                else 0, axis=1)
data4['effCsTrd'] = data4[['CsTrd','DSTCs']].apply(lambda x: 1 if (x[0]=='Yes' and x[1]!='R')
                                                  else 0, axis=1)
data4['effEtoPto'] = data4[['EtoPto','DSTEtoPto']].apply(lambda x: 1 if (x[0]=='Yes' and x[1]=='S') else 0)
data4['effPAS'] = data4[['PAS','DSTPAS']].apply(lambda x: 1 if (x[0]=='Yes' and x[1]=='S') else 0)
data4['effLzd'] = data4[['Lzd','DSTLzd']].apply(lambda x: 1 if (x[0]=='Yes' and x[1]!='R')
                                                  else 0, axis=1)
data4['effCfz'] = data4[['Cfz','DSTCfz']].apply(lambda x: 1 if (x[0]=='Yes' and x[1]!='R')
                                                  else 0, axis=1)
data4['effBdq'] = data4['Bdq'].apply(lambda x: 1 if x=='Yes' else 0)
data4['effDlm'] = data4['Dlm'].apply(lambda x: 1 if x=='Yes' else 0)
data4['effMpmIpm'] = data4['MpmIpm'].apply(lambda x: 1 if x=='Yes' else 0)
data4['effFq'] = data4[['Ofx','Mfx','Lfx',
                        'Cfx','Gfx','DSTFQnew']].apply(lambda x: 1 if (x[0]=='Yes' or
                                                                       x[1]=='Yes' or
                                                                       x[2]=='Yes' or
                                                                       x[3]=='Yes' or
                                                                       x[4]=='Yes') and (x[5]=='S')
                                                       else 0, axis=1)

# Total number of effective drugs
data4['NeffTot'] = data4[['effE','effZ','effFq','effInj','effCsTrd','effEtoPto',
                          'effPAS','effLzd','effBdq','effDlm','effMpmIpm']].sum(axis=1)
data4['NeffTot'].value_counts(dropna=False)

3.0    5370
2.0    4365
1.0    1067
4.0     770
0.0     376
5.0     115
6.0       9
Name: NeffTot, dtype: int64

In [16]:
key_drugs = data4[['effBdq', 'effLzd', 'effCfz', 'effCsTrd']]

# Covariate for use of Lzd and Bdq
#1: none, 2: only Lzd, 3: only Bdq, 4: both
eff_lzd_bdq = data4[['effLzd','effBdq']].apply(lambda x: 1 if (x[0]==0 and x[1]==0)
                                              else 2 if (x[0]==1 and x[1]==0)
                                              else 3 if (x[0]==0 and x[1]==1)
                                              else 4 if (x[0]==1 and x[1]==1)
                                              else np.nan, axis=1).rename('effLzdBdq')

# Covariate for use of Cfz and Cs/Trd
#1: none, 2: only Cfz, 3: only Cs/Trd, 4: both
eff_cfz_cs = data4[['effCfz','effCsTrd']].apply(lambda x: 1 if (x[0]==0 and x[1]==0)
                                              else 2 if (x[0]==1 and x[1]==0)
                                              else 3 if (x[0]==0 and x[1]==1)
                                              else 4 if (x[0]==1 and x[1]==1)
                                              else np.nan, axis=1).rename('effCfzCsTrd')

# Age categories
# 1: <25, 2: 25 to 45, 3: >45
age = data4['Age'].map(lambda x: 1 if x <= 25 else 2 if (x > 25)&(x <= 45) else 3 if x > 45 else np.nan)

# Gender
sex = data4['Sex'].map(lambda x: 1 if x == 'F' else 0 if x == 'M'  else np.nan)

# TB site
site = data4['Site'].map(lambda x: 0 if x == 'Pulm' else 1 if x =='Both' else np.nan)

# HIV
hiv = data4['HIV'].map(lambda x: 1 if x == 'Pos' else 0 if x == 'Neg' else np.nan)

# ART
art = data4[['HIV', 'ART']].apply(lambda x: 'NotApplic' if x[0] == 'Neg' else x[1], axis = 1).rename('ART')
art = art.map(lambda x: 0 if x == 'No' else 1 if x == 'Yes' else 2 if x == 'NotApplic' else np.nan)

# Past treatment
pasttx = data4['PastTx'].map(lambda x: 1 if x == 'Yes' else 0 if x == 'No' else np.nan)

# Past treatment type
pasttype = data4[['PastTx', 'PastType']].apply(lambda x: 'NotApplic' if x[0] == 'No' else x[1], axis = 1).rename('PastType')
pasttype = pasttype.map(lambda x: 1 if x == 'FLD' else 2 if x == 'SLD' else 3 if x == 'NotApplic' else np.nan)

# Total treatment duration
# 1: <18, 2: 18 to 24, 3: >24
atotaldur = data4['ATotalDur'].map(lambda x: 1 if x < 18 else 2 if (x >= 18)&(x <= 24) else 3 if x > 24 else np.nan) 

# Country income
# 1: LowMid, 2: UpMid, 3: High
country = data4['Country'].map(lambda x: 1 if (x == 'Armenia' or x == 'Lesotho' or  x == 'Georgia' or x == 'Kenya' or x == 'Haiti' or x == 'Nepal' or x == 'North Korea' or x == 'Egypt' or x == 'India' or x == 'Mongolia' or x == 'Pakistan' or x == 'Philippines') else 3 if (x == 'Australia' or x == 'France' or x == 'Latvia' or x == 'South Korea' or x == 'Belgium' or x == 'Canada' or x == 'Denmark' or x == 'Estonia' or x == 'Germany' or x == 'Greece' or x == 'Hong Kong' or x == 'Italy' or x == 'Japan' or x == 'Netherlands' or x == 'Portugal' or x == 'Slovakia' or x == 'Spain' or x == 'Sweden' or x == 'Taiwan' or x == 'UK' or x == 'USA') else 2 if (x == 'Belarus' or x == 'Brazil' or x == 'Russia' or x == 'South Africa' or x == 'Argentina' or x == 'Botswana' or x == 'Bulgaria' or x == 'China' or x == 'Dominican Republic' or x == 'Ecuador' or x == 'Iran' or x == 'Mexico' or x == 'Peru' or x == 'Thailand' or x == 'Asia' or x == 'Europe') else np.nan)

# Total number of effective drugs
# 1: <= 2, 2: 3-4, 3: >=5
nefftot = data4['NeffTot'].map(lambda x: 1 if x <=2 else 2 if (x >= 3)&(x <= 4) else 3 if x >= 5 else np.nan)

covariates = pd.concat([country, age, sex, hiv, art, pasttx, pasttype, site, nefftot, atotaldur, eff_lzd_bdq, eff_cfz_cs], axis=1)
resistances = data4[['DSTFQnew','DSTSLI']].applymap(lambda x: 1 if x == 'R' else 0 if x == 'S' else np.nan)
outcomes = data4[['Outcome', 'Relapse']].apply(lambda x: 1 if (x[0] == 'Death' or x[0] == 'Lost' or x[0] == 'Fail' or x[0] == 'Transfer' or x[1] == 'Yes') else 0, axis = 1).rename('Outcome')


In [17]:
full = pd.concat([covariates, resistances, outcomes], axis=1)
full.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12062,12063,12064,12065,12066,12067,12068,12069,12070,12071
Country,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
Age,2.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,2.0,3.0,...,2.0,1.0,2.0,2.0,2.0,1.0,3.0,1.0,2.0,2.0
Sex,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,...,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0
HIV,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
ART,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0
PastTx,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,...,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0
PastType,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,3.0,...,3.0,1.0,1.0,3.0,1.0,2.0,1.0,3.0,3.0,3.0
Site,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
NeffTot,2.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
ATotalDur,,,,,,,,,,,...,1.0,1.0,1.0,1.0,3.0,1.0,1.0,2.0,2.0,2.0


In [18]:
# Counts per covariate before imputation
forcounts = full.copy()
forcounts['Age'] = forcounts['Age'].map(lambda x: '0Lt25' if x == 1 else '25to45' if x == 2 else 'Gt45' if x == 3 else np.nan)
forcounts['Sex'] = forcounts['Sex'].map(lambda x: 'F' if x == 1 else 'M' if x == 0 else np.nan) 
forcounts['Site'] = forcounts['Site'].map(lambda x: '0Pulm' if x == 0 else 'Both' if x == 1 else np.nan)
forcounts['HIV'] = forcounts['HIV'].map(lambda x: 'No' if x == 0 else 'Yes' if x == 1 else np.nan)
forcounts['ART'] = forcounts['ART'].map(lambda x: 'No' if x == 0 else 'Yes' if x == 1 else 'NotApplic' if x== 2 else np.nan)
forcounts['HivArt'] = forcounts[['HIV','ART']].apply(lambda x: 'ART' if (x[1] == 'Yes' and x[0] == 'Yes')
                                                  else 'HIV' if (x[0] == 'Yes' and x[1] == 'No')
                                                  else '0Neg' if (x[0] == 'No') else np.nan, axis=1)
forcounts['NeffTot'] = forcounts['NeffTot'].map(lambda x: 'Lt2' if x == 1 else '03to4' if x == 2
                                                else 'Gt5' if x == 3 else np.nan)
forcounts['ATotalDur'] = forcounts['ATotalDur'].map(lambda x: 'Lt18' if x == 1 else '018to24' if x == 2
                                                    else 'Gt24' if x == 3 else np.nan)
forcounts['PastTx'] = forcounts['PastTx'].map(lambda x: 'No' if x == 0 else 'Yes' if x == 1 else np.nan)  
forcounts['PastType'] = forcounts['PastType'].map(lambda x: '0FLD' if x == 1 else 'SLD' if x == 2
                                                  else 'NotApplic' if x == 3 else np.nan)                       
forcounts['Past'] = forcounts[['PastTx','PastType']].apply(lambda x: '0FLD' if (x[0] == 'Yes' and x[1] == '0FLD')
                                                         else 'SLD' if (x[0] == 'Yes' and x[1] == 'SLD')
                                                        else 'NotApplic' if (x[0] == 'No') else np.nan, axis=1)
forcounts['Country'] = forcounts['Country'].map(lambda x: '0LowMid' if x == 1 else 'UpMid' if x == 2 else 'High') 
forcounts['effLzdBdq'] = forcounts['effLzdBdq'].map(lambda x: '0None' if x == 1 else 'Lzd' if x==2 else 'Bdq' if x==3 else 'Lzd+Bdq') 
forcounts['effCfzCsTrd'] = forcounts['effCfzCsTrd'].map(lambda x: '0None' if x == 1 else 'Cfz' if x==2 else 'Cs/Trd' if x==3 else 'Cfz+CS/Trd') 

forcounts['DrPattern'] = forcounts[['DSTFQnew','DSTSLI']].apply(lambda x: '0FirstLineOnly' if (x[0]==0 and x[1]==0)
                                                               else 'FqOnly' if (x[0]==1 and x[1]==0)
                                                               else 'InjOnly' if (x[0]==0 and x[1]==1)
                                                               else 'FqInj' if (x[0]==1 and x[1]==1)
                                                               else np.nan, axis=1)

counts_to_save = pd.DataFrame({'Variable':['Sample_size'],'Level':['-'],'Count':[len(forcounts)],'Fraction':[1.0]})

for cv in forcounts.columns.tolist():
    blop = forcounts[cv].value_counts(dropna=False)
    blup = [x / len(forcounts) for x in blop]
    toadd = pd.DataFrame({'Variable':[cv]*len(blop),'Level':blop.index.tolist(),'Count':blop.tolist(),
                          'Fraction':blup})
    counts_to_save = counts_to_save.append(toadd,ignore_index=True)
    del blop, blup, toadd
del cv

counts_to_save.to_csv('~/Desktop/covariates_counts_before_imputation.csv',na_rep='NaN')

counts_to_save

Unnamed: 0,Variable,Level,Count,Fraction
0,Sample_size,-,12072,1.000000
1,Country,UpMid,6764,0.560305
2,Country,0LowMid,2697,0.223410
3,Country,High,2611,0.216286
4,Age,25to45,6896,0.571239
5,Age,Gt45,2963,0.245444
6,Age,0Lt25,2213,0.183317
7,Sex,M,7408,0.613651
8,Sex,F,4664,0.386349
9,HIV,No,8083,0.669566


In [19]:
# Further analysis of counts: per DR pattern, per drug used, and per outcome
drpat = forcounts['DrPattern'].unique()
lzdbdq = forcounts['effLzdBdq'].unique()
outc = forcounts['Outcome'].unique()

# Initiate the table saving the counts
counts_per_dr = pd.DataFrame({'DRgroup':['any'],'LzdBdqUse':['any'],'Outcome':['any'],'Count':[len(forcounts)]})

# Loop on the drug resistance groups
for dr in drpat:
    # Total nb of persons in the DR group
    blop = forcounts.loc[forcounts['DrPattern']==dr]
    toadd = pd.DataFrame({'DRgroup':[dr],'LzdBdqUse':['any'],'Outcome':['any'],'Count':[len(blop)]})
    counts_per_dr = counts_per_dr.append(toadd,ignore_index=True)
    del blop, toadd
    # Loop on the Lzd/Bdq use
    for lb in lzdbdq:
        blop = forcounts.loc[(forcounts['DrPattern']==dr) & (forcounts['effLzdBdq']==lb)]
        toadd = pd.DataFrame({'DRgroup':[dr],'LzdBdqUse':[lb],'Outcome':['any'],'Count':[len(blop)]})
        counts_per_dr = counts_per_dr.append(toadd,ignore_index=True)
        del blop, toadd
        # Loop on the outcome
        for oc in outc:
            blop = forcounts.loc[(forcounts['DrPattern']==dr) & (forcounts['effLzdBdq']==lb) & (forcounts['Outcome']==oc)]
            toadd = pd.DataFrame({'DRgroup':[dr],'LzdBdqUse':[lb],'Outcome':[oc],'Count':[len(blop)]})
            counts_per_dr = counts_per_dr.append(toadd,ignore_index=True)
            del blop, toadd
del dr, lb, oc, drpat, lzdbdq, outc

counts_per_dr.to_csv('~/Desktop/counts_per_dr_drug_outcome_before_imputation.csv',na_rep='NaN')

counts_per_dr

Unnamed: 0,DRgroup,LzdBdqUse,Outcome,Count
0,any,any,any,12072
1,0FirstLineOnly,any,any,5457
2,0FirstLineOnly,0None,any,5021
3,0FirstLineOnly,0None,0,3221
4,0FirstLineOnly,0None,1,1800
5,0FirstLineOnly,Lzd,any,229
6,0FirstLineOnly,Lzd,0,169
7,0FirstLineOnly,Lzd,1,60
8,0FirstLineOnly,Lzd+Bdq,any,49
9,0FirstLineOnly,Lzd+Bdq,0,35


In [20]:
imp = mice.MICEData(full)
imp.update_all(10)
imputed = imp.data.astype('int32')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [21]:
imputed.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12062,12063,12064,12065,12066,12067,12068,12069,12070,12071
Country,1,1,1,1,1,1,1,1,1,1,...,2,2,2,2,2,2,2,2,2,2
Age,2,1,1,1,3,2,1,1,2,3,...,2,1,2,2,2,1,3,1,2,2
Sex,1,1,1,1,0,1,1,0,1,1,...,0,1,0,1,0,1,1,1,1,0
HIV,0,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,0,1
ART,2,2,2,2,2,2,2,2,2,2,...,2,1,2,1,2,2,2,2,2,1
PastTx,1,1,1,1,1,1,1,1,1,0,...,0,1,1,0,1,1,1,0,0,0
PastType,1,2,1,1,1,1,1,2,2,3,...,3,1,1,3,1,2,1,3,3,3
Site,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
NeffTot,2,1,2,1,1,2,1,2,2,1,...,1,1,1,1,1,1,1,1,1,1
ATotalDur,3,3,2,2,1,2,1,3,2,1,...,1,1,1,1,3,1,1,2,2,2


In [22]:
res_injectables = (imputed['DSTSLI'] == 1)
res_fq = (imputed['DSTFQnew'] == 1)

res_first_line_only = ~res_injectables & ~res_fq
res_injectables_only = res_injectables & ~res_fq
res_fq_only = res_fq & ~res_injectables
res_fq_injectables = res_injectables & res_fq

prevars = pd.concat([res_first_line_only, res_injectables_only, res_fq_only, res_fq_injectables], axis=1)

dr_pattern = prevars.apply(lambda x: '0FirstLineOnly' if x[0] else 'InjOnly' if x[1]
                           else 'FqOnly' if x[2] else 'FqInj', axis = 1).rename('DrPattern')

i_covariates = imputed[['Country', 'Age', 'Sex', 'HIV', 'ART', 'PastTx', 'PastType', 'Site','NeffTot',
                        'ATotalDur','effLzdBdq','effCfzCsTrd']]

i_outcomes = imputed['Outcome']

In [23]:
model_data = pd.concat([i_covariates, dr_pattern, i_outcomes], axis=1)

In [24]:
model_data.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12062,12063,12064,12065,12066,12067,12068,12069,12070,12071
Country,1,1,1,1,1,1,1,1,1,1,...,2,2,2,2,2,2,2,2,2,2
Age,2,1,1,1,3,2,1,1,2,3,...,2,1,2,2,2,1,3,1,2,2
Sex,1,1,1,1,0,1,1,0,1,1,...,0,1,0,1,0,1,1,1,1,0
HIV,0,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,0,1
ART,2,2,2,2,2,2,2,2,2,2,...,2,1,2,1,2,2,2,2,2,1
PastTx,1,1,1,1,1,1,1,1,1,0,...,0,1,1,0,1,1,1,0,0,0
PastType,1,2,1,1,1,1,1,2,2,3,...,3,1,1,3,1,2,1,3,3,3
Site,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
NeffTot,2,1,2,1,1,2,1,2,2,1,...,1,1,1,1,1,1,1,1,1,1
ATotalDur,3,3,2,2,1,2,1,3,2,1,...,1,1,1,1,3,1,1,2,2,2


In [25]:
model_data['Age'] = model_data['Age'].map(lambda x: '0Lt25' if x == 1 else '25to45' if x == 2
                                          else 'Gt45' if x == 3 else np.nan)
model_data['Sex'] = model_data['Sex'].map(lambda x: 'F' if x == 1 else 'M' if x == 0 else np.nan) 
model_data['Site'] = model_data['Site'].map(lambda x: '0Pulm' if x == 0 else 'Both' if x == 1 else np.nan)
model_data['HIV'] = model_data['HIV'].map(lambda x: 'No' if x == 0 else 'Yes' if x == 1 else np.nan)
model_data['ART'] = model_data['ART'].map(lambda x: 'No' if x == 0 else 'Yes' if x == 1 else 'NotApplic' if x== 2
                                          else np.nan)
model_data['HivArt'] = model_data[['HIV','ART']].apply(lambda x: 'ART' if (x[1] == 'Yes' and x[0] == 'Yes')
                                                  else 'HIV' if (x[0] == 'Yes' and x[1] == 'No')
                                                  else '0Neg' if (x[0] == 'No') else np.nan, axis=1)
model_data['NeffTot'] = model_data['NeffTot'].map(lambda x: 'Lt2' if x == 1 else '03to4' if x == 2
                                                else 'Gt5' if x == 3 else np.nan)
model_data['ATotalDur'] = model_data['ATotalDur'].map(lambda x: 'Lt18' if x == 1 else '018to24' if x == 2
                                                    else 'Gt24' if x == 3 else np.nan)
model_data['PastTx'] = model_data['PastTx'].map(lambda x: 'No' if x == 0 else 'Yes' if x == 1 else np.nan)  
model_data['PastType'] = model_data['PastType'].map(lambda x: '0FLD' if x == 1 else 'SLD' if x == 2
                                                  else 'NotApplic' if x == 3 else np.nan)                       
model_data['Past'] = model_data[['PastTx','PastType']].apply(lambda x: '0FLD' if (x[0] == 'Yes' and x[1] == '0FLD')
                                                         else 'SLD' if (x[0] == 'Yes' and x[1] == 'SLD')
                                                        else 'NotApplic' if (x[0] == 'No') else np.nan, axis=1)
model_data['Country'] = model_data['Country'].map(lambda x: '0LowMid' if x == 1 else 'UpMid' if x == 2 else 'High') 
model_data['effLzdBdq'] = model_data['effLzdBdq'].map(lambda x: '0None' if x == 1
                                                      else 'Lzd' if x==2 else 'Bdq' if x==3 else 'Lzd+Bdq') 
model_data['effCfzCsTrd'] = model_data['effCfzCsTrd'].map(lambda x: '0None' if x == 1 else 'Cfz' if x==2
                                                          else 'Cs/Trd' if x==3 else 'Cfz+CS/Trd') 
model_data.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12062,12063,12064,12065,12066,12067,12068,12069,12070,12071
Country,0LowMid,0LowMid,0LowMid,0LowMid,0LowMid,0LowMid,0LowMid,0LowMid,0LowMid,0LowMid,...,UpMid,UpMid,UpMid,UpMid,UpMid,UpMid,UpMid,UpMid,UpMid,UpMid
Age,25to45,0Lt25,0Lt25,0Lt25,Gt45,25to45,0Lt25,0Lt25,25to45,Gt45,...,25to45,0Lt25,25to45,25to45,25to45,0Lt25,Gt45,0Lt25,25to45,25to45
Sex,F,F,F,F,M,F,F,M,F,F,...,M,F,M,F,M,F,F,F,F,M
HIV,No,No,No,No,No,No,No,No,No,No,...,No,Yes,No,Yes,No,No,No,No,No,Yes
ART,NotApplic,NotApplic,NotApplic,NotApplic,NotApplic,NotApplic,NotApplic,NotApplic,NotApplic,NotApplic,...,NotApplic,Yes,NotApplic,Yes,NotApplic,NotApplic,NotApplic,NotApplic,NotApplic,Yes
PastTx,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,No,...,No,Yes,Yes,No,Yes,Yes,Yes,No,No,No
PastType,0FLD,SLD,0FLD,0FLD,0FLD,0FLD,0FLD,SLD,SLD,NotApplic,...,NotApplic,0FLD,0FLD,NotApplic,0FLD,SLD,0FLD,NotApplic,NotApplic,NotApplic
Site,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,...,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,0Pulm,Both
NeffTot,03to4,Lt2,03to4,Lt2,Lt2,03to4,Lt2,03to4,03to4,Lt2,...,Lt2,Lt2,Lt2,Lt2,Lt2,Lt2,Lt2,Lt2,Lt2,Lt2
ATotalDur,Gt24,Gt24,018to24,018to24,Lt18,018to24,Lt18,Gt24,018to24,Lt18,...,Lt18,Lt18,Lt18,Lt18,Gt24,Lt18,Lt18,018to24,018to24,018to24


In [27]:
# Univariate regression on each covariate
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effLzdBdq','effCfzCsTrd'] # The covariates used in the model

or_table = pd.DataFrame() # Initialization (empty dataframe)

for cv in covars:
    # The model calculation
    model = smf.glm('Outcome ~ '+cv, data=model_data)
    results = model.fit()
    
    # Get proper ORs (i.e. exponential) with associated CIs
    coefs = results.params[1:]
    cis = results.conf_int()[1:]
    ors = np.exp(coefs)
    orcis = np.exp(cis)
    lors = orcis[0]
    uors = orcis[1]

    # Get the p-value for the model (to be corrected)
    pvals = results.wald_test_terms().pvalues[1:]
    pvals = pd.DataFrame({'pval':pvals},index=ors.index)
    
    # Put ORs, associated CIs, and p-values in proper table
    toadd = pd.concat([ors, lors, uors,pvals], axis=1)
    toadd.columns = ['OR','LCI','UCI','pval']
    
    if or_table.empty:
        or_table = toadd
    else:
        or_table = or_table.append(toadd)
        
    # Cleaning
    del model, results, coefs, cis, ors, orcis, lors, uors, pvals, toadd
    
toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effLzdBdq[T.0None]','effCfzCsTrd[T.0None]'])

or_table = or_table.append(toadd)
or_table = or_table.sort_index()

# Save the ORs
or_table.to_csv('~/Desktop/GLM_univariate.csv')
or_table

Unnamed: 0,OR,LCI,UCI,pval
ATotalDur[T.018to24],1.0,1.0,1.0,
ATotalDur[T.Gt24],1.094737,1.075355,1.114468,0.0
ATotalDur[T.Lt18],1.993529,1.963812,2.023696,0.0
Age[T.0Lt25],1.0,1.0,1.0,
Age[T.25to45],1.030654,1.006754,1.055122,0.0257046
Age[T.Gt45],1.012785,0.985823,1.040483,0.0257046
Country[T.0LowMid],1.0,1.0,1.0,
Country[T.High],0.906191,0.882974,0.930019,3.64312e-86
Country[T.UpMid],1.120952,1.097077,1.145346,3.64312e-86
DrPattern[T.0FirstLineOnly],1.0,1.0,1.0,


In [44]:
#Multivariate with effCfzCsTrd
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur + effLzdBdq + effCfzCsTrd', data=model_data)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effLzdBdq','effCfzCsTrd'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effLzdBdq[T.0None]','effCfzCsTrd[T.0None]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals

or_table_multi

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,LCI,OR,UCI,pval,nb_after_imputation
ATotalDur[T.018to24],1.0,1.0,1.0,0.0,5396.0
ATotalDur[T.Gt24],1.042027,1.060252,1.078796,0.0,2414.0
ATotalDur[T.Lt18],1.898151,1.926253,1.954771,0.0,4262.0
Age[T.0Lt25],1.0,1.0,1.0,0.2533286,2213.0
Age[T.25to45],0.997148,1.014679,1.032519,0.2533286,6896.0
Age[T.Gt45],0.992885,1.013059,1.033644,0.2533286,2963.0
Country[T.0LowMid],1.0,1.0,1.0,2.065979e-49,2697.0
Country[T.High],0.859355,0.878221,0.897501,2.065979e-49,2611.0
Country[T.UpMid],0.997525,1.016878,1.036608,2.065979e-49,6764.0
DrPattern[T.0FirstLineOnly],1.0,1.0,1.0,1.009324e-47,6262.0


In [45]:
#Multivariate without effCfzCsTrd
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur + effLzdBdq', data=model_data)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effLzdBdq'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effLzdBdq[T.0None]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_without_cfzcstrd.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals

or_table_multi

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,LCI,OR,UCI,pval,nb_after_imputation
ATotalDur[T.018to24],1.0,1.0,1.0,0.0,5396.0
ATotalDur[T.Gt24],1.043282,1.061522,1.08008,0.0,2414.0
ATotalDur[T.Lt18],1.89879,1.926905,1.955435,0.0,4262.0
Age[T.0Lt25],1.0,1.0,1.0,0.2373732,2213.0
Age[T.25to45],0.997486,1.015023,1.032867,0.2373732,6896.0
Age[T.Gt45],0.993196,1.013367,1.033948,0.2373732,2963.0
Country[T.0LowMid],1.0,1.0,1.0,3.5942989999999997e-48,2697.0
Country[T.High],0.864107,0.882689,0.90167,3.5942989999999997e-48,2611.0
Country[T.UpMid],1.000339,1.019498,1.039025,3.5942989999999997e-48,6764.0
DrPattern[T.0FirstLineOnly],1.0,1.0,1.0,2.865776e-46,6262.0


In [46]:
# ANALYSIS ON THE GROUP WHERE NEITHER LZD NOR BDQ ARE USED

model_data_sub = model_data.loc[model_data['effLzdBdq']=='0None']

##################################
# Univariate
##################################
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effCfzCsTrd'] # The covariates used in the model

or_table = pd.DataFrame() # Initialization (empty dataframe)

for cv in covars:
    # The model calculation
    model = smf.glm('Outcome ~ '+cv, data=model_data_sub)
    results = model.fit()
    
    # Get proper ORs (i.e. exponential) with associated CIs
    coefs = results.params[1:]
    cis = results.conf_int()[1:]
    ors = np.exp(coefs)
    orcis = np.exp(cis)
    lors = orcis[0]
    uors = orcis[1]

    # Get the p-value for the model (to be corrected)
    pvals = results.wald_test_terms().pvalues[1:]
    pvals = pd.DataFrame({'pval':pvals},index=ors.index)
    
    # Put ORs, associated CIs, and p-values in proper table
    toadd = pd.concat([ors, lors, uors,pvals], axis=1)
    toadd.columns = ['OR','LCI','UCI','pval']
    
    if or_table.empty:
        or_table = toadd
    else:
        or_table = or_table.append(toadd)
        
    # Cleaning
    del model, results, coefs, cis, ors, orcis, lors, uors, pvals, toadd
    
toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effCfzCsTrd[T.0None]'])

or_table = or_table.append(toadd)
or_table = or_table.sort_index()

# Save the ORs
or_table.to_csv('~/Desktop/GLM_univariate_no_lzd_bdq.csv')



##################################
# Multivariate with effCfzCsTrd
##################################
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur + effCfzCsTrd',
                data=model_data_sub)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effCfzCsTrd'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effCfzCsTrd[T.0None]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_no_lzd_bdq.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals



##################################
# Multivariate without effCfzCsTrd
##################################
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur', data=model_data_sub)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_without_cfzcstrd_no_lzd_bdq.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [47]:
# ANALYSIS ON THE GROUP WHERE ONLY LZD IS USED

model_data_sub = model_data.loc[model_data['effLzdBdq']=='Lzd']

##################################
# Univariate
##################################
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effCfzCsTrd'] # The covariates used in the model

or_table = pd.DataFrame() # Initialization (empty dataframe)

for cv in covars:
    # The model calculation
    model = smf.glm('Outcome ~ '+cv, data=model_data_sub)
    results = model.fit()
    
    # Get proper ORs (i.e. exponential) with associated CIs
    coefs = results.params[1:]
    cis = results.conf_int()[1:]
    ors = np.exp(coefs)
    orcis = np.exp(cis)
    lors = orcis[0]
    uors = orcis[1]

    # Get the p-value for the model (to be corrected)
    pvals = results.wald_test_terms().pvalues[1:]
    pvals = pd.DataFrame({'pval':pvals},index=ors.index)
    
    # Put ORs, associated CIs, and p-values in proper table
    toadd = pd.concat([ors, lors, uors,pvals], axis=1)
    toadd.columns = ['OR','LCI','UCI','pval']
    
    if or_table.empty:
        or_table = toadd
    else:
        or_table = or_table.append(toadd)
        
    # Cleaning
    del model, results, coefs, cis, ors, orcis, lors, uors, pvals, toadd
    
toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effCfzCsTrd[T.0None]'])

or_table = or_table.append(toadd)
or_table = or_table.sort_index()

# Save the ORs
or_table.to_csv('~/Desktop/GLM_univariate_only_lzd.csv')



##################################
# Multivariate with effCfzCsTrd
##################################
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur + effCfzCsTrd',
                data=model_data_sub)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effCfzCsTrd'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effCfzCsTrd[T.0None]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_only_lzd.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals



##################################
# Multivariate without effCfzCsTrd
##################################
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur', data=model_data_sub)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_without_cfzcstrd_only_lzd.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [48]:
# ANALYSIS ON THE GROUP WHERE ONLY BDQ IS USED

model_data_sub = model_data.loc[model_data['effLzdBdq']=='Bdq']

##################################
# Univariate
##################################
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effCfzCsTrd'] # The covariates used in the model

or_table = pd.DataFrame() # Initialization (empty dataframe)

for cv in covars:
    # The model calculation
    model = smf.glm('Outcome ~ '+cv, data=model_data_sub)
    results = model.fit()
    
    # Get proper ORs (i.e. exponential) with associated CIs
    coefs = results.params[1:]
    cis = results.conf_int()[1:]
    ors = np.exp(coefs)
    orcis = np.exp(cis)
    lors = orcis[0]
    uors = orcis[1]

    # Get the p-value for the model (to be corrected)
    pvals = results.wald_test_terms().pvalues[1:]
    pvals = pd.DataFrame({'pval':pvals},index=ors.index)
    
    # Put ORs, associated CIs, and p-values in proper table
    toadd = pd.concat([ors, lors, uors,pvals], axis=1)
    toadd.columns = ['OR','LCI','UCI','pval']
    
    if or_table.empty:
        or_table = toadd
    else:
        or_table = or_table.append(toadd)
        
    # Cleaning
    del model, results, coefs, cis, ors, orcis, lors, uors, pvals, toadd
    
toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effCfzCsTrd[T.0None]'])

or_table = or_table.append(toadd)
or_table = or_table.sort_index()

# Save the ORs
or_table.to_csv('~/Desktop/GLM_univariate_only_bdq.csv')



##################################
# Multivariate with effCfzCsTrd
##################################
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur + effCfzCsTrd',
                data=model_data_sub)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effCfzCsTrd'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effCfzCsTrd[T.0None]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_only_bdq.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals



##################################
# Multivariate without effCfzCsTrd
##################################
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur', data=model_data_sub)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_without_cfzcstrd_only_bdq.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [49]:
# ANALYSIS ON THE GROUP WHERE BOTH LZD AND BDQ ARE USED

model_data_sub = model_data.loc[model_data['effLzdBdq']=='Lzd+Bdq']

##################################
# Univariate
##################################
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effCfzCsTrd'] # The covariates used in the model

or_table = pd.DataFrame() # Initialization (empty dataframe)

for cv in covars:
    # The model calculation
    model = smf.glm('Outcome ~ '+cv, data=model_data_sub)
    results = model.fit()
    
    # Get proper ORs (i.e. exponential) with associated CIs
    coefs = results.params[1:]
    cis = results.conf_int()[1:]
    ors = np.exp(coefs)
    orcis = np.exp(cis)
    lors = orcis[0]
    uors = orcis[1]

    # Get the p-value for the model (to be corrected)
    pvals = results.wald_test_terms().pvalues[1:]
    pvals = pd.DataFrame({'pval':pvals},index=ors.index)
    
    # Put ORs, associated CIs, and p-values in proper table
    toadd = pd.concat([ors, lors, uors,pvals], axis=1)
    toadd.columns = ['OR','LCI','UCI','pval']
    
    if or_table.empty:
        or_table = toadd
    else:
        or_table = or_table.append(toadd)
        
    # Cleaning
    del model, results, coefs, cis, ors, orcis, lors, uors, pvals, toadd
    
toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effCfzCsTrd[T.0None]'])

or_table = or_table.append(toadd)
or_table = or_table.sort_index()

# Save the ORs
or_table.to_csv('~/Desktop/GLM_univariate_both_lzd_bdq.csv')



##################################
# Multivariate with effCfzCsTrd
##################################
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur + effCfzCsTrd',
                data=model_data_sub)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur','effCfzCsTrd'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]','effCfzCsTrd[T.0None]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_both_lzd_bdq.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals



##################################
# Multivariate without effCfzCsTrd
##################################
model = smf.glm('Outcome ~ DrPattern + Country + Age + Sex + HivArt + Past + Site + NeffTot + ATotalDur', data=model_data_sub)
results_multi = model.fit()

# Get proper ORs (i.e. exponential) with associated CIs
coefs = results_multi.params[1:]
#dir(results)
cis = results_multi.conf_int()[1:]
ors = np.exp(coefs)
orcis = np.exp(cis)
lors = orcis[0]
uors = orcis[1]

# Get the p-value for the model (to be corrected)
wtest = results_multi.wald_test_terms().table
pvals = wtest['pvalue'][1:]
pvals
del wtest

# Put ORs, associated CIs, and p-values in proper table
covars = ['DrPattern','Country','Age','Sex','HivArt','Past','Site','NeffTot','ATotalDur'] # The covariates used in the model
or_table_multi = pd.concat([ors, lors, uors], axis=1)
or_table_multi.columns = ['OR','LCI','UCI']

toadd = pd.DataFrame({'OR':[1.0]*len(covars),'LCI':[1.0]*len(covars),'UCI':[1.0]*len(covars),'pval':[np.nan]*len(covars)},
                     index=['DrPattern[T.0FirstLineOnly]', 'Country[T.0LowMid]', 'Age[T.0Lt25]','Sex[T.F]','HivArt[T.0Neg]','Past[T.0FLD]',
                            'Site[T.0Pulm]','NeffTot[T.03to4]','ATotalDur[T.018to24]'])
or_table_multi = or_table_multi.append(toadd)
del toadd
or_table_multi = or_table_multi.sort_index()

# Add p-values per covariate + numbers for each level of each covariate
or_table_multi['pval'] = np.nan
or_table_multi['nb_after_imputation'] = np.nan
for cv in covars:
    # Find the p-value for the covariate
    idxp = [s for s in pvals.index if cv == s]
    pv = pvals.loc[idxp]
    # Find the rows corresponding to this covariate in the ORs table
    idx = [s for s in or_table_multi.index if cv in s]
    or_table_multi.at[idx,'pval'] = pv.tolist()[0]
    # Loop on those rows, find the corresponding level, and count
    for i in idx:
        lv = i.replace(cv,'').replace('[T.','').replace(']','')
        or_table_multi.at[i,'nb_after_imputation'] = model_data[cv].tolist().count(lv)
        del lv
    del idxp, pv, idx

# Save the ORs
or_table_multi.to_csv('~/Desktop/GLM_multivariate_without_cfzcstrd_both_lzd_bdq.csv')

# Cleaning
del model, coefs, cis, ors, orcis, lors, uors, pvals

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
