generated from opensafely/research-template
/
20a_SENS_SMOKEBMICOMPLETECASE.do
415 lines (332 loc) · 15.8 KB
/
20a_SENS_SMOKEBMICOMPLETECASE.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
*************************************************************************
*Do file: 08_hhClassif_an_mv_analysis_perEth5Group_HR_table.do
*
*Purpose: Create content that is ready to paste into a pre-formatted Word
* shell table containing minimally and fully-adjusted HRs for risk factors
* of interest, across 2 outcomes
*
*Requires: final analysis dataset (analysis_dataset.dta)
*
*Coding: K Wing, base on file from HFORBES, based on file from Krishnan Bhaskaran
*
*Date drafted: 17th June 2021
*************************************************************************
*CODE BEFORE MAKING KRISHNAN'S CHANGES - testing with a redone ethnicity variable where south asian is category one
local dataset `1'
/*for reference
capture noisily stcox i.hhRiskCatExp_5cats##i.eth5 i.imd##i.eth5 i.ageCatfor67Plus##i.eth5 i.obese4cat##i.eth5 i.rural_urbanFive i.smoke i.male i.coMorbCat, strata(utla_group) vce(cluster hh_id)
*/
*global demogadjlistWInts i.imd##i.eth5 i.smoke##i.eth5 i.obese4cat##i.eth5 i.rural_urbanFive##i.eth5 i.ageCatfor67Plus##i.eth5 i.male##i.eth5 i.coMorbCat##i.eth5
global demogadjlistWInts i.imd##i.eth5 i.ageCatfor67Plus##i.eth5 i.obese4cat##i.eth5 i.rural_urbanFive i.smoke i.male i.coMorbCat
prog drop _all
prog define outputHRsforvar
syntax, variable(string) catLabel(string) min(real) max(real) ethnicity(real) outcome(string)
*calculation of rates
*get total count of people by for each ethnicity
count if eth5==`ethnicity'
local total = r(N)
forvalues i=`min'/`max' {
display
*get overall number for each category
cou if `variable' == `i' & eth5==`ethnicity'
*get number of events
cou if `variable' == `i' & _d == 1 & eth5==`ethnicity'
local event = r(N)
*get person time and rate and counts
bysort `variable': egen total_follow_up = total(_t) if eth5==`ethnicity'
su total_follow_up if eth5==`ethnicity'
local n_people_All = r(N)
su total_follow_up if `variable' == `i' & eth5==`ethnicity'
local n_people = r(N)
local person_days = r(mean)
local person_years=`person_days'/365.25
local rate = 100000*(`event'/`person_years')
local percent=100*(`n_people'/`n_people_All')
*get HRs for each regression analysis
*crude
estimates restore crude
*cap lincom `i'.`variable', eform
capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5, eform
local hr_crude = r(estimate)
local lb_crude = r(lb)
local ub_crude = r(ub)
*age adjusted
estimates restore ageAdj
*cap lincom `i'.`variable', eform
capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5, eform
local hr_ageAdj = r(estimate)
local lb_ageAdj = r(lb)
local ub_ageAdj = r(ub)
*mv adjusted
estimates restore mvAdj
capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5, eform
local hr_mvAdj = r(estimate)
local lb_mvAdj = r(lb)
local ub_mvAdj = r(ub)
*mv adjusted with hh size
capture noisily estimates restore mvAdjWHHSize
capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5, eform
capture noisily local hr_mvAdjWHHSize = r(estimate)
capture noisily local lb_mvAdjWHHSize = r(lb)
capture noisily local ub_mvAdjWHHSize = r(ub)
*mv adjusted with hh size CONTINOUS
/*
capture noisily estimates restore mvAdjWHHSizeCONT
cap noisily lincom `i'.`variable', eform
capture noisily local hr_mvAdjWHHSizeCONT = r(estimate)
capture noisily local lb_mvAdjWHHSizeCONT = r(lb)
capture noisily local ub_mvAdjWHHSizeCONT = r(ub)
*/
*get variable name
local lab: variable label `variable'
*file write tablecontents _tab (`i') _n
*get category name
local category: label `catLabel' `i'
display "Category label: `category'"
*write each row hg
if `i'==1 {
*write the total
file write tablecontents "(Ethnicity="(`ethnicity') ")" _n
file write tablecontents "(N="(`total') ")" _n
file write tablecontents _tab ("`category'") _tab (`n_people') (" (") %3.1f (`percent') (")") _tab (`event') _tab %3.0f (`person_years') _tab %3.0f (`rate') _tab "1" _tab "1" _tab "1" _n
}
else {
file write tablecontents _tab ("`category'") _tab (`n_people') (" (") %3.1f (`percent') (")") _tab (`event') _tab %3.0f (`person_years') _tab %3.0f (`rate') _tab %4.2f (`hr_crude') " (" %4.2f (`lb_crude') "-" %4.2f (`ub_crude') ")" _tab %4.2f (`hr_ageAdj') " (" %4.2f (`lb_ageAdj') "-" %4.2f (`ub_ageAdj') ")" _tab %4.2f (`hr_mvAdj') " (" %4.2f (`lb_mvAdj') "-" %4.2f (`ub_mvAdj') ")" _n
}
drop total_follow_up
}
*variable category
end
********Code that calls program and outputs tables*******
/*I think what I want here FOR EACH WAVE is
- A single page PER OUTCOME containing
- Results FOR EACH ETHNICITY for that outcome
*/
*Testing outcomes
*use ./output/hhClassif_analysis_dataset_STSET_covidDeath_ageband_3`dataset'.dta, clear
*foreach outcome in covidDeath covidHosp covidHospOrDeath nonCovidDeath {
foreach outcome in covidHospOrDeath {
* Open a log file
capture log close
log using "./logs/20a_SENS_SMOKEBMICOMPLETECASE_`outcome'_`dataset'", text replace
*open dataset
use ./output/hhClassif_analysis_dataset_STSET_`outcome'_ageband_3`dataset'.dta, clear
*drop people with missing smoking or BMI
drop if smokeMissing==1
drop if missingBMI==1
*open table
file open tablecontents using ./output/20a_SENS_SMOKEBMICOMPLETECASE_`outcome'_`dataset'.txt, t w replace
*write table title and column headers
file write tablecontents "Wave: `dataset', Outcome: `outcome'" _n
file write tablecontents _tab _tab ("MV adjusted - MI") _n
**REGRESSIONS**
*only need to do the regressions once, so putting that code here and editing the outputHRsforvar program accordingly
strate hhRiskCatExp_5cats
**cox regressiona**
*need to account for different models for wave 1 (only interaction is with hhrisk) versus wave 2 (multiple interactions)
if "`dataset'"=="MAIN" {
*crude (only utla matched)
capture noisily stcox i.hhRiskCatExp_5cats##i.eth5, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store crude
*age-adjusted
capture noisily stcox i.hhRiskCatExp_5cats##i.eth5 i.ageCatfor67Plus##i.eth5, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store ageAdj
*MV adjusted (without household size)
capture noisily stcox i.hhRiskCatExp_5cats##i.eth5 i.ageCatfor67Plus##i.eth5 i.imd i.obese4cat i.rural_urbanFive i.smoke i.male i.coMorbCat, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store mvAdj
}
else if "`dataset'"=="W2" {
*crude (only utla matched)
capture noisily stcox i.hhRiskCatExp_5cats##i.eth5, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store crude
*age-adjusted
capture noisily stcox i.hhRiskCatExp_5cats##i.eth5 i.ageCatfor67Plus##i.eth5, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store ageAdj
*MV adjusted (without household size)
capture noisily stcox i.hhRiskCatExp_5cats##i.eth5 $demogadjlistWInts, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store mvAdj
*MV adjusted (with household size categorical)
*capture noisily stcox i.hhRiskCatExp_5cats##i.eth5 $demogadjlistWInts i.hh_total_cat, strata(utla_group) vce(cluster hh_id)
*capture noisily estimates store mvAdjWHHSize
}
*MV adjusted (with household size continuous)
/*
capture noisily stcox i.`variable' $demogadjlist $comorbidadjlist i.imd i.hh_size, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store mvAdjWHHSizeCONT
*/
*helper variables
sum eth5
local maxEth5=r(max)
forvalues e=1/`maxEth5' {
display "*************Ethnicity: `e'************ "
display "`e'"
*next line: commented out while testing testparm etc
cap noisily outputHRsforvar, variable(hhRiskCatExp_5cats) catLabel(hhRiskCatExp_5cats) min(1) max(5) ethnicity(`e') outcome(`outcome')
file write tablecontents _n
}
cap file close tablecontents
cap log close
*output excel
*insheet using ./output/hhClassif_tablecontents_HRtable_`outcome'_`dataset'.txt, clear
*export excel using ./output/hhClassif_tablecontents_HRtable_`outcome'_`dataset'.xlsx, replace
}
*this code checks that I have baseline correct
/*
local dataset `1'
/*for reference
capture noisily stcox i.hhRiskCatExp_5cats##i.eth5 i.imd##i.eth5 i.ageCatfor67Plus##i.eth5 i.obese4cat##i.eth5 i.rural_urbanFive i.smoke i.male i.coMorbCat, strata(utla_group) vce(cluster hh_id)
*/
*global demogadjlistWInts i.imd##i.eth5 i.smoke##i.eth5 i.obese4cat##i.eth5 i.rural_urbanFive##i.eth5 i.ageCatfor67Plus##i.eth5 i.male##i.eth5 i.coMorbCat##i.eth5
global demogadjlistWInts i.imd##i.eth5 i.ageCatfor67Plus##i.eth5 i.obese4cat##i.eth5 i.rural_urbanFive i.smoke i.male i.coMorbCat
prog drop _all
prog define outputHRsforvar
syntax, variable(string) catLabel(string) min(real) max(real) ethnicity(real) outcome(string)
display "CHECK 1"
*calculation of rates
strate `variable'
*get total count of people by for each ethnicity
count if eth5==`ethnicity'
local total = r(N)
display "CHECK 2"
forvalues i=`min'/`max' {
*display
*get overall number for each category
cou if `variable' == `i' & eth5==`ethnicity'
display "CHECK 2.1"
*get number of events
cou if `variable' == `i' & _d == 1 & eth5==`ethnicity'
local event = r(N)
display "CHECK 2.2"
*get person time and rate and counts
bysort `variable': egen total_follow_up = total(_t) if eth5==`ethnicity'
su total_follow_up if eth5==`ethnicity'
local n_people_All = r(N)
su total_follow_up if `variable' == `i' & eth5==`ethnicity'
local n_people = r(N)
local person_days = r(mean)
local person_years=`person_days'/365.25
local rate = 100000*(`event'/`person_years')
local percent=100*(`n_people'/`n_people_All')
display "CHECK 3"
*get HRs for each regression analysis
*crude
estimates restore crude_`ethnicity'
*cap lincom `i'.`variable', eform
capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5, eform
*amazing new Krishnan way that has same ethnicity as baseline (see email Fri 4 March - couldnt' get this to work so doing it the other way suggested in same email)
*capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5#`i'.`variable', eform
local hr_crude = r(estimate)
local lb_crude = r(lb)
local ub_crude = r(ub)
display "CHECK 4"
*age adjusted
estimates restore ageAdj_`ethnicity'
*cap lincom `i'.`variable', eform
capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5, eform
local hr_ageAdj = r(estimate)
local lb_ageAdj = r(lb)
local ub_ageAdj = r(ub)
display "CHECK 5"
*mv adjusted
estimates restore mvAdj_`ethnicity'
capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5, eform
local hr_mvAdj = r(estimate)
local lb_mvAdj = r(lb)
local ub_mvAdj = r(ub)
display "CHECK 6"
*mv adjusted with hh size
*capture noisily estimates restore mvAdjWHHSize
*capture noisily lincom `i'.`variable' + `i'.`variable'#`ethnicity'.eth5, eform
*capture noisily local hr_mvAdjWHHSize = r(estimate)
*capture noisily local lb_mvAdjWHHSize = r(lb)
*capture noisily local ub_mvAdjWHHSize = r(ub)
*mv adjusted with hh size CONTINOUS
/*
capture noisily estimates restore mvAdjWHHSizeCONT
cap noisily lincom `i'.`variable', eform
capture noisily local hr_mvAdjWHHSizeCONT = r(estimate)
capture noisily local lb_mvAdjWHHSizeCONT = r(lb)
capture noisily local ub_mvAdjWHHSizeCONT = r(ub)
*/
*get variable name
local lab: variable label `variable'
*file write tablecontents _tab (`i') _n
*get category name
local category: label `catLabel' `i'
display "Category label: `category'"
*write each row hg
if `i'==1 {
*write the total
file write tablecontents "(Ethnicity="(`ethnicity') ")" _n
file write tablecontents "(N="(`total') ")" _n
file write tablecontents _tab ("`category'") _tab (`n_people') (" (") %3.1f (`percent') (")") _tab (`event') _tab %3.0f (`person_years') _tab %3.0f (`rate') _tab "1" _tab "1" _tab "1" _n
}
else {
file write tablecontents _tab ("`category'") _tab (`n_people') (" (") %3.1f (`percent') (")") _tab (`event') _tab %3.0f (`person_years') _tab %3.0f (`rate') _tab %4.2f (`hr_crude') " (" %4.2f (`lb_crude') "-" %4.2f (`ub_crude') ")" _tab %4.2f (`hr_ageAdj') " (" %4.2f (`lb_ageAdj') "-" %4.2f (`ub_ageAdj') ")" _tab %4.2f (`hr_mvAdj') " (" %4.2f (`lb_mvAdj') "-" %4.2f (`ub_mvAdj') ")" _n
}
drop total_follow_up
}
*variable category
end
********Code that calls program and outputs tables*******
/*I think what I want here FOR EACH WAVE is
- A single page PER OUTCOME containing
- Results FOR EACH ETHNICITY for that outcome
*/
*Testing outcomes
*use ./output/hhClassif_analysis_dataset_STSET_covidDeath_ageband_3`dataset'.dta, clear
*foreach outcome in covidDeath covidHosp covidHospOrDeath nonCovidDeath {
foreach outcome in covidHospOrDeath {
* Open a log file
capture log close
log using "./logs/20a_hhClassif_an_mv_an_wInteractions_67alone_HR_`outcome'_`dataset'", text replace
*open dataset
use ./output/hhClassif_analysis_dataset_STSET_`outcome'_ageband_3`dataset'.dta, clear
*open table
file open tablecontents using ./output/20a_hhClassif_an_mv_an_wInteractions_67alone_HR_`outcome'_`dataset'.txt, t w replace
*write table title and column headers
file write tablecontents "Wave: `dataset', Outcome: `outcome'" _n
file write tablecontents _tab _tab ("N (%)") _tab ("Events") _tab ("Person years follow up") _tab ("Rate (per 100 000 person years)") _tab ("Crude") _tab ("Age adjusted") _tab ("MV adjusted") _n
*helper variables
sum eth5
local maxEth5=r(max)
forvalues e=1/`maxEth5' {
display "*************Ethnicity: `e'************ "
display "`e'"
*run the regressions once per ethnicity, so that the baseline can be change each time (see Krishnan email Fri 4th March) - this is what the "ib.`e'" code is doing
if "`dataset'"=="MAIN" {
*crude (only utla matched)
capture noisily stcox i.hhRiskCatExp_5cats##ib`e'.eth5, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store crude_`e'
*age-adjusted
capture noisily stcox i.hhRiskCatExp_5cats##ib`e'.eth5 i.ageCatfor67Plus, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store ageAdj_`e'
*MV adjusted (without household size)
capture noisily stcox i.hhRiskCatExp_5cats##ib`e'.eth5 i.ageCatfor67Plus i.imd i.obese4cat i.rural_urbanFive i.smoke i.male i.coMorbCat, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store mvAdj_`e'
}
else if "`dataset'"=="W2" {
*crude (only utla matched)
capture noisily stcox i.hhRiskCatExp_5cats##ib`e'.eth5, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store crude_`e'
*age-adjusted
capture noisily stcox i.hhRiskCatExp_5cats##ib`e'.eth5 i.ageCatfor67Plus##i.eth5, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store ageAdj_`e'
*MV adjusted (without household size)
capture noisily stcox i.hhRiskCatExp_5cats##ib`e'.eth5 $demogadjlistWInts, strata(utla_group) vce(cluster hh_id)
capture noisily estimates store mvAdj_`e'
*MV adjusted (with household size categorical)
*capture noisily stcox i.hhRiskCatExp_5cats##i.eth5 $demogadjlistWInts i.hh_total_cat, strata(utla_group) vce(cluster hh_id)
*capture noisily estimates store mvAdjWHHSize
}
*next line: commented out while testing testparm etc
outputHRsforvar, variable(hhRiskCatExp_5cats) catLabel(hhRiskCatExp_5cats) min(1) max(5) ethnicity(`e') outcome(`outcome')
file write tablecontents _n
}
cap file close tablecontents
cap log close
*output excel
*insheet using ./output/hhClassif_tablecontents_HRtable_`outcome'_`dataset'.txt, clear
*export excel using ./output/hhClassif_tablecontents_HRtable_`outcome'_`dataset'.xlsx, replace
}
*/