generated from opensafely/research-template
/
000_define_covariates.do
1397 lines (1197 loc) · 65.6 KB
/
000_define_covariates.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
version 16
/*==============================================================================
DO FILE NAME: define covariates
PROJECT: EIA OpenSAFELY project
DATE: 07/03/2022
AUTHOR: J Galloway / M Russell
adapted from C Rentsch
DESCRIPTION OF FILE: data management for EIA project
reformat variables
categorise variables
label variables
DATASETS USED: data in memory (from output/input.csv)
DATASETS CREATED: analysis files
OTHER OUTPUT: logfiles, printed to folder $Logdir
USER-INSTALLED ADO:
(place .ado file(s) in analysis folder)
==============================================================================*/
**Set filepaths
*global projectdir "C:\Users\k1754142\OneDrive\PhD Project\OpenSAFELY\Github Practice"
*global projectdir "C:\Users\Mark\OneDrive\PhD Project\OpenSAFELY\Github Practice"
global projectdir `c(pwd)'
capture mkdir "$projectdir/output/data"
capture mkdir "$projectdir/output/figures"
capture mkdir "$projectdir/output/tables"
global logdir "$projectdir/logs"
**Open a log file
cap log close
log using "$logdir/cleaning_dataset.log", replace
di "$projectdir"
di "$logdir"
import delimited "$projectdir/output/input.csv", clear
**Set Ado file path
adopath + "$projectdir/analysis/extra_ados"
**Set index dates ===========================================================*/
global year_preceding = "01/04/2018"
global start_date = "01/04/2019"
global appt_date = "01/04/2021"
global end_date = "01/01/2023"
**Rename variables (some are too long for Stata to handle) =======================================*/
rename chronic_respiratory_disease chronic_resp_disease
**Convert date strings to dates ====================================================*/
***Some dates are given with month/year only, so adding day 15 to enable them to be processed as dates
foreach var of varlist hba1c_mmol_per_mol_date ///
hba1c_percentage_date ///
creatinine_date ///
bmi_date_measured ///
abatacept_date ///
adalimumab_date ///
baricitinib_date ///
certolizumab_date ///
etanercept_date ///
golimumab_date ///
guselkumab_date ///
infliximab_date ///
ixekizumab_date ///
methotrexate_hcd_date ///
rituximab_date ///
sarilumab_date ///
secukinumab_date ///
tocilizumab_date ///
tofacitinib_date ///
upadacitinib_date ///
ustekinumab_date ///
{
capture confirm string variable `var'
if _rc!=0 {
assert `var'==.
rename `var' `var'_date
}
else {
replace `var' = `var' + "-15"
rename `var' `var'_dstr
replace `var'_dstr = " " if `var'_dstr == "-15"
gen `var'_date = date(`var'_dstr, "YMD")
order `var'_date, after(`var'_dstr)
drop `var'_dstr
}
format `var'_date %td
}
**Conversion for dates with day already included ====================================================*/
foreach var of varlist died_date_ons ///
eia_code_date ///
rheum_appt_date ///
rheum_appt_any_date ///
rheum_appt2_date ///
rheum_appt3_date ///
ra_code_date ///
psa_code_date ///
anksp_code_date ///
undiff_code_date ///
last_gp_prerheum_date ///
last_gp_precode_date ///
last_gp_refrheum_date ///
last_gp_refcode_date ///
referral_rheum_prerheum ///
referral_rheum_precode ///
chronic_cardiac_disease ///
diabetes ///
hypertension ///
chronic_resp_disease ///
copd ///
chronic_liver_disease ///
stroke ///
lung_cancer ///
haem_cancer ///
other_cancer ///
esrf ///
organ_transplant ///
hydroxychloroquine_date ///
leflunomide_date ///
methotrexate_date ///
methotrexate_inj_date ///
sulfasalazine_date ///
{
capture confirm string variable `var'
if _rc!=0 {
assert `var'==.
rename `var' `var'_date
}
else {
rename `var' `var'_dstr
gen `var'_date = date(`var'_dstr, "YMD")
order `var'_date, after(`var'_dstr)
drop `var'_dstr
gen `var'_date15 = `var'_date+15
order `var'_date15, after(`var'_date)
drop `var'_date
rename `var'_date15 `var'_date
}
format `var'_date %td
}
**Rename variables with extra 'date' added to the end of variable names===========================================================*/
rename rheum_appt_date_date rheum_appt_date
rename rheum_appt2_date_date rheum_appt2_date
rename rheum_appt3_date_date rheum_appt3_date
rename rheum_appt_any_date_date rheum_appt_any_date
rename eia_code_date_date eia_code_date
rename ra_code_date_date ra_code_date
rename psa_code_date_date psa_code_date
rename anksp_code_date_date anksp_code_date
rename undiff_code_date_date undiff_code_date
rename died_date_ons_date died_ons_date
rename last_gp_prerheum_date_date last_gp_prerheum_date
rename last_gp_refrheum_date_date last_gp_refrheum_date
rename last_gp_refcode_date_date last_gp_refcode_date
rename last_gp_precode_date_date last_gp_precode_date
rename hba1c_mmol_per_mol_date_date hba1c_mmol_per_mol_date
rename hba1c_percentage_date_date hba1c_percentage_date
rename creatinine_date_date creatinine_date
rename creatinine creatinine_value
rename bmi_date_measured_date bmi_date
rename bmi bmi_value
rename hydroxychloroquine_date_date hydroxychloroquine_date
rename leflunomide_date_date leflunomide_date
rename methotrexate_date_date methotrexate_date
rename methotrexate_inj_date_date methotrexate_inj_date
rename sulfasalazine_date_date sulfasalazine_date
rename abatacept_date_date abatacept_date
rename adalimumab_date_date adalimumab_date
rename baricitinib_date_date baricitinib_date
rename certolizumab_date_date certolizumab_date
rename etanercept_date_date etanercept_date
rename golimumab_date_date golimumab_date
rename guselkumab_date_date guselkumab_date
rename infliximab_date_date infliximab_date
rename ixekizumab_date_date ixekizumab_date
rename methotrexate_hcd_date_date methotrexate_hcd_date
rename rituximab_date_date rituximab_date
rename sarilumab_date_date sarilumab_date
rename secukinumab_date_date secukinumab_date
rename tocilizumab_date_date tocilizumab_date
rename tofacitinib_date_date tofacitinib_date
rename upadacitinib_date_date upadacitinib_date
rename ustekinumab_date_date ustekinumab_date
**Create binary indicator variables for relevant conditions ====================================================*/
foreach var of varlist eia_code_date ///
rheum_appt_date ///
rheum_appt2_date ///
rheum_appt3_date ///
rheum_appt_any_date ///
ra_code_date ///
psa_code_date ///
anksp_code_date ///
undiff_code_date ///
died_ons_date ///
last_gp_prerheum_date ///
last_gp_precode_date ///
last_gp_refrheum_date ///
last_gp_refcode_date ///
referral_rheum_prerheum_date ///
referral_rheum_precode_date ///
chronic_cardiac_disease_date ///
diabetes_date ///
hypertension_date ///
chronic_resp_disease_date ///
copd_date ///
chronic_liver_disease_date ///
stroke_date ///
lung_cancer_date ///
haem_cancer_date ///
other_cancer_date ///
esrf_date ///
creatinine_date ///
organ_transplant_date ///
hydroxychloroquine_date ///
leflunomide_date ///
methotrexate_date ///
methotrexate_inj_date ///
sulfasalazine_date ///
abatacept_date ///
adalimumab_date ///
baricitinib_date ///
certolizumab_date ///
etanercept_date ///
golimumab_date ///
guselkumab_date ///
infliximab_date ///
ixekizumab_date ///
methotrexate_hcd_date ///
rituximab_date ///
sarilumab_date ///
secukinumab_date ///
tocilizumab_date ///
tofacitinib_date ///
upadacitinib_date ///
ustekinumab_date {
/*date ranges are applied in python, so presence of date indicates presence of
disease in the correct time frame*/
local newvar = substr("`var'", 1, length("`var'") - 5)
gen `newvar' = (`var'!=. )
order `newvar', after(`var')
}
**Create and label variables ===========================================================*/
**Demographics
***Sex
gen male = 1 if sex == "M"
replace male = 0 if sex == "F"
lab var male "Male"
lab define male 0 "No" 1 "Yes", modify
lab val male male
tab male, missing
***Ethnicity
replace ethnicity = .u if ethnicity == .
****rearrange in order of prevalence
recode ethnicity 2=6 /* mixed to 6 */
recode ethnicity 3=2 /* south asian to 2 */
recode ethnicity 4=3 /* black to 3 */
recode ethnicity 6=4 /* mixed to 4 */
recode ethnicity 5=4 /* other to 4 */
label define ethnicity 1 "White" ///
2 "Asian/Asian British" ///
3 "Black" ///
4 "Mixed/Other" ///
.u "Not known"
label values ethnicity ethnicity
lab var ethnicity "Ethnicity"
tab ethnicity, missing
gen ethnicity_bme=0 if ethnicity==1
replace ethnicity_bme=1 if ethnicity>1 & ethnicity<5
replace ethnicity_bme=.u if ethnicity==.u
label define ethnicity_bme 0 "White" ///
1 "Non-white" ///
.u "Not known"
label values ethnicity_bme ethnicity_bme
lab var ethnicity_bme "Ethnicity"
tab ethnicity_bme, missing
***STP
rename stp stp_old
bysort stp_old: gen stp = 1 if _n==1
replace stp = sum(stp) //
drop stp_old
***Regions
encode region, gen(nuts_region)
tab region, missing
replace region="Not known" if region==""
gen region_nospace=region
replace region_nospace="SouthWest" if region=="South West"
replace region_nospace="EastMidlands" if region=="East Midlands"
replace region_nospace="East" if region=="East"
replace region_nospace="London" if region=="London"
replace region_nospace="NorthEast" if region=="North East"
replace region_nospace="NorthWest" if region=="North West"
replace region_nospace="SouthEast" if region=="South East"
replace region_nospace="WestMidlands" if region=="West Midlands"
replace region_nospace="YorkshireandTheHumber" if region=="Yorkshire and The Humber"
***IMD
recode imd 0 = .u
label define imd 1 "1 most deprived" 2 "2" 3 "3" 4 "4" 5 "5 least deprived" .u "Not known"
label values imd imd
lab var imd "Index of multiple deprivation"
tab imd, missing
***Age variables
*Nb. works if ages 18 and over
*Create categorised age
drop if age<18 & age !=.
drop if age>109 & age !=.
drop if age==.
lab var age "Age"
recode age 18/39.9999 = 1 ///
40/49.9999 = 2 ///
50/59.9999 = 3 ///
60/69.9999 = 4 ///
70/79.9999 = 5 ///
80/max = 6, gen(agegroup)
label define agegroup 1 "18-39" ///
2 "40-49" ///
3 "50-59" ///
4 "60-69" ///
5 "70-79" ///
6 "80+"
label values agegroup agegroup
lab var agegroup "Age group"
tab agegroup, missing
*Create binary age
recode age min/69.999 = 0 ///
70/max = 1, gen(age70)
***Body Mass Index
*Recode strange values
replace bmi_value = . if bmi_value == 0
replace bmi_value = . if !inrange(bmi_value, 10, 80)
*Restrict to within 10 years of EIA diagnosis date and aged>16
gen bmi_time = (eia_code_date - bmi_date)/365.25
gen bmi_age = age - bmi_time
replace bmi_value = . if bmi_age < 16
replace bmi_value = . if bmi_time > 10 & bmi_time != .
*Set to missing if no date, and vice versa
replace bmi_value = . if bmi_date == .
replace bmi_date = . if bmi_value == .
replace bmi_time = . if bmi_value == .
replace bmi_age = . if bmi_value == .
*Create BMI categories
gen bmicat = .
recode bmicat . = 1 if bmi_value < 18.5
recode bmicat . = 2 if bmi_value < 25
recode bmicat . = 3 if bmi_value < 30
recode bmicat . = 4 if bmi_value < 35
recode bmicat . = 5 if bmi_value < 40
recode bmicat . = 6 if bmi_value < .
replace bmicat = .u if bmi_value >= .
label define bmicat 1 "Underweight (<18.5)" ///
2 "Normal (18.5-24.9)" ///
3 "Overweight (25-29.9)" ///
4 "Obese I (30-34.9)" ///
5 "Obese II (35-39.9)" ///
6 "Obese III (40+)" ///
.u "Not known"
label values bmicat bmicat
lab var bmicat "BMI"
tab bmicat, missing
*Create less granular categorisation
recode bmicat 1/3 .u = 1 4 = 2 5 = 3 6 = 4, gen(obese4cat)
label define obese4cat 1 "No record of obesity" ///
2 "Obese I (30-34.9)" ///
3 "Obese II (35-39.9)" ///
4 "Obese III (40+)"
label values obese4cat obese4cat
order obese4cat, after(bmicat)
***Smoking
label define smoke 1 "Never" 2 "Former" 3 "Current" .u "Not known"
gen smoke = 1 if smoking_status == "N"
replace smoke = 2 if smoking_status == "E"
replace smoke = 3 if smoking_status == "S"
replace smoke = .u if smoking_status == "M"
replace smoke = .u if smoking_status == ""
label values smoke smoke
lab var smoke "Smoking status"
drop smoking_status
tab smoke, missing
*Create non-missing 3-category variable for current smoking (assumes missing smoking is never smoking)
recode smoke .u = 1, gen(smoke_nomiss)
order smoke_nomiss, after(smoke)
label values smoke_nomiss smoke
**Clinical comorbidities
***eGFR
*Set implausible creatinine values to missing (Note: zero changed to missing)
replace creatinine_value = . if !inrange(creatinine_value, 20, 3000)
*Remove creatinine dates if no measurements, and vice versa
replace creatinine_value = . if creatinine_date == .
replace creatinine_date = . if creatinine_value == .
replace creatinine = . if creatinine_value == .
recode creatinine .=0
tab creatinine, missing
*Divide by 88.4 (to convert umol/l to mg/dl)
gen SCr_adj = creatinine_value/88.4
gen min = .
replace min = SCr_adj/0.7 if male==0
replace min = SCr_adj/0.9 if male==1
replace min = min^-0.329 if male==0
replace min = min^-0.411 if male==1
replace min = 1 if min<1
gen max=.
replace max=SCr_adj/0.7 if male==0
replace max=SCr_adj/0.9 if male==1
replace max=max^-1.209
replace max=1 if max>1
gen egfr=min*max*141
replace egfr=egfr*(0.993^age)
replace egfr=egfr*1.018 if male==0
label var egfr "egfr calculated using CKD-EPI formula with no ethnicity"
*Categorise into ckd stages
egen egfr_cat_all = cut(egfr), at(0, 15, 30, 45, 60, 5000)
recode egfr_cat_all 0 = 5 15 = 4 30 = 3 45 = 2 60 = 0, generate(ckd_egfr)
gen egfr_cat = .
recode egfr_cat . = 3 if egfr < 30
recode egfr_cat . = 2 if egfr < 60
recode egfr_cat . = 1 if egfr < .
replace egfr_cat = .u if egfr >= .
label define egfr_cat 1 ">=60" ///
2 "30-59" ///
3 "<30" ///
.u "Not known"
label values egfr_cat egfr_cat
lab var egfr_cat "eGFR"
tab egfr_cat, missing
*If missing eGFR, assume normal
gen egfr_cat_nomiss = egfr_cat
replace egfr_cat_nomiss = 1 if egfr_cat == .u
label define egfr_cat_nomiss 1 ">=60/not known" ///
2 "30-59" ///
3 "<30"
label values egfr_cat_nomiss egfr_cat_nomiss
lab var egfr_cat_nomiss "eGFR"
tab egfr_cat_nomiss, missing
gen egfr_date = creatinine_date
format egfr_date %td
*Add in end stage renal failure and create a single CKD variable
*Missing assumed to not have CKD
gen ckd = 0
replace ckd = 1 if ckd_egfr != . & ckd_egfr >= 1
replace ckd = 1 if esrf == 1
label define ckd 0 "No" 1 "Yes"
label values ckd ckd
label var ckd "Chronic kidney disease"
tab ckd, missing
*Create date (most recent measure prior to index)
gen temp1_ckd_date = creatinine_date if ckd_egfr >=1
gen temp2_ckd_date = esrf_date if esrf == 1
gen ckd_date = max(temp1_ckd_date,temp2_ckd_date)
format ckd_date %td
drop temp1_ckd_date temp2_ckd_date SCr_adj min max ckd_egfr egfr_cat_all
***HbA1c
*Set zero or negative to missing
replace hba1c_percentage = . if hba1c_percentage <= 0
replace hba1c_mmol_per_mol = . if hba1c_mmol_per_mol <= 0
*Change implausible values to missing
replace hba1c_percentage = . if !inrange(hba1c_percentage, 1, 20)
replace hba1c_mmol_per_mol = . if !inrange(hba1c_mmol_per_mol, 10, 200)
*Set most recent values of >24 months prior to EIA diagnosis date to missing
replace hba1c_percentage = . if (eia_code_date - hba1c_percentage_date) > 24*30 & hba1c_percentage_date != .
replace hba1c_mmol_per_mol = . if (eia_code_date - hba1c_mmol_per_mol_date) > 24*30 & hba1c_mmol_per_mol_date != .
*Clean up dates
replace hba1c_percentage_date = . if hba1c_percentage == .
replace hba1c_mmol_per_mol_date = . if hba1c_mmol_per_mol == .
*Express HbA1c as percentage
*Express all values as perecentage
noi summ hba1c_percentage hba1c_mmol_per_mol
gen hba1c_pct = hba1c_percentage
replace hba1c_pct = (hba1c_mmol_per_mol/10.929)+2.15 if hba1c_mmol_per_mol<.
*Valid % range between 0-20
replace hba1c_pct = . if !inrange(hba1c_pct, 1, 20)
replace hba1c_pct = round(hba1c_pct, 0.1)
*Categorise HbA1c and diabetes
*Group hba1c pct
gen hba1ccat = 0 if hba1c_pct < 6.5
replace hba1ccat = 1 if hba1c_pct >= 6.5 & hba1c_pct < 7.5
replace hba1ccat = 2 if hba1c_pct >= 7.5 & hba1c_pct < 8
replace hba1ccat = 3 if hba1c_pct >= 8 & hba1c_pct < 9
replace hba1ccat = 4 if hba1c_pct >= 9 & hba1c_pct !=.
label define hba1ccat 0 "<6.5%" 1">=6.5-7.4" 2">=7.5-7.9" 3">=8-8.9" 4">=9"
label values hba1ccat hba1ccat
tab hba1ccat, missing
*Express all values as mmol
gen hba1c_mmol = hba1c_mmol_per_mol
replace hba1c_mmol = (hba1c_percentage*10.929)-23.5 if hba1c_percentage<. & hba1c_mmol==.
*Group hba1c mmol
gen hba1ccatmm = 0 if hba1c_mmol < 58
replace hba1ccatmm = 1 if hba1c_mmol >= 58 & hba1c_mmol !=.
replace hba1ccatmm =.u if hba1ccatmm==.
label define hba1ccatmm 0 "HbA1c <58mmol/mol" 1 "HbA1c >=58mmol/mol" .u "Not known"
label values hba1ccatmm hba1ccatmm
lab var hba1ccatmm "HbA1c"
tab hba1ccatmm, missing
*Create diabetes, split by control/not (assumes missing = no diabetes)
gen diabcatm = 1 if diabetes==0
replace diabcatm = 2 if diabetes==1 & hba1ccatmm==0
replace diabcatm = 3 if diabetes==1 & hba1ccatmm==1
replace diabcatm = 4 if diabetes==1 & hba1ccatmm==.u
label define diabcatm 1 "No diabetes" ///
2 "Diabetes with HbA1c <58mmol/mol" ///
3 "Diabetes with HbA1c >58mmol/mol" ///
4 "Diabetes with no HbA1c measure"
label values diabcatm diabcatm
lab var diabcatm "Diabetes"
*Create cancer variable
gen cancer =0
replace cancer =1 if lung_cancer ==1 | haem_cancer ==1 | other_cancer ==1
lab var cancer "Cancer"
lab define cancer 0 "No" 1 "Yes", modify
lab val cancer cancer
tab cancer, missing
*Create other comorbid variables
gen combined_cv_comorbid =1 if chronic_cardiac_disease ==1 | stroke==1
recode combined_cv_comorbid .=0
*Label variables
lab var hypertension "Hypertension"
lab define hypertension 0 "No" 1 "Yes", modify
lab val hypertension hypertension
lab var diabetes "Diabetes"
lab define diabetes 0 "No" 1 "Yes", modify
lab val diabetes diabetes
lab var stroke "Stroke"
lab define stroke 0 "No" 1 "Yes", modify
lab val stroke stroke
lab var chronic_resp_disease "Chronic respiratory disease"
lab define chronic_resp_disease 0 "No" 1 "Yes", modify
lab val chronic_resp_disease chronic_resp_disease
lab var copd "COPD"
lab define copd 0 "No" 1 "Yes", modify
lab val copd copd
lab var esrf "End-stage renal failure"
lab define esrf 0 "No" 1 "Yes", modify
lab val esrf esrf
lab var chronic_liver_disease "Chronic liver disease"
lab define chronic_liver_disease 0 "No" 1 "Yes", modify
lab val chronic_liver_disease chronic_liver_disease
lab var chronic_cardiac_disease "Chronic cardiac disease"
lab define chronic_cardiac_disease 0 "No" 1 "Yes", modify
lab val chronic_cardiac_disease chronic_cardiac_disease
lab var rheum_appt "Rheumatology appointment"
lab define rheum_appt 0 "No" 1 "Yes", modify
lab val rheum_appt rheum_appt
*Ensure everyone has EIA code=============================================================*/
**All patients should have EIA code
tab eia_code, missing
keep if eia_code==1
**Check first rheum appt date was before EIA code date==================================*/
**Rheumatology appt
tab rheum_appt, missing //proportion of patients with an rheum outpatient date (with first attendance option selected) in the 12 months before EIA code appeared in GP record; data only April 2019 onwards
tab rheum_appt_any, missing //proportion of patients with a rheum outpatient date (without first attendance option selected) in the 6 months before EIA code appeared in GP record; data only April 2019 onwards
tab rheum_appt2, missing //proportion of patients with a rheum outpatient date (without first attendance option selected) in the 6 months before EIA code appeared in GP record; data only April 2019 onwards
tab rheum_appt3, missing //proportion of patients with a rheum outpatient date (without first attendance option selected) in the 2 years before EIA code appeared in GP record; data only April 2019 onwards
**Check timeframe of rheum appt relative to EIA code
tab rheum_appt if rheum_appt_date>eia_code_date & rheum_appt_date!=. //confirm proportion who had rheum appt (i.e. not missing) and appt after EIA code
tab rheum_appt if rheum_appt_date>(eia_code_date + 30) & rheum_appt_date!=. //confirm proportion who had rheum appt 30 days after EIA code
tab rheum_appt if rheum_appt_date>(eia_code_date + 60) & rheum_appt_date!=. //confirm proportion who had rheum appt 60 days after EIA code
replace rheum_appt=0 if rheum_appt_date>(eia_code_date + 60) & rheum_appt_date!=. //replace as missing those appts >60 days after EIA code
replace rheum_appt_date=. if rheum_appt_date>(eia_code_date + 60) & rheum_appt_date!=. //replace as missing those appts >60 days after EIA code
**As above
replace rheum_appt_any=0 if rheum_appt_any_date>(eia_code_date + 60) & rheum_appt_any_date!=. //replace as missing those appts >60 days after EIA code
replace rheum_appt_any_date=. if rheum_appt_any_date>(eia_code_date + 60) & rheum_appt_any_date!=. //replace as missing those appts >60 days after EIA code
replace rheum_appt2=0 if rheum_appt2_date>(eia_code_date + 60) & rheum_appt2_date!=. //replace as missing those appts >60 days after EIA code
replace rheum_appt2_date=. if rheum_appt2_date>(eia_code_date + 60) & rheum_appt2_date!=. //replace as missing those appts >60 days after EIA code_yea
replace rheum_appt3=0 if rheum_appt3_date>(eia_code_date + 60) & rheum_appt3_date!=. //replace as missing those appts >60 days after EIA code
replace rheum_appt3_date=. if rheum_appt3_date>(eia_code_date + 60) & rheum_appt3_date!=. //replace as missing those appts >60 days after EIA code
*Check first csDMARD/biologic was after rheum appt date=====================================================*/
**csDMARDs (not including high cost MTX; wouldn't be shared care)
gen csdmard=1 if hydroxychloroquine==1 | leflunomide==1 | methotrexate==1 | methotrexate_inj==1 | sulfasalazine==1
recode csdmard .=0
tab csdmard, missing
**csDMARDs (including high cost MTX)
gen csdmard_hcd=1 if hydroxychloroquine==1 | leflunomide==1 | methotrexate==1 | methotrexate_inj==1 | methotrexate_hcd==1 | sulfasalazine==1
recode csdmard_hcd .=0
tab csdmard_hcd, missing
**Date of first csDMARD script (not including high cost MTX prescriptions)
gen csdmard_date=min(hydroxychloroquine_date, leflunomide_date, methotrexate_date, methotrexate_inj_date, sulfasalazine_date)
format %td csdmard_date
**Date of first csDMARD script (including high cost MTX prescriptions)
gen csdmard_hcd_date=min(hydroxychloroquine_date, leflunomide_date, methotrexate_date, methotrexate_inj_date, methotrexate_hcd_date, sulfasalazine_date)
format %td csdmard_hcd_date
**Biologic use
gen biologic=1 if abatacept==1 | adalimumab==1 | baricitinib==1 | certolizumab==1 | etanercept==1 | golimumab==1 | guselkumab==1 | infliximab==1 | ixekizumab==1 | rituximab==1 | sarilumab==1 | secukinumab==1 | tocilizumab==1 | tofacitinib==1 | upadacitinib==1 | ustekinumab==1
recode biologic .=0
tab biologic, missing
**Date of first biologic script
gen biologic_date=min(abatacept_date, adalimumab_date, baricitinib_date, certolizumab_date, etanercept_date, golimumab_date, guselkumab_date, infliximab_date, ixekizumab_date, rituximab_date, sarilumab_date, secukinumab_date, tocilizumab_date, tofacitinib_date, upadacitinib_date, ustekinumab_date)
format %td biologic_date
**Exclude if first csdmard or biologic was before first rheum appt
tab csdmard if rheum_appt_date!=. & csdmard_date!=. & csdmard_date<rheum_appt_date
tab csdmard if rheum_appt_date!=. & csdmard_date!=. & (csdmard_date + 60)<rheum_appt_date
drop if rheum_appt_date!=. & csdmard_date!=. & (csdmard_date + 60)<rheum_appt_date //drop if first csDMARD more than 60 days before first attendance at a rheum appt
tab csdmard if rheum_appt_date==. & rheum_appt_any_date!=. & csdmard_date!=. & (csdmard_date + 60)<rheum_appt_any_date
drop if rheum_appt_date==. & rheum_appt_any_date!=. & csdmard_date!=. & (csdmard_date + 60)<rheum_appt_any_date //drop if first csDMARD more than 60 days before first captured rheum appt that did not have first attendance tag
tab biologic if rheum_appt_date!=. & biologic_date!=. & biologic_date<rheum_appt_date
tab biologic if rheum_appt_date!=. & biologic_date!=. & (biologic_date + 60)<rheum_appt_date
drop if rheum_appt_date!=. & biologic_date!=. & (biologic_date + 60)<rheum_appt_date //drop if first biologic more than 60 days before first rheum_appt_date
tab biologic if rheum_appt_date==. & rheum_appt_any_date!=. & biologic_date!=. & (biologic_date + 60)<rheum_appt_any_date
drop if rheum_appt_date==. & rheum_appt_any_date!=. & biologic_date!=. & (biologic_date + 60)<rheum_appt_any_date //drop if first biologic more than 60 days before first captured rheum appt that did not have first attendance tag
*Generate diagnosis date===============================================================*/
*Use eia code date (in GP record) as diagnosis date
gen diagnosis_date=eia_code_date
format diagnosis_date %td
*Refine diagnostic window=============================================================*/
**Keep patients with diagnosis date was after 1st April 2019 and before end date
keep if diagnosis_date>=date("$start_date", "DMY") & diagnosis_date!=.
tab eia_code, missing
keep if diagnosis_date<date("$end_date", "DMY") & diagnosis_date!=.
tab eia_code, missing
*Include only most recent EIA sub-diagnosis=============================================*/
replace ra_code =0 if psa_code_date > ra_code_date & psa_code_date !=.
replace ra_code =0 if anksp_code_date > ra_code_date & anksp_code_date !=.
replace ra_code =0 if undiff_code_date > ra_code_date & undiff_code_date !=.
replace psa_code =0 if ra_code_date >= psa_code_date & ra_code_date !=.
replace psa_code =0 if anksp_code_date > psa_code_date & anksp_code_date !=.
replace psa_code =0 if undiff_code_date > psa_code_date & undiff_code_date !=.
replace anksp_code =0 if psa_code_date >= anksp_code_date & psa_code_date !=.
replace anksp_code =0 if ra_code_date >= anksp_code_date & ra_code_date !=.
replace anksp_code =0 if undiff_code_date > anksp_code_date & undiff_code_date !=.
replace undiff_code =0 if ra_code_date >= undiff_code_date & ra_code_date !=.
replace undiff_code =0 if psa_code_date >= undiff_code_date & psa_code_date !=.
replace undiff_code =0 if anksp_code_date >= undiff_code_date & anksp_code_date !=.
gen eia_diagnosis=1 if ra_code==1
replace eia_diagnosis=2 if psa_code==1
replace eia_diagnosis=3 if anksp_code==1
replace eia_diagnosis=4 if undiff_code==1
lab define eia_diagnosis 1 "RA" 2 "PsA" 3 "AxSpA" 4 "Undiff IA", modify
lab val eia_diagnosis eia_diagnosis
tab eia_diagnosis, missing
drop if eia_diagnosis==. //should be none
decode eia_diagnosis, gen(eia_diag)
replace eia_diag="Undiff_IA" if eia_diagnosis==4
*Number of EIA diagnoses in 6-month time windows=========================================*/
**Month/Year of EIA code
gen year_diag=year(eia_code_date)
format year_diag %ty
gen month_diag=month(eia_code_date)
gen mo_year_diagn=ym(year_diag, month_diag)
format mo_year_diagn %tmMon-CCYY
generate str16 mo_year_diagn_s = strofreal(mo_year_diagn,"%tmCCYY!mNN")
lab var mo_year_diagn "Month/Year of Diagnosis"
lab var mo_year_diagn_s "Month/Year of Diagnosis"
**Month/Year of rheum appt
gen year_appt=year(rheum_appt_date) if rheum_appt_date!=.
format year_appt %ty
gen month_appt=month(rheum_appt_date) if rheum_appt_date!=.
gen mo_year_appt=ym(year_appt, month_appt)
format mo_year_appt %tmMon-CCYY
generate str16 mo_year_appt_s = strofreal(mo_year_appt,"%tmCCYY!mNN")
**Separate into 3-month time windows (for diagnosis date)
gen diagnosis_3m=1 if diagnosis_date>=td(01apr2019) & diagnosis_date<td(01jul2019)
replace diagnosis_3m=2 if diagnosis_date>=td(01jul2019) & diagnosis_date<td(01oct2019)
replace diagnosis_3m=3 if diagnosis_date>=td(01oct2019) & diagnosis_date<td(01jan2020)
replace diagnosis_3m=4 if diagnosis_date>=td(01jan2020) & diagnosis_date<td(01apr2020)
replace diagnosis_3m=5 if diagnosis_date>=td(01apr2020) & diagnosis_date<td(01jul2020)
replace diagnosis_3m=6 if diagnosis_date>=td(01jul2020) & diagnosis_date<td(01oct2020)
replace diagnosis_3m=7 if diagnosis_date>=td(01oct2020) & diagnosis_date<td(01jan2021)
replace diagnosis_3m=8 if diagnosis_date>=td(01jan2021) & diagnosis_date<td(01apr2021)
replace diagnosis_3m=9 if diagnosis_date>=td(01apr2021) & diagnosis_date<td(01jul2021)
replace diagnosis_3m=10 if diagnosis_date>=td(01jul2021) & diagnosis_date<td(01oct2021)
replace diagnosis_3m=11 if diagnosis_date>=td(01oct2021) & diagnosis_date<td(01jan2022)
replace diagnosis_3m=12 if diagnosis_date>=td(01jan2022) & diagnosis_date<td(01apr2022)
replace diagnosis_3m=13 if diagnosis_date>=td(01apr2022) & diagnosis_date<td(01jul2022)
replace diagnosis_3m=14 if diagnosis_date>=td(01jul2022) & diagnosis_date<td(01oct2022)
replace diagnosis_3m=15 if diagnosis_date>=td(01oct2022) & diagnosis_date<td(01jan2023)
lab define diagnosis_3m 1 "Apr 2019-Jun 2019" 2 "Jul 2019-Sep 2019" 3 "Oct 2019-Dec 2019" 4 "Jan 2020-Mar 2020" 5 "Apr 2020-Jun 2020" 6 "Jul 2020-Sep 2020" 7 "Oct 2020-Dec 2020" 8 "Jan 2021-Mar 2021" 9 "Apr 2021-Jun 2021" 10 "Jul 2021-Sep 2021" 11 "Oct 2021-Dec 2021" 12 "Jan 2022-Mar 2022" 13 "Apr 2022-Jun 2022" 14 "Jul 2022-Sep 2022" 15 "Oct 2022-Dec 2022", modify
lab val diagnosis_3m diagnosis_3m
lab var diagnosis_3m "Time period for diagnosis"
tab diagnosis_3m, missing
bys eia_diagnosis: tab diagnosis_3m, missing
**Separate into 6-month time windows (for diagnosis date)
gen diagnosis_6m=1 if diagnosis_date>=td(01apr2019) & diagnosis_date<td(01oct2019)
replace diagnosis_6m=2 if diagnosis_date>=td(01oct2019) & diagnosis_date<td(01apr2020)
replace diagnosis_6m=3 if diagnosis_date>=td(01apr2020) & diagnosis_date<td(01oct2020)
replace diagnosis_6m=4 if diagnosis_date>=td(01oct2020) & diagnosis_date<td(01apr2021)
replace diagnosis_6m=5 if diagnosis_date>=td(01apr2021) & diagnosis_date<td(01oct2021)
replace diagnosis_6m=6 if diagnosis_date>=td(01oct2021) & diagnosis_date<td(01apr2022)
replace diagnosis_6m=7 if diagnosis_date>=td(01apr2022) & diagnosis_date<td(01oct2022)
replace diagnosis_6m=8 if diagnosis_date>=td(01oct2022) & diagnosis_date<td(01apr2023)
lab define diagnosis_6m 1 "Apr 2019-Oct 2019" 2 "Oct 2019-Apr 2020" 3 "Apr 2020-Oct 2020" 4 "Oct 2020-Apr 2021" 5 "Apr 2021-Oct 2021" 6 "Oct 2021-Apr 2022" 7 "Apr 2022-Oct 2022" 8 "Oct 2022-Apr 2023", modify
lab val diagnosis_6m diagnosis_6m
lab var diagnosis_6m "Time period for diagnosis"
tab diagnosis_6m, missing
bys eia_diagnosis: tab diagnosis_6m, missing
**Separate into 12-month time windows (for diagnosis date)
gen diagnosis_year=1 if diagnosis_date>=td(01apr2019) & diagnosis_date<td(01apr2020)
replace diagnosis_year=2 if diagnosis_date>=td(01apr2020) & diagnosis_date<td(01apr2021)
replace diagnosis_year=3 if diagnosis_date>=td(01apr2021) & diagnosis_date<td(01apr2022)
replace diagnosis_year=4 if diagnosis_date>=td(01apr2022) & diagnosis_date<td(01apr2023)
lab define diagnosis_year 1 "Apr 2019-Apr 2020" 2 "Apr 2020-Apr 2021" 3 "Apr 2021-Apr 2022" 4 "Apr 2022-Apr 2023", modify
lab val diagnosis_year diagnosis_year
lab var diagnosis_year "Year of diagnosis"
tab diagnosis_year, missing
bys eia_diagnosis: tab diagnosis_year, missing
**Separate into 3-month time windows (for appt date)
gen appt_3m=1 if rheum_appt_date>=td(01apr2019) & rheum_appt_date<td(01jul2019)
replace appt_3m=2 if rheum_appt_date>=td(01jul2019) & rheum_appt_date<td(01oct2019)
replace appt_3m=3 if rheum_appt_date>=td(01oct2019) & rheum_appt_date<td(01jan2020)
replace appt_3m=4 if rheum_appt_date>=td(01jan2020) & rheum_appt_date<td(01apr2020)
replace appt_3m=5 if rheum_appt_date>=td(01apr2020) & rheum_appt_date<td(01jul2020)
replace appt_3m=6 if rheum_appt_date>=td(01jul2020) & rheum_appt_date<td(01oct2020)
replace appt_3m=7 if rheum_appt_date>=td(01oct2020) & rheum_appt_date<td(01jan2021)
replace appt_3m=8 if rheum_appt_date>=td(01jan2021) & rheum_appt_date<td(01apr2021)
replace appt_3m=9 if rheum_appt_date>=td(01apr2021) & rheum_appt_date<td(01jul2021)
replace appt_3m=10 if rheum_appt_date>=td(01jul2021) & rheum_appt_date<td(01oct2021)
replace appt_3m=11 if rheum_appt_date>=td(01oct2021) & rheum_appt_date<td(01jan2022)
replace appt_3m=12 if rheum_appt_date>=td(01jan2022) & rheum_appt_date<td(01apr2022)
replace appt_3m=13 if rheum_appt_date>=td(01apr2022) & rheum_appt_date<td(01jul2022)
replace appt_3m=14 if rheum_appt_date>=td(01jul2022) & rheum_appt_date<td(01oct2022)
replace appt_3m=15 if rheum_appt_date>=td(01oct2022) & rheum_appt_date<td(01jan2023)
lab define appt_3m 1 "Apr 2019-Jun 2019" 2 "Jul 2019-Sep 2019" 3 "Oct 2019-Dec 2019" 4 "Jan 2020-Mar 2020" 5 "Apr 2020-Jun 2020" 6 "Jul 2020-Sep 2020" 7 "Oct 2020-Dec 2020" 8 "Jan 2021-Mar 2021" 9 "Apr 2021-Jun 2021" 10 "Jul 2021-Sep 2021" 11 "Oct 2021-Dec 2021" 12 "Jan 2022-Mar 2022" 13 "Apr 2022-Jun 2022" 14 "Jul 2022-Sep 2022" 15 "Oct 2022-Dec 2022", modify
lab val appt_3m appt_3m
lab var appt_3m "Time period for first rheumatology appt"
tab appt_3m, missing
bys eia_diagnosis: tab appt_3m, missing
**Separate into 6-month time windows (for appt date)
gen appt_6m=1 if rheum_appt_date>=td(01apr2019) & rheum_appt_date<td(01oct2019)
replace appt_6m=2 if rheum_appt_date>=td(01oct2019) & rheum_appt_date<td(01apr2020)
replace appt_6m=3 if rheum_appt_date>=td(01apr2020) & rheum_appt_date<td(01oct2020)
replace appt_6m=4 if rheum_appt_date>=td(01oct2020) & rheum_appt_date<td(01apr2021)
replace appt_6m=5 if rheum_appt_date>=td(01apr2021) & rheum_appt_date<td(01oct2021)
replace appt_6m=6 if rheum_appt_date>=td(01oct2021) & rheum_appt_date<td(01apr2022)
replace appt_6m=7 if rheum_appt_date>=td(01apr2022) & rheum_appt_date<td(01oct2022)
replace appt_6m=8 if rheum_appt_date>=td(01oct2022) & rheum_appt_date<td(01apr2023)
lab define appt_6m 1 "Apr 2019-Oct 2019" 2 "Oct 2019-Apr 2020" 3 "Apr 2020-Oct 2020" 4 "Oct 2020-Apr 2021" 5 "Apr 2021-Oct 2021" 6 "Oct 2021-Apr 2022" 7 "Apr 2022-Oct 2022" 8 "Oct 2022-Apr 2023", modify
lab val appt_6m appt_6m
lab var appt_6m "Time period for first rheumatology appt"
tab appt_6m, missing
bys eia_diagnosis: tab appt_6m, missing
**Separate into 12-month time windows (for appt date)
gen appt_year=1 if rheum_appt_date>=td(01apr2019) & rheum_appt_date<td(01apr2020)
replace appt_year=2 if rheum_appt_date>=td(01apr2020) & rheum_appt_date<td(01apr2021)
replace appt_year=3 if rheum_appt_date>=td(01apr2021) & rheum_appt_date<td(01apr2022)
replace appt_year=4 if rheum_appt_date>=td(01apr2022) & rheum_appt_date<td(01apr2023)
lab define appt_year 1 "Apr 2019-Apr 2020" 2 "Apr 2020-Apr 2021" 3 "Apr 2021-Apr 2022" 4 "Apr 2022-Apr 2023", modify
lab val appt_year appt_year
lab var appt_year "Year of first rheumatology appt"
tab appt_year, missing
bys eia_diagnosis: tab appt_year, missing
*Define appointments and referrals======================================*/
**Proportion of patients with at least 6 or 12 months of GP registration after rheum appt (i.e. diagnosis date)
tab has_6m_follow_up
tab has_12m_follow_up
tab mo_year_diagn has_6m_follow_up
tab mo_year_diagn has_12m_follow_up
*For appt and csDMARD analyses, all patients must have 1) rheum appt 2) GP appt before rheum appt 3) 12m follow-up after rheum appt 4) 12m of registration after appt
gen has_6m_post_appt=1 if rheum_appt_date!=. & rheum_appt_date<(date("$end_date", "DMY")-180) & has_6m_follow_up==1 & last_gp_prerheum==1
recode has_6m_post_appt .=0
lab var has_6m_post_appt "GP/rheum/registration 6m+"
lab define has_6m_post_appt 0 "No" 1 "Yes", modify
lab val has_6m_post_appt has_6m_post_appt
gen has_12m_post_appt=1 if rheum_appt_date!=. & rheum_appt_date<(date("$end_date", "DMY")-365) & has_12m_follow_up==1 & last_gp_prerheum==1
recode has_12m_post_appt .=0
lab var has_12m_post_appt "GP/rheum/registration 12m+"
lab define has_12m_post_appt 0 "No" 1 "Yes", modify
lab val has_12m_post_appt has_12m_post_appt
**Rheumatology appt
tab rheum_appt, missing //proportion of patients with a rheum outpatient date in the 12 months before EIA code appeared in GP record; data only April 2019 onwards
tab rheum_appt2, missing //proportion of patients with a rheum outpatient date in the 6 months before EIA code appeared in GP record; data only April 2019 onwards
tab rheum_appt3, missing //proportion of patients with a rheum outpatient date in the 2 years before EIA code appeared in GP record; data only April 2019 onwards
*Gen rheum appt var only for those with 12m follow-up
gen rheum_appt_to21=rheum_appt if rheum_appt_date<(date("$end_date", "DMY")-365)
recode rheum_appt_to21 .=0
lab var rheum_appt_to21 "Rheumatology appt 12m+"
lab define rheum_appt_to21 0 "No" 1 "Yes", modify
lab val rheum_appt_to21 rheum_appt_to21
*Gen rheum appt var only for those with 6m+ follow-up
gen rheum_appt_to6m=rheum_appt if rheum_appt_date<(date("$end_date", "DMY")-180)
recode rheum_appt_to6m .=0
lab var rheum_appt_to6m "Rheumatology appt 6m+"
lab define rheum_appt_to6m 0 "No" 1 "Yes", modify
lab val rheum_appt_to6m rheum_appt_to6m
**Check number of rheumatology appts in the year before EIA code
tabstat rheum_appt_count, stat (n mean sd p50 p25 p75)
bys diagnosis_year: tabstat rheum_appt_count, stat (n mean sd p50 p25 p75)
bys appt_year: tabstat rheum_appt_count, stat (n mean sd p50 p25 p75)
**Check medium used for rheumatology appointment
tab rheum_appt_medium, missing
recode rheum_appt_medium 3=2 //recode telemedicine=telephone
replace rheum_appt_medium=10 if rheum_appt_medium>2 & rheum_appt_medium!=.
recode rheum_appt_medium .=.u
lab define rheum_appt_medium 1 "Face-to-face" 2 "Telephone" 10 "Other" .u "Missing", modify
lab val rheum_appt_medium rheum_appt_medium
lab var rheum_appt_medium "Rheumatology consultation medium"
tab rheum_appt_medium if has_12m_post_appt==1, missing
bys appt_year: tab rheum_appt_medium if has_12m_post_appt==1, missing
**Rheumatology referrals (Nb. low capture of coded rheumatology referrals at present, therefore last GP appt used as proxy of referral date currently - see below)
tab referral_rheum_prerheum //last rheum referral in the 2 years before rheumatology outpatient (requires rheum appt to have been present)
tab referral_rheum_prerheum if rheum_appt!=0 & referral_rheum_prerheum_date<=rheum_appt_date //last rheum referral in the 2 years before rheumatology outpatient, assuming ref date before rheum appt date (should be accounted for by Python code)
tab referral_rheum_precode //last rheum referral in the 2 years before EIA code
gen referral_rheum_comb_date = referral_rheum_prerheum_date if referral_rheum_prerheum_date!=.
replace referral_rheum_comb_date = referral_rheum_precode_date if referral_rheum_prerheum_date==. & referral_rheum_precode_date!=.
format %td referral_rheum_comb_date
**GP appointments
tab last_gp_refrheum //proportion with last GP appointment in 2 years before rheum referral (pre-rheum appt); requires there to have been a rheum referral before a rheum appt
gen last_gp_prerheum_to21=last_gp_prerheum if rheum_appt_date!=. & rheum_appt_date<(date("$end_date", "DMY")-365)
recode last_gp_prerheum_to21 .=0
lab var last_gp_prerheum_to21 "GP and rheum appt 12m+"
lab define last_gp_prerheum_to21 0 "No" 1 "Yes", modify
lab val last_gp_prerheum_to21 last_gp_prerheum_to21
gen last_gp_prerheum_to6m=last_gp_prerheum if rheum_appt_date!=. & rheum_appt_date<(date("$end_date", "DMY")-180)
recode last_gp_prerheum_to6m .=0
lab var last_gp_prerheum_to6m "GP and rheum appt 6m+"
lab define last_gp_prerheum_to6m 0 "No" 1 "Yes", modify
lab val last_gp_prerheum_to6m last_gp_prerheum_to6m
gen all_appts=1 if last_gp_refrheum==1 & referral_rheum_prerheum==1 & rheum_appt==1 & last_gp_refrheum_date<=referral_rheum_prerheum_date & referral_rheum_prerheum_date<=rheum_appt_date
recode all_appts .=0
tab all_appts, missing //proportion who had a last gp appt, then rheum ref, then rheum appt
tab last_gp_refcode //last GP appointment before rheum ref (pre-eia code ref); requires there to have been a rheum referral before an EIA code (i.e. rheum appt could have been missing)
tab last_gp_prerheum //last GP appointment before rheum appt; requires there to have been a rheum appt before and EIA code
tab last_gp_precode //last GP appointment before EIA code
*Time to rheum referral (see notes above)=============================================*/
**Time from last GP to rheum ref before rheum appt (i.e. if appts are present and in correct time order)
gen time_gp_rheum_ref_appt = (referral_rheum_prerheum_date - last_gp_refrheum_date) if referral_rheum_prerheum_date!=. & last_gp_refrheum_date!=. & rheum_appt_date!=. & referral_rheum_prerheum_date>=last_gp_refrheum_date & referral_rheum_prerheum_date<=rheum_appt_date
tabstat time_gp_rheum_ref_appt, stats (n mean p50 p25 p75) //all patients (should be same number as all_appts)
gen gp_ref_cat=1 if time_gp_rheum_ref_appt<=3 & time_gp_rheum_ref_appt!=.
replace gp_ref_cat=2 if time_gp_rheum_ref_appt>3 & time_gp_rheum_ref_appt<=7 & time_gp_rheum_ref_appt!=. & gp_ref_cat==.
replace gp_ref_cat=3 if time_gp_rheum_ref_appt>7 & time_gp_rheum_ref_appt!=. & gp_ref_cat==.
lab define gp_ref_cat 1 "Within 3 days" 2 "Between 3-7 days" 3 "More than 7 days", modify
lab val gp_ref_cat gp_ref_cat
lab var gp_ref_cat "Time to GP referral"
tab gp_ref_cat, missing
gen gp_ref_3d=1 if time_gp_rheum_ref_appt<=3 & time_gp_rheum_ref_appt!=.
replace gp_ref_3d=2 if time_gp_rheum_ref_appt>3 & time_gp_rheum_ref_appt!=.
lab define gp_ref_3d 1 "Within 3 days" 2 "More than 3 days", modify
lab val gp_ref_3d gp_ref_3d
lab var gp_ref_3d "Time to GP referral"
tab gp_ref_3d, missing
**Time from last GP to rheum ref before eia code (sensitivity analysis; includes those with no rheum appt)
gen time_gp_rheum_ref_code = (referral_rheum_precode_date - last_gp_refcode_date) if referral_rheum_precode_date!=. & last_gp_refcode_date!=. & referral_rheum_precode_date>=last_gp_refcode_date & referral_rheum_precode_date<=eia_code_date
tabstat time_gp_rheum_ref_code, stats (n mean p50 p25 p75)
**Time from last GP to rheum ref (combined - sensitivity analysis; includes those with no rheum appt)
gen time_gp_rheum_ref_comb = time_gp_rheum_ref_appt
replace time_gp_rheum_ref_comb = time_gp_rheum_ref_code if time_gp_rheum_ref_appt==. & time_gp_rheum_ref_code!=.
tabstat time_gp_rheum_ref_comb, stats (n mean p50 p25 p75)
*Time to rheum appointment=============================================*/
**Time from last GP pre-rheum appt to first rheum appt (proxy for referral to appt delay)
gen time_gp_rheum_appt = (rheum_appt_date - last_gp_prerheum_date) if rheum_appt_date!=. & last_gp_prerheum_date!=. & rheum_appt_date>=last_gp_prerheum_date
tabstat time_gp_rheum_appt, stats (n mean p50 p25 p75)
**Time from rheum ref to rheum appt (i.e. if appts are present and in correct order)
gen time_ref_rheum_appt = (rheum_appt_date - referral_rheum_prerheum_date) if rheum_appt_date!=. & referral_rheum_prerheum_date!=. & referral_rheum_prerheum_date<=rheum_appt_date
tabstat time_ref_rheum_appt, stats (n mean p50 p25 p75)
gen gp_appt_cat=1 if time_gp_rheum_appt<=21 & time_gp_rheum_appt!=.
replace gp_appt_cat=2 if time_gp_rheum_appt>21 & time_gp_rheum_appt<=42 & time_gp_rheum_appt!=. & gp_appt_cat==.
replace gp_appt_cat=3 if time_gp_rheum_appt>42 & time_gp_rheum_appt!=. & gp_appt_cat==.
lab define gp_appt_cat 1 "Within 3 weeks" 2 "Between 3-6 weeks" 3 "More than 6 weeks", modify
lab val gp_appt_cat gp_appt_cat
lab var gp_appt_cat "Time to rheumatology assessment, overall"
tab gp_appt_cat, missing
gen gp_appt_cat_19=gp_appt_cat if appt_year==1
gen gp_appt_cat_20=gp_appt_cat if appt_year==2
gen gp_appt_cat_21=gp_appt_cat if appt_year==3
gen gp_appt_cat_22=gp_appt_cat if appt_year==4
lab define gp_appt_cat_19 1 "Within 3 weeks" 2 "Between 3-6 weeks" 3 "More than 6 weeks", modify
lab val gp_appt_cat_19 gp_appt_cat_19
lab var gp_appt_cat_19 "Time to rheumatology assessment, Apr 2019-2020"
lab define gp_appt_cat_20 1 "Within 3 weeks" 2 "Between 3-6 weeks" 3 "More than 6 weeks", modify
lab val gp_appt_cat_20 gp_appt_cat_20
lab var gp_appt_cat_20 "Time to rheumatology assessment, Apr 2020-2021"
lab define gp_appt_cat_21 1 "Within 3 weeks" 2 "Between 3-6 weeks" 3 "More than 6 weeks", modify
lab val gp_appt_cat_21 gp_appt_cat_21
lab var gp_appt_cat_21 "Time to rheumatology assessment, Apr 2021-2022"
lab define gp_appt_cat_22 1 "Within 3 weeks" 2 "Between 3-6 weeks" 3 "More than 6 weeks", modify
lab val gp_appt_cat_22 gp_appt_cat_22
lab var gp_appt_cat_22 "Time to rheumatology assessment, Apr 2022-2023"
gen gp_appt_3w=1 if time_gp_rheum_appt<=21 & time_gp_rheum_appt!=.
replace gp_appt_3w=2 if time_gp_rheum_appt>21 & time_gp_rheum_appt!=.
lab define gp_appt_3w 1 "Within 3 weeks" 2 "More than 3 weeks", modify
lab val gp_appt_3w gp_appt_3w
lab var gp_appt_3w "Time to rheumatology assessment, overall"
tab gp_appt_3w, missing
gen ref_appt_cat=1 if time_ref_rheum_appt<=21 & time_ref_rheum_appt!=.
replace ref_appt_cat=2 if time_ref_rheum_appt>21 & time_ref_rheum_appt<=42 & time_ref_rheum_appt!=. & ref_appt_cat==.
replace ref_appt_cat=3 if time_ref_rheum_appt>42 & time_ref_rheum_appt!=. & ref_appt_cat==.
lab define ref_appt_cat 1 "Within 3 weeks" 2 "Between 3-6 weeks" 3 "More than 6 weeks", modify
lab val ref_appt_cat ref_appt_cat