generated from opensafely/research-template
/
define_dataset_table.py
152 lines (131 loc) · 5.29 KB
/
define_dataset_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#########################################################
# This script extracts relevant demographics and
# opioid prescribing data for people from Apr-Jun 2022
# for inclusion in Table 1
#########################################################
from ehrql import Dataset, case, when, months, days, years, weeks, Measures, INTERVAL
from ehrql.tables.beta.tpp import (
patients,
medications,
addresses,
practice_registrations,
clinical_events)
import codelists
from dataset_definition import make_dataset_opioids
dataset = make_dataset_opioids(index_date="2022-04-01", end_date="2022-06-30")
# Define population #
dataset.define_population(
(patients.age_on("2022-04-01") >= 18)
& (patients.age_on("2022-04-01") < 110)
& ((patients.sex == "male") | (patients.sex == "female"))
& (patients.date_of_death.is_after("2022-04-01") | patients.date_of_death.is_null())
& (practice_registrations.for_patient_on("2022-04-01").exists_for_patient())
)
# Demographics #
# Age
age = patients.age_on("2022-04-01")
dataset.age_group = case(
when(age < 30).then("18-29"),
when(age < 40).then("30-39"),
when(age < 50).then("40-49"),
when(age < 60).then("50-59"),
when(age < 70).then("60-69"),
when(age < 80).then("70-79"),
when(age < 90).then("80-89"),
when(age >= 90).then("90+"),
default="missing",
)
# Age for standardisation
dataset.age_stand = case(
when(age < 25).then("18-24"),
when(age < 30).then("25-29"),
when(age < 35).then("30-34"),
when(age < 40).then("35-39"),
when(age < 45).then("40-44"),
when(age < 50).then("45-49"),
when(age < 55).then("50-54"),
when(age < 60).then("55-59"),
when(age < 65).then("60-64"),
when(age < 70).then("65-69"),
when(age < 75).then("70-74"),
when(age < 80).then("75-79"),
when(age < 85).then("80-84"),
when(age < 90).then("85-89"),
when(age >= 90).then("90+"),
default="missing",
)
# Sex
dataset.sex = patients.sex
# IMD decile
imd = addresses.for_patient_on("2022-04-01").imd_rounded
dataset.imd10 = case(
when((imd >= 0) & (imd < int(32844 * 1 / 10))).then("1 (most deprived)"),
when(imd < int(32844 * 2 / 10)).then("2"),
when(imd < int(32844 * 3 / 10)).then("3"),
when(imd < int(32844 * 4 / 10)).then("4"),
when(imd < int(32844 * 5 / 10)).then("5"),
when(imd < int(32844 * 6 / 10)).then("6"),
when(imd < int(32844 * 7 / 10)).then("7"),
when(imd < int(32844 * 8 / 10)).then("8"),
when(imd < int(32844 * 9 / 10)).then("9"),
when(imd >= int(32844 * 9 / 10)).then("10 (least deprived)"),
default="unknown"
)
# Ethnicity 16 categories
ethnicity16 = clinical_events.where(clinical_events.snomedct_code.is_in(codelists.ethnicity_codes_16)
).where(
clinical_events.date.is_on_or_before("2022-04-01")
).sort_by(
clinical_events.date
).last_for_patient().snomedct_code.to_category(codelists.ethnicity_codes_16)
dataset.ethnicity16 = case(
when(ethnicity16 == "1").then("White - British"),
when(ethnicity16 == "2").then("White - Irish"),
when(ethnicity16 == "3").then("White - Other"),
when(ethnicity16 == "4").then("Mixed - White/Black Caribbean"),
when(ethnicity16 == "5").then("Mixed - White/Black African"),
when(ethnicity16 == "6").then("Mixed - White/Asian"),
when(ethnicity16 == "7").then("Mixed - Other"),
when(ethnicity16 == "8").then("Asian or Asian British - Indian"),
when(ethnicity16 == "9").then("Asian or Asian British - Pakistani"),
when(ethnicity16 == "10").then("Asian or Asian British - Bangladeshi"),
when(ethnicity16 == "11").then("Asian or Asian British - Other"),
when(ethnicity16 == "12").then("Black - Caribbean"),
when(ethnicity16 == "13").then("Black - African"),
when(ethnicity16 == "14").then("Black - Other"),
when(ethnicity16 == "15").then("Other - Chinese"),
when(ethnicity16 == "16").then("Other - Other"),
default="Unknown"
)
# Ethnicity 6 categories
ethnicity6 = clinical_events.where(
clinical_events.snomedct_code.is_in(codelists.ethnicity_codes_6)
).where(
clinical_events.date.is_on_or_before("2022-04-01")
).sort_by(
clinical_events.date
).last_for_patient().snomedct_code.to_category(codelists.ethnicity_codes_6)
dataset.ethnicity6 = case(
when(ethnicity6 == "1").then("White"),
when(ethnicity6 == "2").then("Mixed"),
when(ethnicity6 == "3").then("South Asian"),
when(ethnicity6 == "4").then("Black"),
when(ethnicity6 == "5").then("Other"),
when(ethnicity6 == "6").then("Not stated"),
default="Unknown"
)
# Practice region
dataset.region = practice_registrations.for_patient_on("2022-04-01").practice_nuts1_region_name
# In care home based on primis codes/TPP address match
carehome_primis = clinical_events.where(
clinical_events.snomedct_code.is_in(codelists.carehome_primis_codes)
).where(
clinical_events.date.is_on_or_before("2022-04-01")
).exists_for_patient()
carehome_tpp = addresses.for_patient_on("2022-04-01").care_home_is_potential_match
dataset.carehome = case(
when(carehome_primis).then(True),
when(carehome_tpp).then(True),
default=False
)
##############################################