generated from opensafely/research-template
/
counts.py
143 lines (128 loc) · 5.59 KB
/
counts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import pandas as pd
from config import start_date, end_date, date_cols
import sys
sys.path.append('lib/')
from functions import *
def redact_small_numbers(df, column):
mask = df[column].isin([1, 2, 3, 4, 5])
df.loc[mask, :] = np.nan
return df
# import data
df = pd.read_csv(
filepath_or_buffer = 'output/input.csv',
parse_dates = date_cols
)
# Make a dataframe with consecutive dates
consec_dates = pd.DataFrame(
index=pd.date_range(start=start_date, end=end_date, freq="D")
)
# choose only date variables
activity_dates = df.filter(items=date_cols)
activity_dates.columns = activity_dates.columns.str.replace("date_", "")
# count code activity per day
codecounts_day = activity_dates.apply(lambda x: eventcountseries(event_dates=x, date_range = consec_dates))
#derive count activity per week
codecounts_week = codecounts_day.resample('W').sum()
#derive total code activity over whole time period
codecounts_total = codecounts_week.sum()
tableindex = [
"probable_covid",
"probable_covid_pos_test",
"probable_covid_sequelae",
"suspected_covid_advice",
"suspected_covid_had_test",
"suspected_covid_isolation",
"suspected_covid_nonspecific",
"suspected_covid",
"historic_covid",
"potential_historic_covid",
"exposure_to_disease",
"antigen_negative",
"covid_unrelated_to_case_status"
]
tabledata = {
'Category':[
'Probable case',
'',
'',
'Suspected case',
'',
'',
'',
'',
'Historic case',
'Potential historic case',
'Exposure to disease',
'Antigen test negative',
'COVID-19 related but case status not specified',
],
'Sub-category':[
'Clinical code',
'Positive test',
'Sequalae',
'Advice',
'Had test',
'Isolation code',
'Non-specific clinical assessment',
'Suspected codes',
'-',
'-',
'-',
'-',
'-',
],
'Codelist':[
'Probable case: clinical code',
'Probable case: positive test',
'Probable case: sequelae',
'Suspected case: advice',
'Suspected case: had test',
'Suspected case: isolation code',
'Suspected case: non-specific clinical assessment',
'Suspected case: suspected codes',
'Historic case',
'Potential historic case',
'Exposure to disease',
'Antigen test negative',
'COVID-19 related but case status not specified',
],
'Description':[
'Clinical diagnosis of COVID-19 made',
'Record of positive test result for SARS-CoV-2 (active infection)',
'Symptom or condition recorded as secondary to SARS-CoV-2',
'General advice given about SARS-CoV-2',
'Record of having had a test for active infection with SARS-CoV-2',
'Self- or household-isolation recorded',
'Clinical assessments plausibly related to COVID-19',
'"Suspect" mentioned, or previous COVID-19 reported',
'SARS-CoV-2 antibodies or immunity recorded',
'Has had a test for SARS-CoV-2 antibodies',
'Record of contact/exposure/procedure',
'Record of negative test result for SARS-CoV-2',
'Healthcare contact related to COVID-19 but not case status',
],
'link':[
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-probable-covid-clinical-code/2020-07-16/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-probable-covid-positive-test/2020-07-16/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-probable-covid-sequelae/2020-07-16/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-suspected-covid-advice/2020-07-16/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-suspected-covid-had-test/2020-07-16/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-suspected-covid-isolation-code/2020-07-16/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-suspected-covid-nonspecific-clinical-assessment/2020-07-16/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-suspected-codes-suspected-codes/2020-07-16/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-historic-case/2020-06-23/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-potential-historic-case/2020-06-23/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-exposure-to-disease/2020-06-23/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-antigen-test-negative/2020-06-24/",
"https://codelists.opensafely.org/codelist/opensafely/covid-identification-in-primary-care-unrelated-to-case-status/2020-06-23/",
]
}
tabledata = pd.DataFrame(tabledata, index=tableindex)
tabledata['Codelist']="<a href='"+tabledata['link']+"' target='_blank'>"+tabledata['Codelist']+"</a>"
codecounts_total.name = "Count"
tabledata = tabledata.merge(codecounts_total, left_index=True, right_index=True)
redact_small_numbers(tabledata,"Count").to_csv("output/tabledata.csv")
cols= codecounts_week.columns.values.tolist()
for col in cols:
codecounts_week=redact_small_numbers(codecounts_week,col)
codecounts_week.to_csv("output/codecounts_week.csv")