generated from opensafely/covid-vaccine-research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
count_prevalences.py
56 lines (40 loc) · 1.7 KB
/
count_prevalences.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import pandas as pd
input_path="output/cohort.pickle"
backend = os.getenv("OPENSAFELY_BACKEND", "expectations")
output_path = "output/" + backend + "/tables"
os.makedirs(output_path, exist_ok=True)
cohort = pd.read_pickle(input_path)
def count_prevalences(cohort):
prevalences = pd.DataFrame(
{"total": cohort.groupby(["wave"])["patient_id"].count()}
)
pop_total = cohort["patient_id"].count()
cols = ["vacc_group", "decline_group","other_reason_group", "declined_accepted_group", "preg_group"]
for col in cols:
prevalences[col] = (
cohort[cohort[col]].groupby(["wave"])["patient_id"].count()
)
totals = cohort[cols].sum()
totals["total"] = pop_total
totals = totals.rename("total")
prevalences = prevalences.append(totals)
for high_level_ethnicity_category in [1, 2, 3, 4, 5, 6]:
prevalences[f"ethnicity_{high_level_ethnicity_category}"] = (
cohort[cohort["high_level_ethnicity"] == high_level_ethnicity_category]
.groupby(["wave"])["patient_id"]
.count()
)
eth_total = (
cohort[cohort["high_level_ethnicity"] == high_level_ethnicity_category]
["patient_id"].count()
)
prevalences[f"ethnicity_{high_level_ethnicity_category}"].loc["total"] = eth_total
prevalences.fillna(0, inplace=True)
prevalences = ((prevalences // 7) * 7).astype(int)
for c in prevalences.columns:
prevalences[f"{c}_percent"] = (100*prevalences[c]/prevalences["total"]).round(1)
prevalences.fillna(0, inplace=True)
return (prevalences)
prevalences = count_prevalences(cohort)
prevalences.to_csv(output_path+"/prevalences.csv")