generated from opensafely/research-template
/
get_patients_counts.py
55 lines (41 loc) · 1.25 KB
/
get_patients_counts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
import os
import json
import numpy as np
sentinel_measures = [
"qrisk2",
"asthma",
"copd",
"sodium",
"cholesterol",
"alt",
"tsh",
"alt",
"rbc",
"hba1c",
"systolic_bp",
"medication_review",
]
patient_counts_dict = {}
patient_dict = {}
for file in os.listdir("output"):
if file.startswith("input"):
if file.split("_")[1] not in ["ethnicity.feather", "practice"]:
file_path = os.path.join("output", file)
df = pd.read_feather(file_path)
for measure in sentinel_measures:
df_subset = df[df[measure] == 1]
# get unique patients
patients = list(df_subset["patient_id"])
if measure not in patient_dict:
# create key
patient_dict[measure] = patients
else:
patient_dict[measure].extend(patients)
for (key, value) in patient_dict.items():
# get unique patients
unique_patients = len(np.unique(patient_dict[key]))
# add to dictionary as num(mil)
patient_counts_dict[key] = unique_patients / 1000000
with open("output/patient_count.json", "w") as f:
json.dump({"num_patients": patient_counts_dict}, f)