generated from opensafely/research-template
/
get_patients_counts.py
40 lines (26 loc) · 1.07 KB
/
get_patients_counts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
import os
import json
import numpy as np
sentinel_measures = ["antipsychotics_first_gen", "antipsychotics_second_gen", "antipsychotics_injectable_and_depot", "Prochlorperazine"]
patient_counts_dict = {}
patient_dict = {}
for file in os.listdir('output/data'):
if file.startswith('input'):
df = pd.read_csv(os.path.join('output/data', file))
for measure in sentinel_measures:
df_subset = df[df[measure]==1]
# get unique patients
patients = list(df_subset['patient_id'])
if measure not in patient_dict:
#create key
patient_dict[measure] = patients
else:
patient_dict[measure].extend(patients)
for (key, value) in patient_dict.items():
#get unique patients
unique_patients = len(np.unique(patient_dict[key]))
#add to dictionary as num(mil)
patient_counts_dict[key] = (unique_patients/1000000)
with open('output/data/patient_count.json', 'w') as f:
json.dump({"num_patients": patient_counts_dict}, f)