generated from opensafely/research-template
/
get_patient_count.py
64 lines (48 loc) · 1.59 KB
/
get_patient_count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import json
import pandas as pd
import numpy as np
patients_codes = {}
doac_patients_codes = {}
dates = [
"input_2019-09-01.csv",
"input_2019-10-01.csv",
"input_2019-11-01.csv",
"input_2019-12-01.csv",
"input_2020-01-01.csv",
"input_2020-02-01.csv",
"input_2020-03-01.csv",
"input_2020-04-01.csv",
"input_2020-05-01.csv",
"input_2020-06-01.csv",
"input_2020-07-01.csv",
"input_2020-08-01.csv",
"input_2020-09-01.csv",
"input_2020-10-01.csv",
"input_2020-11-01.csv",
"input_2020-12-01.csv",
"input_2021-01-01.csv",
"input_2021-02-01.csv",
"input_2021-03-01.csv",
"input_2021-04-01.csv",
"input_2021-05-01.csv",
"input_2021-06-01.csv",
"input_2021-07-01.csv",
"input_2021-08-01.csv",
"input_2021-09-01.csv",
]
patients_list = []
doac_patients_list = []
for file in os.listdir('output'):
if file in dates:
date = file.split('_')[-1][:-4]
df = pd.read_csv(os.path.join('output', file))
patients = np.unique(df['patient_id'])
patients_list.extend(patients)
doac_subset = df[df['doac']==1]
doac_patients = patients = np.unique(doac_subset['patient_id'])
doac_patients_list.extend(doac_patients)
unique_patients = len(np.unique(patients_list))
unique_patients_doac = len(np.unique(doac_patients_list))
count_df = pd.DataFrame([['mechanical_valve', unique_patients],['mechanical_valve_doac', unique_patients_doac]], columns=['group','count'])
count_df.to_csv('output/patient_count.csv')