generated from opensafely/research-template
/
calculate_measures.py
41 lines (28 loc) · 1.58 KB
/
calculate_measures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pandas as pd
from utilities import OUTPUT_DIR, match_input_files, get_date_input_file, calculate_rate, redact_small_numbers
from study_definition import indicators_list
demographics = ["age_band", "sex", "region", "imd", "care_home_type"]
if __name__ == "__main__":
df_dict = {}
for d in demographics:
df_dict[d] = {}
for i in indicators_list:
df_dict[d][i] = []
for file in OUTPUT_DIR.iterdir():
if match_input_files(file.name):
df = pd.read_feather(OUTPUT_DIR / file.name)
date = get_date_input_file(file.name)
for d in demographics:
for i in indicators_list:
if i in ["me_no_fbc", "me_no_lft"]:
denominator = "indicator_me_denominator"
else:
denominator = f"indicator_{i}_denominator"
event = df.groupby(by=[d])[[f"indicator_{i}_numerator", denominator]].sum().reset_index()
event["rate"] = calculate_rate(event, f"indicator_{i}_numerator", denominator, 1000)
event["date"] = date
df_dict[d][i].append(event)
for demographic_key, demographic_value in df_dict.items():
for indicator_key, indicator_value in df_dict[demographic_key].items():
df_combined = pd.concat(indicator_value, axis=0)
df_combined.to_csv(OUTPUT_DIR / f"indicator_measure_{indicator_key}_{demographic_key}.csv")