generated from opensafely/research-template
/
generate_measures_demographics.py
37 lines (27 loc) · 1.39 KB
/
generate_measures_demographics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
import pandas as pd
demographics = ["region", "imd", "ethnicity"]
df_list = []
for file in os.listdir('output'):
if file.startswith('input_2'):
date = file.split('_')[-1][:-4]
file_path = os.path.join('output', file)
df = pd.read_csv(file_path)
df_sublist = []
for var in demographics:
population = df.groupby(
["AgeGroup", var]).size().reset_index()
values = df.groupby(["AgeGroup", var]).agg(
{'CVD': 'sum', 'cancer': 'sum', 'respiratory_disease': 'sum'}).reset_index()
values['population'] = population.iloc[:, -1]
values['date'] = date
df_sublist.append(values)
df_list.append(df_sublist)
for i, demographic in enumerate(demographics):
df_combined = pd.concat(df_list[y][i] for y in range(len(df_list)))
cv_df = df_combined[["AgeGroup", demographic,
"CVD", "population", "date"]].to_csv(f"output/measure_CVD_rate_{demographic}.csv")
cancer_df = df_combined[["AgeGroup", demographic,
"cancer", "population", "date"]].to_csv(f"output/measure_cancer_rate_{demographic}.csv")
respiratory_df = df_combined[["AgeGroup", demographic,
"respiratory_disease", "population", "date"]].to_csv(f"output/measure_respiratory_disease_rate_{demographic}.csv")