generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
population_count.py
52 lines (41 loc) · 1.7 KB
/
population_count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas as pd
from pathlib import Path
from utilities import (
get_patients_left_tpp,
get_patients_joined_tpp,
concatenate_patients_moved,
match_input_files,
get_date_input_file,
save_dict_as_json,
)
# this will contain dataframes of patients who have joined or left during the study period. There will be duplicates
moved = []
first_month = pd.read_feather("output/joined/input_population_2019-01-01.feather")
for file in Path("output/joined").iterdir():
if match_input_files(file.name):
df = pd.read_feather(file)
date = get_date_input_file(str(file.name))
# 2019-01-01 is the month being compared, so we ignore it here
if date != "2019-01-01":
demographics_patients_left = get_patients_left_tpp(
df,
first_month,
["sex", "age_band", "ethnicity_x", "imd", "region"],
)
demographics_patients_joined = get_patients_joined_tpp(
df,
first_month,
"age",
"age_start",
["sex", "age_band", "ethnicity_x", "imd", "region"],
)
demographics_patients_left["ethnicity_x"] = demographics_patients_left[
"ethnicity_x"
].astype(str)
demographics_patients_joined["ethnicity_x"] = demographics_patients_joined[
"ethnicity_x"
].astype(str)
moved.extend([demographics_patients_left, demographics_patients_joined])
total_moved, dem_counts = concatenate_patients_moved(moved)
save_dict_as_json(total_moved, "output/moved_count.json")
save_dict_as_json(dem_counts, "output/moved_demographic_count.json")