generated from opensafely/research-template
/
plot_measures.py
71 lines (53 loc) · 1.99 KB
/
plot_measures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pandas as pd
import numpy as np
from pathlib import Path
from utilities import *
if not (OUTPUT_DIR / "figures").exists():
Path.mkdir(OUTPUT_DIR / "figures")
for i in ["cr_cl", "creatinine", "eGFR", "RRT", "dialysis", "kidney_tx", "ckd", "ckd_primis_1_5"]:
for j in ["population", "at_risk", "diabetes", "hypertension"]:
df = pd.read_csv(
OUTPUT_DIR / f"joined/measure_{i}_{j}_rate.csv", parse_dates=["date"]
)
df = drop_irrelevant_practices(df)
df["rate"] = df[f"value"] * 100
df = df.drop(["value"], axis=1)
df = df.replace(np.inf, np.nan)
df_deciles = compute_redact_deciles(df, "date", i, "rate")
deciles_chart(
df,
filename=f"output/figures/plot_{i}_{j}.jpeg",
period_column="date",
column="rate",
count_column=i,
ylabel="Percentage",
)
if i in ["RRT", "dialysis", "kidney_tx", "ckd", "ckd_primis_1_5"]:
demographics = []
else:
demographics = ["age_band", "sex", "imd", "region"]
# demographic plots
for d in demographics:
df = pd.read_csv(
OUTPUT_DIR / f"joined/measure_{i}_{d}_{j}_rate.csv",
parse_dates=["date"],
)
if d == "sex":
df = df[df["sex"].isin(["M", "F"])]
elif d == "imd":
df = df[df["imd"] != 0]
elif d == "age_band":
df = df[df["age_band"] != "missing"]
elif d == "region":
df = df[df["region"].notnull()]
df["rate"] = df[f"value"] * 100
df = redact_small_numbers(df, 10, i, j, "rate", "date")
plot_measures(
df=df,
filename=f"plot_{i}_{d}_{j}",
title=f"{i} by {d}",
column_to_plot="rate",
y_label="Proportion",
as_bar=False,
category=d,
)