generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
plots.py
88 lines (75 loc) · 4.38 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pandas as pd
from utilities import *
if not (OUTPUT_DIR / "figures").exists():
Path.mkdir(OUTPUT_DIR / "figures")
breakdowns=[
"age_band",
"sex",
"imdQ5",
"region",
"ethnicity",
"nhome",
"learning_disability",
"care_home_type",
"addictive_meds",
"dmards",
"highrisk_meds",
"teratogenic_meds"
]
columnlookupdict={
"addictive_meds": "addictivemeds_last12m",
"dmards": "dmards_last12m",
"highrisk_meds": "highriskmeds_last12m",
"teratogenic_meds": "teratogenicmeds_last12m"
}
def checkColumnDict(dic, key):
if key in dic.keys():
print("Present, ", end =" ")
return dic[key]
else:
return key
med_review_type=["smr", "smr12m", "mr", "mr12m", "allmedrv", "allmedrv12m"]
med_review_dict={
"smr" : "structured medication review",
"mr" : "medication review",
"allmedrv": "any types of medication review",
"smr12m" : "structured medication review in preceding 12 months",
"mr12m" : "medication review in preceding 12 months",
"allmedrv12m": "any type of medication review in preceding 12 months",
}
for med_review in med_review_type:
if (med_review=="allmedrv"):
numerator_col="had_anymedrev" # fix as column title doesn't match filename for allmedrv
elif (med_review=="allmedrv12m"):
numerator_col="had_anymedrev12m" # fix as column title doesn't match filename for allmedrv
else:
numerator_col=f'had_{med_review}'
df = pd.read_csv(OUTPUT_DIR / f"redacted/redacted_measure_{med_review}_population_rate.csv", parse_dates=["date"])
calculate_rate(df, numerator_col, 'population', rate_per=1000, round_rate=False) #Add column for rate per 1000 patients
plot_measures(df, filename=f"{med_review}_population_rate_perthousand", title="", column_to_plot="rate", y_label=f"People who received a {med_review_dict[med_review]} per 1000 registered patients") #Plot
calculate_rate(df, numerator_col, 'population', rate_per=100, round_rate=False) #Add column for %
plot_measures(df, filename=f"{med_review}_population_rate_percentage", title="", column_to_plot="rate", y_label="Percentage") #Plot
for breakdownby in breakdowns:
df = pd.read_csv(OUTPUT_DIR / f"redacted/redacted_measure_{med_review}_{breakdownby}_rate.csv", parse_dates=["date"])
breakdownbycol=checkColumnDict(columnlookupdict, breakdownby)
df[breakdownbycol] = df[breakdownbycol].fillna('missing')
if (breakdownby == "care_home_type"):
df=binary_care_home_status(df, numerator_col, 'population')
convert_binary(df, 'care_home_type', 'Record of positive care home status', 'No record of positive care home status')
if (breakdownby == "learning_disability"):
convert_binary(df, 'learning_disability', 'Record of learning disability', 'No record of learning disability')
if (breakdownby == "nhome"):
convert_binary(df, 'nhome', 'Record of individual living at a nursing home', 'No record of individual living at a nursing home')
if (breakdownby == "sex"):
df = relabel_sex(df)
#Add column for rate per 1000 patients
calculate_rate(df, numerator_col, 'population', rate_per=1000, round_rate=False)
plot_measures(df, filename=f"{med_review}_{breakdownby}_rate_perthousand", title="", column_to_plot="rate", y_label=f"People who received a {med_review_dict[med_review]} per 1000 registered patients", category=breakdownbycol)
calculate_rate(df, numerator_col, 'population', rate_per=100, round_rate=False)
plot_measures(df, filename=f"{med_review}_{breakdownby}_rate_percentage", title="", column_to_plot="rate", y_label=f"Percentage of people who received a {med_review_dict[med_review]}", category=breakdownbycol)
#Plot deciles chart
df = pd.read_csv(OUTPUT_DIR / f"joined/deciles_table_{med_review}_practice_rate.csv", parse_dates=["date"])
df['rateperthousand']=df['value']*1000
plot_measures(df, filename=f"deciles_chart_{med_review}_practice_rate_perthousand", title="", column_to_plot="rateperthousand", y_label=f"People who received a {med_review_dict[med_review]} per 1000 registered patients", category="percentile", deciles=True)
df['percentage']=df['value']*100
plot_measures(df, filename=f"deciles_chart_{med_review}_practice_rate_percentage", title="", column_to_plot="percentage", y_label=f"Percentage of people who received a {med_review_dict[med_review]}", category="percentile", deciles=True)