Skip to content

Commit

Permalink
Merge pull request #85 from opensafely/update-local-actions
Browse files Browse the repository at this point in the history
Update local actions
  • Loading branch information
LFISHER7 committed Apr 15, 2024
2 parents 233a96c + 6f7b7d8 commit 6a1e818
Show file tree
Hide file tree
Showing 9 changed files with 305 additions and 185 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ model.log
__pycache__
.python-version
/output/*
released_outputs/*
metadata/*
venv/
.DS_Store
Expand Down
17 changes: 14 additions & 3 deletions analysis/report/panel_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def get_group_chart(
rows = rows + 1

# NOTE: constrained_layout=True available in matplotlib>=3.5
figure = plt.figure(figsize=(12 + 6 * (columns - 1), 4.8 * rows))
figure = plt.figure(figsize=(12 + 6 * (columns - 1), 4.8 * rows), dpi=300)

lgds = []
for index, panel in enumerate(groups):
Expand All @@ -395,6 +395,16 @@ def get_group_chart(
repeated,
autolabel=True,
)

if title =="Sore Throat Tonsillitis":
title = "Sore Throat/Tonsillitis"
elif title == "Invasive Strep A":
title = "iGAS"
elif title == "Age Band":
title = "Age Band (years)"
elif title == "Imd":
title = "Index of Multiple Deprivation (IMD) quintile"

ax.set_title(title)
# Filter out group, but ignore case
if exclude_group:
Expand Down Expand Up @@ -424,6 +434,7 @@ def get_group_chart(
lgd_params,
hide_legend,
)

# Save the season table only if there is more than one group
more_than_one_group = panel_group_data.group.nunique() > 1
if (produce_season_table and more_than_one_group) or mark_seasons:
Expand All @@ -432,7 +443,7 @@ def get_group_chart(
"group",
column_to_plot,
output_dir,
panel_group_data.iloc[0]["name"],
panel_group_data.iloc[0]["name"].replace("/", "_"),
)
if mark_seasons:
annotate_seasons(season_table, column_to_plot, ax)
Expand Down Expand Up @@ -677,7 +688,7 @@ def main():
produce_season_table=produce_season_table,
mark_seasons=mark_seasons,
)
write_group_chart(chart, lgds, output_dir / output_name, plot_title)
write_group_chart(chart, lgds, output_dir / output_name)
chart.close()


Expand Down
56 changes: 56 additions & 0 deletions analysis/report/pcnt_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import argparse
import pandas
import fnmatch
import re
import matplotlib.dates as mdates
import matplotlib.pyplot as plt

from report_utils import ci_95_proportion, ci_to_str

Expand Down Expand Up @@ -38,6 +41,48 @@ def subset_table(measure_table, measures_pattern, date):

return table_subset

def plot_pcnt_over_time(measure_table, output_dir, type):
"""
Plot the percentage over time for each measure.
"""
measure_table['date'] = pandas.to_datetime(measure_table['date'])
measure_table['value'] *= 100


valid_dates = measure_table.dropna(subset=['value'])['date']
start_date = valid_dates.min()
end_date = valid_dates.max()
measure_table = measure_table[(measure_table['date'] >= start_date) & (measure_table['date'] <= end_date)]

plt.figure(figsize=(10, 6))
for name, group in measure_table.groupby('name'):

legend_label = name

if type == "clinical":
match = re.search(r"event_(\w+)_with_clinical_any_pcnt", name)
if match:
legend_label = match.group(1).title()
elif type == "medication":
match = re.search(r"event_(\w+)_with_medication_any_pcnt", name)
if match:
legend_label = match.group(1).title()

plt.plot(group['date'], group['value'], label=legend_label)

plt.xlabel('Date')
plt.ylabel('Percentage')
plt.title(f'Percentage with {type} event')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize='small')
plt.xticks(rotation=90)
plt.grid(True)
plt.ylim(bottom=0)
plt.xlim(start_date, end_date)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b-%Y'))
plt.tight_layout()
plt.savefig(output_dir / f"pcnt_over_time_{type}.jpg", dpi=300)
plt.close()

def match_paths(files, pattern):
return fnmatch.filter(files, pattern)
Expand Down Expand Up @@ -106,6 +151,17 @@ def main():
table = pandas.DataFrame(joined)
table.to_html(output_dir / "pcnt_with_indication.html", index=True)

filtered_table_med = measure_table[
measure_table['name'].str.contains('with_medication_any_pcnt')
]
plot_pcnt_over_time(filtered_table_med, output_dir, "medication")

filtered_table_clinical = measure_table[
measure_table['name'].str.contains('with_clinical_any_pcnt')
]
plot_pcnt_over_time(filtered_table_clinical, output_dir, "clinical")



if __name__ == "__main__":
main()
12 changes: 6 additions & 6 deletions analysis/report/report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@
"display(Markdown(\"The below charts show the count of patients prescribed any of the above listed antibiotics each month, followed by a table with the underlying counts and a chart with of the rate of prescribing with the years stacked on top of each other.\"))\n",
"display_image(\"medications_bar_measures_count.jpeg\")\n",
"display_table(\"event_counts_medication.csv\")\n",
"display_image(\"all_medications_by_year.png\")\n",
"display_image(\"all_medications_by_year.jpg\")\n",
"display(Markdown(\"The below charts show the monthly count and rate of patients with any of the listed antibiotics across the study period, with a breakdown by key demographic subgroups.\"))\n",
"display(Markdown(\"##### Count\"))\n",
"display_image(\"medication_any_by_subgroup_count.png\")\n",
"display_image(\"medication_any_by_subgroup_count.jpg\")\n",
"display(Markdown(\"##### Rate\"))\n",
"display_image(\"medication_any_by_subgroup.png\")"
"display_image(\"medication_any_by_subgroup.jpg\")"
]
},
{
Expand Down Expand Up @@ -294,12 +294,12 @@
"display(Markdown(\"The below charts show the count of patients with any of the above listed clinical events each month, followed by a table with the underlying counts and a chart with the rate of clinical events with the years stacked on top of each other.\"))\n",
"display_image(\"clinical_bar_measures_count.jpeg\")\n",
"display_table(\"event_counts_clinical.csv\")\n",
"display_image(\"any_clinical_event_by_year.png\")\n",
"display_image(\"any_clinical_event_by_year.jpg\")\n",
"display(Markdown(\"The below charts show the monthly count and rate of patients with any of the listed clinical events across the study period, with a breakdown by key demographic subgroups.\"))\n",
"display(Markdown(\"##### Count\"))\n",
"display_image(\"clinical_any_by_subgroup_count.png\")\n",
"display_image(\"clinical_any_by_subgroup_count.jpg\")\n",
"display(Markdown(\"##### Rate\"))\n",
"display_image(\"clinical_any_by_subgroup.png\")"
"display_image(\"clinical_any_by_subgroup.jpg\")"
]
},
{
Expand Down
28 changes: 14 additions & 14 deletions analysis/report/report_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,12 @@ def subset_table(measure_table, measures_pattern, measures_list):
return measure_table[measure_table["name"].isin(measures_list)]


def write_group_chart(group_chart, lgds, path, plot_title):
suptitle = plt.suptitle(plot_title)
group_chart.savefig(
path, bbox_extra_artists=tuple(lgds) + (suptitle,), bbox_inches="tight"
)

def write_group_chart(group_chart, lgds, path, plot_title=None):
save_args = {"fname": f"{path}.jpg", "dpi": 300, "bbox_inches": "tight"}
if plot_title:
suptitle = plt.suptitle(plot_title)
save_args.update({"bbox_extra_artists": (tuple(lgds) + (suptitle,))})
group_chart.savefig(**save_args)

# NOTE: These paths will only work for notebook generation, which is run on /workspace
REPORT_DIR = Path.cwd().parent.parent / "output/report"
Expand Down Expand Up @@ -551,7 +551,7 @@ def display_medicine(
)
)
display_image(
f"event_code_{medicine_path}_rate_top_5_codes_over_time.png",
f"event_code_{medicine_path}_rate_top_5_codes_over_time.jpg",
dir=results_dir,
)
display(
Expand All @@ -560,9 +560,9 @@ def display_medicine(
)
)
display(Markdown("##### Count"))
display_image(f"{medicine_path}_by_subgroup_count.png", dir=results_dir)
display_image(f"{medicine_path}_by_subgroup_count.jpg", dir=results_dir)
display(Markdown("##### Rate"))
display_image(f"{medicine_path}_by_subgroup.png", dir=results_dir)
display_image(f"{medicine_path}_by_subgroup.jpg", dir=results_dir)
if time_period == "month":
display(
Markdown(
Expand All @@ -575,7 +575,7 @@ def display_medicine(
)
)
display_image(
f"{medicine_path}_with_clinical_any_by_subgroup.png",
f"{medicine_path}_with_clinical_any_by_subgroup.jpg",
dir=results_dir,
)

Expand Down Expand Up @@ -625,7 +625,7 @@ def display_clinical(
)
)
display_image(
f"event_code_{clinical_path}_rate_top_5_codes_over_time.png",
f"event_code_{clinical_path}_rate_top_5_codes_over_time.jpg",
dir=results_dir,
)
display(
Expand All @@ -635,10 +635,10 @@ def display_clinical(
)
display(Markdown("##### Count"))
display_image(
f"{clinical_path}_by_subgroup_count.png", dir=results_dir
f"{clinical_path}_by_subgroup_count.jpg", dir=results_dir
)
display(Markdown("##### Rate"))
display_image(f"{clinical_path}_by_subgroup.png", dir=results_dir)
display_image(f"{clinical_path}_by_subgroup.jpg", dir=results_dir)
if time_period == "month":
display(Markdown("##### Rate with an antibiotic of interest"))
display(
Expand All @@ -647,6 +647,6 @@ def display_clinical(
)
)
display_image(
f"{clinical_path}_with_medication_any_by_subgroup.png",
f"{clinical_path}_with_medication_any_by_subgroup.jpg",
dir=results_dir,
)
8 changes: 4 additions & 4 deletions analysis/report/report_weekly.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@
"display_table(\"event_counts_medication.csv\", dir=WEEKLY_RESULTS_DIR)\n",
"display(Markdown(\"The below charts show the weekly count and rate of patients with any of the listed antibiotics across the study period, with a breakdown by key demographic subgroups.\"))\n",
"display(Markdown(\"##### Count\"))\n",
"display_image(\"medication_any_by_subgroup_count.png\", dir=WEEKLY_RESULTS_DIR)\n",
"display_image(\"medication_any_by_subgroup_count.jpg\", dir=WEEKLY_RESULTS_DIR)\n",
"display(Markdown(\"##### Rate\"))\n",
"display_image(\"medication_any_by_subgroup.png\", dir=WEEKLY_RESULTS_DIR)"
"display_image(\"medication_any_by_subgroup.jpg\", dir=WEEKLY_RESULTS_DIR)"
]
},
{
Expand Down Expand Up @@ -283,9 +283,9 @@
"display_table(\"event_counts_clinical.csv\", dir=WEEKLY_RESULTS_DIR)\n",
"display(Markdown(\"The below charts show the weekly count and rate of patients with any of the listed clinical events across the study period, with a breakdown by key demographic subgroups.\"))\n",
"display(Markdown(\"##### Count\"))\n",
"display_image(\"clinical_any_by_subgroup_count.png\", dir=WEEKLY_RESULTS_DIR)\n",
"display_image(\"clinical_any_by_subgroup_count.jpg\", dir=WEEKLY_RESULTS_DIR)\n",
"display(Markdown(\"##### Rate\"))\n",
"display_image(\"clinical_any_by_subgroup.png\", dir=WEEKLY_RESULTS_DIR)"
"display_image(\"clinical_any_by_subgroup.jpg\", dir=WEEKLY_RESULTS_DIR)"
]
},
{
Expand Down
42 changes: 42 additions & 0 deletions analysis/report/top_5_over_time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import argparse
import pandas as pd
from pathlib import Path

from top_5_report import plot_top_codes_over_time


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--input-file",
required=True,
help="Path to single joined measures file",
)
parser.add_argument(
"--output-dir",
required=True,
type=Path,
help="Path to the output directory",
)
parser.add_argument(
"--frequency",
default="month",
choices=["month", "week"],
help="The frequency of the data",
)
parser.add_argument(
"--xtick-frequency",
help="Display every nth xtick",
type=int,
default=1,
)
return parser.parse_args()


def main():
args = parse_args()
df = pd.read_csv(args.input_file)
plot_top_codes_over_time(df, "sore_throat_tonsillitis_rate", args.output_dir, args.frequency, args.xtick_frequency)

if __name__ == "__main__":
main()
3 changes: 2 additions & 1 deletion analysis/report/top_5_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,9 @@ def plot_top_codes_over_time(
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")

plt.savefig(
output_dir / f"{measure}_top_5_codes_over_time.png",
output_dir / f"{measure}_top_5_codes_over_time.jpg",
bbox_inches="tight",
dpi=300,
)


Expand Down

0 comments on commit 6a1e818

Please sign in to comment.