Skip to content

Commit

Permalink
Make output checking top5 codes over time easier
Browse files Browse the repository at this point in the history
1. Include code label and total count of codes in the top codes over
time csv
2. Round code proportion to 2 decimals
3. Switch order of code proportion table creation and plotting, so that
we first make the table, and then use that table to generate the figure.
This will also make it possible to locally in python re-generate the
plot with the underlying data.
  • Loading branch information
ccunningham101 committed Sep 19, 2023
1 parent 57fc927 commit 0fcc6b2
Showing 1 changed file with 34 additions and 17 deletions.
51 changes: 34 additions & 17 deletions analysis/report/top_5_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,25 +227,23 @@ def get_proportion_of_events(df, code, date):
if total_events == 0:
row["proportion"] = 0
else:
row["proportion"] = (code_events / total_events) * 100
row["proportion"] = round((code_events / total_events) * 100, 2)
row["total"] = total_events
row["count"] = code_events

return row


def plot_top_codes_over_time(
code_df, top_codes, measure, output_dir, frequency, xtick_frequency=1
):
def make_top_codes_over_time_dict(code_df, top_codes, measure, output_dir):
"""
Plots the top 5 codes over time for each measure.
Creates and saves df with proportion of each of the top codes
Args:
code_df: A codelist table.
top_codes: A dictionary of the top codes for each measure with corresponding descriptions.
measure: The measure ID.
output_dir: The directory to save the plot to.
output_dir: The directory to save the csv to.
"""

# Create a new dataframe with the proportion of events for each code on
# each date.
code_proportions = pd.DataFrame()
Expand All @@ -261,7 +259,27 @@ def plot_top_codes_over_time(
),
]
)
code_proportions["name"] = code_proportions["code"].map(top_codes)

# save the underlying data for the plot
code_proportions.to_csv(
output_dir / f"{measure}_top_5_codes_over_time.csv", index=False
)
return code_proportions


def plot_top_codes_over_time(
code_proportions, measure, output_dir, frequency, xtick_frequency=1
):
"""
Plots the top 5 codes over time for each measure.
Args:
code_proportions: a table of top code proportions.
measure: The measure ID.
output_dir: The directory to save the plot to.
frequency: month or week
xtick_frequency: interval to display x-axis tick label
"""
plt.figure(figsize=(10, 6))
# seaborn styling
sns.set_style("whitegrid")
Expand All @@ -274,10 +292,9 @@ def plot_top_codes_over_time(
code_proportions["date"] = pd.to_datetime(code_proportions["date"])
# Plot the proportion of events for each code on each date.
# Plots should be on the same graph.
for code, description in top_codes.items():
code_proportions.loc[code_proportions["code"] == code, :].plot(
x="date", y="count", label=description, ax=ax
)
groups = code_proportions.groupby("code")
for code, data in groups:
data.plot(x="date", y="count", label=data.iloc[0]["name"], ax=ax)

if frequency == "month":
xticks = pd.date_range(
Expand Down Expand Up @@ -311,11 +328,6 @@ def plot_top_codes_over_time(
bbox_inches="tight",
)

# save the underlying data for the plot
code_proportions.to_csv(
output_dir / f"{measure}_top_5_codes_over_time.csv", index=False
)


def main():
args = parse_args()
Expand Down Expand Up @@ -407,11 +419,16 @@ def main():
)

# plot the top codes over time
plot_top_codes_over_time(
code_proportions = make_top_codes_over_time_dict(
code_df=code_df,
top_codes=top_codes_dict,
measure=measure,
output_dir=output_dir,
)
plot_top_codes_over_time(
code_proportions=code_proportions,
measure=measure,
output_dir=output_dir,
frequency=frequency,
xtick_frequency=xtick_frequency,
)
Expand Down

0 comments on commit 0fcc6b2

Please sign in to comment.