Skip to content

Commit

Permalink
feat(python): add gantt chart plot to LazyFrame::profile (#5063)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 1, 2022
1 parent bbc3e17 commit 66c98b9
Showing 1 changed file with 28 additions and 1 deletion.
29 changes: 28 additions & 1 deletion py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,8 @@ def profile(
no_optimization: bool = False,
slice_pushdown: bool = True,
common_subplan_elimination: bool = True,
show_plot: bool = False,
truncate_nodes: int = 40,
) -> tuple[pli.DataFrame, pli.DataFrame]:
"""
Profile a LazyFrame.
Expand All @@ -773,6 +775,11 @@ def profile(
Slice pushdown optimization.
common_subplan_elimination
Will try to cache branching subplans that occur on self-joins or unions.
show_plot
Show a gantt chart of the profiling result
truncate_nodes
Truncate the label lengths in the gantt chart to this number of
characters.
Returns
-------
Expand All @@ -792,7 +799,27 @@ def profile(
common_subplan_elimination,
)
df, timings = ldf.profile()
return pli.wrap_df(df), pli.wrap_df(timings)
(df, timings) = pli.wrap_df(df), pli.wrap_df(timings)

if show_plot:
try:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, figsize=(18, 8))
df_ = timings.reverse()
if truncate_nodes > 0:
df_ = df_.with_column(
pli.col("node").str.slice(0, truncate_nodes) + "..."
)
ax.barh(df_["node"], width=df_["end"] - df_["start"], left=df_["start"])
plt.show()

except ImportError:
raise ImportError(
"matplotlib should be installed to show profiling plot."
) from None

return df, timings

def collect(
self,
Expand Down

0 comments on commit 66c98b9

Please sign in to comment.