Skip to content

Commit

Permalink
feat(rust, python): small plan and profile chart improvements (#5067)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 1, 2022
1 parent 649648e commit 1a17950
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 52 deletions.
105 changes: 58 additions & 47 deletions polars/polars-lazy/polars-plan/src/logical_plan/format.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
use std::borrow::Cow;
use std::fmt;
use std::fmt::{Debug, Display, Formatter};
use std::path::Path;

use crate::prelude::*;

fn write_scan(
f: &mut fmt::Formatter,
name: &str,
path: &Path,
indent: usize,
n_columns: i64,
total_columns: usize,
predicate: &Option<Expr>,
) -> fmt::Result {
writeln!(f, "{:indent$}{} SCAN {}", "", name, path.to_string_lossy(),)?;
if n_columns > 0 {
writeln!(
f,
"{:indent$}PROJECT {}/{} COLUMNS",
"", n_columns, total_columns,
)?;
} else {
writeln!(f, "{:indent$}PROJECT */{} COLUMNS", "", total_columns,)?;
}
writeln!(f, "{:indent$}SELECTION: {:?}", "", predicate)
}

impl LogicalPlan {
fn _format(&self, f: &mut fmt::Formatter, mut indent: usize) -> fmt::Result {
indent += 2;
Expand All @@ -17,15 +40,19 @@ impl LogicalPlan {
options,
..
} => {
let total_columns = schema.len();
let mut n_columns = "*".to_string();
if let Some(columns) = &options.with_columns {
n_columns = format!("{}", columns.len());
}
writeln!(
let n_columns = options
.with_columns
.as_ref()
.map(|columns| columns.len() as i64)
.unwrap_or(-1);
write_scan(
f,
"{:indent$}{}; PROJECT {}/{} COLUMNS; SELECTION: {:?}",
"", options.fmt_str, n_columns, total_columns, predicate
options.fmt_str,
Path::new(""),
indent,
n_columns,
schema.len(),
predicate,
)
}
Union { inputs, .. } => {
Expand All @@ -48,19 +75,19 @@ impl LogicalPlan {
options,
..
} => {
let total_columns = schema.len();
let mut n_columns = "*".to_string();
if let Some(columns) = &options.with_columns {
n_columns = format!("{}", columns.len());
}
writeln!(
let n_columns = options
.with_columns
.as_ref()
.map(|columns| columns.len() as i64)
.unwrap_or(-1);
write_scan(
f,
"{:indent$}PARQUET SCAN {}; PROJECT {}/{} COLUMNS; SELECTION: {:?}",
"",
path.to_string_lossy(),
"PARQUET",
path,
indent,
n_columns,
total_columns,
predicate
schema.len(),
predicate,
)
}
#[cfg(feature = "ipc")]
Expand All @@ -71,20 +98,12 @@ impl LogicalPlan {
predicate,
..
} => {
let total_columns = schema.len();
let mut n_columns = "*".to_string();
if let Some(columns) = &options.with_columns {
n_columns = format!("{}", columns.len());
}
writeln!(
f,
"{:indent$}IPC SCAN {}; PROJECT {}/{} COLUMNS; SELECTION: {:?}",
"",
path.to_string_lossy(),
n_columns,
total_columns,
predicate
)
let n_columns = options
.with_columns
.as_ref()
.map(|columns| columns.len() as i64)
.unwrap_or(-1);
write_scan(f, "IPC", path, indent, n_columns, schema.len(), predicate)
}
Selection { predicate, input } => {
writeln!(f, "{:indent$}FILTER {:?} FROM", "", predicate)?;
Expand All @@ -102,20 +121,12 @@ impl LogicalPlan {
predicate,
..
} => {
let total_columns = schema.len();
let mut n_columns = "*".to_string();
if let Some(columns) = &options.with_columns {
n_columns = format!("{}", columns.len());
}
writeln!(
f,
"{:indent$}CSV SCAN {}; PROJECT {}/{} COLUMNS; SELECTION: {:?}",
"",
path.to_string_lossy(),
n_columns,
total_columns,
predicate
)
let n_columns = options
.with_columns
.as_ref()
.map(|columns| columns.len() as i64)
.unwrap_or(-1);
write_scan(f, "CSV", path, indent, n_columns, schema.len(), predicate)
}
DataFrameScan {
schema,
Expand Down
36 changes: 31 additions & 5 deletions py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,8 @@ def profile(
slice_pushdown: bool = True,
common_subplan_elimination: bool = True,
show_plot: bool = False,
truncate_nodes: int = 40,
truncate_nodes: int = 0,
figsize: tuple[int, int] = (18, 8),
) -> tuple[pli.DataFrame, pli.DataFrame]:
"""
Profile a LazyFrame.
Expand Down Expand Up @@ -780,6 +781,8 @@ def profile(
truncate_nodes
Truncate the label lengths in the gantt chart to this number of
characters.
figsize
matplotlib figsize of the profiling plot
Returns
-------
Expand All @@ -805,13 +808,36 @@ def profile(
try:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, figsize=(18, 8))
df_ = timings.reverse()
fig, ax = plt.subplots(1, figsize=figsize)

max_val = timings["end"][-1]
timings_ = timings.reverse()

if max_val > 1e9:
unit = "s"
timings_ = timings_.with_column(
pli.col(["start", "end"]) / 1_000_000
)
elif max_val > 1e6:
unit = "ms"
timings_ = timings_.with_column(pli.col(["start", "end"]) / 1000)
else:
unit = "us"
if truncate_nodes > 0:
df_ = df_.with_column(
timings_ = timings_.with_column(
pli.col("node").str.slice(0, truncate_nodes) + "..."
)
ax.barh(df_["node"], width=df_["end"] - df_["start"], left=df_["start"])

max_in_unit = timings_["end"][0]
ax.barh(
timings_["node"],
width=timings_["end"] - timings_["start"],
left=timings_["start"],
)

plt.title("Profiling result")
ax.set_xlabel(f"node duration in [{unit}], total {max_in_unit}{unit}")
ax.set_ylabel("nodes")
plt.show()

except ImportError:
Expand Down

0 comments on commit 1a17950

Please sign in to comment.