Skip to content

Commit

Permalink
fix(eda.plot_missing): new label texts and color mapping
Browse files Browse the repository at this point in the history
ci

aligned labels

fix
  • Loading branch information
eutialia authored and Brandon Lockhart committed Oct 19, 2020
1 parent cd675f3 commit 71a95f9
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 12 deletions.
2 changes: 1 addition & 1 deletion dataprep/eda/missing/compute/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
is_dtype,
)

LABELS = ["With Missing Remaining", "With Missing Dropped"]
LABELS = ["Orignal data", "After drop missing values"]


def uni_histogram(
Expand Down
19 changes: 8 additions & 11 deletions dataprep/eda/missing/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This module implements the plot_missing(df, x, y) function's
visualization part.
"""
from typing import Any, Dict, List, Optional, Sequence, Tuple, Set
from typing import Any, Dict, List, Optional, Sequence, Tuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -193,6 +193,10 @@ def render_hist( # pylint: disable=too-many-arguments

relocate_legend(fig, "right")
else:
shown, total = meta["partial"]
if shown != total:
fig.xaxis.axis_label = f"Top {shown} out of {total}"
fig.xaxis.axis_label_standoff = 0
fig.vbar(
x="x",
width=radius,
Expand Down Expand Up @@ -604,25 +608,18 @@ def render_missing_impact_1vn(
dfs = itmdt["data"]
x = itmdt["x"]
meta = itmdt["meta"]
legend_set: Set[str] = set()
panels = []
for col, df in dfs.items():
fig = render_hist(df, col, meta[col], plot_width, plot_height, False)
shown, total = meta[col]["partial"]
fig.frame_height = plot_height
if shown != total:
fig.title = Title(text=f"Missing impact of {x} by ({shown} out of {total}) {col}")
else:
fig.title = Title(text=f"Missing impact of {x} by {col}")
fig.title = Title(text=f"Missing impact of {x} by {col}")
panels.append(Panel(child=fig, title=col))
legend_set = legend_set.union(set(df["label"].drop_duplicates().to_list()))
legend_labels = list(legend_set)
legend_colors = [CATEGORY10[count] for count in range(len(legend_labels))]
legend_colors = [CATEGORY10[count] for count in range(len(LABELS))]
return {
"layout": [panel.child for panel in panels],
"fig_width": plot_width,
"legend_labels": [
{"label": label, "color": color} for label, color in zip(legend_labels, legend_colors)
{"label": label, "color": color} for label, color in zip(LABELS, legend_colors)
],
}

Expand Down

0 comments on commit 71a95f9

Please sign in to comment.