Skip to content

Commit

Permalink
rewrote .loc with column reference, speed up by 100x
Browse files Browse the repository at this point in the history
  • Loading branch information
dorisjlee committed Nov 11, 2020
1 parent 3ec1193 commit b6a7dd6
Showing 1 changed file with 11 additions and 19 deletions.
30 changes: 11 additions & 19 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,15 +332,11 @@ def apply_filter(
def execute_2D_binning(vis: Vis):
pd.reset_option("mode.chained_assignment")
with pd.option_context("mode.chained_assignment", None):
x_attr = vis.get_attr_by_channel("x")[0]
y_attr = vis.get_attr_by_channel("y")[0]
x_attr = vis.get_attr_by_channel("x")[0].attribute
y_attr = vis.get_attr_by_channel("y")[0].attribute

vis._vis_data.loc[:, "xBin"] = pd.cut(
vis._vis_data[x_attr.attribute], bins=40
)
vis._vis_data.loc[:, "yBin"] = pd.cut(
vis._vis_data[y_attr.attribute], bins=40
)
vis._vis_data["xBin"] = pd.cut(vis._vis_data[x_attr], bins=40)
vis._vis_data["yBin"] = pd.cut(vis._vis_data[y_attr], bins=40)

color_attr = vis.get_attr_by_channel("color")
if len(color_attr) > 0:
Expand All @@ -361,23 +357,19 @@ def execute_2D_binning(vis: Vis):
).reset_index()
result = result.dropna()
else:
groups = vis._vis_data.groupby(["xBin", "yBin"])[x_attr.attribute]
groups = vis._vis_data.groupby(["xBin", "yBin"])[x_attr]
result = groups.agg("count").reset_index(
name=x_attr.attribute
name=x_attr
) # .agg in this line throws SettingWithCopyWarning
result = result.rename(columns={x_attr.attribute: "count"})
result = result.rename(columns={x_attr: "count"})
result = result[result["count"] != 0]

# convert type to facilitate weighted correlation interestingess calculation
result.loc[:, "xBinStart"] = (
result["xBin"].apply(lambda x: x.left).astype("float")
)
result.loc[:, "xBinEnd"] = result["xBin"].apply(lambda x: x.right)
result["xBinStart"] = result["xBin"].apply(lambda x: x.left).astype("float")
result["xBinEnd"] = result["xBin"].apply(lambda x: x.right)

result.loc[:, "yBinStart"] = (
result["yBin"].apply(lambda x: x.left).astype("float")
)
result.loc[:, "yBinEnd"] = result["yBin"].apply(lambda x: x.right)
result["yBinStart"] = result["yBin"].apply(lambda x: x.left).astype("float")
result["yBinEnd"] = result["yBin"].apply(lambda x: x.right)

vis._vis_data = result.drop(columns=["xBin", "yBin"])

Expand Down

0 comments on commit b6a7dd6

Please sign in to comment.