Skip to content

Commit

Permalink
FEAT-quantopian#23: Add filter relevant history functionality (quanto…
Browse files Browse the repository at this point in the history
…pian#33)

Signed-off-by: Richard Lin <richard.lin.047@berkeley.edu>
  • Loading branch information
richardlin047 committed Mar 30, 2021
1 parent b70fd9e commit 8506ca7
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 15 deletions.
6 changes: 6 additions & 0 deletions js/src/spreadsheet.widget.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ class ModinSpreadsheetView extends widgets.DOMWidgetView {
text: 'Clear History'
});

append_btn({
loading_text: 'Filtering...',
event_type: 'filter_history',
text: 'Filter History'
});

append_btn({
loading_text: 'Resetting...',
text: 'Reset Filters',
Expand Down
20 changes: 20 additions & 0 deletions modin_spreadsheet/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
HISTORY_PREFIX = "# ---- spreadsheet transformation history ----\n"

# Transformation History Prefixes
# Row
ADD_ROW = "# Add row"
REMOVE_ROWS = "# Remove rows"

# Edit
EDIT_CELL = "# Edit cell"

# Filter
FILTER_COLUMNS = "# Filter columns"
RESET_FILTER = "# Reset filter"
RESET_ALL_FILTERS = "# Reset all filters"

# Sort
SORT_COLUMN = "# Sort column"
SORT_MIXED_TYPE_COLUMN = "# Sort mixed type column"
SORT_INDEX = "# Sort index"
RESET_SORT = "# Reset sort"
75 changes: 62 additions & 13 deletions modin_spreadsheet/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from itertools import chain
from uuid import uuid4
from six import string_types
from . import constants

# versions of pandas prior to version 0.20.0 don't support the orient='table'
# when calling the 'to_json' function on DataFrames. to get around this we
Expand Down Expand Up @@ -145,7 +146,6 @@ def get_events(self):

defaults = _DefaultSettings()
handlers = _EventHandlers()
HISTORY_PREFIX = "# ---- spreadsheet transformation history ----\n"


def set_defaults(
Expand Down Expand Up @@ -1125,7 +1125,7 @@ def _update_sort(self):
self._df.sort_index(ascending=self._sort_ascending, inplace=True)
# Record sort index
self._record_transformation(
f"# Sort index\n"
f"{constants.SORT_INDEX}\n"
f"df.sort_index(ascending={self._sort_ascending}, inplace=True)"
)
else:
Expand All @@ -1136,7 +1136,7 @@ def _update_sort(self):
# Record sort index
# TODO: Fix level=level_index
self._record_transformation(
f"# Sort index\n"
f"{constants.SORT_INDEX}\n"
f"df.sort_index(level=level_index, ascending={self._sort_ascending}, "
f"inplace=True)"
)
Expand All @@ -1148,7 +1148,7 @@ def _update_sort(self):
)
# Record sort column
self._record_transformation(
f"# Sort column\n"
f"{constants.SORT_COLUMN}\n"
f"df.sort_values('{self._sort_field}', ascending={self._sort_ascending}, "
f"inplace=True)"
)
Expand All @@ -1168,7 +1168,7 @@ def _update_sort(self):
helper_col = self._sort_field + self._sort_col_suffix
self._record_transformation(
(
f"#Sort mixed type column\n"
f"{constants.SORT_MIXED_TYPE_COLUMN}\n"
f"df['{helper_col}'] = df['{self._sort_field}'].map(str)\n"
f"df.sort_values('{helper_col}', ascending={self._sort_ascending}, inplace=True)\n"
f"df.drop(columns='{helper_col}', inplace=True)"
Expand Down Expand Up @@ -1496,7 +1496,7 @@ def _handle_change_filter(self, content):
# Record reset filter
# Other filters and sorts are reapplied after
self._record_transformation(
f"# Reset filter\n" f"df = unfiltered_df.copy()"
f"{constants.RESET_FILTER}\n" f"df = unfiltered_df.copy()"
)
else:
combined_condition = conditions[0]
Expand All @@ -1509,7 +1509,7 @@ def _handle_change_filter(self, content):
["(" + c + ")" for c in self._filter_conditions]
)
self._record_transformation(
f"# Filter columns\n"
f"{constants.FILTER_COLUMNS}\n"
f"df = unfiltered_df[{record_combined_condition}].copy()"
)
# Reset filter conditions
Expand Down Expand Up @@ -1557,7 +1557,7 @@ def _handle_view_msg_helper(self, content):
self._df.loc[location] = val_to_set
# Record cell edit
self._record_transformation(
f"# Edit cell\n"
f"{constants.EDIT_CELL}\n"
f"df.loc[{location}]={repr(val_to_set)}\n"
f"unfiltered_df.loc[{location}]={repr(val_to_set)}"
)
Expand Down Expand Up @@ -1686,7 +1686,7 @@ def _handle_view_msg_helper(self, content):
self._resetting_filters = False
# Record reset filters before updating sort
self._record_transformation(
("# Reset all filters\n" "df = unfiltered_df.copy()")
(f"{constants.RESET_ALL_FILTERS}\n" "df = unfiltered_df.copy()")
)
self._update_sort()
self._update_table(triggered_by="reset_filters")
Expand All @@ -1703,6 +1703,9 @@ def _handle_view_msg_helper(self, content):
self._update_history_cell()
elif content["type"] == "clear_history":
self.clear_history(from_api=False)
elif content["type"] == "filter_history":
self._filter_relevant_history(persist=True)
self._notify_listeners({"name": "history_filtered", "source": "gui"})

def _notify_listeners(self, event):
# notify listeners at the module level
Expand Down Expand Up @@ -1800,7 +1803,7 @@ def _duplicate_last_row(self):
)
# Record row add
self._record_transformation(
f"# Add row\n"
f"{constants.ADD_ROW}\n"
f"last = df.loc[max(df.index)].copy()\n"
f"df.loc[last.name+1] = last.values\n"
f"unfiltered_df.loc[last.name+1] = last.values"
Expand Down Expand Up @@ -1930,7 +1933,7 @@ def _remove_rows(self, rows=None):
self._update_table(triggered_by="remove_row")
# Record remove rows
self._record_transformation(
f"# Remove rows\n"
f"{constants.REMOVE_ROWS}\n"
f"df.drop({selected_names}, inplace=True)\n"
f"unfiltered_df.drop({selected_names}, inplace=True)"
)
Expand Down Expand Up @@ -2015,7 +2018,7 @@ def _update_history_cell(self):
if not self.show_history:
return
cleaned_history = "\n".join(self._history)
cell_text = HISTORY_PREFIX + cleaned_history
cell_text = constants.HISTORY_PREFIX + cleaned_history
self.send(
{
"type": "update_history",
Expand Down Expand Up @@ -2058,10 +2061,56 @@ def reset_sort(self, from_api=True):
# Record sort index
# After update_table to prevent resetting in progress button prematurely
self._record_transformation(
f"# Reset sort\n" f"df.sort_index(ascending=True, inplace=True)"
f"{constants.RESET_SORT}\n" f"df.sort_index(ascending=True, inplace=True)"
)
source = "api" if from_api else "gui"
self._notify_listeners({"name": "sort_reset", "source": source})

def reset_filters(self):
self.send({"type": "reset_filters"})

def filter_relevant_history(self, persist=True):
relevant_history = self._filter_relevant_history(persist)
self._notify_listeners({"name": "history_filtered", "source": "api"})
return relevant_history

def _filter_relevant_history(self, persist):
history = self.get_history()
relevant_history = []
# Whether a filter or sort can still be added
add_filter = True
add_sort = True
# Check history in reverse
for cmd in history[::-1]:
# Add the latest filter after a reset filter
if cmd.startswith(constants.FILTER_COLUMNS):
if not add_filter:
continue
relevant_history.insert(0, cmd)
add_filter = False
# Add the latest sort after a reset sort
elif (
cmd.startswith(constants.SORT_COLUMN)
or cmd.startswith(constants.SORT_INDEX)
or cmd.startswith(constants.SORT_MIXED_TYPE_COLUMN)
):
if not add_sort:
continue
relevant_history.insert(0, cmd)
add_sort = False
# Prevent adding filter
elif cmd.startswith(constants.RESET_FILTER) or cmd.startswith(
constants.RESET_ALL_FILTERS
):
add_filter = False
# Prevent adding sort
elif cmd.startswith(constants.RESET_SORT):
add_sort = False
# Include edit cell, remove/add row, initialization, etc.
else:
relevant_history.insert(0, cmd)
# Change internal state if persisting
if persist:
self._history = relevant_history
self._update_history_cell()
return relevant_history
111 changes: 109 additions & 2 deletions modin_spreadsheet/tests/test_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,13 +850,13 @@ def test_get_history():
{"type": "change_sort", "sort_field": "A", "sort_ascending": True}
)
last_history = spreadsheet.get_history()[-1]
expected_history = "#Sort mixed type column\ndf['A_modin_spreadsheet_sort_column'] = df['A'].map(str)\ndf.sort_values('A_modin_spreadsheet_sort_column', ascending=True, inplace=True)\ndf.drop(columns='A_modin_spreadsheet_sort_column', inplace=True)"
expected_history = "# Sort mixed type column\ndf['A_modin_spreadsheet_sort_column'] = df['A'].map(str)\ndf.sort_values('A_modin_spreadsheet_sort_column', ascending=True, inplace=True)\ndf.drop(columns='A_modin_spreadsheet_sort_column', inplace=True)"
assert last_history == expected_history

# Reset filters
spreadsheet._handle_view_msg_helper({"type": "reset_filters_end"})
last_history = spreadsheet.get_history()[-1]
expected_history = "#Sort mixed type column\ndf['A_modin_spreadsheet_sort_column'] = df['A'].map(str)\ndf.sort_values('A_modin_spreadsheet_sort_column', ascending=True, inplace=True)\ndf.drop(columns='A_modin_spreadsheet_sort_column', inplace=True)"
expected_history = "# Sort mixed type column\ndf['A_modin_spreadsheet_sort_column'] = df['A'].map(str)\ndf.sort_values('A_modin_spreadsheet_sort_column', ascending=True, inplace=True)\ndf.drop(columns='A_modin_spreadsheet_sort_column', inplace=True)"
assert last_history == expected_history

# Reset sort
Expand Down Expand Up @@ -948,3 +948,110 @@ def test_apply_history():

# Checks that the history is applied properly
assert changed_df.equals(applied_df)


def test_filter_relevant_history():
spreadsheet = SpreadsheetWidget(df=create_df())

# Initial history
filtered_history = spreadsheet.filter_relevant_history()
expected_history = ["unfiltered_df = df.copy()"]
assert filtered_history == expected_history

# Empty history
spreadsheet._history = []
expected_history = []
filtered_history = spreadsheet.filter_relevant_history()
assert filtered_history == expected_history

# Multiple filters
spreadsheet._history = [
"# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] <= 48802)].copy()",
"# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 5495)&(unfiltered_df['trip_id'] <= 48802)].copy()",
"# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 5495)&(unfiltered_df['trip_id'] <= 48802)&(unfiltered_df['vendor_id'] >= 2)].copy()",
]
expected_history = [
"# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 5495)&(unfiltered_df['trip_id'] <= 48802)&(unfiltered_df['vendor_id'] >= 2)].copy()",
]
filtered_history = spreadsheet.filter_relevant_history()
assert filtered_history == expected_history

# Multiple sorts
spreadsheet._history = [
"# Sort column\ndf.sort_values('trip_id', ascending=True, inplace=True)",
"# Sort column\ndf.sort_values('vendor_id', ascending=True, inplace=True)",
"# Sort column\ndf.sort_values('vendor_id', ascending=False, inplace=True)",
]
expected_history = [
"# Sort column\ndf.sort_values('vendor_id', ascending=False, inplace=True)",
]
filtered_history = spreadsheet.filter_relevant_history()
assert filtered_history == expected_history

# Filter after reset
spreadsheet._history = [
"# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 6485)].copy()",
"# Reset filter\ndf = unfiltered_df.copy()",
"# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] <= 41873)].copy()",
]
expected_history = [
"# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] <= 41873)].copy()"
]
filtered_history = spreadsheet.filter_relevant_history()
assert filtered_history == expected_history

# Sort after reset
spreadsheet._history = [
"# Sort column\ndf.sort_values('trip_id', ascending=True, inplace=True)",
"# Reset sort\ndf.sort_index(ascending=True, inplace=True)",
"# Sort column\ndf.sort_values('vendor_id', ascending=True, inplace=True)",
]
expected_history = [
"# Sort column\ndf.sort_values('vendor_id', ascending=True, inplace=True)"
]
filtered_history = spreadsheet.filter_relevant_history()
assert filtered_history == expected_history

# None after reset
spreadsheet._history = [
"# Sort column\ndf.sort_values('vendor_id', ascending=True, inplace=True)",
"# Reset sort\ndf.sort_index(ascending=True, inplace=True)",
"# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 6980)].copy()",
"# Reset all filters\ndf = unfiltered_df.copy()",
]
expected_history = []
filtered_history = spreadsheet.filter_relevant_history()
assert filtered_history == expected_history

# Mixed transformation
mixed_history = [
"# Edit cell\ndf.loc[(4, 'trip_id')]=10\nunfiltered_df.loc[(4, 'trip_id')]=10",
"# Sort column\ndf.sort_values('trip_id', ascending=True, inplace=True)",
"# Sort column\ndf.sort_values('rate_code_id', ascending=True, inplace=True)",
"# Reset sort\ndf.sort_index(ascending=True, inplace=True)",
"# Sort index\ndf.sort_index(ascending=True, inplace=True)",
"# Remove rows\ndf.drop([6], inplace=True)\nunfiltered_df.drop([6], inplace=True)",
"# Add row\nlast = df.loc[max(df.index)].copy()\ndf.loc[last.name+1] = last.values\nunfiltered_df.loc[last.name+1] = last.values",
"# Reset all filters\ndf = unfiltered_df.copy()",
"# Sort index\ndf.sort_index(ascending=True, inplace=True)",
]
expected_filtered_history = [
"# Edit cell\ndf.loc[(4, 'trip_id')]=10\nunfiltered_df.loc[(4, 'trip_id')]=10",
"# Remove rows\ndf.drop([6], inplace=True)\nunfiltered_df.drop([6], inplace=True)",
"# Add row\nlast = df.loc[max(df.index)].copy()\ndf.loc[last.name+1] = last.values\nunfiltered_df.loc[last.name+1] = last.values",
"# Sort index\ndf.sort_index(ascending=True, inplace=True)",
]
spreadsheet._history = mixed_history
filtered_history = spreadsheet.filter_relevant_history()
assert filtered_history == expected_filtered_history

# Persist filter history
spreadsheet._history = mixed_history
spreadsheet.filter_relevant_history(persist=True)
assert spreadsheet.get_history() == expected_filtered_history

# Don't persist filter history
spreadsheet._history = mixed_history
filtered_history = spreadsheet.filter_relevant_history(persist=False)
assert filtered_history == expected_filtered_history
assert spreadsheet.get_history() == mixed_history

0 comments on commit 8506ca7

Please sign in to comment.