From 8506ca7ddf11661f67a766a5aa9e550da4ae6e62 Mon Sep 17 00:00:00 2001 From: Richard Lin <35508487+richardlin047@users.noreply.github.com> Date: Tue, 30 Mar 2021 10:24:03 -0400 Subject: [PATCH] FEAT-#23: Add filter relevant history functionality (#33) Signed-off-by: Richard Lin --- js/src/spreadsheet.widget.js | 6 ++ modin_spreadsheet/constants.py | 20 +++++ modin_spreadsheet/grid.py | 75 ++++++++++++++---- modin_spreadsheet/tests/test_grid.py | 111 ++++++++++++++++++++++++++- 4 files changed, 197 insertions(+), 15 deletions(-) create mode 100644 modin_spreadsheet/constants.py diff --git a/js/src/spreadsheet.widget.js b/js/src/spreadsheet.widget.js index 62f9b19a..57fe6efd 100644 --- a/js/src/spreadsheet.widget.js +++ b/js/src/spreadsheet.widget.js @@ -117,6 +117,12 @@ class ModinSpreadsheetView extends widgets.DOMWidgetView { text: 'Clear History' }); + append_btn({ + loading_text: 'Filtering...', + event_type: 'filter_history', + text: 'Filter History' + }); + append_btn({ loading_text: 'Resetting...', text: 'Reset Filters', diff --git a/modin_spreadsheet/constants.py b/modin_spreadsheet/constants.py new file mode 100644 index 00000000..2d657f9a --- /dev/null +++ b/modin_spreadsheet/constants.py @@ -0,0 +1,20 @@ +HISTORY_PREFIX = "# ---- spreadsheet transformation history ----\n" + +# Transformation History Prefixes +# Row +ADD_ROW = "# Add row" +REMOVE_ROWS = "# Remove rows" + +# Edit +EDIT_CELL = "# Edit cell" + +# Filter +FILTER_COLUMNS = "# Filter columns" +RESET_FILTER = "# Reset filter" +RESET_ALL_FILTERS = "# Reset all filters" + +# Sort +SORT_COLUMN = "# Sort column" +SORT_MIXED_TYPE_COLUMN = "# Sort mixed type column" +SORT_INDEX = "# Sort index" +RESET_SORT = "# Reset sort" diff --git a/modin_spreadsheet/grid.py b/modin_spreadsheet/grid.py index 21e6a27f..529ea97d 100644 --- a/modin_spreadsheet/grid.py +++ b/modin_spreadsheet/grid.py @@ -24,6 +24,7 @@ from itertools import chain from uuid import uuid4 from six import string_types +from . import constants # versions of pandas prior to version 0.20.0 don't support the orient='table' # when calling the 'to_json' function on DataFrames. to get around this we @@ -145,7 +146,6 @@ def get_events(self): defaults = _DefaultSettings() handlers = _EventHandlers() -HISTORY_PREFIX = "# ---- spreadsheet transformation history ----\n" def set_defaults( @@ -1125,7 +1125,7 @@ def _update_sort(self): self._df.sort_index(ascending=self._sort_ascending, inplace=True) # Record sort index self._record_transformation( - f"# Sort index\n" + f"{constants.SORT_INDEX}\n" f"df.sort_index(ascending={self._sort_ascending}, inplace=True)" ) else: @@ -1136,7 +1136,7 @@ def _update_sort(self): # Record sort index # TODO: Fix level=level_index self._record_transformation( - f"# Sort index\n" + f"{constants.SORT_INDEX}\n" f"df.sort_index(level=level_index, ascending={self._sort_ascending}, " f"inplace=True)" ) @@ -1148,7 +1148,7 @@ def _update_sort(self): ) # Record sort column self._record_transformation( - f"# Sort column\n" + f"{constants.SORT_COLUMN}\n" f"df.sort_values('{self._sort_field}', ascending={self._sort_ascending}, " f"inplace=True)" ) @@ -1168,7 +1168,7 @@ def _update_sort(self): helper_col = self._sort_field + self._sort_col_suffix self._record_transformation( ( - f"#Sort mixed type column\n" + f"{constants.SORT_MIXED_TYPE_COLUMN}\n" f"df['{helper_col}'] = df['{self._sort_field}'].map(str)\n" f"df.sort_values('{helper_col}', ascending={self._sort_ascending}, inplace=True)\n" f"df.drop(columns='{helper_col}', inplace=True)" @@ -1496,7 +1496,7 @@ def _handle_change_filter(self, content): # Record reset filter # Other filters and sorts are reapplied after self._record_transformation( - f"# Reset filter\n" f"df = unfiltered_df.copy()" + f"{constants.RESET_FILTER}\n" f"df = unfiltered_df.copy()" ) else: combined_condition = conditions[0] @@ -1509,7 +1509,7 @@ def _handle_change_filter(self, content): ["(" + c + ")" for c in self._filter_conditions] ) self._record_transformation( - f"# Filter columns\n" + f"{constants.FILTER_COLUMNS}\n" f"df = unfiltered_df[{record_combined_condition}].copy()" ) # Reset filter conditions @@ -1557,7 +1557,7 @@ def _handle_view_msg_helper(self, content): self._df.loc[location] = val_to_set # Record cell edit self._record_transformation( - f"# Edit cell\n" + f"{constants.EDIT_CELL}\n" f"df.loc[{location}]={repr(val_to_set)}\n" f"unfiltered_df.loc[{location}]={repr(val_to_set)}" ) @@ -1686,7 +1686,7 @@ def _handle_view_msg_helper(self, content): self._resetting_filters = False # Record reset filters before updating sort self._record_transformation( - ("# Reset all filters\n" "df = unfiltered_df.copy()") + (f"{constants.RESET_ALL_FILTERS}\n" "df = unfiltered_df.copy()") ) self._update_sort() self._update_table(triggered_by="reset_filters") @@ -1703,6 +1703,9 @@ def _handle_view_msg_helper(self, content): self._update_history_cell() elif content["type"] == "clear_history": self.clear_history(from_api=False) + elif content["type"] == "filter_history": + self._filter_relevant_history(persist=True) + self._notify_listeners({"name": "history_filtered", "source": "gui"}) def _notify_listeners(self, event): # notify listeners at the module level @@ -1800,7 +1803,7 @@ def _duplicate_last_row(self): ) # Record row add self._record_transformation( - f"# Add row\n" + f"{constants.ADD_ROW}\n" f"last = df.loc[max(df.index)].copy()\n" f"df.loc[last.name+1] = last.values\n" f"unfiltered_df.loc[last.name+1] = last.values" @@ -1930,7 +1933,7 @@ def _remove_rows(self, rows=None): self._update_table(triggered_by="remove_row") # Record remove rows self._record_transformation( - f"# Remove rows\n" + f"{constants.REMOVE_ROWS}\n" f"df.drop({selected_names}, inplace=True)\n" f"unfiltered_df.drop({selected_names}, inplace=True)" ) @@ -2015,7 +2018,7 @@ def _update_history_cell(self): if not self.show_history: return cleaned_history = "\n".join(self._history) - cell_text = HISTORY_PREFIX + cleaned_history + cell_text = constants.HISTORY_PREFIX + cleaned_history self.send( { "type": "update_history", @@ -2058,10 +2061,56 @@ def reset_sort(self, from_api=True): # Record sort index # After update_table to prevent resetting in progress button prematurely self._record_transformation( - f"# Reset sort\n" f"df.sort_index(ascending=True, inplace=True)" + f"{constants.RESET_SORT}\n" f"df.sort_index(ascending=True, inplace=True)" ) source = "api" if from_api else "gui" self._notify_listeners({"name": "sort_reset", "source": source}) def reset_filters(self): self.send({"type": "reset_filters"}) + + def filter_relevant_history(self, persist=True): + relevant_history = self._filter_relevant_history(persist) + self._notify_listeners({"name": "history_filtered", "source": "api"}) + return relevant_history + + def _filter_relevant_history(self, persist): + history = self.get_history() + relevant_history = [] + # Whether a filter or sort can still be added + add_filter = True + add_sort = True + # Check history in reverse + for cmd in history[::-1]: + # Add the latest filter after a reset filter + if cmd.startswith(constants.FILTER_COLUMNS): + if not add_filter: + continue + relevant_history.insert(0, cmd) + add_filter = False + # Add the latest sort after a reset sort + elif ( + cmd.startswith(constants.SORT_COLUMN) + or cmd.startswith(constants.SORT_INDEX) + or cmd.startswith(constants.SORT_MIXED_TYPE_COLUMN) + ): + if not add_sort: + continue + relevant_history.insert(0, cmd) + add_sort = False + # Prevent adding filter + elif cmd.startswith(constants.RESET_FILTER) or cmd.startswith( + constants.RESET_ALL_FILTERS + ): + add_filter = False + # Prevent adding sort + elif cmd.startswith(constants.RESET_SORT): + add_sort = False + # Include edit cell, remove/add row, initialization, etc. + else: + relevant_history.insert(0, cmd) + # Change internal state if persisting + if persist: + self._history = relevant_history + self._update_history_cell() + return relevant_history diff --git a/modin_spreadsheet/tests/test_grid.py b/modin_spreadsheet/tests/test_grid.py index 299c1617..45895cc8 100644 --- a/modin_spreadsheet/tests/test_grid.py +++ b/modin_spreadsheet/tests/test_grid.py @@ -850,13 +850,13 @@ def test_get_history(): {"type": "change_sort", "sort_field": "A", "sort_ascending": True} ) last_history = spreadsheet.get_history()[-1] - expected_history = "#Sort mixed type column\ndf['A_modin_spreadsheet_sort_column'] = df['A'].map(str)\ndf.sort_values('A_modin_spreadsheet_sort_column', ascending=True, inplace=True)\ndf.drop(columns='A_modin_spreadsheet_sort_column', inplace=True)" + expected_history = "# Sort mixed type column\ndf['A_modin_spreadsheet_sort_column'] = df['A'].map(str)\ndf.sort_values('A_modin_spreadsheet_sort_column', ascending=True, inplace=True)\ndf.drop(columns='A_modin_spreadsheet_sort_column', inplace=True)" assert last_history == expected_history # Reset filters spreadsheet._handle_view_msg_helper({"type": "reset_filters_end"}) last_history = spreadsheet.get_history()[-1] - expected_history = "#Sort mixed type column\ndf['A_modin_spreadsheet_sort_column'] = df['A'].map(str)\ndf.sort_values('A_modin_spreadsheet_sort_column', ascending=True, inplace=True)\ndf.drop(columns='A_modin_spreadsheet_sort_column', inplace=True)" + expected_history = "# Sort mixed type column\ndf['A_modin_spreadsheet_sort_column'] = df['A'].map(str)\ndf.sort_values('A_modin_spreadsheet_sort_column', ascending=True, inplace=True)\ndf.drop(columns='A_modin_spreadsheet_sort_column', inplace=True)" assert last_history == expected_history # Reset sort @@ -948,3 +948,110 @@ def test_apply_history(): # Checks that the history is applied properly assert changed_df.equals(applied_df) + + +def test_filter_relevant_history(): + spreadsheet = SpreadsheetWidget(df=create_df()) + + # Initial history + filtered_history = spreadsheet.filter_relevant_history() + expected_history = ["unfiltered_df = df.copy()"] + assert filtered_history == expected_history + + # Empty history + spreadsheet._history = [] + expected_history = [] + filtered_history = spreadsheet.filter_relevant_history() + assert filtered_history == expected_history + + # Multiple filters + spreadsheet._history = [ + "# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] <= 48802)].copy()", + "# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 5495)&(unfiltered_df['trip_id'] <= 48802)].copy()", + "# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 5495)&(unfiltered_df['trip_id'] <= 48802)&(unfiltered_df['vendor_id'] >= 2)].copy()", + ] + expected_history = [ + "# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 5495)&(unfiltered_df['trip_id'] <= 48802)&(unfiltered_df['vendor_id'] >= 2)].copy()", + ] + filtered_history = spreadsheet.filter_relevant_history() + assert filtered_history == expected_history + + # Multiple sorts + spreadsheet._history = [ + "# Sort column\ndf.sort_values('trip_id', ascending=True, inplace=True)", + "# Sort column\ndf.sort_values('vendor_id', ascending=True, inplace=True)", + "# Sort column\ndf.sort_values('vendor_id', ascending=False, inplace=True)", + ] + expected_history = [ + "# Sort column\ndf.sort_values('vendor_id', ascending=False, inplace=True)", + ] + filtered_history = spreadsheet.filter_relevant_history() + assert filtered_history == expected_history + + # Filter after reset + spreadsheet._history = [ + "# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 6485)].copy()", + "# Reset filter\ndf = unfiltered_df.copy()", + "# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] <= 41873)].copy()", + ] + expected_history = [ + "# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] <= 41873)].copy()" + ] + filtered_history = spreadsheet.filter_relevant_history() + assert filtered_history == expected_history + + # Sort after reset + spreadsheet._history = [ + "# Sort column\ndf.sort_values('trip_id', ascending=True, inplace=True)", + "# Reset sort\ndf.sort_index(ascending=True, inplace=True)", + "# Sort column\ndf.sort_values('vendor_id', ascending=True, inplace=True)", + ] + expected_history = [ + "# Sort column\ndf.sort_values('vendor_id', ascending=True, inplace=True)" + ] + filtered_history = spreadsheet.filter_relevant_history() + assert filtered_history == expected_history + + # None after reset + spreadsheet._history = [ + "# Sort column\ndf.sort_values('vendor_id', ascending=True, inplace=True)", + "# Reset sort\ndf.sort_index(ascending=True, inplace=True)", + "# Filter columns\ndf = unfiltered_df[(unfiltered_df['trip_id'] >= 6980)].copy()", + "# Reset all filters\ndf = unfiltered_df.copy()", + ] + expected_history = [] + filtered_history = spreadsheet.filter_relevant_history() + assert filtered_history == expected_history + + # Mixed transformation + mixed_history = [ + "# Edit cell\ndf.loc[(4, 'trip_id')]=10\nunfiltered_df.loc[(4, 'trip_id')]=10", + "# Sort column\ndf.sort_values('trip_id', ascending=True, inplace=True)", + "# Sort column\ndf.sort_values('rate_code_id', ascending=True, inplace=True)", + "# Reset sort\ndf.sort_index(ascending=True, inplace=True)", + "# Sort index\ndf.sort_index(ascending=True, inplace=True)", + "# Remove rows\ndf.drop([6], inplace=True)\nunfiltered_df.drop([6], inplace=True)", + "# Add row\nlast = df.loc[max(df.index)].copy()\ndf.loc[last.name+1] = last.values\nunfiltered_df.loc[last.name+1] = last.values", + "# Reset all filters\ndf = unfiltered_df.copy()", + "# Sort index\ndf.sort_index(ascending=True, inplace=True)", + ] + expected_filtered_history = [ + "# Edit cell\ndf.loc[(4, 'trip_id')]=10\nunfiltered_df.loc[(4, 'trip_id')]=10", + "# Remove rows\ndf.drop([6], inplace=True)\nunfiltered_df.drop([6], inplace=True)", + "# Add row\nlast = df.loc[max(df.index)].copy()\ndf.loc[last.name+1] = last.values\nunfiltered_df.loc[last.name+1] = last.values", + "# Sort index\ndf.sort_index(ascending=True, inplace=True)", + ] + spreadsheet._history = mixed_history + filtered_history = spreadsheet.filter_relevant_history() + assert filtered_history == expected_filtered_history + + # Persist filter history + spreadsheet._history = mixed_history + spreadsheet.filter_relevant_history(persist=True) + assert spreadsheet.get_history() == expected_filtered_history + + # Don't persist filter history + spreadsheet._history = mixed_history + filtered_history = spreadsheet.filter_relevant_history(persist=False) + assert filtered_history == expected_filtered_history + assert spreadsheet.get_history() == mixed_history