Skip to content

Commit

Permalink
feature: Make lyse handle gracefully shot files that have been delete…
Browse files Browse the repository at this point in the history
…d off disk.

Whenever lyse tries to read a HDF5 file that has been deleted, it
previously would crash. Now, the shot's row
in the filebox is marked with an icon showing it as deleted off disk
and a warning is printed. No more singleshot analysis will be run on that shot
file, but its data will be kept in the dataframe until the user removes it
from the filebox, and thus still be available for multishot analysis.

The use case for this is when an experiment is cycling just to 'keep warm' or
whatever, and shots are being deleted so as not to consume hard drive space.

I removed some of the unused optional ways of calling some of lyse's
internal methods so as to decrease the number of places where
lyse reads a HDF5 file. Previously multiple code paths were allowed - one
which was a bit cleaner (reading HDF5 files at the last minute)
 and one which was a performance optimisation (read them in advance, outside the
 GUI thread, and pass the results to the GUI-related methods). Now only
the performant way of calling these methods is allowed - which is what all
the calling code was using anyway.
  • Loading branch information
chrisjbillington committed Apr 11, 2017
1 parent 3c5f49b commit 0c4fff8
Showing 1 changed file with 94 additions and 31 deletions.
125 changes: 94 additions & 31 deletions __main__.py
Expand Up @@ -53,7 +53,6 @@

from lyse.dataframe_utilities import (concat_with_padding,
get_dataframe_from_shot,
get_dataframe_from_shots,
replace_with_padding)

from qtutils import inmain_decorator, UiLoader, DisconnectContextManager
Expand Down Expand Up @@ -1086,6 +1085,7 @@ class DataFrameModel(QtCore.QObject):
COL_FILEPATH = 1

ROLE_STATUS_PERCENT = QtCore.Qt.UserRole + 1
ROLE_DELETED_OFF_DISK = QtCore.Qt.UserRole + 2

columns_changed = Signal()

Expand Down Expand Up @@ -1194,7 +1194,9 @@ def mark_selection_not_done(self):
selected_rows = set(index.row() for index in selected_indexes)
for row in selected_rows:
status_item = self._model.item(row, self.COL_STATUS)
status_item.setData(0, self.ROLE_STATUS_PERCENT)
# Only mark as not done if it's not deleted off disk:
if not status_item.data(self.ROLE_DELETED_OFF_DISK):
status_item.setData(0, self.ROLE_STATUS_PERCENT)

def on_view_context_menu_requested(self, point):
menu = QtGui.QMenu(self._view)
Expand Down Expand Up @@ -1249,6 +1251,30 @@ def update_column_levels(self):
self.column_indices = column_indices
self.column_names = column_names

@inmain_decorator()
def mark_as_deleted_off_disk(self, filepath):
# Confirm the shot hasn't been removed from lyse (we are in the main
# thread so there is no race condition in checking first)
try:
np.where(self.dataframe['filepath'].values == filepath)[0][0]
except IndexError:
# Shot has been removed from FileBox, nothing to do here:
return

model_row_number = self.get_model_row_by_filepath(filepath)
status_item = self._model.item(model_row_number, self.COL_STATUS)
already_marked_as_deleted = status_item.data(self.ROLE_DELETED_OFF_DISK)
if already_marked_as_deleted:
return
# Icon only displays if percent completion is 100. This is also
# important so that the shot is not picked up as analysis
# incomplete and analysis re-attempted on it.
status_item.setData(True, self.ROLE_DELETED_OFF_DISK)
status_item.setData(100, self.ROLE_STATUS_PERCENT)
status_item.setToolTip("Shot has been deleted off disk or is unreadable")
status_item.setIcon(QtGui.QIcon(':qtutils/fugue/drive--minus'))
app.output_box.output('Warning: Shot deleted from disk or no longer readable %s\n' % filepath, red=True)

@inmain_decorator()
def update_row(self, filepath, dataframe_already_updated=False, status_percent=None, new_row_data=None):
""""Updates a row in the dataframe and Qt model
Expand All @@ -1262,9 +1288,8 @@ def update_row(self, filepath, dataframe_already_updated=False, status_percent=N
return
if not dataframe_already_updated:
if new_row_data is None:
# This can be passed in from the caller as a performace optimisation.
# Opening the file can be slow, better not to do it in the GUI thread.
new_row_data = get_dataframe_from_shot(filepath)
raise ValueError("If dataframe_already_updated is False, then new_row_data, as returned "
"by dataframe_utils.get_dataframe_from_shot(filepath) must be provided.")
self.dataframe = replace_with_padding(self.dataframe, new_row_data, df_row_index)
self.update_column_levels()

Expand Down Expand Up @@ -1376,36 +1401,38 @@ def renumber_rows(self):
vertical_header_item.setText(vert_header_text)

@inmain_decorator()
def add_files(self, filepaths, new_row_data=None):
def add_files(self, filepaths, new_row_data):
"""Add files to the dataframe model. New_row_data should be a
dataframe containing the new rows."""

to_add = []

# Check for duplicates:
for filepath in filepaths:
if filepath in self.dataframe['filepath'].values:
# Ignore duplicates:
if filepath in self.dataframe['filepath'].values or filepath in to_add:
app.output_box.output('Warning: Ignoring duplicate shot %s\n' % filepath, red=True)
if new_row_data is not None:
df_row_index = np.where(new_row_data['filepath'].values == filepath)
new_row_data = new_row_data.drop(df_row_index[0])
new_row_data.index = pandas.Index(range(len(new_row_data)))
else:
to_add.append(filepath)

assert len(new_row_data) == len(to_add)

for filepath in to_add:
# Add the new row to the model:
# Add the new rows to the model:
self._model.appendRow(self.new_row(filepath))
vert_header_item = QtGui.QStandardItem('...loading...')
self._model.setVerticalHeaderItem(self._model.rowCount() - 1, vert_header_item)
self._view.resizeRowToContents(self._model.rowCount() - 1)
# Add the new rows to the dataframe.
if new_row_data is None:
# This can be passed in from the caller as a performace optimisation.
# Opening the file can be slow, better not to do it in the GUI thread:
new_row_data = get_dataframe_from_shots(to_add)
else:
assert len(new_row_data) == len(to_add)
self.dataframe = concat_with_padding(self.dataframe, new_row_data)
self.update_column_levels()
for filepath in to_add:
self.update_row(filepath, dataframe_already_updated=True)
self.renumber_rows()

if to_add:
self.dataframe = concat_with_padding(self.dataframe, new_row_data)
self.update_column_levels()
for filepath in to_add:
self.update_row(filepath, dataframe_already_updated=True)
self.renumber_rows()

@inmain_decorator()
def get_first_incomplete(self):
Expand Down Expand Up @@ -1578,23 +1605,39 @@ def incoming_buffer_loop(self):
filepaths = sorted(set(filepaths), key=filepaths.index) # Inefficient but readable
# We open the HDF5 files here outside the GUI thread so as not to hang the GUI:
dataframes = []
indices_of_files_not_found = []
for i, filepath in enumerate(filepaths):
dataframe = get_dataframe_from_shot(filepath)
dataframes.append(dataframe)
try:
dataframe = get_dataframe_from_shot(filepath)
dataframes.append(dataframe)
except IOError:
app.output_box.output('Warning: Ignoring shot file not found or not readable %s\n' % filepath, red=True)
indices_of_files_not_found.append(i)
n_shots_added += 1
shots_remaining = self.incoming_queue.qsize()
total_shots = n_shots_added + shots_remaining + len(filepaths) - (i + 1)
if i != len(filepaths) - 1:
# Leave the last update until after dataframe concatenation.
# Looks more responsive that way:
self.set_add_shots_progress(n_shots_added, total_shots)
new_row_data = concat_with_padding(*dataframes)
if dataframes:
new_row_data = concat_with_padding(*dataframes)
else:
new_row_data = None
self.set_add_shots_progress(n_shots_added, total_shots)
self.shots_model.add_files(filepaths, new_row_data)

# Do not add the shots that were not found on disk. Reverse
# loop so that removing an item doesn't change the indices of
# subsequent removals:
for i in reversed(indices_of_files_not_found):
del filepaths[i]
if filepaths:
self.shots_model.add_files(filepaths, new_row_data)
# Let the analysis loop know to look for new shots:
self.analysis_pending.set()
if shots_remaining == 0:
n_shots_added = 0 # reset our counter for the next batch
# Let the analysis loop know to look for new shots:
self.analysis_pending.set()

except Exception:
# Keep this incoming loop running at all costs, but make the
# otherwise uncaught exception visible to the user:
Expand Down Expand Up @@ -1640,19 +1683,39 @@ def pause_analysis(self):
self.ui.pushButton_analysis_running.setChecked(True)

def do_singleshot_analysis(self, filepath):
# Check the shot file exists before sending it to the singleshot
# routinebox. This does not guarantee it won't have been deleted by
# the time the routinebox starts running analysis on it, but by
# detecting it now we can most of the time avoid the user code
# coughing exceptions due to the file not existing. Which would also
# not be a problem, but this way we avoid polluting the outputbox with
# more errors than necessary.
if not os.path.exists(filepath):
self.shots_model.mark_as_deleted_off_disk(filepath)
return
self.to_singleshot.put(filepath)
while True:
signal, status_percent = self.from_singleshot.get()
if signal in ['error', 'progress']:
# Do the file reading here outside the GUI thread so as not to hang the GUI:
new_row_data = get_dataframe_from_shot(filepath)
self.shots_model.update_row(filepath, status_percent=status_percent, new_row_data=new_row_data)
try:
new_row_data = get_dataframe_from_shot(filepath)
except IOError:
self.shots_model.mark_as_deleted_off_disk(filepath)
new_row_data = None
else:
self.shots_model.update_row(filepath, status_percent=status_percent, new_row_data=new_row_data)
if signal == 'done':
# No need to update the dataframa again, that should have been done with the last 'progress' signal:
# No need to update the dataframe again, that should have been done with the last 'progress' signal:
self.shots_model.update_row(filepath, status_percent=status_percent, dataframe_already_updated=True)
return
if signal == 'error':
self.pause_analysis()
# If new_row_data is None, that indicates that we got a
# ShotFileNotFound error above. Do not pause analysis in this
# case, as an error is expected given the shot file doesn't
# exist.
if new_row_data is not None:
self.pause_analysis()
return

def do_multishot_analysis(self):
Expand Down

0 comments on commit 0c4fff8

Please sign in to comment.