-
Notifications
You must be signed in to change notification settings - Fork 154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Handling exceptions in CMIS SM to prevent xcvrd crash #483
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,7 +121,12 @@ | |
# | ||
# Helper functions ============================================================= | ||
# | ||
|
||
def log_exception_traceback(): | ||
exc_type, exc_value, exc_traceback = sys.exc_info() | ||
msg = traceback.format_exception(exc_type, exc_value, exc_traceback) | ||
for tb_line in msg: | ||
for tb_line_split in tb_line.splitlines(): | ||
helper_logger.log_error(tb_line_split) | ||
|
||
def is_cmis_api(api): | ||
return isinstance(api, CmisApi) | ||
|
@@ -1376,6 +1381,11 @@ def task_worker(self): | |
# Skip if these essential routines are not available | ||
self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) | ||
continue | ||
except Exception as e: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mihirpat1 why we made distinction between AttributeError vs others? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @prgeor The AttributeError relevant handling here seems to be a day 1 code. However, I am not sure on the original reason for this so decided to leave the existing behavior as is since with AttributeError, we are moving CMIS SM to |
||
self.log_error("{}: Exception in xcvr api: {}".format(lport, e)) | ||
log_exception_traceback() | ||
self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) | ||
continue | ||
|
||
# CMIS expiration and retries | ||
# | ||
|
@@ -1608,8 +1618,9 @@ def task_worker(self): | |
self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY) | ||
self.post_port_active_apsel_to_db(api, lport, host_lanes_mask) | ||
|
||
except (NotImplementedError, AttributeError) as e: | ||
except Exception as e: | ||
self.log_error("{}: internal errors due to {}".format(lport, e)) | ||
log_exception_traceback() | ||
self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) | ||
|
||
self.log_notice("Stopped") | ||
|
@@ -1627,11 +1638,7 @@ def run(self): | |
self.task_worker() | ||
except Exception as e: | ||
helper_logger.log_error("Exception occured at {} thread due to {}".format(threading.current_thread().getName(), repr(e))) | ||
exc_type, exc_value, exc_traceback = sys.exc_info() | ||
msg = traceback.format_exception(exc_type, exc_value, exc_traceback) | ||
for tb_line in msg: | ||
for tb_line_split in tb_line.splitlines(): | ||
helper_logger.log_error(tb_line_split) | ||
log_exception_traceback() | ||
self.exc = e | ||
self.main_thread_stop_event.set() | ||
|
||
|
@@ -1791,11 +1798,7 @@ def run(self): | |
self.task_worker() | ||
except Exception as e: | ||
helper_logger.log_error("Exception occured at {} thread due to {}".format(threading.current_thread().getName(), repr(e))) | ||
exc_type, exc_value, exc_traceback = sys.exc_info() | ||
msg = traceback.format_exception(exc_type, exc_value, exc_traceback) | ||
for tb_line in msg: | ||
for tb_line_split in tb_line.splitlines(): | ||
helper_logger.log_error(tb_line_split) | ||
log_exception_traceback() | ||
self.exc = e | ||
self.main_thread_stop_event.set() | ||
|
||
|
@@ -2216,11 +2219,7 @@ def run(self): | |
self.task_worker(self.task_stopping_event, self.sfp_error_event) | ||
except Exception as e: | ||
helper_logger.log_error("Exception occured at {} thread due to {}".format(threading.current_thread().getName(), repr(e))) | ||
exc_type, exc_value, exc_traceback = sys.exc_info() | ||
msg = traceback.format_exception(exc_type, exc_value, exc_traceback) | ||
for tb_line in msg: | ||
for tb_line_split in tb_line.splitlines(): | ||
helper_logger.log_error(tb_line_split) | ||
log_exception_traceback() | ||
self.exc = e | ||
self.main_thread_stop_event.set() | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will be good to understand when the Attribute error exception happens. Ideally the below line "except Exception as e:" is a superset and should cover all exceptions.
Also what is the exact error/exception when the eeprom data is corrupted/bad -- sue to bad optics
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@judyjoseph Yes - the below line "except Exception as e:" covers all exceptions apart from AttributeError. However, for AttributeError, the CMIS SM is currently being transitioned to
CMIS_STATE_READY
and notCMIS_STATE_FAILED
.In case of bad optics, I have seen KeyError and TypeError so far.