diff --git a/src/include/usr/isteps/istep_reasoncodes.H b/src/include/usr/isteps/istep_reasoncodes.H index f00ba068bf4..bdf4c9386f9 100644 --- a/src/include/usr/isteps/istep_reasoncodes.H +++ b/src/include/usr/isteps/istep_reasoncodes.H @@ -61,6 +61,7 @@ namespace ISTEP MOD_HANDLE_SBE_REG_VALUE = 0x19, MOD_SBE_PERFORM_UPDATE_CHECK = 0x1A, MOD_MSS_ATTR_UPDATE = 0x1B, /* @TODO-RTC:149250-Remove */ + MOD_SBE_GET_FFDC_HANDLER = 0x1C, }; /** @@ -119,6 +120,8 @@ namespace ISTEP RC_PROC_EXTRACT_SBE_MAIN_ERROR = ISTEP_COMP_ID | 0x30, RC_SBE_BOOTED_UNEXPECTED_SIDE_UPD = ISTEP_COMP_ID | 0x31, RC_SBE_UPDATE_IN_MPIPL = ISTEP_COMP_ID | 0x32, + RC_NO_FFDC_RETURNED = ISTEP_COMP_ID | 0x33, + RC_RETURNED_FFDC = ISTEP_COMP_ID | 0x34, }; }; diff --git a/src/usr/isteps/istep08/sbe_extract_rc_handler.C b/src/usr/isteps/istep08/sbe_extract_rc_handler.C index 689e1d845aa..dc43c32e650 100644 --- a/src/usr/isteps/istep08/sbe_extract_rc_handler.C +++ b/src/usr/isteps/istep08/sbe_extract_rc_handler.C @@ -23,7 +23,7 @@ /* */ /* IBM_PROLOG_END_TAG */ /** - * @file sbe_extract_rc_handler.H + * @file sbe_extract_rc_handler.C * * Handle a SBE extract rc error. We use a switch-case to determine * what action to take, and a finite state machine to control the @@ -53,6 +53,12 @@ #include #include "sbe_extract_rc_handler.H" #include +#include +#include +#include <../../usr/sbeio/sbe_fifodd.H> +#include <../../usr/sbeio/sbe_fifo_buffer.H> +#include +#include using namespace ISTEP; @@ -243,8 +249,8 @@ P9_EXTRACT_SBE_RC::RETURN_ACTION failing_exit_state( errlCommit(l_errl,ISTEP_COMP_ID); } #endif - proc_extract_sbe_handler(i_target, i_orig_error, - P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM); + proc_extract_sbe_handler(i_target, + P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM); } // Gard and callout proc, return back to 8.4 else if(i_orig_error == P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM) @@ -280,7 +286,7 @@ P9_EXTRACT_SBE_RC::RETURN_ACTION failing_exit_state( void proc_extract_sbe_handler( TARGETING::Target * i_target, - uint8_t i_original_error, uint8_t i_current_error) + uint8_t i_current_error) { TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, ENTER_MRK "proc_extract_sbe_handler error: %llx",i_current_error); @@ -295,7 +301,7 @@ void proc_extract_sbe_handler( TARGETING::Target * i_target, * @userdata1 HUID of proc that had the SBE timeout * @userdata2 SBE failing code * - * @devdesc SBE did not start, this funciton is looking at + * @devdesc SBE did not start, this function is looking at * the error to determine next course of action * * @custdesc The SBE did not start, we will attempt a reboot if possible @@ -500,47 +506,60 @@ SBE_REG_RETURN check_sbe_reg(TARGETING::Target * i_target) TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, ENTER_MRK "check_sbe_reg"); - errlHndl_t l_errl = NULL; + errlHndl_t l_errl = nullptr; SBE_REG_RETURN l_ret = SBE_REG_RETURN::SBE_FAILED_TO_BOOT; - const fapi2::Target - l_fapi2_proc_target(i_target); + do + { + const fapi2::Target + l_fapi2_proc_target(i_target); - sbeMsgReg_t l_sbeReg; + sbeMsgReg_t l_sbeReg; - l_errl = sbe_timeout_handler(&l_sbeReg,i_target,&l_ret); + l_errl = sbe_timeout_handler(&l_sbeReg,i_target,&l_ret); - if((!l_errl) && (l_sbeReg.currState != SBE_STATE_RUNTIME)) - { - TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "SBE 0x%.8X never started, l_sbeReg=0x%.8X", - TARGETING::get_huid(i_target),l_sbeReg.reg ); + if((!l_errl) && (l_sbeReg.currState != SBE_STATE_RUNTIME)) + { + // See if async FFDC bit is set in SBE register + if(l_sbeReg.asyncFFDC) + { + bool l_flowCtrl = sbe_get_ffdc_handler(i_target); - l_ret = SBE_REG_RETURN::SBE_FAILED_TO_BOOT; - } - else if (l_errl) - { - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, - "ERROR: call check_sbe_reg, PLID=0x%x", l_errl->plid() ); + if(l_flowCtrl) + { + break; + } + } - // capture the target data in the elog - ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog( l_errl ); + // Handle that SBE failed to boot in the allowed time + TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, + "SBE 0x%.8X never started, l_sbeReg=0x%.8X", + TARGETING::get_huid(i_target),l_sbeReg.reg ); + } + else if (l_errl) + { + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, + "ERROR: call check_sbe_reg, PLID=0x%x", l_errl->plid() ); - // Commit error log - errlCommit( l_errl, HWPF_COMP_ID ); - } - // No error and still functional - else if(i_target->getAttr().functional) - { - // Set attribute indicating that SBE is started - i_target->setAttr(1); + // capture the target data in the elog + ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog( l_errl ); - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, - "SUCCESS: check_sbe_reg completed okay for proc 0x%.8X", - TARGETING::get_huid(i_target)); - } - //@TODO-RTC:100963 - this should match the logic in - //call_proc_check_slave_sbe_seeprom.C + // Commit error log + errlCommit( l_errl, HWPF_COMP_ID ); + } + // No error and still functional + else if(i_target->getAttr().functional) + { + // Set attribute indicating that SBE is started + i_target->setAttr(1); + + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, + "SUCCESS: check_sbe_reg completed okay for proc 0x%.8X", + TARGETING::get_huid(i_target)); + } + //@TODO-RTC:100963 - this should match the logic in + //call_proc_check_slave_sbe_seeprom.C + } while(0); return l_ret; @@ -589,8 +608,8 @@ P9_EXTRACT_SBE_RC::RETURN_ACTION handle_sbe_reg_value( #endif // If we were trying to reipl and hit the error, we need // to start with a new seeprom before hitting the threshold - proc_extract_sbe_handler(i_target, i_current_sbe_error, - P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM); + proc_extract_sbe_handler(i_target, + P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM); return P9_EXTRACT_SBE_RC::ERROR_RECOVERED; } @@ -734,6 +753,15 @@ errlHndl_t sbe_timeout_handler(sbeMsgReg_t * o_sbeReg, (*o_returnAction) = SBE_REG_RETURN::SBE_AT_RUNTIME; break; } + else if ((*o_sbeReg).asyncFFDC) + { + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, + "SBE 0x%.8X has async FFDC bit set, o_sbeReg=0x%.8X", + TARGETING::get_huid(i_target), (*o_sbeReg).reg); + // Async FFDC is indicator that SBE is failing to boot, and if + // in DUMP state, that SBE is done dumping, so leave loop + break; + } else { if( !(l_loops % 10) ) @@ -750,6 +778,163 @@ errlHndl_t sbe_timeout_handler(sbeMsgReg_t * o_sbeReg, return l_errl; } +P9_EXTRACT_SBE_RC::RETURN_ACTION action_for_ffdc_rc(uint32_t i_rc) +{ + P9_EXTRACT_SBE_RC::RETURN_ACTION l_action; + + switch(i_rc) + { + case fapi2::RC_EXTRACT_SBE_RC_RUNNING: + case fapi2::RC_EXTRACT_SBE_RC_NEVER_STARTED: + case fapi2::RC_EXTRACT_SBE_RC_PROGRAM_INTERRUPT: + case fapi2::RC_EXTRACT_SBE_RC_ADDR_NOT_RECOGNIZED: + case fapi2::RC_EXTRACT_SBE_RC_PIBMEM_ECC_ERR_INSECURE_MODE: + case fapi2::RC_EXTRACT_SBE_RC_FI2CM_BIT_RATE_ERR: + case fapi2::RC_EXTRACT_SBE_RC_PIBMEM_ECC_ERR: + + l_action = P9_EXTRACT_SBE_RC::RESTART_SBE; + + break; + + case fapi2::RC_EXTRACT_SBE_RC_MAGIC_NUMBER_MISMATCH: + case fapi2::RC_EXTRACT_SBE_RC_FI2C_ECC_ERR_INSECURE_MODE: + case fapi2::RC_EXTRACT_SBE_RC_FI2C_ECC_ERR: + + l_action = P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM; + + break; + + case fapi2::RC_EXTRACT_SBE_RC_FI2C_ERROR: + case fapi2::RC_EXTRACT_SBE_RC_FI2C_TIMEOUT: + case fapi2::RC_EXTRACT_SBE_RC_UNKNOWN_ERROR: + + l_action = P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM; + + break; + + case fapi2::RC_EXTRACT_SBE_RC_OTP_TIMEOUT: + case fapi2::RC_EXTRACT_SBE_RC_OTP_PIB_ERR: + case fapi2::RC_EXTRACT_SBE_RC_PIBMEM_PIB_ERR: + case fapi2::RC_EXTRACT_SBE_RC_FI2C_SPRM_CFG_ERR: + case fapi2::RC_EXTRACT_SBE_RC_FI2C_PIB_ERR: + + l_action = P9_EXTRACT_SBE_RC::RESTART_CBS; + + break; + + case fapi2::RC_EXTRACT_SBE_RC_OTP_ECC_ERR_INSECURE_MODE: + case fapi2::RC_EXTRACT_SBE_RC_BRANCH_TO_SEEPROM_FAIL: + case fapi2::RC_EXTRACT_SBE_RC_UNEXPECTED_OTPROM_HALT: + case fapi2::RC_EXTRACT_SBE_RC_OTP_ECC_ERR: + default: + + l_action = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION; + + break; + } + + return l_action; +} + +bool sbe_get_ffdc_handler(TARGETING::Target * i_target) +{ + bool l_flowCtrl = false; + errlHndl_t l_errl = nullptr; + uint32_t l_responseSize = SBEIO::SbeFifoRespBuffer::MSG_BUFFER_SIZE; + uint32_t *l_pFifoResponse = + reinterpret_cast(malloc(l_responseSize)); + + l_errl = SBEIO::getFifoSBEFFDC(i_target, + l_pFifoResponse, + l_responseSize); + + // Check if there was an error log created + if(l_errl) + { + // Trace but otherwise silently ignore error + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, + "sbe_get_ffdc_handler: ignoring error PLID=0x%x from " + "get SBE FFDC FIFO request to proc 0x%.8X", + l_errl->plid(), + TARGETING::get_huid(i_target)); + delete l_errl; + l_errl = nullptr; + } + else + { + // Parse the FFDC package(s) in the response + SBEIO::SbeFFDCParser * l_ffdc_parser = + new SBEIO::SbeFFDCParser(); + l_ffdc_parser->parseFFDCData(reinterpret_cast(l_pFifoResponse)); + + uint8_t l_pkgs = l_ffdc_parser->getTotalPackages(); + P9_EXTRACT_SBE_RC::RETURN_ACTION l_action; + + // If there are FFDC packages, make a log for FFDC from SBE + if(l_pkgs > 0) + { + /*@ + * @errortype + * @moduleid MOD_SBE_GET_FFDC_HANDLER + * @reasoncode RC_RETURNED_FFDC + * @userdata1 Processor Target + * @userdata2 Number of FFDC packages + * @devdesc FFDC returned by SBE after failing to reach runtime + * @custdesc FFDC associated with boot device failing to boot + */ + l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_INFORMATIONAL, + MOD_SBE_GET_FFDC_HANDLER, + RC_RETURNED_FFDC, + TARGETING::get_huid(i_target), + l_pkgs); + + // Also log the failing proc as FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_errl); + } + + // Process each FFDC package + for(auto i=0; iaddFFDC( SBEIO_COMP_ID, + l_ffdc_parser->getFFDCPackage(i), + l_ffdc_parser->getPackageLength(i), + 0, + SBEIO::SBEIO_UDT_PARAMETERS, + false ); + + // Get the RC from the FFDC package + uint32_t l_rc = l_ffdc_parser->getPackageRC(i); + + // Determine an action for the RC + l_action = action_for_ffdc_rc(l_rc); + + // Handle that action + proc_extract_sbe_handler(i_target, + l_action); + } + + // If there are FFDC packages, commit the log + if(l_pkgs > 0) + { + l_errl->collectTrace( SBEIO_COMP_NAME, KILOBYTE/4); + l_errl->collectTrace( "ISTEPS_TRACE", KILOBYTE/4); + + errlCommit(l_errl, ISTEP_COMP_ID); + } + + delete l_ffdc_parser; + l_ffdc_parser = nullptr; + + l_flowCtrl = true; + } + + free(l_pFifoResponse); + l_pFifoResponse = nullptr; + + return l_flowCtrl; +} + errlHndl_t switch_sbe_sides(TARGETING::Target * i_target) { errlHndl_t l_errl = NULL; diff --git a/src/usr/isteps/istep08/sbe_extract_rc_handler.H b/src/usr/isteps/istep08/sbe_extract_rc_handler.H index 1e475637ed5..eba029539db 100644 --- a/src/usr/isteps/istep08/sbe_extract_rc_handler.H +++ b/src/usr/isteps/istep08/sbe_extract_rc_handler.H @@ -104,13 +104,12 @@ void sbe_threshold_handler( bool i_procSide, * each output of the proc_extract_sbe_rc HWP. * * @param[in] i_target - current proc target - * @param[in] i_previous_error - The previous return value from HWP * @param[in] i_current_error - The most recent return value from HWP * * @return - NULL */ void proc_extract_sbe_handler( TARGETING::Target * i_target, - uint8_t i_previous_error, uint8_t i_current_error); + uint8_t i_current_error); /** * @brief This function handles the call to the p9_get_sbe_msg_handler. @@ -174,6 +173,28 @@ errlHndl_t sbe_timeout_handler(sbeMsgReg_t * o_sbeReg, TARGETING::Target * i_target, SBE_REG_RETURN * o_returnAction); +/** + * @brief This is the switch case that identifies the action needed for the RC + * value in an SBE FFDC package. + * + * @param[in] i_rc - RC value from SBE FFDC package + * + * @return - pass(0) or specific returned SBE action + */ +P9_EXTRACT_SBE_RC::RETURN_ACTION action_for_ffdc_rc( uint32_t i_rc); + +/** + * @brief This function handles getting the SBE FFDC. + * + * @param[in] i_target - current proc target + * + * @return - bool for flow control on return to caller, if false, caller should + * go on with the processing flow, if true, caller should interrupt + * the processing flow and get out of loop or current iteration + */ + +bool sbe_get_ffdc_handler(TARGETING::Target * i_target); + /** * @brief This function deals with the mask needed to switch * boot side on the SBE for a given proc