Skip to content

Commit

Permalink
Hook up FSP runtime support for sbe_retry_handler (hreset path)
Browse files Browse the repository at this point in the history
In the sbe_retry_handler code we have two methods we can use to restart
the sbe. They are restarting the cfam boot sequence (start_cbs HWP) and
performing a hw reset on the PPE (hreset HWP). We use start_cbs if there
are issues with initial power on of the slave proc's SBE because we will
not lose any state info (fabric isn't up yet). During runtime we will want
to use the hreset HWP to recover the SBE. Hreset is handy because it will
not force a reboot of the entire proc chip, so the fabric can stay up while
we reset the PPE in the SBE. This commit implements the code path for the
hreset HWP in the sbe_retry_handler. In addition this commit enables calls to
the sbe_retry_handler in rt_fwnotify's sbeAttemptRecovery function which
handles PHYP requests to recover the SBE.
(Also some small typos in related code fixed)

Change-Id: I8f85c38a09e8d5ab80b2809e5665c77a54e35bc4
CQ: SW415675
RTC: 180242
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/56276
Reviewed-by: Martin Gloff <mgloff@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Roland Veloz <rveloz@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
crgeddes authored and dcrowell77 committed Apr 9, 2018
1 parent a027c49 commit 5192636
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 65 deletions.
1 change: 0 additions & 1 deletion src/include/usr/sbeio/sbeioreasoncodes.H
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ enum sbeioReasonCode
SBEIO_RETURNED_FFDC = SBEIO_COMP_ID | 0x57,
SBEIO_SLAVE_TIMEOUT = SBEIO_COMP_ID | 0x58,
SBEIO_ATTEMPTING_REBOOT = SBEIO_COMP_ID | 0x59,
SBEIO_UNSUPPORTED_REQUEST = SBEIO_COMP_ID | 0x5A,
SBEIO_MORE_FFDC_THAN_EXPECTED = SBEIO_COMP_ID | 0x5B,
SBEIO_EXCEED_MAX_SIDE_SWITCHES = SBEIO_COMP_ID | 0x5C,
SBEIO_EXCEED_MAX_SIDE_BOOTS = SBEIO_COMP_ID | 0x5D,
Expand Down
5 changes: 3 additions & 2 deletions src/usr/sbeio/common/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@

#Common .mk files to include
include ${ROOTPATH}/procedure.rules.mk
include ${ROOTPATH}/src/import/chips/p9/procedures/hwp/sbe/p9_get_sbe_msg_register.mk
include ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/p9_start_cbs.mk
include ${PROCEDURES_PATH}/hwp/sbe/p9_get_sbe_msg_register.mk
include ${PROCEDURES_PATH}/hwp/perv/p9_start_cbs.mk
include ${PROCEDURES_PATH}/hwp/perv/p9_sbe_hreset.mk

#Common Include Paths
EXTRAINCDIR += ${PROCEDURES_PATH}/hwp/ffdc
Expand Down
82 changes: 34 additions & 48 deletions src/usr/sbeio/common/sbe_retry_handler.C
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include <ipmi/ipmiwatchdog.H>

#include <p9_start_cbs.H>
#include <p9_sbe_hreset.H>
#include <p9_get_sbe_msg_register.H>
#include <p9_perv_scom_addresses.H>
#include <sbe/sbe_update.H>
Expand Down Expand Up @@ -183,7 +184,6 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
else if(!this->iv_sbeRegister.sbeBooted)
{
SBE_TRACF("main_sbe_handler(): SBE reports it was never booted, calling p9_sbe_extract_rc will fail. Setting action to be RESTART_SBE");
//Maybe commit log here saying initial start_cbs didnt run
this->iv_currentAction = P9_EXTRACT_SBE_RC::RESTART_SBE;
}

Expand Down Expand Up @@ -326,6 +326,9 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
// switching seeprom sides
}

// Both of the retry methods require a FAPI2 version of the target because they
// are fapi2 HWPs
const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2_proc_target (i_target);
if(this->iv_currentSideBootAttempts >= MAX_SIDE_BOOT_ATTEMPTS)
{
/*@
Expand Down Expand Up @@ -365,8 +368,6 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
else if(this->iv_sbeRestartMethod == SBE_RESTART_METHOD::START_CBS)
{
SBE_TRACF("Invoking p9_start_cbs HWP on processor %.8X", get_huid(i_target));
const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>
l_fapi2_proc_target (i_target);

FAPI_INVOKE_HWP(l_errl, p9_start_cbs,
l_fapi2_proc_target, true);
Expand Down Expand Up @@ -400,56 +401,41 @@ void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
// will work so we will break out of the retry loop
break;
}
}else
}
// The only other type of reset method is HRESET
else
{
//@todo RTC:180242 Right now we don't have the support
// to perform an hreset, when we do remove this error
// log and perform the hreset.

//Increment attempt count for this side
this->iv_currentSideBootAttempts++;
/*@
* @errortype
* @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE
* @moduleid SBEIO_EXTRACT_RC_HANDLER
* @reasoncode SBEIO_UNSUPPORTED_REQUEST
* @userdata1 HUID of proc that had the SBE timeout
* @userdata2 SBE failing code
*
* @devdesc SBE did not start, this function is looking at
* the error to determine next course of action
*
* @custdesc The SBE did not start, we will attempt a reboot
* if possible
*/
l_errl = new ERRORLOG::ErrlEntry(
ERRORLOG::ERRL_SEV_UNRECOVERABLE,
SBEIO_EXTRACT_RC_HANDLER,
SBEIO_UNSUPPORTED_REQUEST,
TARGETING::get_huid(i_target),
this->iv_currentAction);

l_errl->collectTrace( SBEIO_COMP_NAME, 256 );

// Gard the proc, when SBE Retry fails
l_errl->addHwCallout(i_target,
HWAS::SRCI_PRIORITY_HIGH,
HWAS::NO_DECONFIG,
HWAS::GARD_Predictive);

// Set the PLID of the error log to caller's PLID,
// if provided
if (iv_callerErrorLogPLID)
// For now we only use HRESET during runtime, the bool param
// we are passing in is supposed to be FALSE if runtime, TRUE is ipl time
FAPI_INVOKE_HWP(l_errl, p9_sbe_hreset,
l_fapi2_proc_target, false);
if(l_errl)
{
l_errl->plid(iv_callerErrorLogPLID);
}
SBE_TRACF("ERROR: call p9_sbe_hreset, PLID=0x%x",
l_errl->plid() );
l_errl->collectTrace( SBEIO_COMP_NAME, 256 );

errlCommit(l_errl, ISTEP_COMP_ID);
// Gard the target, when SBE Retry fails
l_errl->addHwCallout(i_target,
HWAS::SRCI_PRIORITY_HIGH,
HWAS::NO_DECONFIG,
HWAS::GARD_Predictive);

// If we got an errlog while attempting hreset
// we will assume that no future retry actions
// will work so we will exit
break;
// Set the PLID of the error log to caller's PLID,
// if provided
if (iv_callerErrorLogPLID)
{
l_errl->plid(iv_callerErrorLogPLID);
}

errlCommit( l_errl, ISTEP_COMP_ID);
// If we got an errlog while attempting p9_sbe_hreset
// we will assume that no future retry actions
// will work so we will exit
break;
}
}

// We have performed the action, so make sure that ffdcSetAction is set back to 0
Expand Down Expand Up @@ -1193,7 +1179,7 @@ errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target)
DEVICE_SCOM_ADDRESS(PERV_SB_CS_SCOM) );
if( l_errl )
{
SBE_TRACF( ERR_MRK"switch_sbe_sides: FSI device write "
SBE_TRACF( ERR_MRK"switch_sbe_sides: SCOM device write "
"PERV_SB_CS_SCOM (0x%.4X), proc target = %.8X, "
"RC=0x%X, PLID=0x%lX",
PERV_SB_CS_SCOM, // 0x50008
Expand Down
2 changes: 1 addition & 1 deletion src/usr/sbeio/sbe_fifodd.C
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ errlHndl_t SbeFifo::waitDnFifoReady(TARGETING::Target * i_target,
// so the error logs it creates will be linked
uint32_t l_errPlid = errl->plid();

// Commit errlor log now if this is a FSP system because
// Commit error log now if this is a FSP system because
// we will not return from retry handler
if(INITSERVICE::spBaseServicesEnabled())
{
Expand Down
2 changes: 1 addition & 1 deletion src/usr/sbeio/sbe_psudd.C
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target,
// so the error logs it creates will be linked
uint32_t l_errPlid = l_errl->plid();

// Commit errlor log now if this is a FSP system because
// Commit error log now if this is a FSP system because
// we will not return from retry handler
if(INITSERVICE::spBaseServicesEnabled())
{
Expand Down
20 changes: 8 additions & 12 deletions src/usr/util/runtime/rt_fwnotify.C
Original file line number Diff line number Diff line change
Expand Up @@ -132,19 +132,15 @@ void sbeAttemptRecovery(uint64_t i_data)
break;
}


// Get the recovery results
// TODO SW415675 Need to attempt sbe retry if requested
// Get the SBE Retry Handler, propagating the supplied PLID
// SbeRetryHandler l_SBEobj = SbeRetryHandler(SbeRetryHandler::
// SBE_MODE_OF_OPERATION::INFORMATIONAL_ONLY,
// l_sbeRetryData->plid);
SbeRetryHandler l_SBEobj = SbeRetryHandler(SbeRetryHandler::
SBE_MODE_OF_OPERATION::ATTEMPT_REBOOT,
l_sbeRetryData->plid);

//Attempt to recover the SBE
l_SBEobj.main_sbe_handler(l_target);

// Retry the recovery of the SBE
// l_SBEobj.main_sbe_handler(l_target);
// //bool l_recoverySuccessful = l_SBEobj.getSbeRestart();

bool l_recoverySuccessful = false;
if (nullptr == g_hostInterfaces ||
nullptr == g_hostInterfaces->firmware_request)
{
Expand Down Expand Up @@ -183,8 +179,8 @@ void sbeAttemptRecovery(uint64_t i_data)
l_req_fw_msg.generic_msg.__req = GenericFspMboxMessage_t::REQUEST;
l_req_fw_msg.generic_msg.data = i_data;

// Set msgType based on recovery success or failure
if (l_recoverySuccessful)
// Set msgType based on recovery success or failure (If sbe made it back to runtime)
if (l_SBEobj.isSbeAtRuntime())
{
l_req_fw_msg.generic_msg.msgType =
GenericFspMboxMessage_t::MSG_SBE_RECOVERY_SUCCESS;
Expand Down

0 comments on commit 5192636

Please sign in to comment.