Skip to content

Commit

Permalink
SBE PSU timeout during MBOX init causes task crash/HB TI
Browse files Browse the repository at this point in the history
- Handling a PSU DD Timeout involves invoking the SBE Retry
  handler, which can cause crashes (FAPI library not yet
  initialized) if the timeout happens very early in the IPL.
- Added a check if the FAPI Library was loaded, and added a
  call to handle the early timeout later in the IPL.

Change-Id: I199cf0302af916b3f6ffec598ccab04c031e48e6
RTC:433868
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/61730
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Disable-CI: Nicholas E. Bofferding <bofferdn@us.ibm.com>
Reviewed-by: Prachi Gupta <pragupta@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
  • Loading branch information
rward15 authored and wghoffa committed Jul 5, 2018
1 parent adc91be commit 71397fd
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 12 deletions.
32 changes: 32 additions & 0 deletions src/include/usr/sbeio/sbe_psudd.H
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,28 @@ class SbePsu
uint8_t i_reqMsgs,
uint8_t i_rspMsgs);

/**
* @brief save information from an "early" error
*
* If an error occurs before the fapi2 library is loaded, this
* function will record the error state so it can be reported later.
*
*
* @param[in] i_plid Program log id for the error
* @param[in] i_target Proc target for PSU Request that caused an error
*/
void saveEarlyError(uint32_t i_plid,TARGETING::TargetHandle_t i_target);

/**
* @brief Did an error occur before the fapi2 library was loaded?
*/
bool earlyError() {return iv_earlyErrorOccurred;}

/**
* @brief If an "early" error was detected, then record and process it.
*/
errlHndl_t processEarlyError();

protected:

/**
Expand Down Expand Up @@ -675,6 +697,16 @@ class SbePsu
*/
std::map<TARGETING::Target *, void *> iv_ffdcPackageBuffer;

/**
* @brief Variables needed to record an "early" error
*
* If an error occurs before the fapi2 library is loaded, then
* use these variables to record the error for later reporting.
*/
bool iv_earlyErrorOccurred;
uint32_t iv_earlyErrorPlid;
TARGETING::TargetHandle_t iv_earlyErrorTarget;

/**
* @brief FFDC package needs to be 2 pages
*/
Expand Down
8 changes: 8 additions & 0 deletions src/usr/isteps/istep06/host_init_fsi.C
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@
#include <targeting/common/utilFilter.H>
#include <targeting/common/target.H>

// SBE
#include <sbeio/sbeioif.H>
#include <sbeio/sbe_psudd.H>

using namespace TARGETING;
using namespace I2C;
using namespace TRUSTEDBOOT;
Expand Down Expand Up @@ -116,6 +120,10 @@ void* host_init_fsi( void *io_pArgs )
TRACDCOMP( ISTEPS_TRACE::g_trac_isteps_trace, "host_init_fsi entry" );
do
{
// process SBE PSU errors that might have occurred before fapi was
// initialized
SBEIO::SbePsu::getTheInstance().processEarlyError();

l_err = FSI::initializeHardware( );
if (l_err)
{
Expand Down
2 changes: 1 addition & 1 deletion src/usr/mbox/mailboxsp.C
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ errlHndl_t MailboxSp::_init()
true); //true=Read-Write
if (err)
{
return err;
errlCommit(err,MBOX_COMP_ID);
}

if(mbxComm)
Expand Down
85 changes: 74 additions & 11 deletions src/usr/sbeio/sbe_psudd.C
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include <errno.h>
#include <sys/time.h>
#include <errl/errludprintk.H>
#include <vfs/vfs.H> // module_is_loaded

trace_desc_t* g_trac_sbeio;
TRAC_INIT(&g_trac_sbeio, SBEIO_COMP_NAME, 6*KILOBYTE, TRACE::BUFFER_SLOW);
Expand Down Expand Up @@ -89,6 +90,7 @@ void * SbePsu::msg_handler(void *unused)
**/
SbePsu::SbePsu()
:
iv_earlyErrorOccurred(false),
iv_psuResponse(nullptr),
iv_responseReady(false),
iv_shutdownInProgress(false)
Expand Down Expand Up @@ -319,6 +321,55 @@ errlHndl_t SbePsu::performPsuChipOp(TARGETING::Target * i_target,
return errl;
}

/**
* @brief record info from an "early" error so it can be reported later
*
* If an error occurs before the fapi2 library is loaded, this function
* can be used to record the details for later reporting.
*
* @param[in] i_plid Program log id for the error
* @param[in] i_target Proc target for PSU Request that caused error
*/
void SbePsu::saveEarlyError(uint32_t i_plid, TARGETING::TargetHandle_t i_target)
{
SBE_TRACD(ENTER_MRK "saveEarlyError");

iv_earlyErrorOccurred = true;
iv_earlyErrorPlid = i_plid;
iv_earlyErrorTarget = i_target;

SBE_TRACD(ENTER_MRK "saveEarlyError");
}

/**
* @brief If an "early" error was detected, then record and process it.
*/
errlHndl_t SbePsu::processEarlyError()
{
errlHndl_t l_err = nullptr;

SBE_TRACD(ENTER_MRK "processEarlyError");

if (earlyError())
{
SBE_TRACF(ERR_MRK"processEarlyError: early error occurred"
", plid=0x%X, target huid=0x%X",
iv_earlyErrorPlid, TARGETING::get_huid(iv_earlyErrorTarget));
SbeRetryHandler l_SBEobj = SbeRetryHandler(
SbeRetryHandler::SBE_MODE_OF_OPERATION::INFORMATIONAL_ONLY,
iv_earlyErrorPlid);

l_SBEobj.main_sbe_handler(iv_earlyErrorTarget);

iv_earlyErrorOccurred = false;
SBE_TRACF(ERR_MRK"processEarlyError: early error processed");
}

SBE_TRACD(EXIT_MRK "processEarlyError");

return l_err;
}

/**
* @brief write PSU Request message
*/
Expand Down Expand Up @@ -770,17 +821,29 @@ errlHndl_t SbePsu::pollForPsuComplete(TARGETING::Target * i_target,
HWAS::GARD_NULL );
}

// If the FFDC is empty, this error could be because the SBE
// isn't booted correctly. We need to check the state of the
// SBE.
// If we are on a FSP based system we expect this to result in a TI
// If we are on a BMC based system we expect to return from this fail
SbeRetryHandler l_SBEobj = SbeRetryHandler(
SbeRetryHandler::SBE_MODE_OF_OPERATION::INFORMATIONAL_ONLY,
l_errPlid);

l_SBEobj.main_sbe_handler(i_target);

if (!VFS::module_is_loaded("fapi2.so"))
{
// If the fapi library hasn't been loaded, we need to save
// the details of this error until it has, so the error can
// be logged.
SBE_TRACF("Timeout error saved until fapi is loaded.");
saveEarlyError(l_errPlid, i_target);
}
else
{
// If the FFDC is empty, this error could be because the SBE
// isn't booted correctly. We need to check the state of the
// SBE.
// If we are on a FSP based system we expect this to result
// in a TI
// If we are on a BMC based system we expect to return from
// this fail
SbeRetryHandler l_SBEobj = SbeRetryHandler(
SbeRetryHandler::SBE_MODE_OF_OPERATION::INFORMATIONAL_ONLY,
l_errPlid);

l_SBEobj.main_sbe_handler(i_target);
}
}
else
{
Expand Down

0 comments on commit 71397fd

Please sign in to comment.