Skip to content

Commit

Permalink
PRD: initializing MemTdCtlr variables for broadcast mode
Browse files Browse the repository at this point in the history
Change-Id: I4982ef4c27f694e35471cad3e3ec57eedfa2632e
CQ: SW418155
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/54548
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
  • Loading branch information
zane131 committed Feb 23, 2018
1 parent a681d51 commit 5324435
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 113 deletions.
58 changes: 4 additions & 54 deletions src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )

do
{
#ifdef __HOSTBOOT_RUNTIME

// Make sure the TD controller is initialized.
o_rc = initialize();
if ( SUCCESS != o_rc )
Expand All @@ -96,7 +94,7 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
break;
}

#else // IPL only
#ifndef __HOSTBOOT_RUNTIME // IPL only

// TODO: RTC 179251 asserting here doesn't give us enough FFDC to debug
// why we got this erroneous attention. Eventually, we will want
Expand Down Expand Up @@ -142,36 +140,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
// informational error logs.
if ( !errorsFound ) io_sc.service_data->setDontCommitErrl();
}
else
{
// Make sure iv_stoppedRank still gets updated.
std::vector<ExtensibleChip *> portList;
o_rc = getMcbistMaintPort( iv_chip, portList );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getMcbistMaintPort(0x%08x) failed",
iv_chip->getHuid() );
break;
}

// In broadcast mode, the rank configuration for all ports will be
// the same. In non-broadcast mode, there will only be one MCA in
// the list. Therefore, we can simply use the first MCA in the list
// for all configs.
ExtensibleChip * stopChip = portList.front();

// Get the address in which the command stopped.
MemAddr addr;
o_rc = getMemMaintAddr<T>( iv_chip, addr );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getMemMaintAddr<T>(0x%08x) failed",
iv_chip->getHuid() );
break;
}

iv_stoppedRank = __getStopRank<TYPE_MCA>( stopChip, addr );
}

// Move onto the next step in the state machine.
o_rc = nextStep( io_sc );
Expand All @@ -188,7 +156,10 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )
// successfully with no errors because the error log will not be
// committed.
if ( !io_sc.service_data->queryDontCommitErrl() )
{
collectStateCaptureData( io_sc, TD_CTLR_DATA::END );
MemCaptureData::addEccData<T>( iv_chip, io_sc );
}

if ( SUCCESS != o_rc )
{
Expand Down Expand Up @@ -219,10 +190,6 @@ uint32_t MemTdCtlr<T>::handleCmdComplete( STEP_CODE_DATA_STRUCT & io_sc )

#endif
}
else
{
collectStateCaptureData( io_sc, TD_CTLR_DATA::END );
}

return o_rc;

Expand All @@ -249,9 +216,6 @@ template<TARGETING::TYPE T>
uint32_t __analyzeCmdComplete( ExtensibleChip * i_chip,
TdQueue & io_queue,
TdRankListEntry & o_stoppedRank,
#ifndef __HOSTBOOT_RUNTIME
bool & o_broadcastMode,
#endif
const MemAddr & i_addr,
bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc );
Expand All @@ -260,9 +224,6 @@ template<>
uint32_t __analyzeCmdComplete<TYPE_MCBIST>( ExtensibleChip * i_chip,
TdQueue & io_queue,
TdRankListEntry & o_stoppedRank,
#ifndef __HOSTBOOT_RUNTIME
bool & o_broadcastMode,
#endif
const MemAddr & i_addr,
bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc )
Expand Down Expand Up @@ -294,11 +255,6 @@ uint32_t __analyzeCmdComplete<TYPE_MCBIST>( ExtensibleChip * i_chip,
// Update iv_stoppedRank.
o_stoppedRank = __getStopRank<TYPE_MCA>( stopChip, i_addr );

#ifndef __HOSTBOOT_RUNTIME
// Update iv_broadcastMode.
o_broadcastMode = ( 1 < portList.size() );
#endif

// Check each MCA for ECC errors.
for ( auto & mcaChip : portList )
{
Expand Down Expand Up @@ -330,9 +286,6 @@ template<>
uint32_t __analyzeCmdComplete<TYPE_MBA>( ExtensibleChip * i_chip,
TdQueue & io_queue,
TdRankListEntry & o_stoppedRank,
#ifndef __HOSTBOOT_RUNTIME
bool & o_broadcastMode,
#endif
const MemAddr & i_addr,
bool & o_errorsFound,
STEP_CODE_DATA_STRUCT & io_sc )
Expand Down Expand Up @@ -371,9 +324,6 @@ uint32_t MemTdCtlr<T>::analyzeCmdComplete( bool & o_errorsFound,

// Then, check for ECC errors, if they exist.
o_rc = __analyzeCmdComplete<T>( iv_chip, iv_queue, iv_stoppedRank,
#ifndef __HOSTBOOT_RUNTIME
iv_broadcastMode,
#endif
addr, o_errorsFound, io_sc );
if ( SUCCESS != o_rc )
{
Expand Down
92 changes: 50 additions & 42 deletions src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2016,2017 */
/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -57,25 +57,23 @@ class MemTdCtlr
* This contructor will only be called in the MCBIST or MBA data bundle,
* which already checks for a valid type.
*
* Need to initialize iv_stoppedRank to a valid entry in iv_rankList. Use
* the last entry in the list so that the 'next' rank is the first entry
* in the list.
*
* @param i_chip An MCBIST or MBA chip.
*/
explicit MemTdCtlr( ExtensibleChip * i_chip ) :
iv_chip( i_chip ), iv_rankList( i_chip ),
iv_stoppedRank( i_chip, MemRank(0) )
iv_stoppedRank( iv_rankList.getList().back() )
{
PRDF_ASSERT( T == iv_chip->getType() );
if ( TARGETING::TYPE_MCBIST == i_chip->getType() )
{
ExtensibleChip * mcaChip =
PlatServices::getConnectedChild(i_chip, TARGETING::TYPE_MCA, 0);
iv_stoppedRank = TdRankListEntry( mcaChip, MemRank(0) );
}
}

/**
* @brief Determines and executes the next course of action after a
* maintenance command complete attention.
* @note Initializes the TD controller, if needed (runtime only).
* @note Initializes the TD controller, if needed.
* @param io_sc The step code data struct.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
Expand All @@ -97,8 +95,8 @@ class MemTdCtlr
* requests will be ignored. Any chip marks placed during this time will be
* redetected when the runtime TD controller is initialized.
*
* During IPL, this will simply add a new procedure to the queue, since we
* know TD will already be in progress when this is called.
* During MemDiags, this will simply add a new procedure to the queue, since
* we know a TD procedure will already be in progress when this is called.
*
* @note Initializes the TD controller, if needed.
* @param io_sc The step code data struct.
Expand All @@ -118,14 +116,23 @@ class MemTdCtlr

/**
* @brief Handles reset-reload or FO scenario.
* @note This function will check if PRD was unable to restart maintenance
* command before R/R or FO. In that scenario, this function will
* start maintenance command. As during R/R or F/O we do not have any
* mechanism to restore the complete state of TD controller, we will
* not start any interrupted or pending TD procedure. We will only
* start BG scrub. If we found any chip marks during TD state
* machine initialize we will start VCM procedure rather than
* BG scrub.
*
* This does not call initialize() or start any maintenance commands.
* Instead, it checks the hardware's current state and ensures by the end of
* the function that either a command is currently running or there will be
* a command complete attention pending that PRD will handle separately.
*
* If there is already an active command complete attention. This function
* does nothing because PRD will handle the attention soon.
*
* If there is no active command complete attention and there is no command
* currently in progress, it will set the command complete attention and PRD
* will handle that attention soon.
*
* Otherwise, there is a command in progress. So, it will check for any
* unverified chip marks. If any exist, it will force the current command to
* stop, causing a command complete attention that PRD will handle soon.
*
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
uint32_t handleRrFo();
Expand All @@ -134,6 +141,24 @@ class MemTdCtlr

private:

/**
* @brief Initializes the TD controller, if needed.
*
* This should be called at the beginning of every public function to ensure
* the TD controller is initialized.
*
* During MemDiags, this initializes iv_broadcastModeCapable.
*
* At runtime, this is used to query hardware for any unverified chip marks
* that may have occurred after starting background scrubbing, but before
* PRD is up and running. We may also have unverified chip marks if the HBRT
* service is stopped and restarted (PRD is reinitialize and all previous
* state machine data is lost).
*
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
uint32_t initialize();

/**
* @brief This is called when there are no more TD procedures to execute.
*
Expand Down Expand Up @@ -198,8 +223,8 @@ class MemTdCtlr

/**
* @brief This is called when handling a command complete attention for a
* non-TD command to initialize iv_stoppedRank and iv_broadcastMode
* then check for any ECC errors.
* non-TD command to initialize iv_stoppedRank then check for any
* ECC errors.
* @param o_errorsFound True if errors where found and handled. False
* otherwise.
* @param io_sc The step code data struct.
Expand Down Expand Up @@ -237,22 +262,6 @@ class MemTdCtlr
*/
uint32_t unmaskEccAttns();

/**
* @brief Initializes the TD controller, if needed.
*
* This is only supported during runtime. This is mostly useful at runtime
* to query hardware for any unverified chip marks. Those may occur after
* starting background scrubbing, but before PRD is up and running. We may
* also have unverified chip marks if the HBRT service is stopped and
* restarted (PRD is reinitialize and all previous state machine data is
* lost).
*
* @note Should be called at the beginning of every public function to
* ensure the TD controller is initialized.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
uint32_t initialize();

#endif

private: // instance variables
Expand All @@ -276,21 +285,20 @@ class MemTdCtlr
* diagnostics are complete. */
TdRankListEntry iv_stoppedRank;

#ifdef __HOSTBOOT_RUNTIME

/** True if the TD controller has been initialized. False otherwise. */
bool iv_initialized = false;

#ifdef __HOSTBOOT_RUNTIME

/** True if background scrubbing should be resumed after pausing on error.
* False if a TD procedure had been executed and background scrubbing needs
* to be restarted with a new command. */
bool iv_resumeBgScrub = false;

#else // IPL only

/** Combined with iv_stoppedRank. Indicates if the non-TD command that
* stopped was in broadcast mode or not. */
bool iv_broadcastMode = false;
/** Indicates if broadcast mode is capable on iv_chip. */
bool iv_broadcastModeCapable = false;

#endif

Expand Down
30 changes: 28 additions & 2 deletions src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2016,2017 */
/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -64,6 +64,32 @@ uint32_t MemTdCtlr<T>::handleTdEvent( STEP_CODE_DATA_STRUCT & io_sc,

//------------------------------------------------------------------------------

template <TARGETING::TYPE T>
uint32_t MemTdCtlr<T>::initialize()
{
#define PRDF_FUNC "[MemTdCtlr::initialize] "

uint32_t o_rc = SUCCESS;

do
{
if ( iv_initialized ) break; // nothing to do

// Check if broadcast mode is capable on this chip.
iv_broadcastModeCapable = isBroadcastModeCapable<T>( iv_chip );

// At this point, the TD controller is initialized.
iv_initialized = true;

} while (0);

return o_rc;

#undef PRDF_FUNC
}

//------------------------------------------------------------------------------

template <TARGETING::TYPE T>
uint32_t MemTdCtlr<T>::defaultStep( STEP_CODE_DATA_STRUCT & io_sc )
{
Expand All @@ -72,7 +98,7 @@ uint32_t MemTdCtlr<T>::defaultStep( STEP_CODE_DATA_STRUCT & io_sc )
uint32_t o_rc = SUCCESS;

TdRankListEntry nextRank = iv_rankList.getNext( iv_stoppedRank,
iv_broadcastMode );
iv_broadcastModeCapable );

do
{
Expand Down

0 comments on commit 5324435

Please sign in to comment.