Skip to content

Commit

Permalink
PRD: support to determine if BG scrub can resume after stop-on-error
Browse files Browse the repository at this point in the history
Change-Id: Ie2179b66bbe77ef1b982a6dfb1750734d6a9cc23
RTC: 192638
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58811
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59012
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
  • Loading branch information
zane131 committed May 18, 2018
1 parent c1c584f commit 1b04e45
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 110 deletions.
136 changes: 46 additions & 90 deletions src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.C
Original file line number Diff line number Diff line change
Expand Up @@ -385,96 +385,6 @@ uint32_t checkEccFirs<TYPE_MBA>( ExtensibleChip * i_chip,

//------------------------------------------------------------------------------

template<>
uint32_t isBgScrubConfig<TYPE_MCBIST>( ExtensibleChip * i_chip,
bool & o_isBgScrub )
{
#define PRDF_FUNC "[isBgScrubConfig] "

PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_MCBIST == i_chip->getType() );

uint32_t o_rc = SUCCESS;

o_isBgScrub = false;

do
{
// There really is not a good way of doing this. A scrub command is a
// scrub command the only difference is the speed. Unfortunately, that
// speed can change depending on how the hardware team tunes it. For
// now, we can use the stop conditions, which should be unique for
// background scrub, to determine if it has been configured.

SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( "MBSTR" );
o_rc = reg->Read();
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: i_chip=0x%08x",
i_chip->getHuid() );
break;
}

if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH
0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH
0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH
reg->IsBitSet(34) && // pause on MPE
reg->IsBitSet(35) ) // pause on UE
{
o_isBgScrub = true;
}

} while(0);

return o_rc;

#undef PRDF_FUNC
}

template<>
uint32_t isBgScrubConfig<TYPE_MCA>( ExtensibleChip * i_chip,
bool & o_isBgScrub )
{
PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_MCA == i_chip->getType() );

ExtensibleChip * mcbChip = getConnectedParent( i_chip, TYPE_MCBIST );

return isBgScrubConfig<TYPE_MCBIST>( mcbChip, o_isBgScrub );
}

template<>
uint32_t isBgScrubConfig<TYPE_MBA>( ExtensibleChip * i_chip,
bool & o_isBgScrub )
{
#define PRDF_FUNC "[isBgScrubConfig] "

PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_MBA == i_chip->getType() );

uint32_t o_rc = SUCCESS;

o_isBgScrub = false;

do
{
// There really is not a good way of doing this. A scrub command is a
// scrub command the only difference is the speed. Unfortunately, that
// speed can change depending on how the hardware team tunes it. For
// now, we can use the stop conditions, which should be unique for
// background scrub, to determine if it has been configured.

// TODO RTC 157888

} while(0);

return o_rc;

#undef PRDF_FUNC
}

//------------------------------------------------------------------------------

template<>
uint32_t setBgScrubThresholds<TYPE_MBA>( ExtensibleChip * i_chip,
const MemRank & i_rank )
Expand Down Expand Up @@ -534,5 +444,51 @@ uint32_t setBgScrubThresholds<TYPE_MBA>( ExtensibleChip * i_chip,

//------------------------------------------------------------------------------

template<>
uint32_t didCmdStopOnLastAddr<TYPE_MBA>( ExtensibleChip * i_chip,
AddrRangeType i_rangeType,
bool & o_stoppedOnLastAddr )
{
#define PRDF_FUNC "[didCmdStopOnLastAddr] "

uint32_t o_rc = SUCCESS;

o_stoppedOnLastAddr = false;

do
{
// Get the current address.
MemAddr curAddr;
o_rc = getMemMaintAddr<TYPE_MBA>( i_chip, curAddr );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
i_chip->getHuid() );
break;
}

// Get the end address of the current rank.
MemAddr junk, endAddr;
o_rc = getMemAddrRange<TYPE_MBA>( i_chip, curAddr.getRank(), junk,
endAddr, i_rangeType );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "getMemAddrRange(0x%08x,0x%02x) failed",
i_chip->getHuid(), curAddr.getRank().getKey() );
break;
}

// Compare the addresses.
o_stoppedOnLastAddr = ( curAddr == endAddr );

} while (0);

return o_rc;

#undef PRDF_FUNC
}

//------------------------------------------------------------------------------

} // end namespace PRDF

23 changes: 13 additions & 10 deletions src/usr/diag/prdf/plat/mem/prdfMemScrubUtils.H
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,6 @@ enum MaintEccAttns
template<TARGETING::TYPE T>
uint32_t checkEccFirs( ExtensibleChip * i_chip, uint32_t & o_eccAttns );

/**
* @brief Checks if the command currently configured in hardware is background
* scrubbing.
* @param i_chip MCBIST, MCA, or MBA.
* @param o_isBgScrub TRUE if background scrub is configured, FALSE otherwise.
* @return Non-SUCCESS on SCOM failures, SUCCESS otherwise.
*/
template<TARGETING::TYPE T>
uint32_t isBgScrubConfig( ExtensibleChip * i_chip, bool & o_isBgScrub );

/**
* @brief Sets the ETE thresholds needed for background scrubbing.
* @param i_chip An MBA.
Expand All @@ -174,6 +164,19 @@ template<TARGETING::TYPE T>
uint32_t setBgScrubThresholds( ExtensibleChip * i_chip,
const MemRank & i_rank );

/**
* @param i_chip MBA.
* @param i_rangeType See enum AddrRangeType.
* @param o_stoppedOnLastAddr True, if the current maintenance command stopped
* on the last address of the given rank range.
* False, otherwise.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
template<TARGETING::TYPE T>
uint32_t didCmdStopOnLastAddr( ExtensibleChip * i_chip,
AddrRangeType i_rangeType,
bool & o_stoppedOnLastAddr );

} //end namespace PRDF

#endif // __prdfMemScrubUtils_H
17 changes: 7 additions & 10 deletions src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C
Original file line number Diff line number Diff line change
Expand Up @@ -348,21 +348,18 @@ uint32_t MemTdCtlr<T>::analyzeCmdComplete( bool & o_errorsFound,
if ( iv_queue.empty() )
{
// The queue is empty so it is possible that background scrubbing
// only stopped for FFDC. Simply resume the command instead of
// starting a new one. Note that it is possible to get here if we
// were running a TD procedure and the PRD service is reset.
// Therefore, we must check if background scrubbing was actually
// configured.
bool isBgScrub;
o_rc = isBgScrubConfig<T>( iv_chip, isBgScrub );
// only stopped for FFDC. If possible, simply resume the command
// instead of starting a new one. This must be checked here instead
// of in defaultStep() because a TD procedure could have been run
// before defaultStep() and it is possible that canResumeBgScrub()
// could give as a false positive in that case.
o_rc = canResumeBgScrub( iv_resumeBgScrub );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "isBgScrubConfig(0x%08x) failed",
PRDF_ERR( PRDF_FUNC "canResumeBgScrub(0x%08x) failed",
iv_chip->getHuid() );
break;
}

if ( isBgScrub ) iv_resumeBgScrub = true;
}
else
{
Expand Down
7 changes: 7 additions & 0 deletions src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,13 @@ class MemTdCtlr
*/
uint32_t unmaskEccAttns();

/**
* @param o_canResume True, if background scrubbing can be resumed. False,
* if a new background scrub command must be started.
* @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
*/
uint32_t canResumeBgScrub( bool & o_canResume );

#endif

private: // instance variables
Expand Down
73 changes: 73 additions & 0 deletions src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C
Original file line number Diff line number Diff line change
Expand Up @@ -1149,6 +1149,79 @@ uint32_t MemTdCtlr<TYPE_MBA>::handleRrFo()

//------------------------------------------------------------------------------

template<>
uint32_t MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub( bool & o_canResume )
{
#define PRDF_FUNC "[MemTdCtlr<TYPE_MCBIST>::canResumeBgScrub] "

uint32_t o_rc = SUCCESS;

o_canResume = false;

// It is possible that we were running a TD procedure and the PRD service
// was reset. Therefore, we must check if background scrubbing was actually
// configured. There really is not a good way of doing this. A scrub command
// is a scrub command the only difference is the speed. Unfortunately, that
// speed can change depending on how the hardware team tunes it. For now, we
// can use the stop conditions, which should be unique for background scrub,
// to determine if it has been configured.

SCAN_COMM_REGISTER_CLASS * reg = iv_chip->getRegister( "MBSTR" );
o_rc = reg->Read();
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "Read() failed on MBSTR: iv_chip=0x%08x",
iv_chip->getHuid() );
}
else if ( 0xf != reg->GetBitFieldJustified(0,4) && // NCE int TH
0xf != reg->GetBitFieldJustified(4,4) && // NCE soft TH
0xf != reg->GetBitFieldJustified(8,4) && // NCE hard TH
reg->IsBitSet(34) && // pause on MPE
reg->IsBitSet(35) ) // pause on UE
{
o_canResume = true;
}

return o_rc;

#undef PRDF_FUNC
}

template<>
uint32_t MemTdCtlr<TYPE_MBA>::canResumeBgScrub( bool & o_canResume )
{
#define PRDF_FUNC "[MemTdCtlr<TYPE_MBA>::canResumeBgScrub] "

uint32_t o_rc = SUCCESS;

o_canResume = false;

// It is possible that we were running a TD procedure and the PRD service
// was reset. Assuming the command did not stop on the last address of the
// current slave rank, we will simply "resume" the command from the next
// address to the end of the rank. The MBA resume actually starts a new
// command, unlike MCBIST. Therefore, we can get away with blindly starting
// the command without trying to assess what type of command was actually
// running.

bool lastAddr = false;
o_rc = didCmdStopOnLastAddr<TYPE_MBA>( iv_chip, SLAVE_RANK, lastAddr );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "didCmdStopOnLastAddr(0x%08x) failed",
iv_chip->getHuid() );
}
else
{
o_canResume = !lastAddr;
}

return o_rc;

#undef PRDF_FUNC
}

//------------------------------------------------------------------------------

// Avoid linker errors with the template.
template class MemTdCtlr<TYPE_MCBIST>;
Expand Down

0 comments on commit 1b04e45

Please sign in to comment.