Skip to content

Commit

Permalink
PRD: UE callout rank if no dimm callout
Browse files Browse the repository at this point in the history
Change-Id: Ic8254be5e5d33b18f433f2e308aa23cd88687c7d
CQ: SW427493
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58709
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58724
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
  • Loading branch information
cnpalmer authored and zane131 committed May 16, 2018
1 parent b26bb8d commit 3f14f50
Showing 1 changed file with 24 additions and 1 deletion.
25 changes: 24 additions & 1 deletion src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr,
l_dqBitmap.getCaptureData( io_sc.service_data->GetCaptureData() );

// Add all DIMMs with bad bits to the callout list.
TargetHandleList callouts;
for ( uint8_t ps = 0; ps < DIMMS_PER_RANK::MBA; ps++ )
{
bool badDqs = false;
Expand All @@ -194,7 +195,7 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr,
rank, ps );
if ( l_dimm == nullptr ) continue;

io_sc.service_data->SetCallout( l_dimm, MRU_HIGH );
callouts.push_back( l_dimm );

if ( isMfgCeCheckingEnabled() )
{
Expand All @@ -204,6 +205,28 @@ uint32_t handleMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemAddr & i_addr,
}
}

if ( 0 == callouts.size() )
{
// It is possible the scrub counters have rolled over to zero
// due to a known DD1.0 hardware bug. In this case, the best
// we can do is callout both DIMMs, because at minimum we know
// there was a UE, we just don't know where.
// NOTE: If this condition happens because of a DD2.0+ bug, the
// mssIplUeIsolation procedure will callout the Centaur.
callouts = getConnectedDimms( i_chip->getTrgt(), rank );
if ( 0 == callouts.size() )
{
PRDF_ERR( PRDF_FUNC "getConnectedDimms() failed" );
o_rc = FAIL; break;
}
}

// Callout all DIMMs in the list.
for ( auto & dimm : callouts )
{
io_sc.service_data->SetCallout( dimm, MRU_HIGH );
}

// Make the error log predictive.
io_sc.service_data->setServiceCall();

Expand Down

0 comments on commit 3f14f50

Please sign in to comment.