Skip to content

Commit

Permalink
PRD: Set Signatures during Runtime TPS
Browse files Browse the repository at this point in the history
Change-Id: I8f25b42e8e940d07a6c8be8a97e2458bcbfc4943
CQ: SW401831
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/45976
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46807
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
  • Loading branch information
cnpalmer authored and zane131 committed Sep 29, 2017
1 parent 9d06cbb commit 8f05454
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 2 deletions.
9 changes: 9 additions & 0 deletions src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H
Expand Up @@ -49,6 +49,15 @@ PRDR_ERROR_SIGNATURE(VcmFalseAlarm, 0xffff0021, "", "VCM: false alarm");
PRDR_ERROR_SIGNATURE(VcmFalseAlarmTH, 0xffff0022, "", "VCM: false alarm threshold");
PRDR_ERROR_SIGNATURE(AllDramRepairs, 0xffff002F, "", "all DRAM repairs used");

PRDR_ERROR_SIGNATURE(TpsFalseAlarm, 0xffff0061, "", "TPS: false alarm");
PRDR_ERROR_SIGNATURE(TpsFalseAlarmTH, 0xffff0062, "", "TPS: false alarm threshold");
PRDR_ERROR_SIGNATURE(TpsSymbolMark, 0xffff0063, "", "TPS: symbol mark placed");
PRDR_ERROR_SIGNATURE(TpsChipMark, 0xffff0064, "", "TPS: chip mark placed");
PRDR_ERROR_SIGNATURE(TpsSymUeRisk, 0xffff0065, "", "TPS: placing symbol mark risks UE");
PRDR_ERROR_SIGNATURE(TpsChipUeRisk, 0xffff0066, "", "TPS: placing chip mark risks UE");
PRDR_ERROR_SIGNATURE(TpsPotentialUe, 0xffff0067, "", "TPS: potential UE");
PRDR_ERROR_SIGNATURE(TpsDramDisabled, 0xffff0068, "", "TPS: DRAM repairs disabled");

PRDR_ERROR_SIGNATURE(MnfgIplHardCE, 0xffff0051, "", "MNFG IPL hard CE");
PRDR_ERROR_SIGNATURE(MnfgIplDramCTE, 0xffff0052, "", "MNFG IPL DRAM CTE");
PRDR_ERROR_SIGNATURE(MnfgIplRankCTE, 0xffff0053, "", "MNFG IPL rank CTE");
Expand Down
48 changes: 46 additions & 2 deletions src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C
Expand Up @@ -531,6 +531,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
break;
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsSymbolMark );

// Update VPD with the symbol mark.
o_rc = dqBitmap.setSymbol( i_badDqCount.symList[0].symbol );
if ( SUCCESS != o_rc )
Expand All @@ -551,6 +554,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
"_RANK::MCA>() failed." );
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsSymUeRisk );

// Make the error log predictive.
io_sc.service_data->setServiceCall();

Expand Down Expand Up @@ -599,6 +605,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
break;
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsSymbolMark );

// Update VPD with both symbols.
for ( auto sym : i_badDqCount.symList )
{
Expand All @@ -623,6 +632,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
"_RANK::MCA>() failed." );
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsSymUeRisk );

// Make the error log predictive.
io_sc.service_data->setServiceCall();
}
Expand Down Expand Up @@ -669,11 +681,13 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
newChipMark );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "writeSymbolMark(0x%08x,0x%02x) "
PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) "
"failed", iv_chip->getHuid(), getKey() );
break;
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsChipMark );
// Update VPD with the chip mark.
o_rc = dqBitmap.setDram( i_badChipCount.symList[0].symbol );
if ( SUCCESS != o_rc )
Expand All @@ -694,6 +708,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
"_RANK::MCA>() failed." );
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsChipUeRisk );

// Make the error log predictive.
io_sc.service_data->setServiceCall();

Expand Down Expand Up @@ -738,11 +755,14 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
newChipMark );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "writeSymbolMark(0x%08x,0x%02x) "
PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) "
"failed", iv_chip->getHuid(), getKey() );
break;
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsChipMark );

// Update VPD with the chip mark.
o_rc = dqBitmap.setDram( i_badChipCount.symList[0].symbol );
if ( SUCCESS != o_rc )
Expand All @@ -765,6 +785,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
PRDF_ERR( PRDF_FUNC "__updateVpdCountAboveOne<DIMMS_PER"
"_RANK::MCA>() failed." );
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsChipUeRisk );

// Make the error log predictive.
io_sc.service_data->setServiceCall();

Expand Down Expand Up @@ -792,6 +816,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
break;
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsSymbolMark );

// Update VPD with the symbol mark.
o_rc = dqBitmap.setSymbol( i_badDqCount.symList[0].symbol );
if ( SUCCESS != o_rc )
Expand All @@ -814,6 +841,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
PRDF_ERR( PRDF_FUNC "__updateVpdCountAboveOne<DIMMS_PER"
"_RANK::MCA>() failed." );
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsSymUeRisk );

// Make the error log predictive.
io_sc.service_data->setServiceCall();

Expand All @@ -834,6 +865,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
PRDF_ERR( PRDF_FUNC "__updateVpdCountAboveOne<DIMMS_PER"
"_RANK::MCA>() failed." );
}

io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsPotentialUe );

// Make the error log predictive.
io_sc.service_data->setServiceCall();

Expand All @@ -844,11 +879,17 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount,
// If analysis resulted in a false alarm.
if ( tpsFalseAlarm )
{
io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsFalseAlarm );

// Increase false alarm counter.
// If false alarm counter threshold of 3 per day is reached.
if ( __getTpsFalseAlarmCounter<TYPE_MCA>(iv_chip)->inc( iv_rank,
io_sc) )
{
io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsFalseAlarmTH );

// Permanently mask mainline NCEs and TCEs
getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true;

Expand Down Expand Up @@ -1019,6 +1060,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCe( STEP_CODE_DATA_STRUCT & io_sc )
// abort this procedure.
if ( areDramRepairsDisabled() )
{
io_sc.service_data->setSignature( iv_chip->getHuid(),
PRDFSIG_TpsDramDisabled );

io_sc.service_data->setServiceCall();
break;
}
Expand Down

0 comments on commit 8f05454

Please sign in to comment.