Skip to content

Commit

Permalink
PRD: NVDIMM keep log hidden if no error found during analysis
Browse files Browse the repository at this point in the history
Change-Id: I56d3444b2b9a87e34ce03da9a7a805cdc629d573
CQ: SW476229
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/83930
Reviewed-by: Paul Greenwood <paul.greenwood@ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Brian J Stegmiller <bjs@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamen G Tyner <ben.tyner@ibm.com>
Reviewed-by: Zane C Shelley <zshelle@us.ibm.com>
  • Loading branch information
cnpalmer authored and zane131 committed Sep 19, 2019
1 parent feef038 commit bfe5638
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 18 deletions.
1 change: 1 addition & 0 deletions src/usr/diag/prdf/common/plat/mem/prdfMemExtraSig.H
Expand Up @@ -111,6 +111,7 @@ PRDR_ERROR_SIGNATURE(EsTmpWarnLow, 0xffff0098, "", "NVDIMM Energy Source Temper
PRDR_ERROR_SIGNATURE(BelowWarnTh, 0xffff0099, "", "NVDIMM Below Warning Threshold");
PRDR_ERROR_SIGNATURE(IntNvdimmErr, 0xffff009A, "", "NVDIMM Intermittent error");
PRDR_ERROR_SIGNATURE(NotifStatErr, 0xffff009B, "", "NVDIMM Set Event Notification Status Error");
PRDR_ERROR_SIGNATURE(FirEvntGone, 0xffff009C, "", "NVDIMM Event Triggering the FIR no longer present");

#endif // __prdfMemExtraSig_H

56 changes: 38 additions & 18 deletions src/usr/diag/prdf/plat/mem/prdfP9Mca.C
Expand Up @@ -1072,9 +1072,6 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
// BIT 2: ES_TEMP_WARNING
if ( bitList.count(2) )
{
// Make the log predictive and mask the FIR.
io_sc.service_data->SetThresholdMaskId(0);

// Read the ES_TEMP and ES_TEMP_WARNING_HIGH_THRESHOLD values
uint16_t msbEsTempReg = NVDIMM::i2cReg::ES_TEMP1;
uint16_t lsbEsTempReg = NVDIMM::i2cReg::ES_TEMP0;
Expand Down Expand Up @@ -1116,6 +1113,9 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
// Callout NVDIMM low, no gard
io_sc.service_data->SetCallout( i_dimm, MRU_LOW, NO_GARD );

// Make the log predictive and mask the FIR.
io_sc.service_data->SetThresholdMaskId(0);

// Send message to PHYP that save/restore may work
o_rc = PlatServices::nvdimmNotifyProtChange( i_dimm,
NVDIMM::NVDIMM_RISKY_HW_ERROR );
Expand All @@ -1126,9 +1126,6 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
// BIT 0: NVM_LIFETIME_WARNING
if ( bitList.count(0) )
{
// Make the log predictive, but do not mask the FIR
io_sc.service_data->setServiceCall();

// Adjust warning threshold.
uint16_t warnThReg = NVDIMM::i2cReg::NVM_LIFETIME_WARNING_THRESHOLD;
uint16_t errThReg = NVDIMM::i2cReg::NVM_LIFETIME_ERROR_THRESHOLD;
Expand All @@ -1138,6 +1135,9 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
firstWarn, statusErr );
if ( SUCCESS != o_rc ) break;

// Make the log predictive, but do not mask the FIR
io_sc.service_data->setServiceCall();

// If we got a set event notification status error, add the
// signature for that before adding the signature for the warning.
// Also do not take our normal callout action since we already will
Expand Down Expand Up @@ -1174,9 +1174,6 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
// BIT 1: ES_LIFETIME_WARNING
if ( bitList.count(1) )
{
// Make the log predictive, but do not mask the FIR
io_sc.service_data->setServiceCall();

// Adjust warning threshold.
uint16_t warnThReg = NVDIMM::i2cReg::ES_LIFETIME_WARNING_THRESHOLD;
uint16_t errThReg = NVDIMM::i2cReg::ES_LIFETIME_ERROR_THRESHOLD;
Expand All @@ -1186,6 +1183,9 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
firstWarn, statusErr );
if ( SUCCESS != o_rc ) break;

// Make the log predictive, but do not mask the FIR
io_sc.service_data->setServiceCall();

// If we got a set event notification status error, add the
// signature for that before adding the signature for the warning.
// Also do not take our normal callout action since we already will
Expand Down Expand Up @@ -1342,6 +1342,25 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
// BIT 0: Persistency Lost
if ( bitList.count(0) )
{
// Analyze Health Status0 Reg, Health Status1 Reg,
// and Error Theshold Status Reg
l_rc = __analyzeHealthStatus0Reg( io_sc, dimm, errFound );
if ( SUCCESS != l_rc ) continue;
l_rc = __analyzeHealthStatus1Reg( io_sc, dimm, errFound );
if ( SUCCESS != l_rc ) continue;
l_rc = __analyzeErrorThrStatusReg( io_sc, dimm, errFound );
if ( SUCCESS != l_rc ) continue;

// If we didn't find any error, then keep the log hidden.
if ( !errFound )
{
io_sc.service_data->setSignature( i_chip->getHuid(),
PRDFSIG_FirEvntGone );
// Callout NVDIMM
io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD );
continue;
}

// EVENT_N cannot be retriggered on a new PERSISTENCY_LOST_ERROR
// if a previous PERSISTENCY_LOST_ERROR still exists. Meaning, we
// cannot detect/report multiple errors that happen at different
Expand All @@ -1351,23 +1370,24 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,

// Send message to PHYP that save/restore may work
l_rc = PlatServices::nvdimmNotifyProtChange( dimm,
NVDIMM::NVDIMM_RISKY_HW_ERROR );
NVDIMM::NVDIMM_RISKY_HW_ERROR );
if ( SUCCESS != l_rc ) continue;

// Analyze Health Status0 Reg, Health Status1 Reg,
// and Error Theshold Status Reg
l_rc = __analyzeHealthStatus0Reg( io_sc, dimm, errFound );
if ( SUCCESS != l_rc ) continue;
l_rc = __analyzeHealthStatus1Reg( io_sc, dimm, errFound );
if ( SUCCESS != l_rc ) continue;
l_rc = __analyzeErrorThrStatusReg( io_sc, dimm, errFound );
if ( SUCCESS != l_rc ) continue;
}
// BIT 1: Warning Threshold Exceeded
else if ( bitList.count(1) )
{
l_rc = __analyzeWarningThrStatusReg( io_sc, dimm, errFound );
if ( SUCCESS != l_rc ) continue;

if ( !errFound )
{
io_sc.service_data->setSignature( i_chip->getHuid(),
PRDFSIG_FirEvntGone );
// Callout NVDIMM
io_sc.service_data->SetCallout( dimm, MRU_MED, NO_GARD );
continue;
}
}
// BIT 2: Persistency Restored
else if ( bitList.count(2) )
Expand Down

0 comments on commit bfe5638

Please sign in to comment.