Skip to content

Commit

Permalink
PRD: Axone MemEccAnalysis Updates
Browse files Browse the repository at this point in the history
Change-Id: Id553eb5d4ab2dd716331f7ffbda12b18096bd2bd
RTC: 208211
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/80110
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/81057
Reviewed-by: Zane C Shelley <zshelle@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
  • Loading branch information
cnpalmer authored and zane131 committed Jul 30, 2019
1 parent 81c225b commit 6751459
Show file tree
Hide file tree
Showing 10 changed files with 560 additions and 73 deletions.
4 changes: 2 additions & 2 deletions src/usr/diag/prdf/common/plat/explorer/explorer_ocmb.rule
Expand Up @@ -1004,7 +1004,7 @@ rule rRDFFIR

group gRDFFIR
filter singlebit,
cs_root_cause(14,15,17,35,37,40)
cs_root_cause(14,15,17,35,37)
{
/** RDFFIR[0]
* Mainline read MPE on rank 0
Expand Down Expand Up @@ -1174,7 +1174,7 @@ group gRDFFIR
/** RDFFIR[40]
* RDDATA valid error
*/
(rRDFFIR, bit(40)) ? rdf_rcd_parity_error_UERE;
(rRDFFIR, bit(40)) ? mem_port_th_32perDay;

/** RDFFIR[41]
* SCOM status register parity error
Expand Down
47 changes: 33 additions & 14 deletions src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_actions.rule
Expand Up @@ -299,58 +299,77 @@ actionclass verify_chip_mark_5 { funccall("AnalyzeFetchMpe_5"); };
actionclass verify_chip_mark_6 { funccall("AnalyzeFetchMpe_6"); };
actionclass verify_chip_mark_7 { funccall("AnalyzeFetchMpe_7"); };

# TODO RTC 208211
/** Mainline NCE/TCE handling */
actionclass mainline_nce_tce_handling
{
TBDDefaultCallout;
funccall("AnalyzeFetchNceTce");
};

/** Handle Mainline AUEs/IAUEs */
actionclass mainline_aue_iaue_handling
{
TBDDefaultCallout;
funccall("AnalyzeFetchAueIaue");
mem_port_L;
threshold1;
};

/** Mainline UE handling */
actionclass mainline_ue_handling
{
TBDDefaultCallout;
threshold( field(33 / 30 min ) ); # To prevent flooding. Will be unmasked
# when background scrubbing resumes after
# targeted diagnostics is complete.
funccall("AnalyzeFetchUe");
};

actionclass mainline_ue_handling_UERE
{
TBDDefaultCallout;
SueSource;
mainline_ue_handling;
};

/** Handle Mainline IUEs */
actionclass mainline_iue_handling
{
TBDDefaultCallout;
# An IUE itself is not a SUE source, however, a threshold of IUEs will
# trigger a port failure, which will generate SUEs. The port failure could
# also crash the machine so we want to make sure this bit is flagged as an
# SUE just in case it is needed in the checkstop analysis.
SueSource;
# Thresholding done in the plugin
funccall("AnalyzeMainlineIue");
};

/** Handle Maintenance IUEs */
actionclass maintenance_iue_handling
{
TBDDefaultCallout;
# An IUE itself is not a SUE source, however, a threshold of IUEs will
# trigger a port failure, which will generate SUEs. The port failure could
# also crash the machine so we want to make sure this bit is flagged as an
# SUE just in case it is needed in the checkstop analysis.
SueSource;
# Thresholding done in the plugin
funccall("AnalyzeMaintIue");
};

actionclass memory_impe_handling
{
TBDDefaultCallout;
funccall("AnalyzeImpe");
};

/** Handle Maintenance AUEs */
actionclass maintenance_aue_handling
{
TBDDefaultCallout;
funccall("AnalyzeMaintAue");
mem_port_L;
threshold1;
};

/** Handle Maintenance IAUEs */
actionclass maintenance_iaue_handling
{
TBDDefaultCallout;
all_dimm_H_memport_L;
threshold1;
};

/** RDF RCD Parity Error */
Expand All @@ -369,8 +388,7 @@ actionclass rdf_rcd_parity_error_UERE
/** SRQ RCD Parity Error */
actionclass srq_rcd_parity_error
{
funccall("CalloutAttachedDimmsHigh");
callout(connected(TYPE_MEM_PORT,0), MRU_LOW);
all_dimm_H_memport_L;
threshold32pday;
};

Expand All @@ -382,14 +400,15 @@ actionclass srq_rcd_parity_error_UERE

actionclass mem_port_failure
{
TBDDefaultCallout;
all_dimm_H_memport_L;
threshold1; # Threshold 1
};

################################################################################
# Analyze groups
################################################################################

actionclass analyzeOCMB_LFIR { analyze(gOCMB_LFIR); };
actionclass analyzeOCMB_LFIR { analyze(gOCMB_LFIR); };
actionclass analyzeMMIOFIR { analyze(gMMIOFIR); };
actionclass analyzeSRQFIR { analyze(gSRQFIR); };
actionclass analyzeMCBISTFIR { analyze(gMCBISTFIR); };
Expand Down
19 changes: 19 additions & 0 deletions src/usr/diag/prdf/common/plat/explorer/explorer_ocmb_regs.rule
Expand Up @@ -430,3 +430,22 @@
capture group default;
};

############################################################################
# Misc
############################################################################

register FARB0
{
name "MB_SIM.SRQ.MBA_FARB0Q";
scomaddr 0x08011415;
capture group default;
};

register EXP_MSR
{
name "Explorer Mark Shadow Register";
scomaddr 0x08011C0C;
capture group default;
};


170 changes: 170 additions & 0 deletions src/usr/diag/prdf/common/plat/explorer/prdfExplorerPlugins_common.C
Expand Up @@ -114,6 +114,8 @@ int32_t Ddr4PhyInterrupt( ExtensibleChip * i_chip,
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, Ddr4PhyInterrupt );

//------------------------------------------------------------------------------

/**
* @brief OCMB_LFIR[39:46] - Foxhound Fatal
* @param i_chip An OCMB chip.
Expand Down Expand Up @@ -215,6 +217,8 @@ int32_t CalloutAttachedDimmsHigh( ExtensibleChip * i_chip,
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, CalloutAttachedDimmsHigh );

//------------------------------------------------------------------------------

/**
* @brief RDF RCD Parity Error
* @param i_chip An OCMB chip.
Expand Down Expand Up @@ -275,6 +279,8 @@ int32_t RdfRcdParityError( ExtensibleChip * i_chip,
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, RdfRcdParityError );

//------------------------------------------------------------------------------

/**
* @brief RDFFIR[0:7] - Mainline MPE.
* @param i_chip OCMB chip.
Expand Down Expand Up @@ -302,6 +308,170 @@ PLUGIN_FETCH_MPE_ERROR( 7 )

#undef PLUGIN_FETCH_MPE_ERROR

//------------------------------------------------------------------------------

/**
* @brief RDFFIR[8:9] - Mainline NCE and/or TCE.
* @param i_chip OCMB chip.
* @param io_sc The step code data struct.
* @return SUCCESS
*/
int32_t AnalyzeFetchNceTce( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc )
{
MemEcc::analyzeFetchNceTce<TYPE_OCMB_CHIP>( i_chip, io_sc );
return SUCCESS; // nothing to return to rule code
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchNceTce );

//------------------------------------------------------------------------------

/**
* @brief RDFFIR[14] - Mainline UE.
* @param i_chip OCMB chip.
* @param io_sc The step code data struct.
* @return SUCCESS
*/
int32_t AnalyzeFetchUe( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc )
{
MemEcc::analyzeFetchUe<TYPE_OCMB_CHIP>( i_chip, io_sc );
return SUCCESS; // nothing to return to rule code
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchUe );

//------------------------------------------------------------------------------

/**
* @brief RDFFIR[17] - Mainline read IUE.
* @param i_chip OCMB chip.
* @param io_sc The step code data struct.
* @return PRD_NO_CLEAR_FIR_BITS if IUE threshold is reached, else SUCCESS.
*/
int32_t AnalyzeMainlineIue( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc )
{
int32_t rc = SUCCESS;
MemEcc::analyzeMainlineIue<TYPE_OCMB_CHIP>( i_chip, io_sc );

#ifdef __HOSTBOOT_MODULE

if ( MemEcc::queryIueTh<TYPE_OCMB_CHIP>(i_chip, io_sc) )
rc = PRD_NO_CLEAR_FIR_BITS;

#endif

return rc; // nothing to return to rule code
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMainlineIue );

//------------------------------------------------------------------------------

/**
* @brief RDFFIR[37] - Maint IUE.
* @param i_chip OCMB chip.
* @param io_sc The step code data struct.
* @return PRD_NO_CLEAR_FIR_BITS if IUE threshold is reached, else SUCCESS.
*/
int32_t AnalyzeMaintIue( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc )
{
int32_t rc = SUCCESS;
MemEcc::analyzeMaintIue<TYPE_OCMB_CHIP>( i_chip, io_sc );

#ifdef __HOSTBOOT_MODULE

if ( MemEcc::queryIueTh<TYPE_OCMB_CHIP>(i_chip, io_sc) )
rc = PRD_NO_CLEAR_FIR_BITS;

#endif

return rc; // nothing to return to rule code
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMaintIue );

//------------------------------------------------------------------------------

/**
* @brief RDFFIR[19,39] - Mainline and Maint IMPE
* @param i_chip OCMB chip.
* @param io_sc The step code data struct.
* @return SUCCESS
*/
int32_t AnalyzeImpe( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc )
{
MemEcc::analyzeImpe<TYPE_OCMB_CHIP>( i_chip, io_sc );
return SUCCESS; // nothing to return to rule code
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeImpe );

//------------------------------------------------------------------------------

/**
* @brief RDFFIR[13,16] - Mainline AUE and IAUE
* @param i_chip OCMB chip.
* @param io_sc The step code data struct.
* @return SUCCESS
*/
int32_t AnalyzeFetchAueIaue( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[explorer_ocmb::AnalyzeFetchAueIaue] "

MemAddr addr;
if ( SUCCESS != getMemReadAddr<TYPE_OCMB_CHIP>(i_chip,
MemAddr::READ_AUE_ADDR,
addr) )
{
PRDF_ERR( PRDF_FUNC "getMemReadAddr(0x%08x,READ_AUE_ADDR) failed",
i_chip->getHuid() );
}
else
{
MemRank rank = addr.getRank();
MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK };
io_sc.service_data->SetCallout( mm, MRU_HIGH );
}

return SUCCESS; // nothing to return to rule code

#undef PRDF_FUNC
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeFetchAueIaue );

//------------------------------------------------------------------------------

/**
* @brief RDFFIR[33] - Maintenance AUE
* @param i_chip OCMB chip.
* @param io_sc The step code data struct.
* @return SUCCESS
*/
int32_t AnalyzeMaintAue( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[explorer_ocmb::AnalyzeMaintAue] "

MemAddr addr;
if ( SUCCESS != getMemMaintAddr<TYPE_OCMB_CHIP>(i_chip, addr) )
{
PRDF_ERR( PRDF_FUNC "getMemMaintAddr(0x%08x) failed",
i_chip->getHuid() );
}
else
{
MemRank rank = addr.getRank();
MemoryMru mm { i_chip->getTrgt(), rank, MemoryMruData::CALLOUT_RANK };
io_sc.service_data->SetCallout( mm, MRU_HIGH );
}

return SUCCESS; // nothing to return to rule code

#undef PRDF_FUNC
}
PRDF_PLUGIN_DEFINE( explorer_ocmb, AnalyzeMaintAue );


//##############################################################################
//
// TLXFIR
Expand Down
11 changes: 0 additions & 11 deletions src/usr/diag/prdf/common/plat/mem/prdfMemDbUtils.H
Expand Up @@ -301,17 +301,6 @@ void banTps<TARGETING::TYPE_OCMB_CHIP>( ExtensibleChip * i_chip,
getOcmbDataBundle(i_chip)->iv_maskMainlineNceTce = true;
}

template<> inline
void banTps<TARGETING::TYPE_MEM_PORT>( ExtensibleChip * i_chip,
const MemRank & i_rank )
{
// Call banTps for the parent OCMB
ExtensibleChip * ocmbChip = PlatServices::getConnectedParent( i_chip,
TARGETING::TYPE_OCMB_CHIP );
banTps<TARGETING::TYPE_OCMB_CHIP>( ocmbChip, i_rank );
}


#endif // Hostboot Runtime only

} // end namespace MemDbUtils
Expand Down

0 comments on commit 6751459

Please sign in to comment.