Skip to content

Commit

Permalink
HTMGT support for PGPE/SGPE error logs
Browse files Browse the repository at this point in the history
Change-Id: I4a0d7fa092483cdfa6083a4ca86651c80f548d5c
RTC: 197064
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/68026
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Reviewed-by: Sheldon Bailey <baileysh@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
cjcain authored and dcrowell77 committed Oct 30, 2018
1 parent 93478ad commit d971186
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 71 deletions.
7 changes: 5 additions & 2 deletions src/usr/htmgt/htmgt_occ.H
Original file line number Diff line number Diff line change
Expand Up @@ -332,15 +332,18 @@ namespace HTMGT


/**
* @brief Collect, Commit and Clear error log from the OCC
* @brief Process elog entry from OCC poll response.
* Collect, Commit and Clear error log from the OCC.
*
* @param[in] i_id OCC elog id to retrieve
* @param[in] i_address SRAM address for elog entry
* @param[in] i_length size of the elog entry
* @param[in] i_source OCC Error Log Source (405, PGPE, etc)
*/
void occProcessElog(const uint8_t i_id,
const uint32_t i_address,
const uint16_t i_length);
const uint16_t i_length,
const uint8_t i_source);


/**
Expand Down
24 changes: 15 additions & 9 deletions src/usr/htmgt/htmgt_poll.C
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2014,2017 */
/* Contributors Listed Below - COPYRIGHT 2014,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -208,21 +208,23 @@ namespace HTMGT
OCC_POLL_DATA_MIN_SIZE) != 0))
{
TMGT_INF("OCC%d Poll change: Status:%04X Occs:%02X Cfg:%02X "
"State:%02X Error:%06X/%08X",
"State:%02X Error:%08X/%08X",
iv_instance,
(pollRsp->status << 8) | pollRsp->extStatus,
pollRsp->occsPresent,
pollRsp->requestedCfg, pollRsp->state,
(pollRsp->errorId<<16) | pollRsp->errorLength,
((pollRsp->errorId<<24) | (pollRsp->errorLength<<8) |
pollRsp->errorSource),
pollRsp->errorAddress);
#ifdef CONFIG_CONSOLE_OUTPUT_OCC_COMM
TMGT_CONSOLE("OCC%d Poll change: Status:%04X Occs:%02X Cfg:%02X "
"State:%02X Error:%06X/%08X",
"State:%02X Error:%08X/%08X",
iv_instance,
(pollRsp->status << 8) | pollRsp->extStatus,
pollRsp->occsPresent,
pollRsp->requestedCfg, pollRsp->state,
(pollRsp->errorId<<16) | pollRsp->errorLength,
((pollRsp->errorId<<24) | (pollRsp->errorLength<<8) |
pollRsp->errorSource),
pollRsp->errorAddress);
#endif
}
Expand All @@ -241,16 +243,19 @@ namespace HTMGT
if (pollRsp->errorId != 0)
{
if ((pollRsp->errorId != lastPollRsp->errorId) ||
(pollRsp->errorSource != lastPollRsp->errorSource) ||
(L_elog_retry_count < 3))

{
if (pollRsp->errorId == lastPollRsp->errorId)
if ((pollRsp->errorId == lastPollRsp->errorId) &&
(pollRsp->errorSource == lastPollRsp->errorSource))
{
// Only retry same errorId a few times...
L_elog_retry_count++;
TMGT_ERR("pollRspHandler: Requesting elog 0x%02X"
" (retry %d)",
pollRsp->errorId, L_elog_retry_count);
" from source 0x%02X on OCC%d (retry %d)",
pollRsp->errorId, pollRsp->errorSource,
iv_instance, L_elog_retry_count);
}
else
{
Expand All @@ -260,7 +265,8 @@ namespace HTMGT
// Handle a new error log from the OCC
occProcessElog(pollRsp->errorId,
pollRsp->errorAddress,
pollRsp->errorLength);
pollRsp->errorLength,
pollRsp->errorSource);
if (iv_needsReset)
{
// Update state if changed...
Expand Down
4 changes: 2 additions & 2 deletions src/usr/htmgt/htmgt_poll.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2014,2017 */
/* Contributors Listed Below - COPYRIGHT 2014,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -67,7 +67,7 @@ namespace HTMGT
uint8_t errorId;
uint32_t errorAddress;
uint16_t errorLength;
uint8_t reserved;
uint8_t errorSource;
uint8_t gpuCfg;
uint8_t codeLevel[16];
uint8_t sensor[6];
Expand Down
111 changes: 70 additions & 41 deletions src/usr/htmgt/occError.C
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@

#include <isteps/pm/occAccess.H>
#include <console/consoleif.H>
#include <targeting/targplatutil.H>
#include <targeting/common/commontargeting.H>
#include <targeting/common/utilFilter.H>
#include <variable_buffer.H>
#include "ipmi/ipmisensor.H"


namespace HTMGT
{
Expand Down Expand Up @@ -87,7 +90,8 @@ namespace HTMGT
// Process elog entry from OCC poll response
void Occ::occProcessElog(const uint8_t i_id,
const uint32_t i_address,
const uint16_t i_length)
const uint16_t i_length,
const uint8_t i_source)
{
errlHndl_t l_errlHndl = nullptr;

Expand All @@ -103,18 +107,26 @@ namespace HTMGT
#endif
if (nullptr == l_errlHndl)
{
compId_t l_comp_id = OCCC_COMP_ID;
if (i_source == OCC_ERRSRC_PGPE)
{
l_comp_id = PGPE_COMP_ID;
}
else if (i_source == OCC_ERRSRC_XGPE)
{
l_comp_id = XGPE_COMP_ID;
}

const occErrlEntry_t * l_occElog= reinterpret_cast<occErrlEntry_t*>
(l_buffer.pointer());

TMGT_BIN("OCC ELOG", l_occElog, 256);


// Get user details section
const occErrlUsrDtls_t *l_usrDtls_ptr = (occErrlUsrDtls_t *)
((uint8_t*)l_occElog + sizeof(occErrlEntry_t));

const uint32_t l_occSrc = OCCC_COMP_ID | l_occElog->reasonCode;
const uint32_t l_occSrc = l_comp_id | l_occElog->reasonCode;
ERRORLOG::errlSeverity_t severity =
ERRORLOG::ERRL_SEV_INFORMATIONAL;

Expand All @@ -135,8 +147,6 @@ namespace HTMGT
bool l_occReset = false;
elogProcessActions(l_occElog->actions, l_occReset, severity);



// Need to add WOF reason code to OCC object regardless of
// whether WOF resets are disabled.
if( l_occElog->actions & TMGT_ERRL_ACTIONS_WOF_RESET_REQUIRED )
Expand Down Expand Up @@ -266,24 +276,39 @@ namespace HTMGT
if ((numCallouts == 0) &&
(severity != ERRORLOG::ERRL_SEV_INFORMATIONAL))
{
TMGT_ERR("occProcessElog: No FRU callouts found for OCC%d"
" elog_id:0x%02X, severity:0x%0X",
iv_instance, i_id, severity);
/*@
* @errortype
* @refcode LIC_REFCODE
* @subsys EPUB_FIRMWARE_SP
* @reasoncode HTMGT_RC_OCC_ERROR_LOG
* @moduleid HTMGT_MOD_MISMATCHING_SEVERITY
* @userdata1[0-15] OCC elog id
* @userdata1[16-31] OCC severity
* @devdesc No FRU callouts found for non-info OCC Error Log
*/
bldErrLog(err2, HTMGT_MOD_MISMATCHING_SEVERITY,
HTMGT_RC_OCC_ERROR_LOG,
i_id, severity, 0, 0,
ERRORLOG::ERRL_SEV_INFORMATIONAL);
ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
if (i_source == OCC_ERRSRC_405)
{
TMGT_ERR("occProcessElog: No FRU callouts found for OCC%d"
" elog_id:0x%02X, severity:0x%0X",
iv_instance, i_id, severity);
/*@
* @errortype
* @refcode LIC_REFCODE
* @subsys EPUB_FIRMWARE_SP
* @reasoncode HTMGT_RC_OCC_ERROR_LOG
* @moduleid HTMGT_MOD_MISMATCHING_SEVERITY
* @userdata1[0-15] OCC elog id
* @userdata1[16-31] OCC severity
* @devdesc No FRU callouts found for non-info OCC Error Log
*/
bldErrLog(err2, HTMGT_MOD_MISMATCHING_SEVERITY,
HTMGT_RC_OCC_ERROR_LOG,
i_id, severity, 0, 0,
ERRORLOG::ERRL_SEV_INFORMATIONAL);
ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
}
else
{
// Add Processor callout for PGPE/SGPE/XGPE
TMGT_ERR("occProcessElog: Adding processor callout for"
" OCC%d", iv_instance);
TARGETING::ConstTargetHandle_t l_proc_target =
TARGETING::getParentChip(iv_target);
l_errlHndl->addHwCallout(l_proc_target,
HWAS::SRCI_PRIORITY_MED,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);
}
}

if (int_flags_set(FLAG_HALT_ON_OCC_SRC))
Expand Down Expand Up @@ -313,33 +338,37 @@ namespace HTMGT
#endif

// Add full OCC error log data as a User Details section
l_errlHndl->addFFDC(OCCC_COMP_ID,
l_errlHndl->addFFDC(l_comp_id,
l_occElog,
i_length,
1, // version
0); // subsection
ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID);

// Clear elog
const uint8_t l_cmdData[1] = {i_id};
OccCmd l_cmd(this, OCC_CMD_CLEAR_ERROR_LOG,
sizeof(l_cmdData), l_cmdData);
l_errlHndl = l_cmd.sendOccCmd();
if (l_errlHndl != nullptr)
{
TMGT_ERR("occProcessElog: Failed to clear elog id %d to"
" OCC%d (rc=0x%04X)",
i_id, iv_instance, l_errlHndl->reasonCode());
ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID);
}
}
else
{
TMGT_ERR("occProcessElog: Unable to read elog %d from SRAM"
" address (0x%08X) length (0x%04X), rc=0x%04X",
i_id, i_address, i_length, l_errlHndl->reasonCode());
TMGT_ERR("occProcessElog: Unable to read elog %d from source "
"0x%02X on OCC%d, SRAM address (0x%08X) length (0x%04X), "
"rc=0x%04X",
i_id, i_source, iv_instance, i_address, i_length,
l_errlHndl->reasonCode());
ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID);
}

// Clear elog
const uint8_t l_cmdData[] = {
0x01/* version*/, i_id, i_source, 0x00/*reserved*/};
OccCmd l_cmd(this, OCC_CMD_CLEAR_ERROR_LOG,
sizeof(l_cmdData), l_cmdData);
l_errlHndl = l_cmd.sendOccCmd();
if (l_errlHndl != nullptr)
{
TMGT_ERR("occProcessElog: Failed to clear elog id 0x%02X from"
" source 0x%02X on OCC%d (rc=0x%04X)",
i_id, i_source, iv_instance, l_errlHndl->reasonCode());
ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID);
}

} // end Occ::occProcessElog()


Expand Down
26 changes: 9 additions & 17 deletions src/usr/htmgt/occError.H
Original file line number Diff line number Diff line change
Expand Up @@ -199,23 +199,6 @@ namespace HTMGT
};



/**
* @brief Process elog entry from OCC poll response
*
* @param[in] i_occ OCC instance number reporting error
* @param[in] i_id OCC Error Log ID to retrieve (from the poll response)
* @param[in] i_address OCC Error Log Address to read
* @param[in] i_length OCC Error Log Length
*/
void occProcessElog(Occ * i_occ,
const uint8_t i_id,
const uint32_t i_address,
const uint16_t i_length);




struct tmgtSafeModeReasonCode_t
{
uint32_t returnCode;
Expand All @@ -224,5 +207,14 @@ namespace HTMGT
};


// OCC Error Source
enum occErrorSource_e
{
OCC_ERRSRC_405 = 0x00,
OCC_ERRSRC_PGPE = 0x10,
OCC_ERRSRC_XGPE = 0x20
};


} // end namespace
#endif

0 comments on commit d971186

Please sign in to comment.