Skip to content

Commit

Permalink
Implement an HBRT interface to log a gard event from PHYP/OPAL
Browse files Browse the repository at this point in the history
Added a firmware notify interface, gard_event_t, to accept a
gard event message, from PHYP/OPAL, to log that event.

Change-Id: I9bcf684f0850c9a07ab7d46635aa07a2c1e9917c
RTC: 210201
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/82199
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Christian R Geddes <crgeddes@us.ibm.com>
Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
  • Loading branch information
velozr authored and dcrowell77 committed Aug 28, 2019
1 parent f609640 commit 153dcaa
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 14 deletions.
9 changes: 5 additions & 4 deletions src/include/runtime/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
How to create an HBRT to FW request message interface
0) If passing an HBRT to FSP via MBOX or receiving a firmware notify message,
then use instruction 'generic_hbrt_fsp_message.H::GenericFspMboxMessage_t'
below.
and/or 'How to create an HBRT Firmware Notify message' below.
1) The biggest part will be defining the interface. Inspect the current
interfaces (req_hcode_update, error_log, etc) for inspiration.
2) Once an interface has been designed, add it to the anonymous
Expand Down Expand Up @@ -35,13 +35,14 @@ How to create an HBRT to FW request message interface
1) The biggest part will be defining the interface. Inspect the current
interfaces (AttributeSetter_t, SingleScomOpHbrtFspData_t,
TargetDeconfigHbrtFspData_t, etc) for inspiration.
2) Once an interface has been designed, add the structure to this file
with the other interfaces.
2) Once an interface has been designed, add the structure to the file,
generic_hbrt_fsp_message.H, among the other interfaces.
3) Create an MBOX message queue enum for the interface and add to:
/hostboot/src/include/usr/mbox/mbox_queues.H::queue_id_t
see current message queues for example
4) Add a new message type for the interface to:
enum generic_hbrt_fsp_message.H::GENERIC_FSP_MBOX_MESSAGE_MSG_TYPE.
enum GenericFspMboxMessage_t::GENERIC_FSP_MBOX_MESSAGE_MSG_TYPE in
file generic_hbrt_fsp_message.H.
5) How to use the new interface to pass a message
a) Make sure g_hostInterfaces and g_hostInterfaces->firmware_request
are not NULL.
Expand Down
58 changes: 48 additions & 10 deletions src/include/runtime/interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,17 +552,18 @@ typedef struct hostInterfaces

enum // hbrt_fw_msg::io_type the struct associated with io_type
{
HBRT_FW_MSG_TYPE_REQ_NOP = 0,
HBRT_FW_MSG_TYPE_RESP_NOP = 1, // struct resp_generic
HBRT_FW_MSG_TYPE_RESP_GENERIC = 2, // struct resp_generic
HBRT_FW_MSG_TYPE_REQ_HCODE_UPDATE = 3, // struct req_hcode_update
HBRT_FW_MSG_HBRT_FSP_REQ = 4, // struct GenericFspMboxMessage_t
HBRT_FW_MSG_TYPE_ERROR_LOG = 5, // struct error_log
HBRT_FW_MSG_HBRT_FSP_RESP = 6, // struct GenericFspMboxMessage_t
HBRT_FW_MSG_TYPE_I2C_LOCK = 7, // struct req_i2c_lock
HBRT_FW_MSG_TYPE_SBE_STATE = 8, // struct sbe_state
HBRT_FW_MSG_TYPE_NVDIMM_PROTECTION = 9, // struct nvdimm_protection_state
HBRT_FW_MSG_TYPE_REQ_NOP = 0,
HBRT_FW_MSG_TYPE_RESP_NOP = 1, // struct resp_generic
HBRT_FW_MSG_TYPE_RESP_GENERIC = 2, // struct resp_generic
HBRT_FW_MSG_TYPE_REQ_HCODE_UPDATE = 3, // struct req_hcode_update
HBRT_FW_MSG_HBRT_FSP_REQ = 4, // struct GenericFspMboxMessage_t
HBRT_FW_MSG_TYPE_ERROR_LOG = 5, // struct error_log
HBRT_FW_MSG_HBRT_FSP_RESP = 6, // struct GenericFspMboxMessage_t
HBRT_FW_MSG_TYPE_I2C_LOCK = 7, // struct req_i2c_lock
HBRT_FW_MSG_TYPE_SBE_STATE = 8, // struct sbe_state
HBRT_FW_MSG_TYPE_NVDIMM_PROTECTION = 9, // struct nvdimm_protection_state
HBRT_FW_MSG_TYPE_NVDIMM_OPERATION = 10, // struct nvdimm_operation_t
HBRT_FW_MSG_TYPE_GARD_EVENT = 11, // struct gard_event_t
};

// NVDIMM protection state enum
Expand Down Expand Up @@ -611,6 +612,39 @@ typedef struct hostInterfaces
// see @note associated with NVDIMM_Op_t above
} __attribute__ ((packed));

// Gard event error type
// @note This needs to stay in sync with the FSP Mailbox specification for
// command : Gard-able Error Detected - cmd 0xCE, s/c 0x63, mod 01
enum GARD_ERROR_t: uint32_t
{
HBRT_GARD_ERROR_UNKNOWN = 0x0000,
HBRT_GARD_ERROR_COMPUTATION_TEST_FAILURE = 0x0001,
HBRT_GARD_ERROR_SLB = 0x0002,
HBRT_GARD_ERROR_CHIP_TOD_FAILURE = 0x0003,
HBRT_GARD_ERROR_TIMEFAC_FAILURE = 0x0004,
HBRT_GARD_ERROR_PROC_RECOVERY_THRESHOLD = 0x0005,
HBRT_GARD_ERROR_NX = 0x0008,
HBRT_GARD_ERROR_SLW = 0x0009,
HBRT_GARD_ERROR_CAPP_UNIT = 0x000A,

// Mark the end of the gard error types.
// This is not valid, just a marker
HBRT_GARD_ERROR_LAST,
};

// Gard event (PHYP/OPAL -> HBRT)
struct gard_event_t
{
GARD_ERROR_t i_error_type; // Gard event error type enum
uint32_t i_procId; // Processor ID for
// error types 0x0001 to 0x0005
// Chip ID for
// error types 0x0008 to 0x000A
uint32_t i_plid; // Platform log identifier
uint16_t i_sub_unit_mask; // Currently not being used
uint16_t i_recovery_level; // Currently not being used
} __attribute__ ((packed));

struct hbrt_fw_msg // define struct hbrt_fw_msg
{
hbrt_fw_msg() { req_hcode_update = { 0 }; }; // ctor
Expand Down Expand Up @@ -685,6 +719,10 @@ typedef struct hostInterfaces
// io_type set to HBRT_FW_MSG_TYPE_NVDIMM_OPERATION
struct nvdimm_operation_t nvdimm_operation;

// This struct is sent from PHYP/OPAL to HBRT with
// io_type set to HBRT_FW_MSG_TYPE_GARD_EVENT
struct gard_event_t gard_event;

// This struct is sent from HBRT with
// io_type set to HBRT_FW_MSG_HBRT_FSP_REQ or
// HBRT_FW_MSG_HBRT_FSP_RESP
Expand Down
2 changes: 2 additions & 0 deletions src/include/usr/runtime/runtime_reasoncodes.H
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ namespace RUNTIME
RC_NO_SPACE_FOR_ATTRIBUTE_SERIALIZATION = RUNTIME_COMP_ID | 0x47,
RC_CANNOT_MAKE_ATTRIBUTE = RUNTIME_COMP_ID | 0x48,
RT_NO_OMI_TARGET_FOUND = RUNTIME_COMP_ID | 0x49,
RC_LOG_GARD_EVENT_UNKNOWN_ERROR_TYPE = RUNTIME_COMP_ID | 0x4A,
RC_LOG_GARD_EVENT = RUNTIME_COMP_ID | 0x4B,
};

enum UserDetailsTypes
Expand Down
136 changes: 136 additions & 0 deletions src/usr/util/runtime/rt_fwnotify.C
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,117 @@ int doNvDimmOperation(const hostInterfaces::nvdimm_operation_t& i_nvDimmOp)
return rc;
}

/**
* @brief Log the gard event from PHYP/OPAL
*
* @param[in] i_gardEvent - The details of the gard event
* @see hostInterfaces::gard_event_t for more info
*
**/
void logGardEvent(const hostInterfaces::gard_event_t& i_gardEvent)
{
// Trace input components
TRACFCOMP(g_trac_runtime,
ENTER_MRK"logGardEvent: Gard Event Data: "
"error type(0x%.8X), processor ID(0x%.8X), "
"PLID(0x%.8X), sub unit mask(0x.%4X), "
"recovery level(0x.%4X)",
i_gardEvent.i_error_type,
i_gardEvent.i_procId,
i_gardEvent.i_plid,
i_gardEvent.i_sub_unit_mask,
i_gardEvent.i_recovery_level);

errlHndl_t l_err{nullptr};

do
{
// Make sure the error type is valid, if not, log it
if ((i_gardEvent.i_error_type == hostInterfaces::HBRT_GARD_ERROR_UNKNOWN ) ||
(i_gardEvent.i_error_type >= hostInterfaces::HBRT_GARD_ERROR_LAST) )
{
TRACFCOMP(g_trac_runtime, "logGardEvent: ERROR: unknown/invalid "
"error type 0x%.8X",
i_gardEvent.i_error_type);

/* @
* @errortype
* @severity ERRL_SEV_PREDICTIVE
* @moduleid MOD_RT_FIRMWARE_NOTIFY
* @reasoncode RC_LOG_GARD_EVENT_UNKNOWN_ERROR_TYPE
* @userdata1[0:31] GARD error type
* @userdata1[32:63] Processor ID
* @userdata2[0:31] Sub unit mask
* @userdata2[32:63] Recovery level
* @devdesc Unknown/invalid error type
* @custdesc Internal firmware error
*/
l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE,
MOD_RT_FIRMWARE_NOTIFY,
RC_LOG_GARD_EVENT_UNKNOWN_ERROR_TYPE,
TWO_UINT32_TO_UINT64(
i_gardEvent.i_error_type,
i_gardEvent.i_procId),
TWO_UINT32_TO_UINT64(
i_gardEvent.i_sub_unit_mask,
i_gardEvent.i_recovery_level),
ErrlEntry::ADD_SW_CALLOUT);
break;
}


// Get the Target associated with processor ID
TARGETING::TargetHandle_t l_procTarget{nullptr};
l_err = RT_TARG::getHbTarget(i_gardEvent.i_procId, l_procTarget);
if (l_err)
{
TRACFCOMP(g_trac_runtime, "logGardEvent: Error getting "
"HB Target from processor ID 0x%0X, "
"exiting ...",
i_gardEvent.i_procId);
break;
}

// Log the GARD event
/* @
* @errortype
* @severity ERRL_SEV_PREDICTIVE
* @moduleid MOD_RT_FIRMWARE_NOTIFY
* @reasoncode RC_LOG_GARD_EVENT
* @userdata1[0:31] GARD error type
* @userdata1[32:63] Processor ID
* @userdata2[0:31] Sub unit mask
* @userdata2[32:63] Recovery level
* @devdesc Gard event from Opal/Phyp
* @custdesc Hardware error detected at runtime
*/
l_err = new ErrlEntry( ERRL_SEV_PREDICTIVE,
MOD_RT_FIRMWARE_NOTIFY,
RC_LOG_GARD_EVENT,
TWO_UINT32_TO_UINT64(
i_gardEvent.i_error_type,
i_gardEvent.i_procId),
TWO_UINT32_TO_UINT64(
i_gardEvent.i_sub_unit_mask,
i_gardEvent.i_recovery_level));

// Set the PLID to the given gard event PLID if it exist
if (i_gardEvent.i_plid)
{
l_err->plid(i_gardEvent.i_plid);
}

// Do the actual gard
l_err->addHwCallout( l_procTarget, HWAS::SRCI_PRIORITY_MED,
HWAS::NO_DECONFIG, HWAS::GARD_PHYP);
} while(0);

// Commit any error log that occurred.
errlCommit(l_err, RUNTIME_COMP_ID);

TRACFCOMP(g_trac_runtime, EXIT_MRK"logGardEvent")
}

/**
* @see src/include/runtime/interface.h for definition of call
*
Expand Down Expand Up @@ -764,6 +875,31 @@ void firmware_notify( uint64_t i_len, void *i_data )
} // END case hostInterfaces::HBRT_FW_MSG_TYPE_NVDIMM_OPERATION:
break;

case hostInterfaces::HBRT_FW_MSG_TYPE_GARD_EVENT:
{
uint64_t l_minMsgSize = hostInterfaces::HBRT_FW_MSG_BASE_SIZE +
sizeof(hostInterfaces::hbrt_fw_msg::gard_event);
if (i_len < l_minMsgSize)
{
l_badMessage = true;

TRACFCOMP(g_trac_runtime, ERR_MRK"firmware_notify: "
"Received message HBRT_FW_MSG_TYPE_GARD_EVENT, "
"but size of message data(%d) is not adequate for a "
"complete message of this type, with size requirement of "
"%d", i_len, l_minMsgSize );

// Pack user data 1 with the message input type, the only
// data that can be safely retrieved
l_userData1 = l_hbrt_fw_msg->io_type;

break;
}

logGardEvent(l_hbrt_fw_msg->gard_event);
} // END case hostInterfaces::HBRT_FW_MSG_TYPE_GARD_EVENT:
break;

default:
{
l_badMessage = true;
Expand Down

0 comments on commit 153dcaa

Please sign in to comment.