Skip to content

Commit

Permalink
Send down OCC info logs to BMC for call-home
Browse files Browse the repository at this point in the history
This creates a new eSEL type (dd) to display
informational callhome logs.  The OCC will send
down telemetry information to track the general health
of the system using this new log.

Change-Id: I0319798554c4e48c287953bd2d0de8352bfc4909
RTC:180324
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/48776
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Brian E. Bakke <bbakke@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
  • Loading branch information
mderkse1 authored and wghoffa committed Nov 9, 2017
1 parent 9fec69b commit 2b4e231
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 34 deletions.
37 changes: 37 additions & 0 deletions src/include/usr/errl/errlentry.H
Expand Up @@ -612,6 +612,24 @@ public:
*/
std::vector<void*> getUDSections(compId_t i_compId, uint8_t i_subSect);

/**
* @brief set iv_eselCallhomeInfoEvent
*
* When true, send this error as a special callhome
* type of eSEL to the BMC
*
* @return nothing
*/
void setEselCallhomeInfoEvent(bool i_valid);

/**
* @brief get iv_eselCallhomeInfoEvent
*
* @return true if this log should result in a callhome event type eSEL,
* false otherwise
*/
bool getEselCallhomeInfoEvent();

private:

/**
Expand Down Expand Up @@ -788,6 +806,7 @@ private:
*/
bool getSkipShowingLog();


/**
* @brief Sets internal flag to indicate if this log should be
* saved to PNOR and sent to the BMC
Expand Down Expand Up @@ -838,6 +857,9 @@ private:
//BMC, or displayed in the console
bool iv_skipShowingLog;

// when true, send this special type of eSEL to the BMC
// This is used to send OCC informational errors up to BMC
bool iv_eselCallhomeInfoEvent;
};


Expand Down Expand Up @@ -984,6 +1006,21 @@ inline bool ErrlEntry::getSkipShowingLog()
return iv_skipShowingLog;
}

////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
inline void ErrlEntry::setEselCallhomeInfoEvent(bool i_valid)
{
iv_eselCallhomeInfoEvent = i_valid;
return;
}

////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
inline bool ErrlEntry::getEselCallhomeInfoEvent()
{
return iv_eselCallhomeInfoEvent;
}

} // End namespace


Expand Down
12 changes: 12 additions & 0 deletions src/include/usr/errl/errlmanager.H
Expand Up @@ -607,6 +607,18 @@ private:
* @param[in/out] io_err - errorlog that's being checked and updatd
*/
void setErrlSkipFlag(errlHndl_t io_err);

#ifdef CONFIG_BMC_IPMI
/**
* @brief Helper function to grab the value of
* ATTR_ALLOW_CALLHOME_ESELS_TO_BMC and
* return it as a boolean value
*
* @return true if these ESELs are allowed, false otherwise
*/
bool allowCallHomeEselsToBmc(void);
#endif

};


Expand Down
23 changes: 19 additions & 4 deletions src/include/usr/ipmi/ipmisel.H
Expand Up @@ -76,15 +76,28 @@ namespace IPMISEL
* @param[in] size of eSEL data
* @param[in] eid of errorlog for this eSEL (for ack)
* @param[in] callout list,which has sel event details
* @param[in] is eSEL for informational call home error
*/
void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
uint32_t i_eid,std::vector<sel_info_t*>&i_calloutList);
uint32_t i_eid,std::vector<sel_info_t*>&i_calloutList,
bool i_infoCallHome);

// per IPMI Spec, section 32.1 SEL Event Records
enum sel_record_type
{
record_type_system_event = 0x02,

// Used to send callhome informational eSEL to BMC
// currently used to send OCC telemetry information to the BMC
record_type_oem_call_home_info_event = 0xDD,

// This is a procedure callout
// byte 0 = procedure ID
// bytes 4,5 = record ID of associated eSEL
record_type_oem_sel_for_procedure_callout = 0xDE,

// Normal flattened PEL, often just called the eSEL
// bytes 4-6 = 040020
record_type_ami_esel = 0xDF,
};

Expand Down Expand Up @@ -301,17 +314,19 @@ namespace IPMISEL
* @brief parse the msg and call send_esel to send the esel (handles if
* the SEL reservation is lost)
* @param[in] i_msg
* @param[in] i_infoCallHome - informational call-home log
*/
void process_esel(msg_t *i_msg);
void process_esel(msg_t *i_msg, bool i_infoCallHome);

/**
* @brief do the actual ipmi calls to send the esel data to the bmc
* @param[in] i_data esel data
* @param[in] o_err any error generated during the send
* @param[in] o_cc ipmi completion code from last sendrecv
* @param[in] i_infoCallHome - informational call-home log
*/
void send_esel(IPMISEL::eselInitData * i_data,
errlHndl_t &o_err, IPMI::completion_code &o_cc);
void send_esel(IPMISEL::eselInitData * i_data, errlHndl_t &o_err,
IPMI::completion_code &o_cc, bool i_infoCallHome);

/**
* @brief read the SEL time
Expand Down
9 changes: 7 additions & 2 deletions src/usr/errl/errlentry.C
Expand Up @@ -87,7 +87,8 @@ ErrlEntry::ErrlEntry(const errlSeverity_t i_sev,
iv_Src( SRC_ERR_INFO, i_modId, i_reasonCode, i_user1, i_user2 ),
iv_termState(TERM_STATE_UNKNOWN),
iv_sevFinal(false),
iv_skipShowingLog(true)
iv_skipShowingLog(true),
iv_eselCallhomeInfoEvent(false)
{
#ifdef CONFIG_ERRL_ENTRY_TRACE
TRACFCOMP( g_trac_errl, ERR_MRK"Error created : PLID=%.8X, RC=%.4X, Mod=%.2X, Userdata=%.16X %.16X", plid(), i_reasonCode, i_modId, i_user1, i_user2 );
Expand Down Expand Up @@ -780,7 +781,11 @@ void ErrlEntry::commit( compId_t i_committerComponent )
// User header contains the component ID of the committer.
iv_User.setComponentId( i_committerComponent );

setSubSystemIdBasedOnCallouts();
// Avoid adding a callout to informational callhome "error"
if (!getEselCallhomeInfoEvent())
{
setSubSystemIdBasedOnCallouts();
}

// Add the captured backtrace to the error log
if (iv_pBackTrace)
Expand Down
43 changes: 37 additions & 6 deletions src/usr/errl/errlmanager_common.C
Expand Up @@ -530,6 +530,24 @@ inline bool SensorModifier::modifySensor(uint8_t i_sensorType,
return l_retval;
}

// Retrieve if informational/call-home eSELs are allowed to the BMC
bool ErrlManager::allowCallHomeEselsToBmc(void)
{
bool l_allowed = false;
uint8_t flag = 0;
TARGETING::Target* sys = nullptr;
TARGETING::targetService().getTopLevelTarget(sys);
if (sys)
{
flag = sys->getAttr<TARGETING::ATTR_ALLOW_CALLHOME_ESELS_TO_BMC>();
}
if (flag)
{
l_allowed = true;
}

return l_allowed;
}

///////////////////////////////////////////////////////////////////////////////
// ErrlManager::sendErrLogToBmc()
Expand All @@ -540,12 +558,24 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels)
ENTER_MRK
"sendErrLogToBmc errlogId 0x%.8x, i_sendSels %d",
io_err->eid(), i_sendSels);

bool l_send_eSel_only = !i_sendSels; // don't send callout sensor SEL
bool l_callhome_type = false; // Is this a callhome type eSEL?
if (io_err->getEselCallhomeInfoEvent() && allowCallHomeEselsToBmc())
{
TRACFCOMP( g_trac_errl, INFO_MRK
"sendErrLogToBmc: setting l_callhome_type" );
l_callhome_type = true;
l_send_eSel_only = true; // just send eSEL without any callout SELs
}

do {

// keep track of procedure callouts that modify hardware callouts
SensorModifier l_modifier;

// Decide whether we want to skip the error log
if( io_err->getSkipShowingLog() )
if( io_err->getSkipShowingLog() && !l_callhome_type )
{
TRACFCOMP( g_trac_errl, INFO_MRK
"sendErrLogToBmc: %.8X is INFORMATIONAL/RECOVERED; skipping",
Expand All @@ -558,7 +588,7 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels)
std::vector< HWAS::callout_ud_t* > l_callouts;
HWAS::callout_ud_t l_calloutToAdd; // used for EIBUS error
HWAS::callOutPriority l_priority = HWAS::SRCI_PRIORITY_NONE;
if (i_sendSels)
if (!l_send_eSel_only)
{
bool l_busCalloutEncountered = false; // flag bus callout

Expand Down Expand Up @@ -663,7 +693,7 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels)
// bool default constructor initializes to false as per C++ standard
std::map<uint8_t, bool> l_sensorNumberEncountered;

if (i_sendSels)
if (!l_send_eSel_only)
{
l_selEventList.clear();
std::vector<HWAS::callout_ud_t*>::const_iterator i;
Expand Down Expand Up @@ -775,7 +805,8 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels)
{
IPMISEL::sendESEL(l_pelData, l_pelSize,
io_err->eid(),
l_selEventList);
l_selEventList,
l_callhome_type);
TRACFCOMP(g_trac_errl, INFO_MRK
"sendErrLogToBmc callout size %d",
l_selEventList.size());
Expand All @@ -799,8 +830,8 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels)

l_selEventList.push_back(l_selEvent);

IPMISEL::sendESEL(l_pelData, l_pelSize,
io_err->eid(), l_selEventList);
IPMISEL::sendESEL(l_pelData, l_pelSize, io_err->eid(),
l_selEventList, l_callhome_type);
}

// free the buffer
Expand Down
44 changes: 28 additions & 16 deletions src/usr/ipmi/ipmisel.C
Expand Up @@ -89,7 +89,8 @@ enum esel_retry
namespace IPMISEL
{
void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
uint32_t i_eid, std::vector<sel_info_t*>&i_selEventList)
uint32_t i_eid, std::vector<sel_info_t*>&i_selEventList,
bool i_infoCallHome)
{
IPMI_TRAC(ENTER_MRK "sendESEL() %d",i_selEventList.size());

Expand All @@ -103,13 +104,13 @@ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
#endif
msg->type = MSG_SEND_ESEL;
msg->data[0] = i_eid;
eselInitData *eselData =
eselInitData *eselData =
new eselInitData(i_selEventList, i_eselData, i_dataSize);

msg->extra_data = eselData;

#ifdef __HOSTBOOT_RUNTIME
process_esel(msg);
process_esel(msg, i_infoCallHome);
#else
// one message queue to the SEL thread
static msg_q_t mq = Singleton<IpmiSEL>::instance().msgQueue();
Expand All @@ -129,7 +130,7 @@ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
/*
* @brief process esel msg
*/
void process_esel(msg_t *i_msg)
void process_esel(msg_t *i_msg, bool i_infoCallHome)
{
errlHndl_t l_err = NULL;
IPMI::completion_code l_cc = IPMI::CC_UNKBAD;
Expand All @@ -144,7 +145,7 @@ void process_esel(msg_t *i_msg)
{
IPMI_TRAC(ENTER_MRK"sel list size %d", l_data->selInfoList.size());
std::vector<sel_info_t*>::iterator it;
for (it = l_data->selInfoList.begin(); it != l_data->selInfoList.end();
for (it = l_data->selInfoList.begin(); it != l_data->selInfoList.end();
++it)
{
sel_info_t *l_sel = *it;
Expand All @@ -153,12 +154,12 @@ void process_esel(msg_t *i_msg)
l_data->selEvent = true;

//If sensor type is sys event then need to send the oem sel
//to handle procedure callout
//to handle procedure callout
if (l_sel->sensorType == TARGETING::SENSOR_TYPE_SYS_EVENT)
{
//oem sel data
l_data->selEvent = false;
l_oemSel.record_type =
l_oemSel.record_type =
record_type_oem_sel_for_procedure_callout;
l_oemSel.event_data1 = l_sel->eventOffset;
l_sel->eventOffset = SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE;
Expand All @@ -174,13 +175,13 @@ void process_esel(msg_t *i_msg)
l_eSel.event_dir_type = l_sel->eventDirType;
l_eSel.event_data1 = l_sel->eventOffset;
memcpy(l_data->eSel,&l_eSel,sizeof(selRecord));


uint32_t l_send_count = MAX_SEND_COUNT;
while (l_send_count > 0)
{
// try to send the eles to the bmc
send_esel(l_data, l_err, l_cc);
// try to send the esel to the bmc
send_esel(l_data, l_err, l_cc, i_infoCallHome);

// if no error but last completion code was:
if ((l_err == NULL) &&
Expand Down Expand Up @@ -244,7 +245,8 @@ void process_esel(msg_t *i_msg)
* @brief Send esel data to bmc
*/
void send_esel(eselInitData * i_data,
errlHndl_t &o_err, IPMI::completion_code &o_cc)
errlHndl_t &o_err, IPMI::completion_code &o_cc,
bool i_infoCallHome)
{
IPMI_TRAC(ENTER_MRK "send_esel");
uint8_t* data = NULL;
Expand Down Expand Up @@ -305,8 +307,18 @@ void send_esel(eselInitData * i_data,
memcpy(&data[PARTIAL_ADD_ESEL_REQ], i_data->eSel,
sizeof(selRecord));
// update to make this what AMI eSEL wants
data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,record_type)] = record_type_ami_esel;
data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,event_data1)] = event_data1_ami;
if (i_infoCallHome)
{
data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,record_type)] =
record_type_oem_call_home_info_event;
}
else
{
data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,record_type)] =
record_type_ami_esel;
}
data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,event_data1)] =
event_data1_ami;

o_cc = IPMI::CC_UNKBAD;
TRACFBIN( g_trac_ipmi, INFO_MRK"1st partial_add_esel:", data, len);
Expand Down Expand Up @@ -394,8 +406,8 @@ void send_esel(eselInitData * i_data,
}
}while(0);

// if eSEL wasn't created due to an error, we don't want to continue
if ((o_err == NULL) && (o_cc == IPMI::CC_OK))
// if eSEL wasn't created due to an error or callhome, we don't want to continue
if ((o_err == NULL) && (o_cc == IPMI::CC_OK) && (!i_infoCallHome))
{
// caller wants us to NOT create sensor SEL
if ((i_data->eSel[offsetof(selRecord,sensor_type)] == SENSOR::INVALID_TYPE) &&
Expand Down Expand Up @@ -541,7 +553,7 @@ void IpmiSEL::execute(void)
switch(msg_type)
{
case IPMISEL::MSG_SEND_ESEL:
IPMISEL::process_esel(msg);
IPMISEL::process_esel(msg, false);
//done with msg
msg_free(msg);
break;
Expand Down
12 changes: 12 additions & 0 deletions src/usr/targeting/common/xmltohb/attribute_types_openpower.xml
Expand Up @@ -1367,5 +1367,17 @@ ID for the sensor number returned with the elog. -->
<writeable/>
</attribute>

<attribute>
<id>ALLOW_CALLHOME_ESELS_TO_BMC</id>
<description>Flag used to allow sending informational call-home errors as ESELS to the BMC</description>
<simpleType>
<uint8_t>
<default>0</default>
</uint8_t>
</simpleType>
<persistency>non-volatile</persistency>
<readable/>
</attribute>


</attributes>

0 comments on commit 2b4e231

Please sign in to comment.