Skip to content

Commit

Permalink
Send Attn chip list for monitoring
Browse files Browse the repository at this point in the history
Need to let ATTN know that they need to start monitoring non-primary
processors and centaurs after we determine they are functional.

Change-Id: Ia33ddc87a94c0ca972accb2d6078c2f9f2e7bd2c
CQ: SW446697
CMVC-Prereq: 1070543
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/66717
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Christian R Geddes <crgeddes@us.ibm.com>
Reviewed-by: Corey V. Swenson <cswenson@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
mderkse1 authored and dcrowell77 committed Oct 23, 2018
1 parent c9d3c11 commit a45ace1
Show file tree
Hide file tree
Showing 6 changed files with 246 additions and 5 deletions.
4 changes: 3 additions & 1 deletion src/include/usr/initservice/initsvcreasoncodes.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* Contributors Listed Below - COPYRIGHT 2011,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -84,6 +84,8 @@ enum InitServiceReasonCode
SHUTDOWN_MFG_TERM = INITSVC_COMP_ID | 0x14,
//termination_rc
SHUTDOWN_KEY_TRANSITION = INITSVC_COMP_ID | 0x15,

ISTEP_ATTN_MONITOR_MSG_FAILED = INITSVC_COMP_ID | 0x16,
};

enum InitServiceUserDetailDataSubSection
Expand Down
16 changes: 16 additions & 0 deletions src/include/usr/initservice/istepdispatcherif.H
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#ifndef __INITSERVICE_ISTEPDISPATCHERIF_H
#define __INITSERVICE_ISTEPDISPATCHERIF_H

#include <vector>

namespace INITSERVICE
{

Expand Down Expand Up @@ -70,6 +72,20 @@ void sendProgressCode(bool i_needsLock = true);
*/
errlHndl_t sendIstepCompleteMsg ( void );


/**
* @brief This function is to be used by external code to tell ATTN code on
* the FSP to start monitoring these chips
*
* param[in] i_huid_list - HUID list of chips for ATTN to watch
*
* @return errlHndl_t - NULL if successful, otherwise a pointer to the error
* log.
*/
errlHndl_t sendAttnMonitorChipIdMsg(
const std::vector<TARGETING::ATTR_HUID_type> & i_huid_list);


/**
* @brief This function is to be used by external code to check whether
* a shutdown has been requested
Expand Down
127 changes: 127 additions & 0 deletions src/usr/initservice/istepdispatcher/istepdispatcher.C
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,126 @@ errlHndl_t IStepDispatcher::sendSyncPoint()
return err;
}

errlHndl_t IStepDispatcher::sendAttnMonitorChipIdMsg(
const std::vector<TARGETING::ATTR_HUID_type> & i_huid_list )
{
errlHndl_t l_err = NULL;

TRACFCOMP(g_trac_initsvc,
ENTER_MRK"IStepDispatcher::sendAttnMonitorChipIdMsg");

if( !iv_spBaseServicesEnabled )
{
TRACFCOMP( g_trac_initsvc,
INFO_MRK"sendAttnMonitorChipIdMsg: The ATTN service runs on the "
"FSP and no FSP was found so we are skipping sending message to "
"ATTN service.");
}
else if (i_huid_list.empty())
{
TRACFCOMP( g_trac_initsvc, INFO_MRK"sendAttnMonitorChipIdMsg: empty huid list" );
}
else
{
INITSERVICE::attn_chipid_msg * l_data_ptr = nullptr;

msg_t * myMsg = msg_allocate();
myMsg->type = INITSERVICE::ATTN_MONITOR_CHIPID_LIST;

// Contains the full size of the extra_data field of myMsg
// extra_data includes attn_chipid_msg + list of HUIDs.
// attn_chipid_msg.data is the start of the huid list so
// need to remove that variable's size from the total
uint16_t l_total_size =
(sizeof(INITSERVICE::attn_chipid_msg) - sizeof(l_data_ptr->data)) +
(sizeof(TARGETING::ATTR_HUID_type) * i_huid_list.size());

myMsg->data[0] = 0;
myMsg->data[1] = l_total_size;
myMsg->extra_data = MBOX::allocate(l_total_size);

l_data_ptr = reinterpret_cast<INITSERVICE::attn_chipid_msg *>
(myMsg->extra_data);

// total chip huid's in list
l_data_ptr->chipIdCount = i_huid_list.size();

// data length in bytes of the list (sizeof(huid) * Number of huids)
l_data_ptr->size = sizeof(TARGETING::ATTR_HUID_type) *
i_huid_list.size();

// now fill in the list with huids
std::copy(i_huid_list.begin(), i_huid_list.end(), &(l_data_ptr->data));

TRACFCOMP( g_trac_initsvc,
"sendAttnMonitorChipIdMsg: Sending ATTN_MONITOR_CHIPID_LIST"
" (0x%.8X) msg", myMsg->type );
TRACFBIN(g_trac_initsvc, "msg data", myMsg->extra_data, myMsg->data[1]);

// send message to alert ATTN to start monitoring these chips
l_err = MBOX::sendrecv(HWSVRQ, myMsg);
if (l_err)
{
TRACFCOMP(g_trac_initsvc,
ERR_MRK"sendAttnMonitorChipIdMsg: error 0x%.8X from msg send",
l_err->reasonCode() );
l_err->collectTrace("INITSVC", 1024);

// clean up any allocated memory of failed msg
if((myMsg != nullptr) && (myMsg->extra_data != nullptr))
{
free( myMsg->extra_data );
myMsg->extra_data = nullptr;
}
}
else
{
// Check if msg failed at the FSP level by looking at data[0]
// A non-zero value implies something went wrong
if (myMsg->data[0] != HWSVR_MSG_SUCCESS)
{
TRACFCOMP(g_trac_initsvc, ERR_MRK"sendAttnMonitorChipIdMsg: "
"msg failed at HWSV/ATTN level, see plid %.8X",
myMsg->data[0] );

/*@
* @errortype
* @reasoncode ISTEP_ATTN_MONITOR_MSG_FAILED
* @severity ERRORLOG::ERRL_SEV_PREDICTIVE
* @moduleid ISTEP_INITSVC_MOD_ID
* @userdata1 PLID of failure on FSP
* @userdata2 Number of huids in msg
* @devdesc sendAttnMonitorChipIdMsg failed at
* the FSP level. Potential checkstops
* may not be properly handled.
* @custdesc Firmware error during boot
*/
l_err = new ERRORLOG::ErrlEntry(
ERRORLOG::ERRL_SEV_PREDICTIVE,
ISTEP_INITSVC_MOD_ID,
ISTEP_ATTN_MONITOR_MSG_FAILED,
myMsg->data[0],
i_huid_list.size(),
ERRORLOG::ErrlEntry::ADD_SW_CALLOUT);
l_err->collectTrace("INITSVC", 1024);

// Use the same plid as the HWSV/ATTN error
l_err->plid(myMsg->data[0]);
}
}

// msg cleanup
// NOTE: extra_data is cleaned up by the receiver
msg_free(myMsg);
myMsg = nullptr;
}

TRACFCOMP( g_trac_initsvc,
EXIT_MRK"IStepDispatcher::sendAttnMonitorChipIdMsg");

return l_err;
}

// ----------------------------------------------------------------------------
// IStepDispatcher::sendIstepCompleteMsg()
// ----------------------------------------------------------------------------
Expand Down Expand Up @@ -2544,6 +2664,13 @@ errlHndl_t sendSyncPoint()
return IStepDispatcher::getTheInstance().sendSyncPoint();
}

errlHndl_t sendAttnMonitorChipIdMsg(
const std::vector<TARGETING::ATTR_HUID_type> & i_huid_list)
{
return IStepDispatcher::getTheInstance().
sendAttnMonitorChipIdMsg(i_huid_list);
}

void sendProgressCode(bool i_needsLock)
{
errlHndl_t err = NULL;
Expand Down
13 changes: 13 additions & 0 deletions src/usr/initservice/istepdispatcher/istepdispatcher.H
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
/******************************************************************************/
#include <stdint.h>
#include <time.h>
#include <vector>
#include <util/singleton.H>
#include <sys/msg.h>
#include <sys/sync.h>
Expand Down Expand Up @@ -254,6 +255,18 @@ public:
*/
int getNextIStep(uint8_t& io_istep, uint8_t& io_substep);

/**
* @brief Send attn_chipid_msg to alert ATTN code on the FSP to
* start monitoring these chips
*
* param[in] i_huid_list - HUID list of chips for ATTN to watch
*
* @return errlHndl_t - NULL if successful,
* otherwise a pointer to the error log.
*/
errlHndl_t sendAttnMonitorChipIdMsg(
const std::vector<TARGETING::ATTR_HUID_type> & i_huid_list );

protected:

/**
Expand Down
37 changes: 36 additions & 1 deletion src/usr/isteps/istep08/call_host_attnlisten_proc.C
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <errl/errlentry.H>
#include <initservice/isteps_trace.H>
#include <initservice/initserviceif.H>
#include <initservice/istepdispatcherif.H>
#include <initservice/initsvcreasoncodes.H>
#include <sys/time.h>
#include <devicefw/userif.H>
Expand All @@ -62,21 +63,55 @@ using namespace TARGETING;
namespace ISTEP_08
{

/**
* @brief Send a list of functional procs that ATTN can start monitoring
* for checkstop analysis
*/
void send_analyzable_procs(void)
{
errlHndl_t l_err = nullptr;
std::vector<TARGETING::ATTR_HUID_type> l_chipHuids;

// get all functional Proc targets
TARGETING::TargetHandleList l_procsList;
getAllChips(l_procsList, TYPE_PROC);

// now fill in the list with proc huids
for (const auto & l_cpu_target : l_procsList)
{
l_chipHuids.push_back(TARGETING::get_huid(l_cpu_target));
}

// send the message to alert ATTN to start monitoring these chips
l_err = INITSERVICE::sendAttnMonitorChipIdMsg(l_chipHuids);
if (l_err)
{
errlCommit(l_err, ISTEP_COMP_ID);
}
}


//******************************************************************************
// call_host_attnlisten_proc()
//******************************************************************************
void* call_host_attnlisten_proc(void *io_pArgs)
{
IStepError l_stepError;


TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"call_host_attnlisten_proc entry" );

// Function is a NOOP because with security enabled, PRD is unable
// to write FIRs due to blacklist violations. All of the slave
// processor attentions will be ignored until the SMP comes up.

// Send list of functional procs that ATTN
// can start monitoring for checkstop analysis
if( INITSERVICE::spBaseServicesEnabled() )
{
send_analyzable_procs();
}

TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"call_host_attnlisten_proc exit" );
return l_stepError.getErrorHandle();
Expand Down
54 changes: 51 additions & 3 deletions src/usr/isteps/istep12/call_host_attnlisten_memb.C
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2015,2017 */
/* Contributors Listed Below - COPYRIGHT 2015,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand All @@ -25,14 +25,18 @@
#include <errl/errlentry.H>

#include <initservice/isteps_trace.H>

#include <isteps/hwpisteperror.H>
#include <errl/errludtarget.H>
#include <errl/errlmanager.H>

// targeting support.
#include <targeting/common/commontargeting.H>
#include <targeting/common/utilFilter.H>

// to send chipId list for ATTN monitoring
#include <initservice/istepdispatcherif.H>
#include <initservice/initserviceif.H>

using namespace ISTEP;
using namespace ISTEP_ERROR;
using namespace ERRORLOG;
Expand All @@ -41,10 +45,54 @@ using namespace TARGETING;

namespace ISTEP_12
{
/**
* @brief Send a list of functional procs and centaurs that ATTN
* can start monitoring for checkstop analysis
*/
void send_analyzable_procs_and_centaurs()
{
errlHndl_t l_err = nullptr;
std::vector<TARGETING::ATTR_HUID_type> l_chipHuids;

// Get all functional Centaur targets
TARGETING::TargetHandleList l_membufTargetList;
getAllChips(l_membufTargetList, TYPE_MEMBUF);

// Get all functional Proc targets
TARGETING::TargetHandleList l_procsList;
getAllChips(l_procsList, TYPE_PROC);

// now fill in the list with proc huids
for (const auto & l_cpu_target : l_procsList)
{
l_chipHuids.push_back(TARGETING::get_huid(l_cpu_target));
}

// now fill in the list with Centaur huids
for (const auto & l_membuf_target : l_membufTargetList)
{
l_chipHuids.push_back(TARGETING::get_huid(l_membuf_target));
}

// send the message to alert ATTN to start monitoring these chips
l_err = INITSERVICE::sendAttnMonitorChipIdMsg(l_chipHuids);
if (l_err)
{
errlCommit(l_err, ISTEP_COMP_ID);
}
}

void* call_host_attnlisten_memb (void *io_pArgs)
{
IStepError l_StepError;


// Send list of functional procs and centaurs that ATTN
// can start monitoring for checkstop analysis
if( INITSERVICE::spBaseServicesEnabled() )
{
send_analyzable_procs_and_centaurs();
}

// end task, returning any errorlogs to IStepDisp
return l_StepError.getErrorHandle();
}
Expand Down

0 comments on commit a45ace1

Please sign in to comment.