Skip to content

Commit

Permalink
Monitor threads for HB TI to work
Browse files Browse the repository at this point in the history
Change-Id: I13c1717c650e24ee361e355ccaf5784d001a5b02
CQ:SW405958
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/52398
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
  • Loading branch information
Brian Stegmiller authored and zane131 committed Mar 1, 2018
1 parent 2993c5b commit 8cf2925
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 19 deletions.
10 changes: 7 additions & 3 deletions src/include/usr/diag/attn/attnreasoncodes.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2014,2017 */
/* Contributors Listed Below - COPYRIGHT 2014,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -47,7 +47,9 @@ enum ModuleId
{
ATTN_INVALID_MODULE = 0x00,
ATTN_TEST_FAKE_CALL_PRD = 0x01, // this is used in test code only.
ATTN_CHK_IPL_ATTNS_MODULE = 0x02
ATTN_CHK_IPL_ATTNS_MODULE = 0x02,
ATTN_PRD_TASK_MODULE = 0x03,
ATTN_INTR_TASK_MODULE = 0x04
};

/**
Expand All @@ -61,8 +63,10 @@ enum ReasonCode
ATTN_INVALID_REASONCODE = ATTN_COMP_ID | 0x00, // Invalid Reasoncode
ATTN_TEST_ATTN_FAIL = ATTN_COMP_ID | 0x01, // this is used in
// test code only.
ATTN_SEE_HW_ERROR = ATTN_COMP_ID | 0x02 // HW err with no gard
ATTN_SEE_HW_ERROR = ATTN_COMP_ID | 0x02, // HW err with no gard
// so PLID still set
ATTN_PRD_TASK_CRASHED = ATTN_COMP_ID | 0x03, // prd thread crashed
ATTN_INTR_TASK_CRASHED = ATTN_COMP_ID | 0x04 // intr thread crashed
};

}
Expand Down
4 changes: 3 additions & 1 deletion src/include/usr/diag/mdia/mdiareasoncodes.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2012,2016 */
/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -47,6 +47,7 @@ enum ModuleId
{
INVALID_MODULE = 0x00,
PROCESS_COMMAND_TIMEOUT = 0x01,
MONITOR_MAIN_THREAD = 0x02
};

/**
Expand All @@ -60,6 +61,7 @@ enum ReasonCode
INVALID_REASONCODE = MDIA_COMP_ID | 0x00, // Invalid Reasoncode
MAINT_COMMAND_HW_TIMED_OUT = MDIA_COMP_ID | 0x01, // timeout due to HW
MAINT_COMMAND_SW_TIMED_OUT = MDIA_COMP_ID | 0x02, // timeout due to SW
MONITOR_THREAD_CRASHED = MDIA_COMP_ID | 0x03 // thread crashed
};

}
Expand Down
138 changes: 133 additions & 5 deletions src/usr/diag/attn/ipl/attnsvc.C
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2014,2017 */
/* Contributors Listed Below - COPYRIGHT 2014,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -37,6 +37,8 @@
#include "common/attnmem.H"
#include "common/attntarget.H"
#include "arch/pirformat.H"
#include "diag/attn/attnreasoncodes.H"
#include <initservice/initserviceif.H> // for hostboot TI

// Custom compile configs
#include <config.h>
Expand Down Expand Up @@ -97,7 +99,8 @@ errlHndl_t Service::configureInterrupts(
return err;
}

void * Service::intrTask(void * i_svc)

void* Service::intrTaskWorker(void * i_svc)
{
// interrupt task loop
Service & svc = *static_cast<Service *>(i_svc);
Expand All @@ -118,10 +121,75 @@ void * Service::intrTask(void * i_svc)
// got an interrupt. process it

svc.processIntrQMsg(*msg);


}

return NULL;
}

} // end intrTaskWorker


void * Service::intrTask(void * i_svc)
{
// We need to create the actual thread that will do the work
// and then monitor it for completion.
tid_t l_tid = task_create(&intrTaskWorker, i_svc);
assert( l_tid > 0 );

int l_status = 0;
void * l_Rc = NULL;

tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc);

if (l_status == TASK_STATUS_CRASHED)
{
/*@ errorlog tag
* @errortype ERRL_SEV_CRITICAL_SYS_TERM
* @moduleid ATTN_INTR_TASK_MODULE
* @reasoncode ATTN_INTR_TASK_CRASHED
* @userdata1 tidRc
* @userdata2 Task Id that crashed
*
* @devdesc PRD task crashed
* @custdesc Task analyzing HW errors has failed.
*/
errlHndl_t l_err = new ERRORLOG::ErrlEntry
(
ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity
ATTN_INTR_TASK_MODULE, // moduleid
ATTN_INTR_TASK_CRASHED, // reason Code
(uint64_t)l_tidRc, // tid rc
(uint64_t)l_tid // task that crashed
);

l_err->collectTrace("ATTN_FAST" , 512 );
l_err->collectTrace("PRDF" , 512 );
l_err->collectTrace("MDIA_FAST" , 512 );

// Ensure we are not on the interrupt service list.
// or we'll get hung during TI waiting for this code
// that crashed to shutdown.
INTR::unRegisterMsgQ(INTR::LSI_LCL_FIR);

// Save PLID for TI purposes
uint32_t l_fatalPlid = l_err->plid();

// Commit the elog
ATTN_ERR("Committing INTR task crash elog");
errlCommit(l_err, ATTN_COMP_ID);
// Crash now
INITSERVICE::doShutdown(l_fatalPlid, true);

} // end if crashed


// On Normal shutdown of thread, we will get here
// and exit normally
return NULL;

} // end intrTask


bool Service::intrTaskWait(msg_t * & o_msg)
{
Expand Down Expand Up @@ -280,7 +348,8 @@ errlHndl_t Service::processCheckstop()

#endif // CONFIG_ENABLE_CHECKSTOP_ANALYSIS

void* Service::prdTask(void * i_svc)

void* Service::prdTaskWorker(void * i_svc)
{
// prd task loop
Service & svc = *static_cast<Service *>(i_svc);
Expand All @@ -306,10 +375,69 @@ void* Service::prdTask(void * i_svc)
// new attentions for prd to handle

svc.processAttentions(procs);

}

return NULL;
}

} // end prdTaskWorker


void* Service::prdTask(void * i_svc)
{
// We need to create the actual thread that will do the work
// and then monitor it for completion.
tid_t l_tid = task_create(&prdTaskWorker, i_svc);
assert( l_tid > 0 );

int l_status = 0;
void * l_Rc = NULL;

tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc);

if (l_status == TASK_STATUS_CRASHED)
{
/*@ errorlog tag
* @errortype ERRL_SEV_CRITICAL_SYS_TERM
* @moduleid ATTN_PRD_TASK_MODULE
* @reasoncode ATTN_PRD_TASK_CRASHED
* @userdata1 tidRc
* @userdata2 Task Id that crashed
*
* @devdesc PRD task crashed
* @custdesc Task analyzing HW errors has failed.
*/
errlHndl_t l_err = new ERRORLOG::ErrlEntry
(
ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity
ATTN_PRD_TASK_MODULE, // moduleid
ATTN_PRD_TASK_CRASHED, // reason Code
(uint64_t)l_tidRc, // tid rc
(uint64_t)l_tid // task that crashed
);

l_err->collectTrace("PRDF" , 512 );
l_err->collectTrace("MDIA_FAST" , 512 );
l_err->collectTrace("ATTN_FAST" , 512 );

// Save PLID for TI purposes
uint32_t l_fatalPlid = l_err->plid();

// Commit the elog
ATTN_ERR("Committing PRD task crash elog");
errlCommit(l_err, ATTN_COMP_ID);
// Crash now
INITSERVICE::doShutdown(l_fatalPlid, true);

} // end if crashed


// On Normal shutdown of thread, we will get here
// and exit normally
return NULL;

} // end prdTask


bool Service::prdTaskWait()
{
Expand Down
26 changes: 21 additions & 5 deletions src/usr/diag/attn/ipl/attnsvc.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2014,2015 */
/* Contributors Listed Below - COPYRIGHT 2014,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -130,22 +130,38 @@ class Service : public ServiceCommon
ConfigureMode i_mode);

/**
* @brief intrTask infinite wait-for-interrupt loop
* @brief intrTask will start the intrTaskWorker
* and monitor if it crashes.
*
* @param[in] i_svc service object associated with task
*/
static void* intrTask(void * i_svc);

/**
* @brief prdTask will start the prdTaskWorker
* and monitor if it crashes.
*
* @param[in] i_svc service object associated with task
*/
static void* prdTask(void * i_svc);

/**
* @brief intrTaskWorker infinite wait-for-interrupt loop
*
* repeatedly call intrTaskWait and processIntrQMsg
*
* @param[in] i_svc service object associated with task
*/
static void* intrTask(void * i_svc);
static void* intrTaskWorker(void * i_svc);

/**
* @brief prdTask infinite wait-for-attention loop
* @brief prdTaskWorker infinite wait-for-attention loop
*
* repeatedly call prdTaskWait and processAttentions
*
* @param[in] i_svc service object associated with task
*/
static void* prdTask(void * i_svc);
static void* prdTaskWorker(void * i_svc);

/**
* @brief startIntrTask start task helper function
Expand Down
63 changes: 59 additions & 4 deletions src/usr/diag/mdia/mdiamonitor.C
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2012,2016 */
/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand All @@ -29,6 +29,8 @@
#include "mdiamonitor.H"
#include "mdiasm.H"
#include "mdiatrace.H"
#include <errl/errlmanager.H>
#include <initservice/initserviceif.H>

using namespace TARGETING;

Expand Down Expand Up @@ -149,8 +151,7 @@ void CommandMonitor::threadMain(StateMachine & i_sm)
}

// istep finished...shutdown

if(shutdown)
if (shutdown)
{
MDIA_FAST("cm: CommandMonitor will be shutdown");
break;
Expand Down Expand Up @@ -216,7 +217,8 @@ void CommandMonitor::shutdown()
task_wait_tid(tid, 0, 0);
}

void* CommandMonitor::staticMain(void * i_args)

void* CommandMonitor::staticMainWorker(void * i_args)
{
using namespace CommandMonitorImpl;

Expand All @@ -232,6 +234,59 @@ void* CommandMonitor::staticMain(void * i_args)
return NULL;
}


void* CommandMonitor::staticMain(void * i_args)
{
// We need to create the actual thread that will do the work
// and then monitor it for completion.
tid_t l_tid = task_create(&staticMainWorker, i_args);
assert( l_tid > 0 );

int l_status = 0;
void * l_Rc = NULL;

tid_t l_tidRc = task_wait_tid( l_tid, &l_status, &l_Rc);

if (l_status == TASK_STATUS_CRASHED)
{
/*@ errorlog tag
* @errortype ERRL_SEV_CRITICAL_SYS_TERM
* @moduleid MONITOR_MAIN_THREAD
* @reasoncode MONITOR_THREAD_CRASHED
* @userdata1 tidRc
* @userdata2 Task Id that crashed
*
* @devdesc MDIA monitor task crashed
* @custdesc Task handling mainstore init crashed
*/
errlHndl_t l_err = new ERRORLOG::ErrlEntry
(
ERRORLOG::ERRL_SEV_CRITICAL_SYS_TERM, // severity
MONITOR_MAIN_THREAD, // moduleid
MONITOR_THREAD_CRASHED, // reason Code
(uint64_t)l_tidRc, // tid rc
(uint64_t)l_tid // task that crashed
);

l_err->collectTrace("MDIA_FAST" , 512 );
l_err->collectTrace("PRDF" , 512 );

// Save PLID for TI purposes
uint32_t l_fatalPlid = l_err->plid();

// Commit the elog
errlCommit(l_err, MDIA_COMP_ID);
MDIA_FAST("Committing task crash elog");
// Crash now
INITSERVICE::doShutdown(l_fatalPlid, true);
} // end if crashed


// On Normal shutdown of thread, we will get here
// and exit normally
return NULL;
}

CommandMonitor::CommandMonitor() :
iv_shutdown(false),
iv_tid(0),
Expand Down

0 comments on commit 8cf2925

Please sign in to comment.