Skip to content

Commit

Permalink
Print out MBOX/INTR state info on DMA request hang
Browse files Browse the repository at this point in the history
We have been stuck on a hang that occurs during memdiags on
our multi-node p9 systems. It appears that Hostboot is never
receiving the response to the request to reclaim DMA buffers
from the FSP. From debugging we know the FSP thinks it has sent
the message over the FSI mbox but hostboot isnt seeing it. Next
time this happens if this is in the code we should be able to
get a better idea of what is happening.

Change-Id: I6b702e4094da3576ba454b5cdf0660841961baff
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/60977
Reviewed-by: Richard Ward <rward15@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Roland Veloz <rveloz@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
  • Loading branch information
crgeddes authored and wghoffa committed Jul 5, 2018
1 parent 1759af7 commit 50e7279
Show file tree
Hide file tree
Showing 8 changed files with 420 additions and 11 deletions.
28 changes: 28 additions & 0 deletions src/include/usr/intr/interrupt.H
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ namespace INTR
MSG_INTR_DRAIN_QUEUE, //!< Allow intrp to drain Q of EOI
MSG_INTR_COALESCE, //!< Pending interrupt to be handled
MSG_INTR_ENABLE_PSI_INTR, //!< Enable PSIHB Interrupts
MSG_INTR_DUMP, //!< Print out interrupt info to slow buffer
};


Expand Down Expand Up @@ -144,6 +145,14 @@ namespace INTR
uint64_t phbsecure; //PSI Host Bridge Secure Control reg - 0x90
};

enum esbStates
{
ESB_STATE_RESET = 0,
ESB_STATE_OFF = 1,
ESB_STATE_PENDING = 2,
ESB_STATE_QUEUED = 3,
};


/**
* Register a message queue for an interrupt type
Expand Down Expand Up @@ -191,6 +200,18 @@ namespace INTR
*/
void sendEOI(msg_q_t i_q, msg_t* i_msg);

/**
* Sends a message to the intrrp Q to tell it to print out current state
* of interrupts from hostboot perspective to the slow trace buffer
*
* This will call the private intrrp functions:
* printEsbStates
* printPSIHBInfo
* printLSIInfo
* @return errlHndl_t on error.
*/
errlHndl_t printInterruptInfo();

/**
* Un register a message queue from the interrupt handler
* @param[in] i_type the type of interrupt (ISN value)
Expand Down Expand Up @@ -241,6 +262,13 @@ namespace INTR
* drain */
void drainQueue(void);

/**
* Convert the esb state to a human readable string
* @param[in] i_esbState ESB state as hex
* @return human readable string, "INVALID" if cannot translate
*/
void esbStateToString(uint64_t i_esbState, const char** o_esbStateString);

};

#endif
1 change: 1 addition & 0 deletions src/include/usr/intr/intr_reasoncodes.H
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ namespace INTR
MOD_INTRRP_RESETINTUNIT = 0x11, /**< intrrp.C : IntrRp::resetIntUnit */
MOD_INTRRP_XIVE_SENDEOI = 0x12,
MOD_INTRRP_IPC = 0x13,
MOD_INTR_DUMP = 0x14, /**< intrrp.C : INTR::printInterruptInfo */
};

enum IntrReasonCode
Expand Down
160 changes: 160 additions & 0 deletions src/usr/intr/intrrp.C
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "intrrp.H"
#include <trace/interface.H>
#include <errno.h>
#include <string.h>
#include <initservice/taskargs.H>
#include <initservice/initserviceif.H>
#include <util/singleton.H>
Expand Down Expand Up @@ -1331,6 +1332,16 @@ void IntrRp::msgHandler()
msg_respond(iv_msgQ,msg);
}
break;
case MSG_INTR_DUMP:
{
// Run the functions that dump out
// interrupt info to slow buffer
printEsbStates();
printLSIInfo();
printPSIHBInfo();
msg_free(msg); // async message
}
break;

default:
msg->data[1] = -EINVAL;
Expand Down Expand Up @@ -3392,3 +3403,152 @@ errlHndl_t INTR::IntrRp::enableSlaveProcInterrupts(TARGETING::Target * i_target)
return l_err;
}

void INTR::esbStateToString(uint64_t i_esbState, const char** o_esbStateString)
{
switch(i_esbState)
{
case ESB_STATE_RESET:
*o_esbStateString = "RESET";
break;
case ESB_STATE_OFF:
*o_esbStateString = "OFF";
break;
case ESB_STATE_PENDING:
*o_esbStateString = "PENDING";
break;
case ESB_STATE_QUEUED:
*o_esbStateString = "QUEUED";
break;
default:
*o_esbStateString = "INVALID";
break;
}
}

errlHndl_t INTR::printInterruptInfo()
{
errlHndl_t err = NULL;
msg_q_t intr_msgQ = msg_q_resolve(VFS_ROOT_MSG_INTR);
if(intr_msgQ)
{
msg_t * msg = msg_allocate();
msg->type = MSG_INTR_DUMP;
int send_rc = msg_send(intr_msgQ, msg);
if (send_rc != 0)
{
TRACFCOMP(g_trac_intr, ERR_MRK"IntrRp::printInterruptInfo error "
"sending print intr info message");
/*@ errorlog tag
* @errortype ERRL_SEV_UNRECOVERABLE
* @moduleid INTR::MOD_INTR_DUMP
* @reasoncode INTR::RC_MESSAGE_SEND_ERROR
* @userdata1 RC from msg_send command
* @devdesc Error encountered sending print intr info
* message to INTRP
* @custdesc Error encountered gathering diagnostic info
*/
err = new ERRORLOG::ErrlEntry
(
ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity
INTR::MOD_INTR_DUMP, // moduleid
INTR::RC_MESSAGE_SEND_ERROR, // reason code
send_rc,
0
);
}
}
else
{
/*@ errorlog tag
* @errortype ERRL_SEV_INFORMATIONAL
* @moduleid INTR::MOD_INTR_DUMP
* @reasoncode INTR::RC_RP_NOT_INITIALIZED
* @userdata1 MSG_INTR_DUMP
* @userdata2 0
* @devdesc Interrupt resource provider not initialized yet.
* @custdesc Error encountered gathering diagnostic info
*/
err = new ERRORLOG::ErrlEntry
(
ERRORLOG::ERRL_SEV_INFORMATIONAL, // severity
INTR::MOD_INTR_DUMP, // moduleid
INTR::RC_RP_NOT_INITIALIZED, // reason code
static_cast<uint64_t>(MSG_INTR_DUMP),
0
);
}
return err;
}

void INTR::IntrRp::printLSIInfo() const
{
TRACFCOMP(g_trac_intr, "---LSI Sources---");

//Read LSI Interrupt Status register from each enabled
// proc chip to see which caused the interrupt
for(auto targ_itr = iv_chipList.begin();
targ_itr != iv_chipList.end(); ++targ_itr)
{
uint64_t l_mmioRead = (*targ_itr)->psiHbBaseAddr->lsiintstatus;
uint32_t l_huid = get_huid((*targ_itr)->proc);
TRACFCOMP(g_trac_intr, "Processor 0x%lx", l_huid);
TRACFCOMP(g_trac_intr, " lsiIntStatus : vAddr=0x%016lx Value=0x%016lx", &(*targ_itr)->psiHbBaseAddr->lsiintstatus , l_mmioRead);
l_mmioRead = (*targ_itr)->psiHbBaseAddr->lsiintlevel;
TRACFCOMP(g_trac_intr, " lsiIntLevel : vAddr=0x%016lx Value=0x%016lx", &(*targ_itr)->psiHbBaseAddr->lsiintlevel, l_mmioRead);
}
}

void INTR::IntrRp::printPSIHBInfo() const
{
TRACFCOMP(g_trac_intr, "---PSIHB Info---");
//Read LSI Interrupt Status register from each enabled
// proc chip to see which caused the interrupt
for(auto targ_itr = iv_chipList.begin();
targ_itr != iv_chipList.end(); ++targ_itr)
{
uint32_t l_huid = get_huid((*targ_itr)->proc);
uint64_t l_mmioRead = (*targ_itr)->psiHbBaseAddr->psihbcr;

TRACFCOMP(g_trac_intr, "Processor 0x%lx", l_huid);

TRACFCOMP(g_trac_intr, " PSIHB Ctrl/Status Reg : vAddr=0x%016lx Value=0x%016lx",
&(*targ_itr)->psiHbBaseAddr->psihbcr, l_mmioRead);

l_mmioRead = (*targ_itr)->psiHbBaseAddr->psisemr;
TRACFCOMP(g_trac_intr, " PSIHB Error/Status Reg : vAddr=0x%016lx Value=0x%016lx",
&(*targ_itr)->psiHbBaseAddr->psisemr, l_mmioRead);

l_mmioRead = (*targ_itr)->psiHbBaseAddr->phbdsr;
TRACFCOMP(g_trac_intr, " PSIHB Dbg Setting Reg : vAddr=0x%016lx Value=0x%016lx",
&(*targ_itr)->psiHbBaseAddr->phbdsr, l_mmioRead);

l_mmioRead = (*targ_itr)->psiHbBaseAddr->icr;
TRACFCOMP(g_trac_intr, " PSIHB Interrupt Control Reg : vAddr=0x%016lx Value=0x%016lx",
&(*targ_itr)->psiHbBaseAddr->icr, l_mmioRead);
}
}

void INTR::IntrRp::printEsbStates() const
{
TRACFCOMP(g_trac_intr, "---ESB States---");
for(auto targ_itr = iv_chipList.begin();
targ_itr != iv_chipList.end(); ++targ_itr)
{
TRACFCOMP(g_trac_intr, "Processor 0x%lx", get_huid((*targ_itr)->proc));
for (uint8_t i = 0; i < LSI_LAST_SOURCE; i++)
{
// Ready from the ESB_QUERY_OFFSET to ensure the read doesn't
// affect the state
uint64_t * l_psiHbEsbptr = (*targ_itr)->psiHbEsbBaseAddr +
(((i*PAGE_SIZE)+ESB_QUERY_OFFSET) /sizeof(uint64_t));

volatile uint64_t l_esbState = *l_psiHbEsbptr;
const char* l_esbStateString = nullptr;

// Use toString method to look up human readable string
esbStateToString(l_esbState, &l_esbStateString);

TRACFCOMP(g_trac_intr, " SRC: %02d State: %s", i , l_esbStateString );
}
}
}
28 changes: 24 additions & 4 deletions src/usr/intr/intrrp.H
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,6 @@ namespace INTR
ESB_QUERY_OFFSET = 0x800,
ESB_OFF_OFFSET = 0xD00,
ESB_RESET_OFFSET = 0XC00,
ESB_STATE_RESET = 0,
ESB_STATE_OFF = 1,
ESB_STATE_PENDING = 2,
ESB_STATE_QUEUED = 3,
};

enum INTR_ROUTING_t
Expand Down Expand Up @@ -889,6 +885,30 @@ namespace INTR
*/
errlHndl_t resetIntpForMpipl(void);

/**
* Print out the ESB state for every source on all processors
* that the Interrp is aware of
*
* @return void
*/
void printEsbStates() const;

/**
* Print out the PSIHB info for all processors
* that the Interrp is aware of
*
* @return void
*/
void printPSIHBInfo() const;

/**
* Print out the LSI info for all processors
* that the Interrp is aware of
*
* @return void
*/
void printLSIInfo() const;

};
}; // INTR namespace

Expand Down

0 comments on commit 50e7279

Please sign in to comment.