Skip to content

Commit

Permalink
Save HRMOR in mbox scratch reg for IPC messaging
Browse files Browse the repository at this point in the history
In order to know where the IPC message of a given node is,
we save off the HRMOR of every node in a hw register.
Originally, we were saving this information in the core
scratch register. Since, the core scratch registers are
wiped off when the cores go into the winkle state, therefore,
we were writing to the register after we come out of winkle.
But, at that point, we ran into race conditions because
other nodes could be ahead and try to access the register
on a node that is not exactly out of winkle yet.

This fixes the problems by using the mbox scratch register
rather than the core scratch register because they are saved
off even when the cores go into winkle state. Because the
registers are preseved, we can set the value prior to
cores coming out of winkle, so, we don't run into the race
condition where one node is trying to read a value prior to
the other one writing the value.

Change-Id: I822bfc8defe09cbb418edc5f36a99b7cd41eec88
CQ:SW435271
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/61093
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
Prachi Gupta authored and dcrowell77 committed Jun 26, 2018
1 parent d406ad3 commit cfc5fb7
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 71 deletions.
6 changes: 3 additions & 3 deletions src/include/usr/xscom/xscomif.H
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ uint64_t generate_mmio_addr( TARGETING::Target* i_proc,
uint64_t i_scomAddr );

/**
* @brief Multicast Read of core XSCOM register on remote Node
* @brief Read of XSCOM register on remote Node
* @param[in] i_node - logical Node Number
* @param[in] i_scomAddr - Physical scom address to read
* @return uint64_t - Register value
*/
uint64_t readRemoteCoreScomMultiCast( uint64_t i_node,
uint64_t i_scomAddr );
uint64_t readRemoteScom( uint64_t i_node,
uint64_t i_scomAddr );

}; // namespace XSCOM

Expand Down
1 change: 1 addition & 0 deletions src/kernel/ipc.C
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ int KernelIpc::updateRemoteIpcAddr(uint64_t i_Node, uint64_t i_RemoteAddr)
{
// update local array entry
rc = 0;
printk("IPC ADDR %d = 0x%lx\n", (int)i_Node, i_RemoteAddr);
ipc_data_area.remote_ipc_data_addr[i_Node] =
reinterpret_cast<ipc_data_area_t*>(i_RemoteAddr);
}
Expand Down
16 changes: 5 additions & 11 deletions src/usr/initservice/istepdispatcher/istepdispatcher.C
Original file line number Diff line number Diff line change
Expand Up @@ -2139,6 +2139,11 @@ void IStepDispatcher::handleProcFabIovalidMsg(msg_t * & io_pMsg)
msg_respond(iv_msgQ, io_pMsg);
io_pMsg = NULL;

//Setup for IPC messages to continue after we come out of winkle
//This stores the location of IPC messages of current drawer in the mbox scratch
//register.
IPC::IpcSp::distributeLocalNodeAddr();

// call to suspend the MBOX so that all messages are flushed
err = MBOX::suspend();
if (err)
Expand Down Expand Up @@ -2182,17 +2187,6 @@ void IStepDispatcher::handleProcFabIovalidMsg(msg_t * & io_pMsg)
"Returned from cpu_all_winkle." );
}

//Temporary hack to sleep for 15 seconds to avoid race condition
//where other nodes are trying to read the core scratch registers
//while this node might not be out of winkle. To make it worse,
//if we fail in this case, we don't TI gracefully.
nanosleep(15,0);

// identify IPC msg address to remote node(s)
// this must be after the winkle call because the data is stored
// in a core scom which will get lost
IPC::IpcSp::distributeLocalNodeAddr();

err = MBOX::resume();
if (err)
{
Expand Down
57 changes: 22 additions & 35 deletions src/usr/mbox/ipcSp.C
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@
#include <mbox/mbox_reasoncodes.H>
#include <intr/interrupt.H>
#include <initservice/initserviceif.H>
#include <initservice/mboxRegs.H>
#include <sbeio/sbeioif.H>
#include <util/utiltce.H>
#include <util/utilmbox_scratch.H>
#include <targeting/targplatutil.H>
#include <targeting/common/targetservice.H>
#include <targeting/common/attributes.H>
#include <p9_quad_scom_addresses.H>
#include <sys/internode.h>
#include <sys/mmio.h>
#include <xscom/xscomif.H>
Expand All @@ -52,7 +53,7 @@ namespace ISTEP_21
};

trace_desc_t* g_trac_ipc = NULL;
TRAC_INIT(&g_trac_ipc, IPC_TRACE_NAME, KILOBYTE);
TRAC_INIT(&g_trac_ipc, IPC_TRACE_NAME, 4*KILOBYTE);

using namespace IPC;
using namespace ERRORLOG;
Expand All @@ -77,41 +78,16 @@ void IpcSp::init(errlHndl_t & o_errl)

void IpcSp::distributeLocalNodeAddr( void )
{
// Store IPC address for local node in core scratch registers
// Store IPC address for local node in mbox scratch register 7
// to identify IPC msg address to remote node(s)
uint64_t l_localNode;
uint64_t l_remoteAddr;
qryLocalIpcInfo( l_localNode, l_remoteAddr );

TARGETING::Target * l_pSys = NULL;
TARGETING::targetService().getTopLevelTarget( l_pSys );
TARGETING::TargetHandleList l_coreTargetList;
TARGETING::getChildChiplets( l_coreTargetList,
l_pSys,
TARGETING::TYPE_CORE,
true );

// Store IPC address into scom reg for each core
// Every core on this node needs to have the value stored
// in it's scratch register in case any cores get deconfigured
for(const auto & l_core_target : l_coreTargetList)
{
uint64_t l_remoteAddrSize = sizeof(l_remoteAddr);
errlHndl_t l_err = deviceWrite( l_core_target,
&l_remoteAddr,
l_remoteAddrSize,
DEVICE_SCOM_ADDRESS(C_SCR2) );

if (l_err)
{
TRACFCOMP( g_trac_ipc,
"ERROR: distributeLocalNodeAddr == failed to scom Addr=0x%x"
" Target=0x%x", C_SCR2, get_huid(l_core_target));
errlCommit(l_err, IPC_COMP_ID);
}
}

return;
Util::writeScratchReg (INITSERVICE::SPLESS::MBOX_SCRATCH_REG7,
l_remoteAddr>>32);
Util::writeScratchReg (INITSERVICE::SPLESS::MBOX_SCRATCH_REG8,
l_remoteAddr);
}

void IpcSp::acquireRemoteNodeAddrs( void )
Expand Down Expand Up @@ -681,9 +657,20 @@ void IpcSp::_acquireRemoteNodeAddrs( void )
( (validNodeBitMap & (0x80 >> i)) != 0 )
{
// read scoms for remote node
l_RemoteAddr =
XSCOM::readRemoteCoreScomMultiCast(i,
C_SCR2);
uint64_t l_remoteAddrHighBits =
XSCOM::readRemoteScom(i,
INITSERVICE::SPLESS::MBOX_SCRATCH_REG7);

uint64_t l_remoteAddrLowBits =
XSCOM::readRemoteScom(i,
INITSERVICE::SPLESS::MBOX_SCRATCH_REG8);

l_RemoteAddr = (l_remoteAddrHighBits ) |
(l_remoteAddrLowBits >> 32);

TRACFCOMP( g_trac_ipc,"readRemoteScom"
" node=%d, remoteAddr=0x%x",
i, l_RemoteAddr);
} // end valid node
else
{
Expand Down
34 changes: 12 additions & 22 deletions src/usr/xscom/xscom.C
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,7 @@ errlHndl_t xScomDoOp(DeviceFW::OperationType i_opType,
uint64_t l_data = 0;

// retry counter.
uint32_t l_retryCtr = 0;
uint32_t l_retryTraceCtr = 128;
uint32_t l_retryCtr = 1;

errlHndl_t l_err = NULL;

Expand All @@ -378,10 +377,9 @@ errlHndl_t xScomDoOp(DeviceFW::OperationType i_opType,
// Check for error or done
io_hmer = waitForHMERStatus();

l_retryCtr++;

// If the retry counter is a multiple of 128,256,512,etc.
if (l_retryCtr % l_retryTraceCtr*2 == 0)
if (l_retryCtr % 100000 == 0)
{
// print a trace message.. for debug purposes
// incase we are stuck in a retry loop.
Expand All @@ -395,6 +393,7 @@ errlHndl_t xScomDoOp(DeviceFW::OperationType i_opType,
break;
}
}
l_retryCtr++;
} while (io_hmer.mXSComStatus == PIB::PIB_RESOURCE_OCCUPIED);


Expand Down Expand Up @@ -822,10 +821,10 @@ uint64_t generate_mmio_addr( TARGETING::Target* i_proc,


/**
* @brief Multicast Read of core XSCOM register on remote Node
* @brief Read of XSCOM register on remote Node
*/
uint64_t readRemoteCoreScomMultiCast( uint64_t i_node,
uint64_t i_scomAddr )
uint64_t readRemoteScom( uint64_t i_node,
uint64_t i_scomAddr )
{
// definitions of 64 bit xscom address contents that are
// useful for this function
Expand Down Expand Up @@ -853,10 +852,6 @@ uint64_t readRemoteCoreScomMultiCast( uint64_t i_node,
// - rsvd 38
// multicast group 0x0000_0000__0700_0000
// relative scomAddr field 0x0000_0000__00FF_FFFF
constexpr uint64_t XSCOM_MULTICAST = 0x0000000040000000;
constexpr uint64_t XSCOM_MULTICAST_OP_READ_OR = 0x0000000000000000;
constexpr uint64_t XSCOM_MULTICAST_GROUP_CORE = 0x0000000001000000;
constexpr uint64_t XSCOM_MULTICAST_REL_ADDR_MASK = 0x0000000000FFFFFF;

// Symmetry between nodes is enforced so we know the remote
// node contains this chip
Expand All @@ -866,17 +861,15 @@ uint64_t readRemoteCoreScomMultiCast( uint64_t i_node,
uint8_t l_chipId =
l_MasterProcTarget->getAttr<TARGETING::ATTR_FABRIC_CHIP_ID>();

// compute xscom address & control, then map into processor space
// compute xscom address & control
// This will return xscom base of the remote node
uint64_t l_xscomBaseAddr =
computeMemoryMapOffset( MMIO_GROUP0_CHIP0_XSCOM_BASE_ADDR,
i_node,
l_chipId );

uint64_t l_xscomAddr = ( (i_scomAddr & XSCOM_MULTICAST_REL_ADDR_MASK) |
XSCOM_MULTICAST |
XSCOM_MULTICAST_OP_READ_OR |
XSCOM_MULTICAST_GROUP_CORE );

//Map xscom base into processor space
uint64_t * l_virtAddr =
static_cast<uint64_t*>
(mmio_dev_map(reinterpret_cast<void*>(l_xscomBaseAddr),
Expand All @@ -892,7 +885,7 @@ uint64_t readRemoteCoreScomMultiCast( uint64_t i_node,
{
errlHndl_t l_err = xScomDoOp( DeviceFW::READ,
l_virtAddr,
l_xscomAddr,
i_scomAddr,
&l_rv,
l_rvSize,
l_hmer );
Expand All @@ -904,17 +897,14 @@ uint64_t readRemoteCoreScomMultiCast( uint64_t i_node,
l_err = nullptr;

TRACFCOMP( g_trac_xscom,
ERR_MRK "readRemoteCoreScomMultiCast() Read xScom Failed: "
ERR_MRK "readRemoteScom() Read xScom Failed: "
"XscomAddr = %.16llx, VAddr=%llx",
l_xscomAddr, l_virtAddr );
i_scomAddr, l_virtAddr );

// re-seed return value in case changed before error detected
l_rv = IPC_INVALID_REMOTE_ADDR | i_node;
break;
}
else
{
}

// regs not yet populated
if (l_rv == 0 )
Expand Down

0 comments on commit cfc5fb7

Please sign in to comment.