Skip to content

Commit

Permalink
Tweaks to multicast scom workaround
Browse files Browse the repository at this point in the history
Added a mode to avoid a pile of FFDC collection for the case
where we expect errors could happen.

Also added a couple new flags to control which chiplets get
accessed as part of the workaround, one to avoid access to
slave cores before they are running and the other to avoid
access to the memory chiplets before they have clocks enabled.

Without these changes, the code is still functionally correct
but the traces get filled with pointless non-errors which makes
real problems harder to find and also increases boot time.

Change-Id: I834781261d16748cbbfd8511d9649ec19de53d81
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/50503
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Corey V. Swenson <cswenson@us.ibm.com>
Reviewed-by: Prachi Gupta <pragupta@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
dcrowell77 committed Jan 9, 2018
1 parent 9ea9546 commit 7f8aa4b
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 13 deletions.
5 changes: 3 additions & 2 deletions src/build/tools/listdeps.pl
Expand Up @@ -6,7 +6,7 @@
#
# OpenPOWER HostBoot Project
#
# Contributors Listed Below - COPYRIGHT 2013,2016
# Contributors Listed Below - COPYRIGHT 2013,2017
# [+] Google Inc.
# [+] International Business Machines Corp.
#
Expand Down Expand Up @@ -221,7 +221,8 @@
"libsbeio.so" => '1',
"libvpd.so" => '1',
"libsecureboot_trusted.so" => '1',
"libsecureboot_base.so" => '1',
"libsecureboot_base.so" => '1',
"libscom.so" => '1',
};

# A list of the dependent libraries in each istep.
Expand Down
9 changes: 8 additions & 1 deletion src/include/usr/devicefw/driverif.H
Expand Up @@ -93,7 +93,14 @@ namespace DeviceFW
* @param[in] i_address - XSCom address to operate on.
*/
#define DEVICE_XSCOM_ADDRESS(i_address) \
DeviceFW::XSCOM, static_cast<uint64_t>((i_address))
DeviceFW::XSCOM, static_cast<uint64_t>((i_address)), 0ull

/** Construct the device addressing parameters for XSCOM device ops
* when no error handling is required.
* @param[in] i_address - XSCom address to operate on.
*/
#define DEVICE_XSCOM_ADDRESS_NO_ERROR(i_address) \
DeviceFW::XSCOM, static_cast<uint64_t>((i_address)), 1ull

/** Construct the device addressing parameters for IBSCOM (inband scom)
* device ops.
Expand Down
11 changes: 11 additions & 0 deletions src/include/usr/scom/scomif.H
Expand Up @@ -43,6 +43,17 @@ errlHndl_t scomTranslate(TARGETING::Target * &i_target,
bool & o_needsWakeup,
uint64_t i_opMode = 0);

#ifndef __HOSTBOOT_RUNTIME
/**
* @brief Enable scoms to all cores for multicast workaround
*/
void enableSlaveCoreMulticast( void );

/**
* @brief Enable scoms to the memory chiplets for multicast workaround
*/
void enableMemChipletMulticast( void );
#endif

}; // end namespace SCOM

Expand Down
7 changes: 6 additions & 1 deletion src/usr/isteps/istep13/call_mem_startclocks.C
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2015,2016 */
/* Contributors Listed Below - COPYRIGHT 2015,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -42,6 +42,7 @@
//From Import Directory (EKB Repository)
#include <p9_mem_startclocks.H>

#include <scom/scomif.H>

using namespace ERRORLOG;
using namespace ISTEP;
Expand Down Expand Up @@ -100,6 +101,10 @@ void* call_mem_startclocks (void *io_pArgs)
}
}

// Now that the memory chiplets are turned on, we need to include them in
// multicast scom operations
SCOM::enableMemChipletMulticast();

TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"call_mem_startclocks exit" );

Expand Down
6 changes: 6 additions & 0 deletions src/usr/isteps/istep16/call_host_activate_slave_cores.C
Expand Up @@ -49,6 +49,8 @@
#include <isteps/pm/occCheckstop.H>
#endif

#include <scom/scomif.H>

using namespace ERRORLOG;
using namespace TARGETING;
using namespace ISTEP;
Expand Down Expand Up @@ -256,6 +258,10 @@ void* call_host_activate_slave_cores (void *io_pArgs)
}
#endif

// Now that the slave cores are running, we need to include them in
// multicast scom operations
SCOM::enableSlaveCoreMulticast();

TRACDCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"call_host_activate_slave_cores exit" );

Expand Down
79 changes: 77 additions & 2 deletions src/usr/scom/scom.C
Expand Up @@ -48,7 +48,7 @@
#include <hw_access_def.H>
#include <p9_scom_addr.H>
#include <targeting/common/utilFilter.H>

#include <targeting/namedtarget.H>


// Trace definition
Expand All @@ -58,6 +58,31 @@ TRAC_INIT(&g_trac_scom, SCOM_COMP_NAME, KILOBYTE, TRACE::BUFFER_SLOW); //1K

namespace SCOM
{
#ifndef __HOSTBOOT_RUNTIME
/**
* Keep track of system state to handle the multicast workaround
* more cleanly
*/
bool g_useSlaveCores = false;
bool g_useMemChiplets = false;

/**
* @brief Enable scoms to all cores for multicast workaround
*/
void enableSlaveCoreMulticast( void )
{
g_useSlaveCores = true;
};

/**
* @brief Enable scoms to the memory chiplets for multicast workaround
*/
void enableMemChipletMulticast( void )
{
g_useMemChiplets = true;
};
#endif

/**
* @brief Add any additional FFDC for this specific type of scom
*
Expand Down Expand Up @@ -1187,6 +1212,16 @@ errlHndl_t doMulticastWorkaround( DeviceFW::OperationType i_opType,
constexpr uint64_t MULTICAST_OP = 0x38000000;
constexpr uint64_t MULTICAST_OP_BITWISE = 0x10000000;

#ifndef __HOSTBOOT_RUNTIME
// Some P9-specific chiplet values to make things more efficient
constexpr uint64_t P9_FIRST_MC = 0x07;
constexpr uint64_t P9_LAST_MC = 0x08;
constexpr uint64_t P9_FIRST_EQ = 0x10;
constexpr uint64_t P9_LAST_EQ = 0x1F;
constexpr uint64_t P9_FIRST_EC = 0x20;
constexpr uint64_t P9_LAST_EC = 0x2F;
#endif

// Skip calls to the SENTINEL since we don't have the
// ability to find its children
if( TARGETING::MASTER_PROCESSOR_CHIP_TARGET_SENTINEL
Expand Down Expand Up @@ -1224,13 +1259,53 @@ errlHndl_t doMulticastWorkaround( DeviceFW::OperationType i_opType,
uint64_t l_data = 0;
uint64_t l_addr = (i_addr & ~CHIPLET_BYTE);
uint64_t l_unit = l_chiplet->getAttr<TARGETING::ATTR_CHIP_UNIT>();

#ifndef __HOSTBOOT_RUNTIME
// filter out some chiplets that aren't running yet
if( !g_useSlaveCores
&& (((l_unit >= P9_FIRST_EQ) && (l_unit <= P9_LAST_EQ))
|| ((l_unit >= P9_FIRST_EC) && (l_unit <= P9_LAST_EC))
)
)
{
// Only access the master ec/eq
static const TARGETING::Target* l_masterCore =
TARGETING::getMasterCore();
uint64_t l_ecNum =
l_masterCore->getAttr<TARGETING::ATTR_CHIP_UNIT>();
bool l_fused = TARGETING::is_fused_mode();
if( !((l_unit == l_ecNum) //master
|| (l_fused && (l_unit == l_ecNum+1))) ) //fused-pair
{
continue;
}
auto l_eqNum = 0x10 + l_ecNum/4;
if( l_unit == l_eqNum )
{
continue;
}
}
if( !g_useMemChiplets
&& ((l_unit >= P9_FIRST_MC) && (l_unit <= P9_LAST_MC)) )
{
// Only access the mem chiplets if we're not in async mode
// because we don't start clocks until later on in that case
auto l_syncMode =
i_target->getAttr<TARGETING::ATTR_MC_SYNC_MODE>();
if( l_syncMode )
{
continue;
}
}
#endif

l_addr |= (l_unit << 24);
io_buflen = sizeof(uint64_t);
l_err = deviceOp(i_opType,
i_target,
&l_data,
io_buflen,
DEVICE_XSCOM_ADDRESS(l_addr));
DEVICE_XSCOM_ADDRESS_NO_ERROR(l_addr));
// just ignore any errors, we expect they will happen
if( l_err )
{
Expand Down
29 changes: 22 additions & 7 deletions src/usr/xscom/xscom.C
Expand Up @@ -458,7 +458,7 @@ errlHndl_t xScomDoOp(DeviceFW::OperationType i_opType,
{
// print a trace message.. for debug purposes
// incase we are stuck in a retry loop.
TRACFCOMP(g_trac_xscom,"xscomPerformOp - RESOURCE OCCUPIED LOOP Cntr = %d: OpType 0x%.16llX, Address 0x%llX, MMIO Address 0x%llX, HMER=%.16X", l_retryCtr, static_cast<uint64_t>(i_opType), i_xscomAddr, static_cast<uint64_t>(l_mmioAddr), io_hmer.mRegister );
TRACFCOMP(g_trac_xscom,"xscomDoOp - RESOURCE OCCUPIED LOOP Cntr = %d: OpType 0x%.16llX, Address 0x%llX, MMIO Address 0x%llX, HMER=%.16X", l_retryCtr, static_cast<uint64_t>(i_opType), i_xscomAddr, static_cast<uint64_t>(l_mmioAddr), io_hmer.mRegister );

// we don't want to hang forever so break out after
// an obscene amount of time
Expand All @@ -471,17 +471,17 @@ errlHndl_t xScomDoOp(DeviceFW::OperationType i_opType,
} while (io_hmer.mXSComStatus == PIB::PIB_RESOURCE_OCCUPIED);


TRACDCOMP(g_trac_xscom,"xscomPerformOp: OpType 0x%.16llX, Address 0x%llX, MMIO Address 0x%llX", static_cast<uint64_t>(i_opType),i_xscomAddr,static_cast<uint64_t>(l_mmioAddr));
TRACDCOMP(g_trac_xscom,"xscomDoOp: OpType 0x%.16llX, Address 0x%llX, MMIO Address 0x%llX", static_cast<uint64_t>(i_opType),i_xscomAddr,static_cast<uint64_t>(l_mmioAddr));

TRACDCOMP(g_trac_xscom, "xscomPerformOp: l_offset 0x%.16llX; VirtAddr %p; i_virtAddr+l_offset %p",l_offset,i_virtAddr,i_virtAddr + l_offset);
TRACDCOMP(g_trac_xscom, "xscomDoOp: l_offset 0x%.16llX; VirtAddr %p; i_virtAddr+l_offset %p",l_offset,i_virtAddr,i_virtAddr + l_offset);

if (i_opType == DeviceFW::READ)
{
TRACDCOMP(g_trac_xscom, "xscomPerformOp: Read data: %.16llx", l_data);
TRACDCOMP(g_trac_xscom, "xscomDoOp: Read data: %.16llx", l_data);
}
else
{
TRACDCOMP(g_trac_xscom, "xscomPerformOp: Write data: %.16llx", l_data);
TRACDCOMP(g_trac_xscom, "xscomDoOp: Write data: %.16llx", l_data);
}

do
Expand All @@ -490,6 +490,7 @@ errlHndl_t xScomDoOp(DeviceFW::OperationType i_opType,
if (io_hmer.mXSComStatus != PIB::PIB_NO_ERROR)
{
uint64_t l_hmerVal = io_hmer;
uint64_t l_fullAddr = mm_virt_to_phys(i_virtAddr + l_offset);

TRACFCOMP(g_trac_xscom,ERR_MRK "XSCOM status error HMER: %.16llx ,XSComStatus = %llx, Addr=%llx",l_hmerVal,io_hmer.mXSComStatus, i_xscomAddr );
/*@
Expand All @@ -504,7 +505,7 @@ errlHndl_t xScomDoOp(DeviceFW::OperationType i_opType,
XSCOM_DO_OP,
XSCOM_STATUS_ERR,
io_hmer,
l_mmioAddr);
l_fullAddr);
//Note: Callouts are added by the caller if needed
}
}
Expand Down Expand Up @@ -746,6 +747,7 @@ errlHndl_t xscomPerformOp(DeviceFW::OperationType i_opType,
HMER l_hmer;
mutex_t* l_XSComMutex = NULL;
uint64_t l_addr = va_arg(i_args,uint64_t);
uint64_t l_noErrors = va_arg(i_args,uint64_t);

do
{
Expand Down Expand Up @@ -793,7 +795,20 @@ errlHndl_t xscomPerformOp(DeviceFW::OperationType i_opType,
l_hmer);

// If we got a scom error.
if (l_err)
if (l_err && l_noErrors)
{
// ignoring errors because the caller doesn't care
// just return all zero data
delete l_err;
l_err = nullptr;
io_buflen = XSCOM_BUFFER_SIZE;
memset( io_buffer, 0, io_buflen );

// still need to reset the scomEngine.
resetScomEngine(i_target,
l_virtAddr);
}
else if (l_err)
{
// Call XscomCollectFFDC..
collectXscomFFDC(i_target,
Expand Down

0 comments on commit 7f8aa4b

Please sign in to comment.