Skip to content

Commit

Permalink
PRD: Query for active attentions when channel fail detected
Browse files Browse the repository at this point in the history
The HWP used for querying channel failure does not take into account
that a FIR bit may be masked, or configured as UNIT_CS (which is a
bug). Therefore, we must check for active attentions after calling
the HWP.

Change-Id: I46bd9413d8f17198b6c466be00dfbcfc487c2229
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/61221
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Matt Derksen <mderkse1@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/61532
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
  • Loading branch information
zane131 committed Jun 28, 2018
1 parent b983851 commit 6fd60cf
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 25 deletions.
115 changes: 90 additions & 25 deletions src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C
Original file line number Diff line number Diff line change
Expand Up @@ -521,10 +521,92 @@ uint32_t __queryChnlFail<TYPE_MEMBUF>( ExtensibleChip * i_chip,
template<>
uint32_t __queryChnlFail<TYPE_DMI>( ExtensibleChip * i_chip, bool & o_chnlFail )
{
// There is a HWP on the processor side that will query the CHIFIR, IOMCFIR,
// and associated configuration registers for a valid channel failure
// attention.
return PlatServices::queryChnlFail<TYPE_DMI>( i_chip, o_chnlFail );
#define PRDF_FUNC "[MemUtils::__queryChnlFail] "

PRDF_ASSERT( nullptr != i_chip );
PRDF_ASSERT( TYPE_DMI == i_chip->getType() );

uint32_t o_rc = SUCCESS;

o_chnlFail = false;

SCAN_COMM_REGISTER_CLASS * fir = nullptr;
SCAN_COMM_REGISTER_CLASS * mask = nullptr;
SCAN_COMM_REGISTER_CLASS * act0 = nullptr;
SCAN_COMM_REGISTER_CLASS * act1 = nullptr;

do
{
// There is a HWP on the processor side that will query if this channel
// has failed. Unfortunately, it does not check for an active channel
// fail attention (i.e. not masked). That will need to be done
// afterwards.
bool tmpChnlFail = false;
o_rc = PlatServices::queryChnlFail<TYPE_DMI>( i_chip, tmpChnlFail );
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "Failed to read GLOBAL_CS_FIR on 0x%08x",
i_chip->getHuid() );
break;
}
if ( !tmpChnlFail ) break; // nothing more to do.

// Check for an active attention on the CHIFIR.
fir = i_chip->getRegister( "CHIFIR" );
mask = i_chip->getRegister( "CHIFIR_MASK" );
act0 = i_chip->getRegister( "CHIFIR_ACT0" );
act1 = i_chip->getRegister( "CHIFIR_ACT1" );
o_rc = fir->Read() | mask->Read() | act0->Read() | act1->Read();
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "Failed to read CHIFIRs on 0x%08x",
i_chip->getHuid() );
break;
}

if ( 0 != ( fir->GetBitFieldJustified( 0,64) &
~mask->GetBitFieldJustified(0,64) &
act0->GetBitFieldJustified(0,64) &
act1->GetBitFieldJustified(0,64) ) )
{
o_chnlFail = true;
break; // nothing more to do.
}

// Check for an active attention on the IOMCFIR.
ExtensibleChip * mcChip = getConnectedParent( i_chip, TYPE_MC );
uint32_t dmiPos = i_chip->getPos() % MAX_DMI_PER_MC;
uint32_t bitPos = 8 + dmiPos * 8;

fir = mcChip->getRegister( "IOMCFIR" );
mask = mcChip->getRegister( "IOMCFIR_MASK" );
act0 = mcChip->getRegister( "IOMCFIR_ACT0" );
act1 = mcChip->getRegister( "IOMCFIR_ACT1" );
o_rc = fir->Read() | mask->Read() | act0->Read() | act1->Read();
if ( SUCCESS != o_rc )
{
PRDF_ERR( PRDF_FUNC "Failed to read IOMCFIRs on 0x%08x",
mcChip->getHuid() );
break;
}

if ( 0 != ( fir->GetBitFieldJustified( bitPos,8) &
~mask->GetBitFieldJustified(bitPos,8) &
act0->GetBitFieldJustified(bitPos,8) &
act1->GetBitFieldJustified(bitPos,8) ) )
{
o_chnlFail = true;
break; // nothing more to do.
}

PRDF_INF( PRDF_FUNC "Failed channel detected on 0x%08x, but no active "
"attentions found", i_chip->getHuid() );

} while (0);

return o_rc;

#undef PRDF_FUNC
}

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -666,27 +748,10 @@ void __cleanupChnlFail<TYPE_DMI,TYPE_MEMBUF>( ExtensibleChip * i_dmiChip,
ExtensibleChip * mcChip = getConnectedParent( i_dmiChip, TYPE_MC );
uint32_t dmiPos = i_dmiChip->getPos() % MAX_DMI_PER_MC;

// Mask off all attentions from the DMI target in the chiplet FIRs.
reg = mcChip->getRegister( "MC_CHIPLET_FIR_MASK" );
if ( SUCCESS == reg->Read() )
{
reg->SetBit( 4 + (dmiPos * 2) ); // 4, 6, 8, 10
reg->Write();
}

reg = mcChip->getRegister( "MC_CHIPLET_UCS_FIR_MASK" );
if ( SUCCESS == reg->Read() )
{
reg->SetBit( 0 + (dmiPos * 2) ); // 0, 2, 4, 6 (masks 1, 3, 5, 7)
reg->Write();
}

reg = mcChip->getRegister( "MC_CHIPLET_HA_FIR_MASK" );
if ( SUCCESS == reg->Read() )
{
reg->SetBit( 0 + (dmiPos * 2) ); // 0, 2, 4, 6 (masks 1, 3, 5, 7)
reg->Write();
}
// Mask off all attentions from the DMI target in the CHIFIR.
reg = i_dmiChip->getRegister( "CHIFIR_MASK_OR" );
reg->setAllBits();
reg->Write();

// Mask off all attentions from the DMI target in the IOMCFIR.
reg = mcChip->getRegister( "IOMCFIR_MASK_OR" );
Expand Down
8 changes: 8 additions & 0 deletions src/usr/diag/prdf/common/plat/p9/p9_dmi_regs.rule
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@
access write_only;
};

register CHIFIR_MASK_OR
{
name "P9 DMI target CHIFIR_MASK atomic OR";
scomaddr 0x07010905;
capture group never;
access write_only;
};

register MCICFG0
{
name "MCI Configuration Register 0";
Expand Down

0 comments on commit 6fd60cf

Please sign in to comment.