From 3890040afa1dc93d58476d68df35cb44d49c57b2 Mon Sep 17 00:00:00 2001 From: Stephen Glancy Date: Thu, 21 Sep 2017 10:14:04 -0500 Subject: [PATCH] Updates error paths for PRD FIR checking FIR's could cause errors within hardware procedures. PRD has the capability to retrigger a procedure if it sees an error. We might be able to avoid IPL issues with this, so if a FIR has been hit during hardware enabled code (CCS or calibration), then log the error and let PRD find the "new" FIR that could have caused the hardware engine to have an issue. If there is some other problem, the retriggered HWP will find it. Change-Id: I81599d1d0c4b4c256b79820b4a7e2eafc09e206b Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46571 Tested-by: FSP CI Jenkins Reviewed-by: JACOB L. HARVEY Reviewed-by: Louis Stermole Tested-by: Jenkins Server Tested-by: HWSV CI Reviewed-by: ANDRE A. MARIN Tested-by: Hostboot CI Reviewed-by: Jennifer A. Stofer Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46584 Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW Reviewed-by: Daniel M. Crowell --- .../p9/procedures/hwp/memory/lib/ccs/ccs.C | 7 +- .../hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C | 2 +- .../p9/procedures/hwp/memory/lib/dimm/rank.C | 16 +- .../p9/procedures/hwp/memory/lib/dimm/rank.H | 4 +- .../p9/procedures/hwp/memory/lib/fir/check.C | 202 ++++++++++++++++++ .../p9/procedures/hwp/memory/lib/fir/check.H | 147 ++++++++++++- .../p9/procedures/hwp/memory/lib/mc/port.H | 2 +- .../p9/procedures/hwp/memory/lib/mc/xlate.C | 4 +- .../procedures/hwp/memory/lib/phy/ddr_phy.C | 11 +- .../p9/procedures/hwp/memory/lib/phy/dp16.C | 42 +++- .../hwp/memory/lib/shared/mss_const.H | 1 + .../memory/lib/workarounds/dp16_workarounds.C | 18 +- .../hwp/memory/p9_mss_draminit_training.C | 2 +- 13 files changed, 432 insertions(+), 26 deletions(-) diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C b/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C index b8295122127..6690485137d 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C @@ -37,6 +37,7 @@ #include #include +#include using fapi2::TARGET_TYPE_MCBIST; using fapi2::TARGET_TYPE_MCA; @@ -83,6 +84,7 @@ fapi2::ReturnCode fail_type( const fapi2::Target& i_target, const uint64_t& i_type, const fapi2::Target& i_mca ) { + fapi2::ReturnCode l_failing_rc(fapi2::FAPI2_RC_SUCCESS); // Including the MCA_TARGET here and below at CAL_TIMEOUT since these problems likely lie at the MCA level // So we disable the PORT and hopefully that's it // If the problem lies with the MCBIST, it'll just have to loop @@ -112,7 +114,10 @@ fapi2::ReturnCode fail_type( const fapi2::Target& i_target, fapi2::MSS_CCS_HUNG().set_MCBIST_TARGET(i_target), "%s CCS appears hung", mss::c_str(i_target)); fapi_try_exit: - return fapi2::current_err; + // Due to the PRD update, we need to check for FIR's + // If any FIR's have lit up, this CCS fail could have been caused by the FIR + // So, let PRD retrigger this step to see if we can resolve the issue + return mss::check::fir_or_pll_fail(i_target, fapi2::current_err); } /// diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C index 43694ac2d96..cec455f6a6d 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C @@ -64,7 +64,7 @@ fapi2::ReturnCode mrs_engine( const fapi2::Target& i_ta const uint64_t i_rank, std::vector< ccs::instruction_t >& io_inst ) { - FAPI_TRY( mrs_engine(i_target, i_data, i_rank, i_data.iv_delay, io_inst) ); + FAPI_TRY( mrs_engine(i_target, i_data, i_rank, i_data.iv_delay, io_inst) ); fapi_try_exit: return fapi2::current_err; diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C index 6404adf0bc9..f2edb7873ce 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C @@ -233,7 +233,6 @@ fapi_try_exit: /// /// @brief Return a vector of rank numbers which represent the primary rank pairs for this port -/// @tparam T the target type /// @param[in] i_target TARGET_TYPE_MCA /// @param[out] o_rps a vector of rank_pairs /// @return FAPI2_RC_SUCCESS iff all is ok @@ -251,7 +250,7 @@ fapi2::ReturnCode primary_ranks( const fapi2::Target& i_target, FAPI_TRY( mss::eff_num_master_ranks_per_dimm(d, l_rank_count[mss::index(d)]) ); } - FAPI_DBG("ranks: %d, %d", l_rank_count[0], l_rank_count[1]); + FAPI_DBG("%s ranks: %d, %d", mss::c_str(i_target), l_rank_count[0], l_rank_count[1]); // Walk through rank pair table and skip empty pairs o_rps.clear(); @@ -264,13 +263,15 @@ fapi2::ReturnCode primary_ranks( const fapi2::Target& i_target, } } + // Returning success in case no DIMM's are configured + return fapi2::FAPI2_RC_SUCCESS; + fapi_try_exit: return fapi2::current_err; } /// /// @brief Return a vector of rank numbers which represent the primary rank pairs for this dimm -/// @tparam T the target type /// @param[in] i_target TARGET_TYPE_DIMM /// @param[out] o_rps a vector of rank_pairs /// @return FAPI2_RC_SUCCESS iff all is ok @@ -344,7 +345,6 @@ fapi_try_exit: /// /// @brief Given a target, get the rank pair assignments, based on DIMMs -/// @tparam T the fapi2::TargetType /// @param[in] i_target the target (MCA or MBA?) /// @param[out] o_registers the regiter settings for the appropriate rank pairs /// @return FAPI2_RC_SUCCESS if and only if ok @@ -382,8 +382,7 @@ fapi_try_exit: /// /// @brief Setup the rank information in the port -/// @tparam T the fapi2::TargetType -/// @param[in] i_target the target (MCA or MBA?) +/// @param[in] i_target the target (MCA) /// @return FAPI2_RC_SUCCESS if and only if ok /// template<> @@ -485,7 +484,6 @@ fapi_try_exit: /// /// @brief Get a vector of configured rank pairs. /// Returns a vector of ordinal values of the configured rank pairs. e.g., for a 2R DIMM, {0, 1} -/// @tparam T the fapi2::TargetType /// @param[in]i_target the target (MCA or MBA?) /// @param[out] o_pairs std::vector of rank pairs configured /// @return FAPI2_RC_SUCCESS if and only if ok @@ -565,7 +563,6 @@ fapi_try_exit: /// /// @brief Get a rank-pair id from a physical rank /// Returns a number representing which rank-pair this rank is a part of -/// @tparam T the fapi2::TargetType /// @param[in] i_target the target (MCA or MBA?) /// @param[in] i_rank the physical rank number /// @param[out] o_pairs the rank pair @@ -573,7 +570,8 @@ fapi_try_exit: /// template<> fapi2::ReturnCode get_pair_from_rank(const fapi2::Target& i_target, - uint64_t i_rank, uint64_t& o_pair) + uint64_t i_rank, + uint64_t& o_pair) { // Sort of brute-force, but no real good other way to do it. Given the // rank-pair configuration we walk the config looking for our rank, and diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H index 34310cc560c..e5b3b9041fb 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H @@ -1060,7 +1060,7 @@ inline fapi2::ReturnCode set_pair_valid( const fapi2::Target& i_target, fapi2::MSS_INVALID_RANK() .set_RANK(i_rank) .set_MCA_TARGET(i_target) - .set_FUNCTION(GET_RANKS_IN_PAIR), + .set_FUNCTION(SET_PAIR_VALID), "%s Invalid rank (%d) in get_ranks_in_pair", mss::c_str(i_target), i_rank); @@ -1231,7 +1231,7 @@ fapi2::ReturnCode get_ranks_in_pair( const fapi2::Target& i_target, // Get data for (uint64_t l_ordinal = 0; l_ordinal < TT::NUM_RANKS_IN_PAIR; ++l_ordinal) { - // Check to make sure rank is vlaid + // Check to make sure rank is valid FAPI_ASSERT( l_ordinal < MAX_RANK_PER_DIMM, fapi2::MSS_INVALID_RANK() .set_RANK(l_ordinal) diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C index 58f1f0d94f9..7a329aaed20 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -205,6 +207,9 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target l_phyfir_data; fapi2::buffer l_phyfir_masked; + // If we have a FIR that is lit up, we want to see if it could have been caused by a more drastic FIR + bool l_check_fir = false; + FAPI_TRY( mss::getScom(l_mca, MCA_IOM_PHY0_DDRPHY_FIR_REG, l_phyfir_data) ); l_phyfir_masked = l_phyfir_data & l_phyfir_mask; @@ -213,6 +218,8 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target> MCBIST_FIR_REGS = +{ + // MCBIST FIR + {MCBIST_MCBISTFIRQ, MCBIST_MCBISTFIRMASK}, +}; + +static const std::vector> MCA_FIR_REGS = +{ + // MCA ECC FIR + {MCA_FIR, MCA_MASK}, + // MCA CAL FIR + {MCA_MBACALFIRQ, MCA_MBACALFIR_MASK}, + // DDRPHY FIR + {MCA_IOM_PHY0_DDRPHY_FIR_REG, MCA_IOM_PHY0_DDRPHY_FIR_MASK_REG}, +}; + +/// +/// @brief Checks whether any of the PLL unlock values are set +/// @param[in] i_local_fir - the overall FIR register +/// @param[in] i_perv_fir - the pervasive PLL FIR +/// @param[in] i_mc_fir - the memory controller FIR +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +bool pll_unlock( const fapi2::buffer& i_local_fir, + const fapi2::buffer& i_perv_fir, + const fapi2::buffer& i_mc_fir ) +{ + // Note: the following registers did not have the scom fields defined, so we're constexpr'ing them here + constexpr uint64_t PERV_TP_ERROR_START = 25; + constexpr uint64_t PERV_TP_ERROR_LEN = 4; + constexpr uint64_t PERV_MC_ERROR_START = 25; + + // No overall FIR (bit 21) was set, so just exit + if(!i_local_fir.getBit()) + { + FAPI_INF("Did not have the PERV_LOCAL_FIR bit set. No PLL error, exiting"); + return false; + } + + // Now, identify whether a PLL unlock caused the FIR bit to fail + FAPI_INF("PERV_TP_ERROR_REG %s PERV_MC01_ERROR_REG %s", + i_perv_fir.getBit() ? "PLL lock fail" : "PLL ok", + i_mc_fir.getBit() ? "PLL lock fail" : "PLL ok"); + + // We have a PLL unlock if the MC PLL unlock FIR bit is on or any of the TP PLL unlock bits are on + return (i_mc_fir.getBit()) || (i_perv_fir.getBit()); +} + +/// +/// @brief Checks whether any PLL FIRs have been set on a target +/// @param[in] i_target - the target on which to operate +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +fapi2::ReturnCode pll_fir( const fapi2::Target& i_target, bool& o_fir_error ) +{ + // Sets o_fir_error to false to begin with, just in case we have scom issues + o_fir_error = false; + + // Gets the processor target + const auto& l_proc = mss::find_target(i_target); + + // Gets the register data + fapi2::buffer l_local_fir; + fapi2::buffer l_perv_fir; + fapi2::buffer l_mc_fir; + + FAPI_TRY(mss::getScom(l_proc, PERV_TP_LOCAL_FIR, l_local_fir), "%s failed to get 0x%016llx", mss::c_str(i_target), + PERV_TP_LOCAL_FIR); + FAPI_TRY(mss::getScom(l_proc, PERV_TP_ERROR_REG, l_perv_fir), "%s failed to get 0x%016llx", mss::c_str(i_target), + PERV_TP_ERROR_REG); + FAPI_TRY(mss::getScom(i_target, PERV_MC01_ERROR_REG, l_mc_fir), "%s failed to get 0x%016llx", mss::c_str(i_target), + PERV_MC01_ERROR_REG); + + // Checks the data + o_fir_error = pll_unlock(l_local_fir, l_perv_fir, l_mc_fir); + fapi_try_exit: return fapi2::current_err; } +/// +/// @brief Checks whether any FIR have lit up +/// @param[in] i_target - the target on which to operate - MCBIST specialization +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< > +fapi2::ReturnCode bad_fir_bits( const fapi2::Target& i_target, bool& o_fir_error ) +{ + // Start by assuming we do not have a FIR + o_fir_error = false; + + // Loop, check the scoms, and check the FIR + // Note: we return out if any FIR is bad + for(const auto& l_fir_reg : MCBIST_FIR_REGS) + { + FAPI_TRY(fir_with_mask(i_target, l_fir_reg, o_fir_error)); + + // Exit if we found a FIR + if(o_fir_error) + { + return fapi2::FAPI2_RC_SUCCESS; + } + } + + // Loop through all MCA's and all MCA FIR's + for(const auto& l_mca : mss::find_targets(i_target)) + { + for(const auto& l_fir_reg : MCA_FIR_REGS) + { + FAPI_TRY(fir_with_mask(l_mca, l_fir_reg, o_fir_error)); + + // Exit if we found a FIR + if(o_fir_error) + { + return fapi2::FAPI2_RC_SUCCESS; + } + } + } + + // Lastly, check for PLL unlocks + FAPI_TRY(pll_fir(i_target, o_fir_error)); + +fapi_try_exit: + return fapi2::current_err; +} + + +/// +/// @brief Checks whether any FIR have lit up +/// @param[in] i_target - the target on which to operate - MCA specialization +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< > +fapi2::ReturnCode bad_fir_bits( const fapi2::Target& i_target, bool& o_fir_error ) +{ + const auto& l_mcbist = mss::find_target(i_target); + // Start by assuming we do not have a FIR + o_fir_error = false; + + // Loop, check the scoms, and check the FIR + // Note: we return out if any FIR is bad + for(const auto& l_fir_reg : MCBIST_FIR_REGS) + { + FAPI_TRY(fir_with_mask(l_mcbist, l_fir_reg, o_fir_error)); + + // Exit if we found a FIR + if(o_fir_error) + { + return fapi2::FAPI2_RC_SUCCESS; + } + } + + // Loop through all MCA FIR's + for(const auto& l_fir_reg : MCA_FIR_REGS) + { + FAPI_TRY(fir_with_mask(i_target, l_fir_reg, o_fir_error)); + + // Exit if we found a FIR + if(o_fir_error) + { + return fapi2::FAPI2_RC_SUCCESS; + } + } + + // Lastly, check for PLL unlocks + FAPI_TRY(pll_fir(l_mcbist, o_fir_error)); + +fapi_try_exit: + return fapi2::current_err; +} + + +/// +/// @brief Checks whether any FIR have lit up +/// @param[in] i_target - the target on which to operate - DIMM specialization +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< > +fapi2::ReturnCode bad_fir_bits( const fapi2::Target& i_target, bool& o_fir_error ) +{ + const auto l_mca = mss::find_target(i_target); + return bad_fir_bits(l_mca, o_fir_error); +} + } } diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H index ded638e4999..fc82aaed191 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H @@ -27,7 +27,7 @@ /// @file check.H /// @brief Subroutines for checking MSS FIR /// -// *HWP HWP Owner: Brian Silver +// *HWP HWP Owner: Andre Marin // *HWP HWP Backup: Marc Gollub // *HWP Team: Memory // *HWP Level: 2 @@ -37,6 +37,7 @@ #define _MSS_CHECK_FIR_H_ #include +#include namespace mss { @@ -58,6 +59,7 @@ fapi2::ReturnCode during_phy_reset( const fapi2::Target& i_target ); /// /// @brief Check FIR bits during draminit training +/// @tparam T the fapi2::TargetType which hold the FIR bits /// @param[in] i_target the dimm that was trained /// @note We check for fir errors after training each rank /// to see if there was a problem with the engine @@ -69,6 +71,149 @@ fapi2::ReturnCode during_phy_reset( const fapi2::Target& i_target ); template< fapi2::TargetType T > fapi2::ReturnCode during_draminit_training( const fapi2::Target& i_target ); +/// +/// @brief Checks whether any of the PLL unlock values are set +/// @param[in] i_local_fir - the overall FIR register +/// @param[in] i_perv_fir - the pervasive PLL FIR +/// @param[in] i_mc_fir - the memory controller FIR +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +bool pll_unlock( const fapi2::buffer& i_local_fir, + const fapi2::buffer& i_perv_fir, + const fapi2::buffer& i_mc_fir ); + +/// +/// @brief Checks whether any PLL FIRs have been set on a target +/// @param[in] i_target - the target on which to operate +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +fapi2::ReturnCode pll_fir( const fapi2::Target& i_target, bool& o_fir_error ); + +/// +/// @brief Checks whether any FIRs have lit up on a target +/// @tparam T the fapi2::TargetType which hold the FIR bits +/// @param[in] i_target - the target on which to operate +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode bad_fir_bits( const fapi2::Target& i_target, bool& o_fir_error ); + +/// +/// @brief Checks whether the passed in FIRs have any un-masked errors set +/// @tparam T the fapi2::TargetType which hold the FIR bits +/// @param[in] i_target - the target on which to operate +/// @param[in] i_fir_regs - FIR register and mask register +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< fapi2::TargetType T > +inline fapi2::ReturnCode fir_with_mask( const fapi2::Target& i_target, + const std::pair& i_fir_regs, + bool& o_fir_error ) +{ + // Temporary variables to make the code a bit more readable + const auto FIR_REG = i_fir_regs.first; + const auto FIR_MASK = i_fir_regs.second; + + fapi2::buffer l_fir; + fapi2::buffer l_fir_mask; + + // Read the registers + FAPI_TRY(mss::getScom(i_target, FIR_REG, l_fir)); + FAPI_TRY(mss::getScom(i_target, FIR_MASK, l_fir_mask)); + + + // The mask register will need to be inverted as a 0 in the mask register means the FIR is legit + // A bitwise and works the opposite way + l_fir_mask.invert(); + + // If we have any unmasked bit, set that we have a FIR error and exit out with success + // Note: we want to set success here as PRD will find the FIR as "new" and retrigger the procedure this way + o_fir_error = ((l_fir & l_fir_mask) != 0); + + // And print the information for debuggability + FAPI_INF("%s %s on reg 0x%016lx value 0x%016lx and mask 0x%016lx value 0x%016lx", mss::c_str(i_target), + o_fir_error ? "has FIR's set" : "has no FIR's set", FIR_REG, l_fir, FIR_MASK, l_fir_mask.invert()); + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Checks whether a FIR or unlocked PLL could be the root cause of another failure +/// @tparam T the fapi2::TargetType which hold the FIR bits +/// @param[in] i_target - the target on which to operate +/// @param[in] i_rc - the return code for the function - cannot be const due to a HB compile issue +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// @note This is a helper function to enable unit testing +/// +template< fapi2::TargetType T > +fapi2::ReturnCode hostboot_fir_or_pll_fail( const fapi2::Target& i_target, fapi2::ReturnCode& i_rc) +{ + // We didn't have an error, so return success + if(i_rc == fapi2::FAPI2_RC_SUCCESS) + { + FAPI_INF("%s has a good return code, returning success", mss::c_str(i_target)); + return fapi2::FAPI2_RC_SUCCESS; + } + + fapi2::ReturnCode l_fircheck_scom_err(fapi2::FAPI2_RC_SUCCESS); + bool l_fir_error = false; + + FAPI_ERR("%s has a bad return code, time to check some firs!", mss::c_str(i_target)); + + l_fircheck_scom_err = bad_fir_bits(i_target, l_fir_error); + + FAPI_ERR("%s took a fail. FIR was %s", mss::c_str(i_target), + l_fir_error ? "set - returning FIR RC" : "unset - returning inputted RC"); + + // If we had a FIR error, log the original error and return success + // PRD will handle the original error + if(l_fir_error) + { + fapi2::log_related_error(i_target, i_rc, fapi2::FAPI2_ERRL_SEV_RECOVERED); + fapi2::current_err = fapi2::FAPI2_RC_SUCCESS; + } + else + { + fapi2::current_err = i_rc; + } + + return fapi2::current_err; +} + +/// +/// @brief Checks whether a FIR or unlocked PLL could be the root cause of another failure, if a check fir boolean is passed in +/// @tparam T the fapi2::TargetType which hold the FIR bits +/// @param[in] i_target - the target on which to operate +/// @param[in] i_rc - the return code for the function - cannot be const due to a HB compile issue +/// @param[in] i_check_fir - true IFF the FIR needs to be checked - defaults to true +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode fir_or_pll_fail( const fapi2::Target& i_target, fapi2::ReturnCode& i_rc, + const bool i_check_fir = true) +{ +#ifdef __HOSTBOOT_MODULE + + fapi2::ReturnCode l_rc(i_rc); + + // If need be, check the FIR below + if(i_check_fir) + { + // Handle any issues according to PRD FIR scheme, as a FIR could have caused this issue + l_rc = hostboot_fir_or_pll_fail(i_target, l_rc); + } + + return l_rc; + +#else + return i_rc; +#endif +} + } } #endif diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H index 876a83909a5..b6c2ece0144 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H @@ -964,7 +964,7 @@ fapi2::ReturnCode reset_zqcal_config( const fapi2::Target& i_target ) for (const auto r : l_ranks) { - l_phy_zqcal_config.setBit(TT::PER_ZCAL_ENA_RANK + rank::map_rank_ordinal_to_phy(i_target, r)); + FAPI_TRY(l_phy_zqcal_config.setBit(TT::PER_ZCAL_ENA_RANK + rank::map_rank_ordinal_to_phy(i_target, r))); } // Write the ZQCAL periodic config diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C b/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C index 17563fc8379..bdee48e3c06 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C @@ -936,7 +936,7 @@ fapi2::ReturnCode xlate_dimm_2R2T8Gbx4( const dimm::kind& i_kind, // We're basically a 2R 4Gbx4 with an extra row. So lets setup like we're one of those, // add row 16 and shift the D bit as needed. - xlate_dimm_2R2T4Gbx4(i_kind, i_offset, i_largest, io_xlate0, io_xlate1, io_xlate2); + FAPI_TRY(xlate_dimm_2R2T4Gbx4(i_kind, i_offset, i_largest, io_xlate0, io_xlate1, io_xlate2)); // Tell the MC which of the row bits are valid, and map the DIMM selector // We're a 17 row DIMM, so ROW16 is valid. @@ -1941,7 +1941,7 @@ fapi2::ReturnCode setup_xlate_map_helper( std::vector& io_dimm_kinds set_DIMM_TYPE(k.iv_dimm_type). set_ROWS(k.iv_rows). set_SIZE(k.iv_size), - "no address translation funtion for DIMM %s %dMR (%d total ranks) %dGbx%d (%dGB) %d rows in slot %d", + "no address translation function for DIMM %s %dMR (%d total ranks) %dGbx%d (%dGB) %d rows in slot %d", mss::c_str(k.iv_target), k.iv_master_ranks, k.iv_total_ranks, diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C index 86a8621fa1c..e1e63fec591 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C @@ -521,6 +521,11 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target(i_target); fapi2::buffer l_err_data; @@ -550,6 +555,9 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target #include +#include #include using fapi2::TARGET_TYPE_MCS; @@ -3260,6 +3261,22 @@ fapi_try_exit: /// fapi2::ReturnCode record_bad_bits( const fapi2::Target& i_target ) { + // If we have a FIR set that could have caused our training fail, then skip checking bad bits in FW + // PRD will handle the FIR and retrigger the procedure +#ifdef __HOSTBOOT_MODULE + bool l_fir_error = false; + FAPI_TRY(mss::check::bad_fir_bits(i_target, l_fir_error), "%s took an error while checking FIR's", + mss::c_str(i_target)); + + // Exit if we took a FIR error - PRD will handle bad bits + if(l_fir_error) + { + FAPI_INF("%s has FIR's set, exiting to let PRD handle it", mss::c_str(i_target)); + return fapi2::FAPI2_RC_SUCCESS; + } + +#endif + for( const auto& d : mss::find_targets(i_target) ) { uint8_t l_data[MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT] = {}; @@ -3367,11 +3384,17 @@ fapi2::ReturnCode process_rdvref_cal_errors( const fapi2::Target> l_data; + // Boolean to keep track of if a fail was calibration related, or scom related + bool l_cal_fail = false; + // Suck all the cal error bits out ... FAPI_TRY( mss::scom_suckah(l_mca, TT::RD_VREF_CAL_ERROR_REG, l_data) ); FAPI_INF("%s Processing RD_VREF_CAL_ERROR", mss::c_str(i_target)); + // From here on out, the FIR's are all cal fails + l_cal_fail = true; + for (const auto& v : l_data) { // They should all be 0's. If they're not, we have a problem. @@ -3383,14 +3406,17 @@ fapi2::ReturnCode process_rdvref_cal_errors( const fapi2::Target, fapi2::buffer>> l_data; std::vector, fapi2::buffer>> l_mask; + // Boolean to keep track of if a fail was calibration related, or scom related + bool l_cal_fail = false; + // Suck all the cal error bits out ... FAPI_TRY( mss::scom_suckah(l_mca, TT::WR_VREF_ERROR_REG, l_data) ); FAPI_TRY( mss::scom_suckah(l_mca, TT::WR_VREF_ERROR_MASK_REG, l_mask) ); + // From here on out, the FIR's are all cal fails + l_cal_fail = true; + // Loop through both data and mask { // Note: ideally these would be cbegin/cend, but HB doesn't support constant iterators for vectors @@ -3480,11 +3512,13 @@ fapi2::ReturnCode process_wrvref_cal_errors( const fapi2::Target #include #include +#include namespace mss { @@ -547,10 +548,13 @@ fapi2::ReturnCode dqs_align_workaround(const fapi2::Target& i_target, const uint64_t i_rp, +fapi2::ReturnCode record_passing_values( const fapi2::Target& i_target, + const uint64_t i_rp, std::map& io_passing_values) { // Traits declaration diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C index 533a53905e2..b4de8bd9031 100644 --- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C +++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C @@ -68,7 +68,7 @@ extern "C" std::vector l_fails; - FAPI_INF("Start draminit training"); + FAPI_INF("%s Start draminit training", mss::c_str(i_target)); // If there are no DIMM we don't need to bother. In fact, we can't as we didn't setup // attributes for the PHY, etc.