diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C b/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C index b8295122127..6690485137d 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/ccs/ccs.C @@ -37,6 +37,7 @@ #include #include +#include using fapi2::TARGET_TYPE_MCBIST; using fapi2::TARGET_TYPE_MCA; @@ -83,6 +84,7 @@ fapi2::ReturnCode fail_type( const fapi2::Target& i_target, const uint64_t& i_type, const fapi2::Target& i_mca ) { + fapi2::ReturnCode l_failing_rc(fapi2::FAPI2_RC_SUCCESS); // Including the MCA_TARGET here and below at CAL_TIMEOUT since these problems likely lie at the MCA level // So we disable the PORT and hopefully that's it // If the problem lies with the MCBIST, it'll just have to loop @@ -112,7 +114,10 @@ fapi2::ReturnCode fail_type( const fapi2::Target& i_target, fapi2::MSS_CCS_HUNG().set_MCBIST_TARGET(i_target), "%s CCS appears hung", mss::c_str(i_target)); fapi_try_exit: - return fapi2::current_err; + // Due to the PRD update, we need to check for FIR's + // If any FIR's have lit up, this CCS fail could have been caused by the FIR + // So, let PRD retrigger this step to see if we can resolve the issue + return mss::check::fir_or_pll_fail(i_target, fapi2::current_err); } /// diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C index 43694ac2d96..cec455f6a6d 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ddr4/mrs_load_ddr4.C @@ -64,7 +64,7 @@ fapi2::ReturnCode mrs_engine( const fapi2::Target& i_ta const uint64_t i_rank, std::vector< ccs::instruction_t >& io_inst ) { - FAPI_TRY( mrs_engine(i_target, i_data, i_rank, i_data.iv_delay, io_inst) ); + FAPI_TRY( mrs_engine(i_target, i_data, i_rank, i_data.iv_delay, io_inst) ); fapi_try_exit: return fapi2::current_err; diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C index 6404adf0bc9..f2edb7873ce 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.C @@ -233,7 +233,6 @@ fapi_try_exit: /// /// @brief Return a vector of rank numbers which represent the primary rank pairs for this port -/// @tparam T the target type /// @param[in] i_target TARGET_TYPE_MCA /// @param[out] o_rps a vector of rank_pairs /// @return FAPI2_RC_SUCCESS iff all is ok @@ -251,7 +250,7 @@ fapi2::ReturnCode primary_ranks( const fapi2::Target& i_target, FAPI_TRY( mss::eff_num_master_ranks_per_dimm(d, l_rank_count[mss::index(d)]) ); } - FAPI_DBG("ranks: %d, %d", l_rank_count[0], l_rank_count[1]); + FAPI_DBG("%s ranks: %d, %d", mss::c_str(i_target), l_rank_count[0], l_rank_count[1]); // Walk through rank pair table and skip empty pairs o_rps.clear(); @@ -264,13 +263,15 @@ fapi2::ReturnCode primary_ranks( const fapi2::Target& i_target, } } + // Returning success in case no DIMM's are configured + return fapi2::FAPI2_RC_SUCCESS; + fapi_try_exit: return fapi2::current_err; } /// /// @brief Return a vector of rank numbers which represent the primary rank pairs for this dimm -/// @tparam T the target type /// @param[in] i_target TARGET_TYPE_DIMM /// @param[out] o_rps a vector of rank_pairs /// @return FAPI2_RC_SUCCESS iff all is ok @@ -344,7 +345,6 @@ fapi_try_exit: /// /// @brief Given a target, get the rank pair assignments, based on DIMMs -/// @tparam T the fapi2::TargetType /// @param[in] i_target the target (MCA or MBA?) /// @param[out] o_registers the regiter settings for the appropriate rank pairs /// @return FAPI2_RC_SUCCESS if and only if ok @@ -382,8 +382,7 @@ fapi_try_exit: /// /// @brief Setup the rank information in the port -/// @tparam T the fapi2::TargetType -/// @param[in] i_target the target (MCA or MBA?) +/// @param[in] i_target the target (MCA) /// @return FAPI2_RC_SUCCESS if and only if ok /// template<> @@ -485,7 +484,6 @@ fapi_try_exit: /// /// @brief Get a vector of configured rank pairs. /// Returns a vector of ordinal values of the configured rank pairs. e.g., for a 2R DIMM, {0, 1} -/// @tparam T the fapi2::TargetType /// @param[in]i_target the target (MCA or MBA?) /// @param[out] o_pairs std::vector of rank pairs configured /// @return FAPI2_RC_SUCCESS if and only if ok @@ -565,7 +563,6 @@ fapi_try_exit: /// /// @brief Get a rank-pair id from a physical rank /// Returns a number representing which rank-pair this rank is a part of -/// @tparam T the fapi2::TargetType /// @param[in] i_target the target (MCA or MBA?) /// @param[in] i_rank the physical rank number /// @param[out] o_pairs the rank pair @@ -573,7 +570,8 @@ fapi_try_exit: /// template<> fapi2::ReturnCode get_pair_from_rank(const fapi2::Target& i_target, - uint64_t i_rank, uint64_t& o_pair) + uint64_t i_rank, + uint64_t& o_pair) { // Sort of brute-force, but no real good other way to do it. Given the // rank-pair configuration we walk the config looking for our rank, and diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H index 34310cc560c..e5b3b9041fb 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/dimm/rank.H @@ -1060,7 +1060,7 @@ inline fapi2::ReturnCode set_pair_valid( const fapi2::Target& i_target, fapi2::MSS_INVALID_RANK() .set_RANK(i_rank) .set_MCA_TARGET(i_target) - .set_FUNCTION(GET_RANKS_IN_PAIR), + .set_FUNCTION(SET_PAIR_VALID), "%s Invalid rank (%d) in get_ranks_in_pair", mss::c_str(i_target), i_rank); @@ -1231,7 +1231,7 @@ fapi2::ReturnCode get_ranks_in_pair( const fapi2::Target& i_target, // Get data for (uint64_t l_ordinal = 0; l_ordinal < TT::NUM_RANKS_IN_PAIR; ++l_ordinal) { - // Check to make sure rank is vlaid + // Check to make sure rank is valid FAPI_ASSERT( l_ordinal < MAX_RANK_PER_DIMM, fapi2::MSS_INVALID_RANK() .set_RANK(l_ordinal) diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C index 58f1f0d94f9..7a329aaed20 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.C @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -205,6 +207,9 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target l_phyfir_data; fapi2::buffer l_phyfir_masked; + // If we have a FIR that is lit up, we want to see if it could have been caused by a more drastic FIR + bool l_check_fir = false; + FAPI_TRY( mss::getScom(l_mca, MCA_IOM_PHY0_DDRPHY_FIR_REG, l_phyfir_data) ); l_phyfir_masked = l_phyfir_data & l_phyfir_mask; @@ -213,6 +218,8 @@ fapi2::ReturnCode during_draminit_training( const fapi2::Target> MCBIST_FIR_REGS = +{ + // MCBIST FIR + {MCBIST_MCBISTFIRQ, MCBIST_MCBISTFIRMASK}, +}; + +static const std::vector> MCA_FIR_REGS = +{ + // MCA ECC FIR + {MCA_FIR, MCA_MASK}, + // MCA CAL FIR + {MCA_MBACALFIRQ, MCA_MBACALFIR_MASK}, + // DDRPHY FIR + {MCA_IOM_PHY0_DDRPHY_FIR_REG, MCA_IOM_PHY0_DDRPHY_FIR_MASK_REG}, +}; + +/// +/// @brief Checks whether any of the PLL unlock values are set +/// @param[in] i_local_fir - the overall FIR register +/// @param[in] i_perv_fir - the pervasive PLL FIR +/// @param[in] i_mc_fir - the memory controller FIR +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +bool pll_unlock( const fapi2::buffer& i_local_fir, + const fapi2::buffer& i_perv_fir, + const fapi2::buffer& i_mc_fir ) +{ + // Note: the following registers did not have the scom fields defined, so we're constexpr'ing them here + constexpr uint64_t PERV_TP_ERROR_START = 25; + constexpr uint64_t PERV_TP_ERROR_LEN = 4; + constexpr uint64_t PERV_MC_ERROR_START = 25; + + // No overall FIR (bit 21) was set, so just exit + if(!i_local_fir.getBit()) + { + FAPI_INF("Did not have the PERV_LOCAL_FIR bit set. No PLL error, exiting"); + return false; + } + + // Now, identify whether a PLL unlock caused the FIR bit to fail + FAPI_INF("PERV_TP_ERROR_REG %s PERV_MC01_ERROR_REG %s", + i_perv_fir.getBit() ? "PLL lock fail" : "PLL ok", + i_mc_fir.getBit() ? "PLL lock fail" : "PLL ok"); + + // We have a PLL unlock if the MC PLL unlock FIR bit is on or any of the TP PLL unlock bits are on + return (i_mc_fir.getBit()) || (i_perv_fir.getBit()); +} + +/// +/// @brief Checks whether any PLL FIRs have been set on a target +/// @param[in] i_target - the target on which to operate +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +fapi2::ReturnCode pll_fir( const fapi2::Target& i_target, bool& o_fir_error ) +{ + // Sets o_fir_error to false to begin with, just in case we have scom issues + o_fir_error = false; + + // Gets the processor target + const auto& l_proc = mss::find_target(i_target); + + // Gets the register data + fapi2::buffer l_local_fir; + fapi2::buffer l_perv_fir; + fapi2::buffer l_mc_fir; + + FAPI_TRY(mss::getScom(l_proc, PERV_TP_LOCAL_FIR, l_local_fir), "%s failed to get 0x%016llx", mss::c_str(i_target), + PERV_TP_LOCAL_FIR); + FAPI_TRY(mss::getScom(l_proc, PERV_TP_ERROR_REG, l_perv_fir), "%s failed to get 0x%016llx", mss::c_str(i_target), + PERV_TP_ERROR_REG); + FAPI_TRY(mss::getScom(i_target, PERV_MC01_ERROR_REG, l_mc_fir), "%s failed to get 0x%016llx", mss::c_str(i_target), + PERV_MC01_ERROR_REG); + + // Checks the data + o_fir_error = pll_unlock(l_local_fir, l_perv_fir, l_mc_fir); + fapi_try_exit: return fapi2::current_err; } +/// +/// @brief Checks whether any FIR have lit up +/// @param[in] i_target - the target on which to operate - MCBIST specialization +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< > +fapi2::ReturnCode bad_fir_bits( const fapi2::Target& i_target, bool& o_fir_error ) +{ + // Start by assuming we do not have a FIR + o_fir_error = false; + + // Loop, check the scoms, and check the FIR + // Note: we return out if any FIR is bad + for(const auto& l_fir_reg : MCBIST_FIR_REGS) + { + FAPI_TRY(fir_with_mask(i_target, l_fir_reg, o_fir_error)); + + // Exit if we found a FIR + if(o_fir_error) + { + return fapi2::FAPI2_RC_SUCCESS; + } + } + + // Loop through all MCA's and all MCA FIR's + for(const auto& l_mca : mss::find_targets(i_target)) + { + for(const auto& l_fir_reg : MCA_FIR_REGS) + { + FAPI_TRY(fir_with_mask(l_mca, l_fir_reg, o_fir_error)); + + // Exit if we found a FIR + if(o_fir_error) + { + return fapi2::FAPI2_RC_SUCCESS; + } + } + } + + // Lastly, check for PLL unlocks + FAPI_TRY(pll_fir(i_target, o_fir_error)); + +fapi_try_exit: + return fapi2::current_err; +} + + +/// +/// @brief Checks whether any FIR have lit up +/// @param[in] i_target - the target on which to operate - MCA specialization +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< > +fapi2::ReturnCode bad_fir_bits( const fapi2::Target& i_target, bool& o_fir_error ) +{ + const auto& l_mcbist = mss::find_target(i_target); + // Start by assuming we do not have a FIR + o_fir_error = false; + + // Loop, check the scoms, and check the FIR + // Note: we return out if any FIR is bad + for(const auto& l_fir_reg : MCBIST_FIR_REGS) + { + FAPI_TRY(fir_with_mask(l_mcbist, l_fir_reg, o_fir_error)); + + // Exit if we found a FIR + if(o_fir_error) + { + return fapi2::FAPI2_RC_SUCCESS; + } + } + + // Loop through all MCA FIR's + for(const auto& l_fir_reg : MCA_FIR_REGS) + { + FAPI_TRY(fir_with_mask(i_target, l_fir_reg, o_fir_error)); + + // Exit if we found a FIR + if(o_fir_error) + { + return fapi2::FAPI2_RC_SUCCESS; + } + } + + // Lastly, check for PLL unlocks + FAPI_TRY(pll_fir(l_mcbist, o_fir_error)); + +fapi_try_exit: + return fapi2::current_err; +} + + +/// +/// @brief Checks whether any FIR have lit up +/// @param[in] i_target - the target on which to operate - DIMM specialization +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< > +fapi2::ReturnCode bad_fir_bits( const fapi2::Target& i_target, bool& o_fir_error ) +{ + const auto l_mca = mss::find_target(i_target); + return bad_fir_bits(l_mca, o_fir_error); +} + } } diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H index ded638e4999..fc82aaed191 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/fir/check.H @@ -27,7 +27,7 @@ /// @file check.H /// @brief Subroutines for checking MSS FIR /// -// *HWP HWP Owner: Brian Silver +// *HWP HWP Owner: Andre Marin // *HWP HWP Backup: Marc Gollub // *HWP Team: Memory // *HWP Level: 2 @@ -37,6 +37,7 @@ #define _MSS_CHECK_FIR_H_ #include +#include namespace mss { @@ -58,6 +59,7 @@ fapi2::ReturnCode during_phy_reset( const fapi2::Target& i_target ); /// /// @brief Check FIR bits during draminit training +/// @tparam T the fapi2::TargetType which hold the FIR bits /// @param[in] i_target the dimm that was trained /// @note We check for fir errors after training each rank /// to see if there was a problem with the engine @@ -69,6 +71,149 @@ fapi2::ReturnCode during_phy_reset( const fapi2::Target& i_target ); template< fapi2::TargetType T > fapi2::ReturnCode during_draminit_training( const fapi2::Target& i_target ); +/// +/// @brief Checks whether any of the PLL unlock values are set +/// @param[in] i_local_fir - the overall FIR register +/// @param[in] i_perv_fir - the pervasive PLL FIR +/// @param[in] i_mc_fir - the memory controller FIR +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +bool pll_unlock( const fapi2::buffer& i_local_fir, + const fapi2::buffer& i_perv_fir, + const fapi2::buffer& i_mc_fir ); + +/// +/// @brief Checks whether any PLL FIRs have been set on a target +/// @param[in] i_target - the target on which to operate +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +fapi2::ReturnCode pll_fir( const fapi2::Target& i_target, bool& o_fir_error ); + +/// +/// @brief Checks whether any FIRs have lit up on a target +/// @tparam T the fapi2::TargetType which hold the FIR bits +/// @param[in] i_target - the target on which to operate +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode bad_fir_bits( const fapi2::Target& i_target, bool& o_fir_error ); + +/// +/// @brief Checks whether the passed in FIRs have any un-masked errors set +/// @tparam T the fapi2::TargetType which hold the FIR bits +/// @param[in] i_target - the target on which to operate +/// @param[in] i_fir_regs - FIR register and mask register +/// @param[out] o_fir_error - true iff a FIR was hit +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< fapi2::TargetType T > +inline fapi2::ReturnCode fir_with_mask( const fapi2::Target& i_target, + const std::pair& i_fir_regs, + bool& o_fir_error ) +{ + // Temporary variables to make the code a bit more readable + const auto FIR_REG = i_fir_regs.first; + const auto FIR_MASK = i_fir_regs.second; + + fapi2::buffer l_fir; + fapi2::buffer l_fir_mask; + + // Read the registers + FAPI_TRY(mss::getScom(i_target, FIR_REG, l_fir)); + FAPI_TRY(mss::getScom(i_target, FIR_MASK, l_fir_mask)); + + + // The mask register will need to be inverted as a 0 in the mask register means the FIR is legit + // A bitwise and works the opposite way + l_fir_mask.invert(); + + // If we have any unmasked bit, set that we have a FIR error and exit out with success + // Note: we want to set success here as PRD will find the FIR as "new" and retrigger the procedure this way + o_fir_error = ((l_fir & l_fir_mask) != 0); + + // And print the information for debuggability + FAPI_INF("%s %s on reg 0x%016lx value 0x%016lx and mask 0x%016lx value 0x%016lx", mss::c_str(i_target), + o_fir_error ? "has FIR's set" : "has no FIR's set", FIR_REG, l_fir, FIR_MASK, l_fir_mask.invert()); + +fapi_try_exit: + return fapi2::current_err; +} + +/// +/// @brief Checks whether a FIR or unlocked PLL could be the root cause of another failure +/// @tparam T the fapi2::TargetType which hold the FIR bits +/// @param[in] i_target - the target on which to operate +/// @param[in] i_rc - the return code for the function - cannot be const due to a HB compile issue +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// @note This is a helper function to enable unit testing +/// +template< fapi2::TargetType T > +fapi2::ReturnCode hostboot_fir_or_pll_fail( const fapi2::Target& i_target, fapi2::ReturnCode& i_rc) +{ + // We didn't have an error, so return success + if(i_rc == fapi2::FAPI2_RC_SUCCESS) + { + FAPI_INF("%s has a good return code, returning success", mss::c_str(i_target)); + return fapi2::FAPI2_RC_SUCCESS; + } + + fapi2::ReturnCode l_fircheck_scom_err(fapi2::FAPI2_RC_SUCCESS); + bool l_fir_error = false; + + FAPI_ERR("%s has a bad return code, time to check some firs!", mss::c_str(i_target)); + + l_fircheck_scom_err = bad_fir_bits(i_target, l_fir_error); + + FAPI_ERR("%s took a fail. FIR was %s", mss::c_str(i_target), + l_fir_error ? "set - returning FIR RC" : "unset - returning inputted RC"); + + // If we had a FIR error, log the original error and return success + // PRD will handle the original error + if(l_fir_error) + { + fapi2::log_related_error(i_target, i_rc, fapi2::FAPI2_ERRL_SEV_RECOVERED); + fapi2::current_err = fapi2::FAPI2_RC_SUCCESS; + } + else + { + fapi2::current_err = i_rc; + } + + return fapi2::current_err; +} + +/// +/// @brief Checks whether a FIR or unlocked PLL could be the root cause of another failure, if a check fir boolean is passed in +/// @tparam T the fapi2::TargetType which hold the FIR bits +/// @param[in] i_target - the target on which to operate +/// @param[in] i_rc - the return code for the function - cannot be const due to a HB compile issue +/// @param[in] i_check_fir - true IFF the FIR needs to be checked - defaults to true +/// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok +/// +template< fapi2::TargetType T > +fapi2::ReturnCode fir_or_pll_fail( const fapi2::Target& i_target, fapi2::ReturnCode& i_rc, + const bool i_check_fir = true) +{ +#ifdef __HOSTBOOT_MODULE + + fapi2::ReturnCode l_rc(i_rc); + + // If need be, check the FIR below + if(i_check_fir) + { + // Handle any issues according to PRD FIR scheme, as a FIR could have caused this issue + l_rc = hostboot_fir_or_pll_fail(i_target, l_rc); + } + + return l_rc; + +#else + return i_rc; +#endif +} + } } #endif diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H index 876a83909a5..b6c2ece0144 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H @@ -964,7 +964,7 @@ fapi2::ReturnCode reset_zqcal_config( const fapi2::Target& i_target ) for (const auto r : l_ranks) { - l_phy_zqcal_config.setBit(TT::PER_ZCAL_ENA_RANK + rank::map_rank_ordinal_to_phy(i_target, r)); + FAPI_TRY(l_phy_zqcal_config.setBit(TT::PER_ZCAL_ENA_RANK + rank::map_rank_ordinal_to_phy(i_target, r))); } // Write the ZQCAL periodic config diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C b/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C index 17563fc8379..bdee48e3c06 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C @@ -936,7 +936,7 @@ fapi2::ReturnCode xlate_dimm_2R2T8Gbx4( const dimm::kind& i_kind, // We're basically a 2R 4Gbx4 with an extra row. So lets setup like we're one of those, // add row 16 and shift the D bit as needed. - xlate_dimm_2R2T4Gbx4(i_kind, i_offset, i_largest, io_xlate0, io_xlate1, io_xlate2); + FAPI_TRY(xlate_dimm_2R2T4Gbx4(i_kind, i_offset, i_largest, io_xlate0, io_xlate1, io_xlate2)); // Tell the MC which of the row bits are valid, and map the DIMM selector // We're a 17 row DIMM, so ROW16 is valid. @@ -1941,7 +1941,7 @@ fapi2::ReturnCode setup_xlate_map_helper( std::vector& io_dimm_kinds set_DIMM_TYPE(k.iv_dimm_type). set_ROWS(k.iv_rows). set_SIZE(k.iv_size), - "no address translation funtion for DIMM %s %dMR (%d total ranks) %dGbx%d (%dGB) %d rows in slot %d", + "no address translation function for DIMM %s %dMR (%d total ranks) %dGbx%d (%dGB) %d rows in slot %d", mss::c_str(k.iv_target), k.iv_master_ranks, k.iv_total_ranks, diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C index 86a8621fa1c..e1e63fec591 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/phy/ddr_phy.C @@ -521,6 +521,11 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target(i_target); fapi2::buffer l_err_data; @@ -550,6 +555,9 @@ fapi2::ReturnCode process_initial_cal_errors( const fapi2::Target #include +#include #include using fapi2::TARGET_TYPE_MCS; @@ -3260,6 +3261,22 @@ fapi_try_exit: /// fapi2::ReturnCode record_bad_bits( const fapi2::Target& i_target ) { + // If we have a FIR set that could have caused our training fail, then skip checking bad bits in FW + // PRD will handle the FIR and retrigger the procedure +#ifdef __HOSTBOOT_MODULE + bool l_fir_error = false; + FAPI_TRY(mss::check::bad_fir_bits(i_target, l_fir_error), "%s took an error while checking FIR's", + mss::c_str(i_target)); + + // Exit if we took a FIR error - PRD will handle bad bits + if(l_fir_error) + { + FAPI_INF("%s has FIR's set, exiting to let PRD handle it", mss::c_str(i_target)); + return fapi2::FAPI2_RC_SUCCESS; + } + +#endif + for( const auto& d : mss::find_targets(i_target) ) { uint8_t l_data[MAX_RANK_PER_DIMM][BAD_DQ_BYTE_COUNT] = {}; @@ -3367,11 +3384,17 @@ fapi2::ReturnCode process_rdvref_cal_errors( const fapi2::Target> l_data; + // Boolean to keep track of if a fail was calibration related, or scom related + bool l_cal_fail = false; + // Suck all the cal error bits out ... FAPI_TRY( mss::scom_suckah(l_mca, TT::RD_VREF_CAL_ERROR_REG, l_data) ); FAPI_INF("%s Processing RD_VREF_CAL_ERROR", mss::c_str(i_target)); + // From here on out, the FIR's are all cal fails + l_cal_fail = true; + for (const auto& v : l_data) { // They should all be 0's. If they're not, we have a problem. @@ -3383,14 +3406,17 @@ fapi2::ReturnCode process_rdvref_cal_errors( const fapi2::Target, fapi2::buffer>> l_data; std::vector, fapi2::buffer>> l_mask; + // Boolean to keep track of if a fail was calibration related, or scom related + bool l_cal_fail = false; + // Suck all the cal error bits out ... FAPI_TRY( mss::scom_suckah(l_mca, TT::WR_VREF_ERROR_REG, l_data) ); FAPI_TRY( mss::scom_suckah(l_mca, TT::WR_VREF_ERROR_MASK_REG, l_mask) ); + // From here on out, the FIR's are all cal fails + l_cal_fail = true; + // Loop through both data and mask { // Note: ideally these would be cbegin/cend, but HB doesn't support constant iterators for vectors @@ -3480,11 +3512,13 @@ fapi2::ReturnCode process_wrvref_cal_errors( const fapi2::Target #include #include +#include namespace mss { @@ -547,10 +548,13 @@ fapi2::ReturnCode dqs_align_workaround(const fapi2::Target& i_target, const uint64_t i_rp, +fapi2::ReturnCode record_passing_values( const fapi2::Target& i_target, + const uint64_t i_rp, std::map& io_passing_values) { // Traits declaration diff --git a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C index 533a53905e2..b4de8bd9031 100644 --- a/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C +++ b/src/import/chips/p9/procedures/hwp/memory/p9_mss_draminit_training.C @@ -68,7 +68,7 @@ extern "C" std::vector l_fails; - FAPI_INF("Start draminit training"); + FAPI_INF("%s Start draminit training", mss::c_str(i_target)); // If there are no DIMM we don't need to bother. In fact, we can't as we didn't setup // attributes for the PHY, etc.