Skip to content

Commit

Permalink
Handle PGPE timeouts as workaround for prolonged droop events
Browse files Browse the repository at this point in the history
Add "CLIP" information to poll response

Fix incorrectly throttling due to power when all cores are in stop 2 or greater

Change-Id: I502cc65ad8c4cffd7f9a1442fd4de185f3cac6e2
RTC: 183700
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/51741
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
Reviewed-by: Andres A. Lugo-Reyes <aalugore@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
  • Loading branch information
marthabroyles committed Jan 15, 2018
1 parent d868b77 commit 2397cb6
Show file tree
Hide file tree
Showing 18 changed files with 682 additions and 394 deletions.
6 changes: 4 additions & 2 deletions src/include/registers/ocb_firmware_registers.h
Expand Up @@ -1414,15 +1414,17 @@ typedef union ocb_occflg {
uint32_t gpu0_reset_status : 1;
uint32_t gpu1_reset_status : 1;
uint32_t gpu2_reset_status : 1;
uint32_t reserved_occ : 3;
uint32_t reserved_occ : 2;
uint32_t pm_reset_suppress : 1;
uint32_t wof_hcode_mode : 2;
uint32_t active_quad_update : 1;
uint32_t request_occ_safe : 1;
#else
uint32_t request_occ_safe : 1;
uint32_t active_quad_update : 1;
uint32_t wof_hcode_mode : 2;
uint32_t reserved_occ : 3;
uint32_t pm_reset_suppress : 1;
uint32_t reserved_occ : 2;
uint32_t gpu2_reset_status : 1;
uint32_t gpu1_reset_status : 1;
uint32_t gpu0_reset_status : 1;
Expand Down
29 changes: 19 additions & 10 deletions src/occ_405/amec/amec_controller.c
Expand Up @@ -457,16 +457,25 @@ uint16_t amec_controller_speed2freq (const uint16_t i_speed, const uint16_t i_fm
/*------------------------------------------------------------------------*/
/* Code */
/*------------------------------------------------------------------------*/
l_temp16 = i_fmax;
l_tempreg = (uint16_t)i_speed;
l_temp32 = ((uint32_t)l_tempreg)*((uint32_t)l_temp16);
l_temp16 = (uint16_t)1000;
l_divide32[1] = (uint32_t)l_temp16;
l_divide32[0] = (uint32_t)l_temp32;
l_divide32[0] /= l_divide32[1];
l_temp32 = l_divide32[0];
l_freq = (uint16_t)l_temp32; /* freq will always fit in 16 bits */

// to handle max freq changing (i.e. mode change) between now and running amec_slv_proc_voting_box
// if speed is unconstrained set freq to unconstrained so voting box will use
// the most recent maximum frequency
if(i_speed >= g_amec->sys.max_speed)
{
l_freq = 0xFFFF;
}
else
{
l_temp16 = i_fmax;
l_tempreg = (uint16_t)i_speed;
l_temp32 = ((uint32_t)l_tempreg)*((uint32_t)l_temp16);
l_temp16 = (uint16_t)1000;
l_divide32[1] = (uint32_t)l_temp16;
l_divide32[0] = (uint32_t)l_temp32;
l_divide32[0] /= l_divide32[1];
l_temp32 = l_divide32[0];
l_freq = (uint16_t)l_temp32; /* freq will always fit in 16 bits */
}
return l_freq;
}

Expand Down
56 changes: 51 additions & 5 deletions src/occ_405/amec/amec_freq.c
Expand Up @@ -282,6 +282,10 @@ void amec_slv_proc_voting_box(void)
uint16_t k = 0;
uint16_t l_chip_fmax = g_amec->sys.fmax;
uint16_t l_core_freq = 0;
uint16_t l_core_freq_max = 0; // max freq across all cores
uint16_t l_core_freq_min = g_amec->sys.fmax; // min freq across all cores
uint32_t l_current_reason = 0; // used for debug purposes
static uint32_t L_last_reason = 0; // used for debug purposes
uint32_t l_chip_reason = 0;
uint32_t l_core_reason = 0;
amec_proc_voting_reason_t l_kvm_throt_reason = NO_THROTTLE;
Expand All @@ -306,9 +310,6 @@ void amec_slv_proc_voting_box(void)
// This function implements the voting box to decide which input gets the right
// to actuate the system.

//Reset the maximum core frequency requested prior to recalculation.
g_amec->proc[0].core_max_freq = 0;

// PPB_FMAX
if(g_amec->proc[0].pwr_votes.ppb_fmax < l_chip_fmax)
{
Expand Down Expand Up @@ -516,6 +517,12 @@ void amec_slv_proc_voting_box(void)
//STORE core frequency and reason
g_amec->proc[0].core[k].f_request = l_core_freq;
g_amec->proc[0].core[k].f_reason = l_core_reason;
if(l_core_freq < l_core_freq_min)
{
// store the new lowest frequency and reason to be used after all cores checked
l_core_freq_min = l_core_freq;
l_current_reason = l_core_reason;
}

// Update the Amester parameter telling us the reason. Needed for
// parameter array.
Expand Down Expand Up @@ -557,9 +564,9 @@ void amec_slv_proc_voting_box(void)
}
#endif

if(l_core_freq > g_amec->proc[0].core_max_freq)
if(l_core_freq > l_core_freq_max)
{
g_amec->proc[0].core_max_freq = l_core_freq;
l_core_freq_max = l_core_freq;
}
} // if core present and not offline
else
Expand All @@ -570,6 +577,15 @@ void amec_slv_proc_voting_box(void)
}
}//End of for loop

// update max core frequency if not 0 i.e. all cores offline (stop 2 or greater)
// this is used by power capping alg, updating to 0 will cause power throttling when not needed
if(l_core_freq_max)
{
g_amec->proc[0].core_max_freq = l_core_freq_max;
// update the overall reason driving frequency across all cores
g_amec->proc[0].f_reason = l_current_reason;
}

//check if there was a throttle reason change
if(l_kvm_throt_reason != G_amec_opal_proc_throt_reason)
{
Expand All @@ -582,6 +598,36 @@ void amec_slv_proc_voting_box(void)
ssx_semaphore_post(&G_dcomThreadWakeupSem);
}
}
// For debug... if lower than max update vars returned in poll response to give clipping reason
g_amec->proc[0].core_min_freq = l_core_freq_min;
if(l_core_freq_min < g_amec->sys.fmax)
{
if(l_current_reason == L_last_reason)
{
// same reason INC counter
if(g_amec->proc[0].current_clip_count != 0xFF)
{
g_amec->proc[0].current_clip_count++;
}
}
else
{
// new reason update history and set counter to 1
L_last_reason = l_current_reason;
g_amec->proc[0].current_clip_count = 1;
if( (g_amec->proc[0].chip_f_reason_history & l_current_reason) == 0)
{
g_amec->proc[0].chip_f_reason_history |= l_current_reason;
TRAC_IMP("First time throttling for reason[0x%08X] History[0x%08X] freq = %d",
l_current_reason, g_amec->proc[0].chip_f_reason_history, l_core_freq_min);
}
}
}
else // no active clipping
{
L_last_reason = 0;
g_amec->proc[0].current_clip_count = 0;
}
}

// Function Specification
Expand Down
202 changes: 107 additions & 95 deletions src/occ_405/amec/amec_master_smh.c
Expand Up @@ -40,6 +40,7 @@
#include "dcom.h"
#include <amec_sensors_power.h>
#include <cmdh_fsp_cmds.h> // For G_apss_ch_to_function
#include "common.h" // For ignore_pgpe_error()

//*************************************************************************/
// Externs
Expand Down Expand Up @@ -399,110 +400,121 @@ void amec_mst_check_under_pcap(void)
/*------------------------------------------------------------------------*/
/* Code */
/*------------------------------------------------------------------------*/

// Check if done everything possible to shed power and power still above a hard power cap
// ppb_fmax = Fmin and PWRSYS > Node power cap and
// Node power cap >= hard_min_pcap AND memory is throttled
if((g_amec->proc[0].pwr_votes.ppb_fmax == g_amec->sys.fmin) &&
(AMECSENSOR_PTR(PWRSYS)->sample > g_amec->pcap.active_node_pcap) &&
(g_amec->pcap.active_node_pcap >= G_sysConfigData.pcap.hard_min_pcap) &&
(g_amec->pcap.active_mem_level != 0) )
do
{

G_over_cap_count++;

// GPUs take longer for power limit to take effect if GPUs are present need to use
// a longer wait time before logging an error and resetting
if( ( (!G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_FAILURE_THRESHOLD) ) ||
( (G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_GPU_FAILURE_THRESHOLD) ) )
// Check if done everything possible to shed power and power still above a hard power cap
// ppb_fmax = Fmin and PWRSYS > Node power cap and
// Node power cap >= hard_min_pcap AND memory is throttled
if((g_amec->proc[0].pwr_votes.ppb_fmax == g_amec->sys.fmin) &&
(AMECSENSOR_PTR(PWRSYS)->sample > g_amec->pcap.active_node_pcap) &&
(g_amec->pcap.active_node_pcap >= G_sysConfigData.pcap.hard_min_pcap) &&
(g_amec->pcap.active_mem_level != 0) )
{
TRAC_ERR("Failure to maintain power cap: Power Cap = %d ,"
"PWRSYS = %d",g_amec->pcap.active_node_pcap,
AMECSENSOR_PTR(PWRSYS)->sample);
// Check if we are to ignore pgpe errors meaning the PGPE cannot set frequency which could
// cause this over power event. This will not cover if a different OCC is not able to shed
// power due to PGPE which would require to add this status to occ-occ communication
if(ignore_pgpe_error())
{
// make sure count is cleared to give time for frequency to be set once PGPE can set it
G_over_cap_count = 0;
INCREMENT_ERR_HISTORY(ERRH_OVER_PCAP_IGNORED);
break;
}

// Trace power per APSS channel to have the best breakdown for debug
// compress traces to 4 max to save space on OP systems
for (i = 0; i < MAX_APSS_ADC_CHANNELS; i++)
G_over_cap_count++;

// GPUs take longer for power limit to take effect if GPUs are present need to use
// a longer wait time before logging an error and resetting
if( ( (!G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_FAILURE_THRESHOLD) ) ||
( (G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_GPU_FAILURE_THRESHOLD) ) )
{
l_apss_func_id = G_apss_ch_to_function[i];
TRAC_ERR("Failure to maintain power cap: Power Cap = %d ,"
"PWRSYS = %d",g_amec->pcap.active_node_pcap,
AMECSENSOR_PTR(PWRSYS)->sample);

if((l_apss_func_id != ADC_RESERVED) &&
(l_apss_func_id != ADC_12V_SENSE) &&
(l_apss_func_id != ADC_GND_REMOTE_SENSE) &&
(l_apss_func_id != ADC_12V_STANDBY_CURRENT) )
// Trace power per APSS channel to have the best breakdown for debug
// compress traces to 4 max to save space on OP systems
for (i = 0; i < MAX_APSS_ADC_CHANNELS; i++)
{
l_trace[l_trace_idx] = (i << 24) | (l_apss_func_id << 16) | (AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample);
l_trace_idx++;
l_apss_func_id = G_apss_ch_to_function[i];

if((l_apss_func_id != ADC_RESERVED) &&
(l_apss_func_id != ADC_12V_SENSE) &&
(l_apss_func_id != ADC_GND_REMOTE_SENSE) &&
(l_apss_func_id != ADC_12V_STANDBY_CURRENT) )
{
l_trace[l_trace_idx] = (i << 24) | (l_apss_func_id << 16) | (AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample);
l_trace_idx++;
}
}
while(l_trace_idx != 0)
{
if(l_trace_idx >=4)
{
TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X], [%08X]",
l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3], l_trace[l_trace_idx-4]);
l_trace_idx -= 4;
}
else if(l_trace_idx == 3)
{
TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X]",
l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3]);
l_trace_idx = 0;
}
else if(l_trace_idx == 2)
{
TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X]",
l_trace[l_trace_idx-1], l_trace[l_trace_idx-2]);
l_trace_idx = 0;
}
else // l_trace_idx == 1
{
TRAC_ERR("APSS channel/FuncID/Power: [%08X]",
l_trace[l_trace_idx-1]);
l_trace_idx = 0;
}
}
}
while(l_trace_idx != 0)
{
if(l_trace_idx >=4)
{
TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X], [%08X]",
l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3], l_trace[l_trace_idx-4]);
l_trace_idx -= 4;
}
else if(l_trace_idx == 3)
{
TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X]",
l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3]);
l_trace_idx = 0;
}
else if(l_trace_idx == 2)
{
TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X]",
l_trace[l_trace_idx-1], l_trace[l_trace_idx-2]);
l_trace_idx = 0;
}
else // l_trace_idx == 1
{
TRAC_ERR("APSS channel/FuncID/Power: [%08X]",
l_trace[l_trace_idx-1]);
l_trace_idx = 0;
}
}

/* @
* @errortype
* @moduleid AMEC_MST_CHECK_UNDER_PCAP
* @reasoncode POWER_CAP_FAILURE
* @userdata1 Power Cap
* @userdata2 PWRSYS (Node Power)
* @devdesc Failure to maintain max power limits
*
*/
l_err = createErrl( AMEC_MST_CHECK_UNDER_PCAP,
POWER_CAP_FAILURE,
ERC_AMEC_UNDER_PCAP_FAILURE,
ERRL_SEV_PREDICTIVE,
NULL,
DEFAULT_TRACE_SIZE,
g_amec->pcap.active_node_pcap,
AMECSENSOR_PTR(PWRSYS)->sample);

//Callout to firmware
addCalloutToErrl(l_err,
ERRL_CALLOUT_TYPE_COMPONENT_ID,
ERRL_COMPONENT_ID_FIRMWARE,
ERRL_CALLOUT_PRIORITY_HIGH);

//Callout to APSS
addCalloutToErrl(l_err,
ERRL_CALLOUT_TYPE_HUID,
G_sysConfigData.apss_huid,
ERRL_CALLOUT_PRIORITY_HIGH);

//Reset OCC
REQUEST_RESET(l_err);
/* @
* @errortype
* @moduleid AMEC_MST_CHECK_UNDER_PCAP
* @reasoncode POWER_CAP_FAILURE
* @userdata1 Power Cap
* @userdata2 PWRSYS (Node Power)
* @devdesc Failure to maintain max power limits
*
*/
l_err = createErrl( AMEC_MST_CHECK_UNDER_PCAP,
POWER_CAP_FAILURE,
ERC_AMEC_UNDER_PCAP_FAILURE,
ERRL_SEV_PREDICTIVE,
NULL,
DEFAULT_TRACE_SIZE,
g_amec->pcap.active_node_pcap,
AMECSENSOR_PTR(PWRSYS)->sample);

//Callout to firmware
addCalloutToErrl(l_err,
ERRL_CALLOUT_TYPE_COMPONENT_ID,
ERRL_COMPONENT_ID_FIRMWARE,
ERRL_CALLOUT_PRIORITY_HIGH);

//Callout to APSS
addCalloutToErrl(l_err,
ERRL_CALLOUT_TYPE_HUID,
G_sysConfigData.apss_huid,
ERRL_CALLOUT_PRIORITY_HIGH);

//Reset OCC
REQUEST_RESET(l_err);
}
}
}
else
{
// Clear counter
G_over_cap_count = 0;
}

else
{
// Clear counter
G_over_cap_count = 0;
}
}while(0);
return;
}

Expand Down
4 changes: 4 additions & 0 deletions src/occ_405/amec/amec_sys.h
Expand Up @@ -535,6 +535,10 @@ typedef struct

// Calculations & Interim Data
uint16_t core_max_freq; // Maximum requested freq for all cores on chip.
uint16_t core_min_freq; // for debug. Minimum requested freq for all cores on chip.
uint8_t current_clip_count; // for debug. #consecutive ticks core_max_freq is below max possible for same reason
uint32_t chip_f_reason_history; // for debug. bit mask history of all frequency reason(s) for the chip
uint32_t f_reason; // for debug. current reason across all cores driving the lowest f request

// Parameters used through Amester interface
// Note: keep core arrays here, not in per-cores structure so one parameter
Expand Down

0 comments on commit 2397cb6

Please sign in to comment.