Skip to content

Commit

Permalink
Support for PGPE error handling
Browse files Browse the repository at this point in the history
Change-Id: I979f699eb9f72c0a4087e5f5af533ee3d221a4c5
RTC: 197062
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/68569
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Reviewed-by: AMIT J. TENDOLKAR <amit.tendolkar@in.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
  • Loading branch information
cjcain committed Nov 27, 2018
1 parent 1904821 commit 98ccba6
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 143 deletions.
127 changes: 40 additions & 87 deletions src/include/pstate_pgpe_occ_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2016,2017 */
/* Contributors Listed Below - COPYRIGHT 2016,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -61,14 +61,15 @@ enum MESSAGE_ID_IPI2HI
//
#define PGPE_RC_SUCCESS 0x01
#define PGPE_WOF_RC_NOT_ENABLED 0x10
#define PGPE_RC_PSTATES_DISABLED 0x11
#define PGPE_RC_REQ_PSTATE_ALREADY_STARTED 0x12
#define PGPE_RC_REQ_PSTATE_ALREADY_SUSPENDED 0x13
#define PGPE_RC_PSTATES_NOT_STARTED 0x11
#define PGPE_RC_OCC_NOT_PMCR_OWNER 0x14
#define PGPE_RC_PM_COMPLEX_SUSPEND_SAFE_MODE 0x15
// Active quad mismatch with requested active quads. PGPE did not switch
// to using the new VFRT. The original VFRT is still being used.
#define PGPE_WOF_RC_VFRT_QUAD_MISMATCH 0x20
#define PGPE_RC_REQ_WHILE_PENDING_ACK 0x21
#define PGPE_RC_NULL_VFRT_POINTER 0x22
#define PGPE_RC_INVALID_PMCR_OWNER 0x23

//
// PMCR Owner
Expand Down Expand Up @@ -138,7 +139,7 @@ typedef struct ipcmsg_wof_vfrt
uint8_t active_quads; // OCC updated with the Active Quads that it
// is using for its Ceff calculations
uint8_t pad;
HomerVFRTLayout_t* homer_vfrt_ptr; // Voltage Frequency Ratio Table
HomerVFRTLayout_t* homer_vfrt_ptr;
} ipcmsg_wof_vfrt_t;


Expand Down Expand Up @@ -167,9 +168,6 @@ typedef struct

typedef struct
{
/// Number of Pstate Table entries
uint32_t entries;

/// Internal VDD voltage ID at the output of the PFET header
OCCPstateTable_entry_t table[MAX_OCC_PSTATE_TABLE_ENTRIES];

Expand All @@ -178,83 +176,6 @@ typedef struct
// End Pstate Table
// -----------------------------------------------------------------------------

// -----------------------------------------------------------------------------
// Start FFDC

/// Scopes of the First Failure Data Capture (FFDC) registers
enum scope_type
{
FFDC_CHIP = 0, // Address is chip scope (eg absolute)
FFDC_QUAD = 1, // Address + 0x01000000*quad for good quads from 0 to 5
FFDC_CORE = 2, // Address + 0x01000000*core for good cores from 0 to 23
FFDC_CME = 3 // Address if EX is even; Address + 0x400*EX for EX odd for good Exs from 0 to 11
};

/// Address types of First Failure Data Capture (FFDC) register addresses
enum scope_type1
{
FFDC_OCI = 0, // Address is an OCI address
FFDC_SCOM = 1 // Address is a SCOM address
};

/// Register definition of the Hcode FFDC register list
#define MAX_FFDC_REG_LIST 12
typedef struct
{
uint32_t address;
/* union address_attribute
{
uint32_t value;
struct
{
uint32_t address_type : 16;
uint32_t scope : 16;
} attr;
}*/
} Hcode_FFDC_entry_t;

/// Hcode FFDC register list
typedef struct
{
/// Number of FFDC address list entries
uint32_t list_entries;

/// FFDC Address list
Hcode_FFDC_entry_t list[MAX_FFDC_REG_LIST];
} Hcode_FFDC_list_t;



/// Hcode FFDC register list
/// @todo RTC: 161183 Fill out the rest of this FFDC list
/// @note The reserved FFDC space for registers and traces set aside in the
/// OCC is 1KB. On the register side, the following list will generate
/// 12B of content (4B address, 8B data) x the good entries per scope.
/// CHIP scope are not dependent on partial good or currently active and will
/// take 12B x 8 = 96B. CME scope entries will, at maximum, generate 12B x
/// 12 CMEs x 4 SCOMs = 576B.. The overall totla for registers is 96 + 576
///
/*typedef struct Hcode_FFDC_list
{
{PERV_TP_OCC_SCOM_OCCLFIR, FFDC_SCOM, FFDC_CHIP }, // OCC LFIR
{PU_PBAFIR, FFDC_SCOM, FFDC_CHIP }, // PBA LFIR
{EX_CME_SCOM_LFIR, FFDC_SCOM, FFDC_CME }, // CME LFIR
{PU_GPE3_GPEDBG_OCI, FFDC_OCI, FFDC_CHIP }, // SGPE XSR, SPRG0
{PU_GPE3_GPEDDR_OCI, FFDC_OCI, FFDC_CHIP }, // SGPE IR, EDR
{PU_GPE3_PPE_XIDBGPRO, FFDC_OCI, FFDC_CHIP }, // SGPE XSR, IAR
{PU_GPE2_GPEDBG_OCI, FFDC_OCI, FFDC_CHIP }, // PGPE XSR, SPRG0
{PU_GPE2_GPEDDR_OCI, FFDC_OCI, FFDC_CHIP }, // PGPE IR, EDR
{PU_GPE2_PPE_XIDBGPRO, FFDC_OCI, FFDC_CHIP }, // PGPE XSR, IAR
{EX_PPE_XIRAMDBG, FFDC_SCOM, FFDC_CME }, // CME XSR, SPRG0
{EX_PPE_XIRAMEDR, FFDC_SCOM, FFDC_CME }, // CME IR, EDR
{EX_PPE_XIDBGPRO, FFDC_SCOM, FFDC_CME }, // CME XSR, IAR
};*/

// End FFDC
// -----------------------------------------------------------------------------

// -----------------------------------------------------------------------------
// Start Quad State

Expand Down Expand Up @@ -335,6 +256,32 @@ typedef union requested_active_quads
// End Quad State
// -----------------------------------------------------------------------------

// -----------------------------------------------------------------------------
// Start Error Log Table

#define MAX_HCODE_ELOG_ENTRIES 16

typedef union hcode_elog_entry
{
uint64_t value;
struct {
uint8_t id;
uint8_t source;
uint16_t length;
uint32_t address;
} fields;
} hcode_elog_entry_t;

typedef struct pgpe_error_table
{
uint32_t magic; // "ELTC" (Error Log Table of Contents)
uint8_t total_log_slots;
uint8_t reserved[3];
hcode_elog_entry_t elog[MAX_HCODE_ELOG_ENTRIES];
} pgpe_error_table_t;

// End Error Log Table
// -----------------------------------------------------------------------------

typedef struct
{
Expand All @@ -356,8 +303,14 @@ typedef struct
///Requested Active Quads
requested_active_quads_t req_active_quads;

/// FFDC Address list
Hcode_FFDC_list_t ffdc_list;
// PGPE Produced WOF Values
uint64_t pgpe_produced_wof_values[2];

// Reserved
uint64_t reserved;

// Error Log Table
pgpe_error_table_t pgpe_error_table;

/// Pstate Table
OCCPstateTable_t pstate_table;
Expand Down
160 changes: 110 additions & 50 deletions src/occ_405/cmdh/cmdh_fsp_cmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ extern GpeRequest G_epow_gpio_detected_req;

extern opal_proc_voting_reason_t G_amec_opal_proc_throt_reason;

extern bool G_htmgt_notified_of_error;

// This table contains tunable parameter information that can be exposed to
// customers (only Master OCC should access/control this table)
Expand Down Expand Up @@ -104,6 +105,7 @@ uint8_t G_apss_ch_to_function[MAX_APSS_ADC_CHANNELS] = {0};

ERRL_RC cmdh_poll_v20 (cmdh_fsp_rsp_t * i_rsp_ptr);

#define MAX_CONSECUTIVE_HCODE_ELOGS 2

// Function Specification
//
Expand Down Expand Up @@ -144,6 +146,7 @@ errlHndl_t cmdh_tmgt_poll (const cmdh_fsp_cmd_t * i_cmd_ptr,
return l_errlHndl;
}


// Function Specification
//
// Name: cmdh_poll_v20
Expand All @@ -157,6 +160,7 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
int k = 0, l_max_sensors = 0;
int l_err_hist_idx = 0, l_sens_list_idx = 0;
cmdh_poll_sensor_db_t l_sensorHeader;
static unsigned int L_num_hcode_elogs = 0;

// Set pointer to start of o_rsp_ptr
cmdh_poll_resp_v20_fixed_t * l_poll_rsp = (cmdh_poll_resp_v20_fixed_t *) o_rsp_ptr;
Expand Down Expand Up @@ -224,30 +228,71 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
l_poll_rsp->ips_status.word = 0;
l_poll_rsp->ips_status.ips_enabled = G_ips_config_data.iv_ipsEnabled;
l_poll_rsp->ips_status.ips_active = AMEC_mst_get_ips_active_status();
// Byte 8:
l_poll_rsp->errl_id = getOldestErrlID();
// Byte 9 - 12:
l_poll_rsp->errl_address = getErrlOCIAddrByID(l_poll_rsp->errl_id);
// Byte 13 - 14:
l_poll_rsp->errl_length = getErrlLengthByID(l_poll_rsp->errl_id);

//If errl_id is not 0, then neither address or length should be zero.
//This should not happen, but if it does tmgt will create an error log that
//includes the data at the errl slot address given that can be used for debug.
//NOTE: One cause for a false errlog id is corruption of data in one errl slot
// due to writing data greater than the size of the previous slot. For
// example writing the CallHome errorlog (3kb) into a regular sized (2kb) slot.
// Make sure to verify the order of the memory allocation for the errl slots.
// Error Log:
bool check_405_elogs = true;
// if (405 has no elogs) OR (have not hit max consecutive hcode elogs)
if ((getOldestErrlID() == 0) || (L_num_hcode_elogs < MAX_CONSECUTIVE_HCODE_ELOGS))
{
// Check for HCODE errors
hcode_elog_entry_t elog_entry;
unsigned int index = 0;
for (; index < G_hcode_elog_table_slots; ++index)
{
elog_entry.value = in64(&G_hcode_elog_table[index]);
if (elog_entry.value != 0)
{ // Found HCODE elog
if (elog_entry.fields.source != ERRL_SOURCE_405)
{
++L_num_hcode_elogs;
// Byte 8:
l_poll_rsp->errl_id = elog_entry.fields.id;
// Byte 9 - 12:
l_poll_rsp->errl_address = elog_entry.fields.address;
// Byte 13 - 14:
l_poll_rsp->errl_length = elog_entry.fields.length;
// Byte 15:
l_poll_rsp->errl_source = elog_entry.fields.source;
check_405_elogs = false;
break;
}
else
{
TRAC_ERR("cmdh_poll_v20: ignoring HCODE error with 405 source (id:0x%02X, len:0x%04X, address:0x%08X)",
elog_entry.fields.id, elog_entry.fields.length, elog_entry.fields.address);
// Zero out error log entry in list so hcode can reuse
out64(&G_hcode_elog_table[index], 0);
G_htmgt_notified_of_error = false;
}
}
}
}
if (check_405_elogs)
{ // No, HCODE errors, check/add any 405 elog
L_num_hcode_elogs = 0;
// Byte 8:
l_poll_rsp->errl_id = getOldestErrlID();
// Byte 9 - 12:
l_poll_rsp->errl_address = getErrlOCIAddrByID(l_poll_rsp->errl_id);
// Byte 13 - 14:
l_poll_rsp->errl_length = getErrlLengthByID(l_poll_rsp->errl_id);
// Byte 15:
l_poll_rsp->errl_source = ERRL_SOURCE_405;
}
//If errl_id is not 0, then neither address or length should be zero.
//This should not happen, but if it does TMGT will create an error log that
//includes the data at the errl slot address given that can be used for debug.
//NOTE: One cause for a false errlog id is corruption of data in one errl slot
// due to writing data greater than the size of the previous slot. For
// example writing the CallHome errorlog (3kb) into a regular sized (2kb) slot.
// Make sure to verify the order of the memory allocation for the errl slots.
if ( (l_poll_rsp->errl_id != 0) &&
((l_poll_rsp->errl_address == 0) || (l_poll_rsp->errl_length == 0)))
{
TRAC_ERR("An error ID has been sent via poll but the address or size is 0. "
"ErrlId:0x%X, sz:0x%X, address:0x%X.",
l_poll_rsp->errl_id, l_poll_rsp->errl_length, l_poll_rsp->errl_address);
TRAC_ERR("cmdh_poll_v20: error log sent with bad data "
"(id:0x%02X, source:0x%02X, len:0x%04X, address:0x%08X)",
l_poll_rsp->errl_id, l_poll_rsp->errl_source, l_poll_rsp->errl_length, l_poll_rsp->errl_address);
}

// Byte 15: reserved.

// Byte 16: GPU Configuration
l_poll_rsp->gpu_presence = (uint8_t)G_first_proc_gpu_config;

Expand Down Expand Up @@ -1043,37 +1088,52 @@ errlHndl_t cmdh_clear_elog (const cmdh_fsp_cmd_t * i_cmd_ptr,
switch(l_elog_source)
{
case ERRL_SOURCE_405:
// Get Errl Array index
l_SlotNum = getErrSlotNumByErrId(l_elog_id);

// Get ERRL address
l_oci_address = (errlHndl_t)getErrSlotOCIAddr(l_SlotNum);

if ((l_oci_address != NULL) &&
(l_oci_address != INVALID_ERR_HNDL))
{
// clear only one Errl by ID
l_err = deleteErrl(&l_oci_address);
}
else
{
CMDH_TRAC_ERR("cmdh_clear_elog: 405 error log ID[0x%02X] not found", l_elog_id);
l_rc = ERRL_RC_INVALID_DATA;
}

break;

case ERRL_SOURCE_PGPE:
case ERRL_SOURCE_XGPE:
// TBD
CMDH_TRAC_INFO("cmdh_clear_elog: Cleared PM Hcode elog id 0x%02X from source 0x%02X",
l_elog_id, l_elog_source);
break;

default:
CMDH_TRAC_ERR("cmdh_clear_elog: Invalid error log source 0x%02X", l_elog_source);
l_rc = ERRL_RC_INVALID_DATA;
break;
// Get Errl Array index
l_SlotNum = getErrSlotNumByErrId(l_elog_id);

// Get ERRL address
l_oci_address = (errlHndl_t)getErrSlotOCIAddr(l_SlotNum);

if ((l_oci_address != NULL) &&
(l_oci_address != INVALID_ERR_HNDL))
{
// clear only one Errl by ID
l_err = deleteErrl(&l_oci_address);
}
else
{
CMDH_TRAC_ERR("cmdh_clear_elog: 405 error log ID[0x%02X] not found", l_elog_id);
l_rc = ERRL_RC_INVALID_DATA;
}
break;

default: // non-405 error log
{
unsigned int index = 0;
for (; index < G_hcode_elog_table_slots; ++index)
{
hcode_elog_entry_t elog_entry;
elog_entry.value = in64(&G_hcode_elog_table[index]);
if ((elog_entry.fields.id == l_elog_id) && (elog_entry.fields.source == l_elog_source))
{
CMDH_TRAC_INFO("cmdh_clear_elog: Clearing HCODE elog id 0x%02X from source 0x%02X",
l_elog_id, l_elog_source);

// Zero out error log entry in list so hcode can reuse
out64(&G_hcode_elog_table[index], 0);
break;
}
}
if (index == G_hcode_elog_table_slots)
{
// Did not find matching entry in hcode table for non-405 error
CMDH_TRAC_ERR("cmdh_clear_elog: Could not find elog id 0x%02X with source 0x%02X",
l_elog_id, l_elog_source);
l_rc = ERRL_RC_INVALID_DATA;
}
G_htmgt_notified_of_error = false;
}
break;
}
}while(0);

Expand Down
4 changes: 2 additions & 2 deletions src/occ_405/cmdh/cmdh_fsp_cmds.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ typedef struct __attribute__ ((packed)) cmdh_poll_resp_v20
uint32_t errl_address;
// BYTES 13 - 14: Error Log Length
uint16_t errl_length;
// BYTE 15: Reserved
uint8_t _reserved_15;
// BYTE 15: Error Log Source
uint8_t errl_source;
// BYTE 16: GPU Configuration
uint8_t gpu_presence;
// BYTES 17 - 32 (16 bytes): OCC Code Level - ASCII string of OCC build level currently running.
Expand Down

0 comments on commit 98ccba6

Please sign in to comment.