Skip to content

Commit

Permalink
Added Error log support for new GPU sensors
Browse files Browse the repository at this point in the history
Change-Id: I8a0de390516fd02df07860b960db506899b13f14
RTC:178218
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/45116
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
mderkse1 authored and dcrowell77 committed Sep 1, 2017
1 parent 7b2191a commit 3d4caf2
Show file tree
Hide file tree
Showing 14 changed files with 384 additions and 4 deletions.
24 changes: 24 additions & 0 deletions src/include/usr/errl/errlentry.H
Expand Up @@ -573,6 +573,22 @@ public:
void addProcedureCallout(const HWAS::epubProcedureID i_procedure,
const HWAS::callOutPriority i_priority);


/**
* @brief Add a special sensor callout
* Adds the given sensor to the list of callouts for the log
*
* @param[in] i_sensorID Sensor ID
* @param[in] i_sensorType Type of sensor being added
* @param[in] i_priority Priority of the callout
*
* @return void
*/
void addSensorCallout(const uint32_t i_sensorID,
const HWAS::sensorTypeEnum i_sensorType,
const HWAS::callOutPriority i_priority);


/**
* @brief Import flattened error log
*
Expand Down Expand Up @@ -650,6 +666,14 @@ private:
*/
epubSubSystem_t getSubSystem( HWAS::partTypeEnum i_partType ) const;

/**
* @brief maps a sensor type to a subsystem ID
*
* @param[in] i_sensorType Sensor Type.
*
* @return subsystem ID
*/
epubSubSystem_t getSubSystem( HWAS::sensorTypeEnum i_sensorType ) const;

/**
* @brief The ErrlManager will call here to ask the
Expand Down
18 changes: 17 additions & 1 deletion src/include/usr/errl/errludcallout.H
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2012,2015 */
/* Contributors Listed Below - COPYRIGHT 2012,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -159,6 +159,22 @@ private:
const HWAS::callOutPriority i_priority);


/**
* @brief Add a sensor callout
* Adds the given sensor to the list of callouts for the log
*
* @param[in] i_sensorID Sensor ID
* @param[in] i_sensorType Type of sensor being added
* @param[in] i_priority Priority of the callout
*
* @return void
*/
ErrlUserDetailsCallout(const uint32_t i_sensorID,
const HWAS::sensorTypeEnum i_sensorType,
const HWAS::callOutPriority i_priority );



// Disabled
ErrlUserDetailsCallout(const ErrlUserDetailsCallout &);
ErrlUserDetailsCallout & operator=(const ErrlUserDetailsCallout &);
Expand Down
14 changes: 12 additions & 2 deletions src/include/usr/hwas/common/hwasCallout.H
Expand Up @@ -145,15 +145,21 @@ enum partTypeEnum
PCI_REF_CLOCK = 11,
};



enum sensorTypeEnum
{
UNKNOWN_SENSOR = 0,
GPU_FUNC_SENSOR = 1,
GPU_TEMPERATURE_SENSOR = 2,
GPU_MEMORY_TEMP_SENSOR = 3,
};

//
const uint8_t HW_CALLOUT = 0x01;
const uint8_t PROCEDURE_CALLOUT = 0x02;
const uint8_t BUS_CALLOUT = 0x03;
const uint8_t CLOCK_CALLOUT = 0x04;
const uint8_t PART_CALLOUT = 0x05;
const uint8_t SENSOR_CALLOUT = 0x06;

const uint8_t TARGET_IS_SENTINEL = 0xF0;

Expand Down Expand Up @@ -188,6 +194,10 @@ typedef struct callout_ud
GARD_ErrorType partGardErrorType; // uint32_t
// one Target will follow
};
struct { // type == SENSOR_CALLOUT
uint32_t sensorId;
sensorTypeEnum sensorType;
};
}; // union
} callout_ud_t;

Expand Down
50 changes: 50 additions & 0 deletions src/include/usr/ipmi/ipmiconfiglookup.H
Expand Up @@ -210,6 +210,28 @@ class IpmiConfigLookup
return i_tgt->tryGetAttr<TARGETING::ATTR_IPMI_SENSORS>(l_ipmiArray);
}

/**
* @brief Determines whether the passed in target has the GPU_SENSORS
* attribute.
*
* @param[in] i_tgt. The target to test.
*
* @reurn True if the target has the GPU_SENSORS attribute, false otherwise.
*
*/
inline static bool doesTargetHaveGPUSensorsAttr(TARGETING::Target * i_tgt)
{
if(!i_tgt)
{
return false;
}

TARGETING::AttributeTraits<TARGETING::ATTR_GPU_SENSORS>::Type
l_gpuArray;
return i_tgt->tryGetAttr<TARGETING::ATTR_GPU_SENSORS>(l_gpuArray);
}


/**
* @brief Given a passed in target, looks up IPMI_SENSOR data based upon
* the passed in sensor number.
Expand Down Expand Up @@ -237,6 +259,34 @@ class IpmiConfigLookup
uint8_t& o_entityId,
TARGETING::SENSOR_NAME& o_sensorName
);

/**
* @brief Given a passed in target, looks up GPU_SENSOR data based upon
* the passed in sensor number.
*
* @param[in] i_target. The target whose GPU_SENSORS attribute will be
* searched for information based upon the passed
* in sensor number.
*
* @param[in] i_sensorNumber. The GPU sensor whose information we wish
* to gather.
*
* @param[out] o_sensorType. The sensor type as read from the
* from the GPU_SENSOR_ARRAY.
*
* @param[out] o_entityId. The entity id associated with
* the sensor as read from the GPU_SENSOR_ARRAY.
*
* @param[out] o_sensorName. The sensor name as read from
* the GPU_SENSOR_ARRAY.
*
*/
static bool lookupGPUSensorInfo(TARGETING::Target * i_target,
uint32_t i_sensorNumber,
uint8_t& o_sensorType,
uint8_t& o_entityId,
TARGETING::SENSOR_NAME& o_sensorName
);
};

}
Expand Down
49 changes: 49 additions & 0 deletions src/usr/errl/errlentry.C
Expand Up @@ -320,6 +320,17 @@ void ErrlEntry::addClockCallout(const TARGETING::Target *i_target,

} // addClockCallout


void ErrlEntry::addSensorCallout(const uint32_t i_sensorID,
const HWAS::sensorTypeEnum i_sensorType,
const HWAS::callOutPriority i_priority)
{
TRACFCOMP(g_trac_errl, ENTER_MRK"addSensorCallout(0x%X, %d, 0x%x)",
i_sensorID, i_sensorType, i_priority);

ErrlUserDetailsCallout(i_sensorID, i_sensorType, i_priority).addToLog(this);
}

////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
void ErrlEntry::addPartCallout(const TARGETING::Target *i_target,
Expand Down Expand Up @@ -947,6 +958,13 @@ void ErrlEntry::setSubSystemIdBasedOnCallouts()
"callout to determine SSID", pData->partType);
iv_User.setSubSys(getSubSystem(pData->partType));
}
else if ( pData->type == HWAS::SENSOR_CALLOUT )
{
TRACFCOMP(g_trac_errl, INFO_MRK
"mapping highest priority sensor type 0x%x "
"callout to determine SSID", pData->sensorType);
iv_User.setSubSys(getSubSystem(pData->sensorType));
}
else
{
TRACFCOMP(g_trac_errl, ERR_MRK
Expand Down Expand Up @@ -1153,6 +1171,37 @@ epubSubSystem_t ErrlEntry::getSubSystem( HWAS::clockTypeEnum i_clockType ) const
return subsystem;
}

///////////////////////////////////////////////////////////////////////////////
// Map a Sensor type to a subsystem ID
epubSubSystem_t ErrlEntry::getSubSystem(HWAS::sensorTypeEnum i_sensorType) const
{
TRACDCOMP(g_trac_errl, ENTER_MRK"getSubSystem() from sensor type 0x%x",
i_sensorType);

epubSubSystem_t subsystem = EPUB_MISC_UNKNOWN;

const uint32_t SENSOR_TO_SUBSYS_TABLE_ENTRIES =
sizeof(SENSOR_TO_SUBSYS_TABLE)/sizeof(SENSOR_TO_SUBSYS_TABLE[0]);

for (uint32_t i = 0; i < SENSOR_TO_SUBSYS_TABLE_ENTRIES; i++)
{
if (SENSOR_TO_SUBSYS_TABLE[i].xType == i_sensorType)
{
subsystem = SENSOR_TO_SUBSYS_TABLE[i].xSubSys;
break;
}
}

if(subsystem == EPUB_MISC_UNKNOWN)
{
TRACFCOMP(g_trac_errl,"WRN>> Failed to find subsystem ID for sensor type 0x%x",
i_sensorType);
}

TRACDCOMP(g_trac_errl, EXIT_MRK"getSubSystem() ssid 0x%x", subsystem);
return subsystem;
}

///////////////////////////////////////////////////////////////////////////////
// Map a Part type to a subsystem ID
epubSubSystem_t ErrlEntry::getSubSystem( HWAS::partTypeEnum i_partType ) const
Expand Down
13 changes: 13 additions & 0 deletions src/usr/errl/errlentry_consts.H
Expand Up @@ -149,6 +149,19 @@ const epubPartTypeToSub_t PART_TO_SUBSYS_TABLE[] =
{ HWAS::SPIVID_SLAVE_PART_TYPE , EPUB_POWER_SUBSYS },
};

struct epubSensorTypeToSub_t
{
HWAS::sensorTypeEnum xType;
epubSubSystem_t xSubSys;
};

struct epubSensorTypeToSub_t SENSOR_TO_SUBSYS_TABLE[] =
{
{ HWAS::GPU_FUNC_SENSOR , EPUB_IO_SUBSYS },
{ HWAS::GPU_TEMPERATURE_SENSOR , EPUB_IO_SUBSYS },
{ HWAS::GPU_MEMORY_TEMP_SENSOR , EPUB_IO_SUBSYS },
};

} //end namespace

#endif //#ifndef ERRLENTRY_CONSTS_H
4 changes: 4 additions & 0 deletions src/usr/errl/errlmanager_common.C
Expand Up @@ -842,6 +842,10 @@ uint8_t getSensorInfo(HWAS::callout_ud_t *i_ud,
{
*o_sensorNumber = SENSOR::getBackPlaneFaultSensor();
}
else if (i_ud->type == HWAS::SENSOR_CALLOUT )
{
*o_sensorNumber = static_cast<uint8_t>(i_ud->sensorId);
}
else
{
// for all other types there will be at least
Expand Down
27 changes: 26 additions & 1 deletion src/usr/errl/errludcallout.C
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2012,2014 */
/* Contributors Listed Below - COPYRIGHT 2012,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -202,4 +202,29 @@ ErrlUserDetailsCallout::ErrlUserDetailsCallout(

} // Procedure callout

//------------------------------------------------------------------------------
// Sensor callout
ErrlUserDetailsCallout::ErrlUserDetailsCallout(const uint32_t i_sensorID,
const HWAS::sensorTypeEnum i_sensorType,
const HWAS::callOutPriority i_priority)
{
TRACDCOMP(g_trac_errl, "Sensor Callout");

// Set up ErrlUserDetails instance variables
iv_CompId = ERRL_COMP_ID;
iv_Version = 1;
iv_SubSection = ERRL_UDT_CALLOUT;

HWAS::callout_ud_t *pData;
pData = reinterpret_cast<HWAS::callout_ud_t *>
(reallocUsrBuf(sizeof(HWAS::callout_ud_t)));

pData->type = HWAS::SENSOR_CALLOUT;
pData->priority = i_priority;
pData->sensorId = i_sensorID;
pData->sensorType = i_sensorType;

TRACDCOMP(g_trac_errl, "Sensor Callout exit");
} // Sensor callout

}
24 changes: 24 additions & 0 deletions src/usr/errl/plugins/errludcallout.H
Expand Up @@ -297,6 +297,30 @@ case HWAS::_type: i_parser.PrintString( "Procedure", #_type); break;
#undef case_PROCEDURE
break; // PROCEDURE_CALLOUT
}

case HWAS::SENSOR_CALLOUT:
{
i_parser.PrintString( "Callout type", "Sensor Callout");

i_parser.PrintNumber( "Sensor ID", "0x%X",
ntohl(pData->sensorId));

switch (ntohl(pData->sensorType))
{
#define case_SENSOR_TYPE(_type) \
case HWAS::_type: i_parser.PrintString( "Sensor Type", #_type); break;
case_SENSOR_TYPE(GPU_FUNC_SENSOR)
case_SENSOR_TYPE(GPU_TEMPERATURE_SENSOR)
case_SENSOR_TYPE(GPU_MEMORY_TEMP_SENSOR)
case_SENSOR_TYPE(UNKNOWN_SENSOR)
default:
i_parser.PrintNumber( "Sensor Type", "UNKNOWN: 0x%X",
ntohl(pData->sensorType) );
break;
} // switch sensorType
#undef case_SENSOR_TYPE
break;
}
default:
i_parser.PrintNumber( "Callout type", "UNKNOWN: 0x%X",
ntohl(pData->type) );
Expand Down
19 changes: 19 additions & 0 deletions src/usr/errldisplay/errldisplay.C
Expand Up @@ -380,6 +380,25 @@ case HWAS::_type: CONSOLE::displayf(NULL, " Procedure : %s", #_t
} // switch procedure
#undef case_PROCEDURE
break;

case HWAS::SENSOR_CALLOUT:
CONSOLE::displayf(NULL, " Sensor ID : 0x%x", callout->sensorId);

switch (callout->sensorType)
{
#define case_SENSOR_TYPE(_type) \
case HWAS::_type: CONSOLE::displayf(NULL, " Sensor Type : %s", #_type); break;
case_SENSOR_TYPE(UNKNOWN_SENSOR)
case_SENSOR_TYPE(GPU_FUNC_SENSOR)
case_SENSOR_TYPE(GPU_TEMPERATURE_SENSOR)
case_SENSOR_TYPE(GPU_MEMORY_TEMP_SENSOR)
default:
CONSOLE::displayf(NULL, " Sensor Type : UNKNOWN 0x%X",
callout->sensorType);
} // switch sensorType
#undef case_SENSOR_TYPE
break;

default:
CONSOLE::displayf(NULL, " Callout type : UNKNOWN: 0x%X",
callout->type);
Expand Down

0 comments on commit 3d4caf2

Please sign in to comment.