Skip to content

Commit

Permalink
New OCC/HTMGT interfaces for GPU sensor support
Browse files Browse the repository at this point in the history
getGpuSensors() and updateGpuSensorStatus()

Depends-on: I8a0de390516fd02df07860b960db506899b13f14
Change-Id: I290876d0e5f4889e6f2b1a45b5f81172acb28caf
RTC:178218
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/45196
Reviewed-by: Martin Gloff <mgloff@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
mderkse1 authored and dcrowell77 committed Sep 5, 2017
1 parent 9a2410a commit 14187f3
Show file tree
Hide file tree
Showing 4 changed files with 477 additions and 9 deletions.
104 changes: 96 additions & 8 deletions src/include/usr/ipmi/ipmisensor.H
Expand Up @@ -139,7 +139,7 @@ namespace SENSOR
* @enum discrete09_Offsets
*
* Offsets specific to IPMI sensor reading type 09
* digital discrete senosrs. These offsets result in
* digital discrete sensors. These offsets result in
* Device Enabled or Device Disabled events in the
* BMC event log.
*
Expand All @@ -157,7 +157,7 @@ namespace SENSOR
* @enum discrete03_Offsets
*
* Offsets specific to IPMI sensor reading type 03
* digital discrete senosrs. These offsets result in generic
* digital discrete sensors. These offsets result in generic
* State Asserted or State Deasserted events in the
* BMC event log.
*
Expand Down Expand Up @@ -386,9 +386,9 @@ namespace SENSOR
setSensorReadingRequest * i_data);

/**
* @brief helper function to get the sensor nubmer
* @brief helper function to get the sensor number
*/
inline uint32_t getSensorNumber( )
virtual uint32_t getSensorNumber( )
{
return TARGETING::UTIL::getSensorNumber(iv_target, iv_name );
};
Expand Down Expand Up @@ -688,15 +688,15 @@ namespace SENSOR

errlHndl_t setStatus( statusEnum status );

private:
// disable the default constructor
StatusSensor();

protected:
// internal offset data which is configured based on target type
// used in construction.
uint8_t iv_presentOffset;
uint8_t iv_functionalOffset;

private:
// disable the default constructor
StatusSensor();
};

/**
Expand Down Expand Up @@ -791,6 +791,57 @@ namespace SENSOR
FaultSensor(const FaultSensor& i_rhs);
};


/**
* @class GpuSensor
*
* @brief Specialized class to handle GPU sensor status.
*
* @par Detailed Description:
* Provides the functionality needed to set the status of GPU sensors.
*/
class GpuSensor : public StatusSensor
{

public:

/**
* @brief Constructor for a gpu sensor
*
* The status sensor is used for GPUs attached to Procs. Hostboot
* will update the present and functional state for the status
* sensor associated with each instance of these target types.
*
* @param[in] - Sensor name enum
* @param[in] - number identifying particular sensor
* @param[in] - Processor target that contains the sensors
*/
GpuSensor(TARGETING::SENSOR_NAME i_name, uint16_t i_num,
TARGETING::ConstTargetHandle_t i_target );

/**
* @brief return the unique number identifying this sensor
*/
uint32_t getSensorNumber() override
{
return iv_sensorNumber;
};

/**
* @brief Destructor for the GpuSensor
*/
~GpuSensor();

private:
/**
* @brief unique number identifying this particular gpu sensor
*/
uint16_t iv_sensorNumber;

// disable the default constructor
GpuSensor();
};

/**
* @brief Updates initial state of Hostboot relevant fault sensors on the
* BMC
Expand Down Expand Up @@ -950,6 +1001,43 @@ namespace SENSOR
*/
uint8_t getBackPlaneFaultSensor();

/**
* @brief Maximum GPU sensors on a single processor
*/
static const uint8_t MAX_GPU_SENSORS_PER_PROCESSOR = 3;

/**
* @brief Grab the GPU sensor type IDs for a particular processor target
*
* Will return all sensor ids that match the type for a given target.
*
* @param[in] - i_proc - processor target
* @param[in] - i_type - Functional/state, gpucoretemp, gpumemtemp
* @param[out] - o_num_ids - number of valid IDs returned in o_ids
* @param[out] - o_ids - ordered list of sensor IDs
*
* @return Errorlog handle
*/
errlHndl_t getGpuSensors( TARGETING::Target* i_proc,
HWAS::sensorTypeEnum i_type,
uint8_t & o_num_ids,
uint32_t o_ids[MAX_GPU_SENSORS_PER_PROCESSOR] );

/**
* @brief Maximum number of GPUs associated with a processor
*/
static const uint8_t MAX_PROCESSOR_GPUS = 3;

/**
* @brief Updates GPU sensor status for GPUs on this
* particular processor target
*
* @param[in] - i_proc - processor target
* @param[in] - i_gpu_status - status of GPU0, GPU1 and GPU2
*/
void updateGpuSensorStatus( TARGETING::Target* i_proc,
StatusSensor::statusEnum i_gpu_status[MAX_PROCESSOR_GPUS] );


}; // end namespace

Expand Down
55 changes: 55 additions & 0 deletions src/usr/ipmi/ipmifruinv.C
Expand Up @@ -34,6 +34,7 @@
#include <targeting/common/utilFilter.H>
#include <errl/errlmanager.H>
#include <ipmi/ipmifruinv.H>
#include <ipmi/ipmisensor.H>
#include "ipmifru.H"
#include "ipmifruinvprvt.H"
#include <stdio.h>
Expand Down Expand Up @@ -1685,6 +1686,60 @@ void IPMIFRUINV::setData(bool i_updateData)
IPMIFRUINV::clearData(it->first);
}
}

// Only send GPU sensor PRESENT status one time (no update),
// then allow HTMGT to update

// Go through processors and send GPU sensor status
// Get all Proc targets
TARGETING::TargetHandleList l_procTargetList;
getAllChips(l_procTargetList, TARGETING::TYPE_PROC);

uint32_t gpu_sensors[SENSOR::MAX_GPU_SENSORS_PER_PROCESSOR];
uint8_t num_valid_sensors = 0;
for (const auto & l_procChip: l_procTargetList)
{
// report present GPU sensors
l_errl = SENSOR::getGpuSensors( l_procChip,
HWAS::GPU_FUNC_SENSOR,
num_valid_sensors,
gpu_sensors );
if (!l_errl)
{
// build up present GPUs based on sensor data returned
SENSOR::StatusSensor::statusEnum
gpu_status[SENSOR::MAX_PROCESSOR_GPUS];

// initialize to NOT PRESENT
for (uint8_t j = 0; j < SENSOR::MAX_PROCESSOR_GPUS; j++)
{
gpu_status[j] =
SENSOR::StatusSensor::statusEnum::NOT_PRESENT;
}

// now change the PRESENT ones
for (uint8_t i = 0;
i < SENSOR::MAX_GPU_SENSORS_PER_PROCESSOR; i++)
{
if (i < SENSOR::MAX_PROCESSOR_GPUS)
{
if (gpu_sensors[i] !=
TARGETING::UTIL::INVALID_IPMI_SENSOR)
{
gpu_status[i] =
SENSOR::StatusSensor::statusEnum::PRESENT;
}
}
else
{
break;
}
}

// Send the present/non-present GPU sensors
SENSOR::updateGpuSensorStatus( l_procChip, gpu_status);
}
}
}

} while(0);
Expand Down

0 comments on commit 14187f3

Please sign in to comment.