-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix issue: thermal algorithm not work for optical module (#128)
Fix issue: optical module temperature raising doesn't result in fan speed raising. Kernel thermal algorithm cannot detect optical module temperature changing. ASIC and gearbox has no issue, only optical module. (0058) Enable kernel detect the hottest thermal zone and perform thermal algorithm based on that. (0059)
- Loading branch information
1 parent
8f778f1
commit 97c6686
Showing
3 changed files
with
216 additions
and
0 deletions.
There are no files selected for viewing
49 changes: 49 additions & 0 deletions
49
patch/0058-mlxsw-core-thermal-Set-default-thermal-trips-at-init.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
From a50b84e93f743c13f2568396927c7744ff706e29 Mon Sep 17 00:00:00 2001 | ||
From: Vadim Pasternak <vadimp@mellanox.com> | ||
Date: Mon, 23 Mar 2020 12:57:16 +0200 | ||
Subject: [thermal_emul 1/2] mlxsw: core: thermal: Set default thermal trips at | ||
initialization | ||
|
||
Set default thermal trip temperatures during thermal zone | ||
initialization. Otherwise polling time for thermal control | ||
could stay zero and such thermal zones will not be triggered by | ||
thermal algorithm. | ||
|
||
Signed-off-by: Vadim Pasternak <vadimp@mellanox.com> | ||
--- | ||
drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 12 ++++++++---- | ||
1 file changed, 8 insertions(+), 4 deletions(-) | ||
|
||
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | ||
index 690ae0f1820e..53198e260633 100644 | ||
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | ||
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | ||
@@ -20,6 +20,10 @@ | ||
#define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ | ||
#define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ | ||
#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ | ||
+#define MLXSW_THERMAL_MODULE_TEMP_NORM 60000 /* 60C */ | ||
+#define MLXSW_THERMAL_MODULE_TEMP_HIGH 70000 /* 70C */ | ||
+#define MLXSW_THERMAL_MODULE_TEMP_HOT 80000 /* 80C */ | ||
+#define MLXSW_THERMAL_MODULE_TEMP_CRIT 90000 /* 90C */ | ||
#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ | ||
#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) | ||
#define MLXSW_THERMAL_ZONE_MAX_NAME 16 | ||
@@ -154,10 +158,10 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, | ||
static void | ||
mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz) | ||
{ | ||
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0; | ||
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0; | ||
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0; | ||
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0; | ||
+ tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = MLXSW_THERMAL_MODULE_TEMP_NORM; | ||
+ tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = MLXSW_THERMAL_MODULE_TEMP_HIGH; | ||
+ tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = MLXSW_THERMAL_MODULE_TEMP_HOT; | ||
+ tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = MLXSW_THERMAL_MODULE_TEMP_CRIT; | ||
} | ||
|
||
static int | ||
-- | ||
2.11.0 | ||
|
165 changes: 165 additions & 0 deletions
165
patch/0059-mlxsw-core-Add-the-hottest-thermal-zone-detection.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
From a975f22abc2b75614f5a58ea8af843043ade782f Mon Sep 17 00:00:00 2001 | ||
From: junchao <junchao@contoso.com> | ||
Date: Mon, 30 Mar 2020 10:05:25 +0000 | ||
Subject: [hwmon-next 04/14] mlxsw: core: Add the hottest thermal zone | ||
detection | ||
|
||
When multiple sensors are mapped to the same cooling device, the | ||
cooling device should be set according the worst sensor from the | ||
sensors associated with this cooling device. | ||
|
||
Provide the hottest thermal zone detection and enforce cooling device | ||
to follow the temperature trends of the hottest zone only. | ||
Prevent competition for the cooling device control from others zones, | ||
by "stable trend" indication. A cooling device will not perform any | ||
actions associated with a zone with a "stable trend". | ||
|
||
When other thermal zone is detected as a hottest, a cooling device is | ||
to be switched to following temperature trends of new hottest zone. | ||
|
||
Thermal zone score is represented by 32 bits unsigned integer and | ||
calculated according to the next formula: | ||
For T < TZ<t><i>, where t from {normal trip = 0, high trip = 1, hot | ||
trip = 2, critical = 3}: | ||
TZ<i> score = (T + (TZ<t><i> - T) / 2) / (TZ<t><i> - T) * 256 ** j; | ||
Highest thermal zone score s is set as MAX(TZ<i>score); | ||
Following this formula, if TZ<i> is in trip point higher than TZ<k>, | ||
the higher score is to be always assigned to TZ<i>. | ||
|
||
For two thermal zones located at the same kind of trip point, the higher | ||
score will be assigned to the zone which is closer to the next trip | ||
point. Thus, the highest score will always be assigned objectively to | ||
the hottest thermal zone. | ||
|
||
All the thermal zones initially are to be configured with mode | ||
"enabled" with the "step_wise" governor. | ||
|
||
--- | ||
drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 55 ++++++++++++++++++++++ | ||
1 file changed, 55 insertions(+) | ||
|
||
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | ||
index e1e18ea..53198e2 100644 | ||
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | ||
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | ||
@@ -27,6 +27,7 @@ | ||
#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ | ||
#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) | ||
#define MLXSW_THERMAL_ZONE_MAX_NAME 16 | ||
+#define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) | ||
#define MLXSW_THERMAL_MAX_STATE 10 | ||
#define MLXSW_THERMAL_MAX_DUTY 255 | ||
/* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values | ||
@@ -205,6 +206,34 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, | ||
return 0; | ||
} | ||
|
||
+static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal, | ||
+ struct thermal_zone_device *tzdev, | ||
+ struct mlxsw_thermal_trip *trips, | ||
+ int temp) | ||
+{ | ||
+ struct mlxsw_thermal_trip *trip = trips; | ||
+ unsigned int score, delta, i, shift = 1; | ||
+ | ||
+ /* Calculate thermal zone score, if temperature is above the critical | ||
+ * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX. | ||
+ */ | ||
+ score = MLXSW_THERMAL_TEMP_SCORE_MAX; | ||
+ for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS; | ||
+ i++, trip++) { | ||
+ if (temp < trip->temp) { | ||
+ delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp); | ||
+ score = delta * shift; | ||
+ break; | ||
+ } | ||
+ shift *= 256; | ||
+ } | ||
+ | ||
+ if (score > thermal->tz_highest_score) { | ||
+ thermal->tz_highest_score = score; | ||
+ thermal->tz_highest_dev = tzdev; | ||
+ } | ||
+} | ||
+ | ||
static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, | ||
struct thermal_cooling_device *cdev) | ||
{ | ||
@@ -306,6 +335,9 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, | ||
return err; | ||
} | ||
mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); | ||
+ if (temp > 0) | ||
+ mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, | ||
+ temp); | ||
|
||
*p_temp = temp; | ||
return 0; | ||
@@ -367,6 +399,22 @@ static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, | ||
return 0; | ||
} | ||
|
||
+static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev, | ||
+ int trip, enum thermal_trend *trend) | ||
+{ | ||
+ struct mlxsw_thermal_module *tz = tzdev->devdata; | ||
+ struct mlxsw_thermal *thermal = tz->parent; | ||
+ | ||
+ if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) | ||
+ return -EINVAL; | ||
+ | ||
+ if (tzdev == thermal->tz_highest_dev) | ||
+ return 1; | ||
+ | ||
+ *trend = THERMAL_TREND_STABLE; | ||
+ return 0; | ||
+} | ||
+ | ||
struct thermal_zone_params mlxsw_thermal_params = { | ||
.no_hwmon = true, | ||
}; | ||
@@ -382,6 +430,7 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { | ||
.set_trip_temp = mlxsw_thermal_set_trip_temp, | ||
.get_trip_hyst = mlxsw_thermal_get_trip_hyst, | ||
.set_trip_hyst = mlxsw_thermal_set_trip_hyst, | ||
+ .get_trend = mlxsw_thermal_trend_get, | ||
}; | ||
|
||
static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, | ||
@@ -499,6 +548,8 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, | ||
|
||
/* Update trip points. */ | ||
err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); | ||
+ if (!err && temp > 0) | ||
+ mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); | ||
|
||
return 0; | ||
} | ||
@@ -574,6 +625,7 @@ static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { | ||
.set_trip_temp = mlxsw_thermal_module_trip_temp_set, | ||
.get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, | ||
.set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, | ||
+ .get_trend = mlxsw_thermal_trend_get, | ||
}; | ||
|
||
static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, | ||
@@ -600,6 +652,8 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, | ||
return err; | ||
|
||
mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); | ||
+ if (temp > 0) | ||
+ mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); | ||
|
||
*p_temp = temp; | ||
return 0; | ||
@@ -616,6 +670,7 @@ static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { | ||
.set_trip_temp = mlxsw_thermal_module_trip_temp_set, | ||
.get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, | ||
.set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, | ||
+ .get_trend = mlxsw_thermal_trend_get, | ||
}; | ||
|
||
static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, | ||
-- | ||
2.11.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters