Skip to content

Commit

Permalink
ath9k: add stability fixes for long standing hang issues (FS#13, #34, #…
Browse files Browse the repository at this point in the history
…373, #383)

The radio would stop communicating completely. This issue was easiest to
trigger on AR913x devices, e.g. the TP-Link TL-WR1043ND, but other
hardware was occasionally affected as well.

The most critical issue was a race condition in disabling/enabling IRQs
between the IRQ handler and the IRQ processing tasklet

Signed-off-by: Felix Fietkau <nbd@nbd.name>
  • Loading branch information
nbd168 committed Jan 25, 2017
1 parent 4cacc1c commit b94177e
Show file tree
Hide file tree
Showing 16 changed files with 523 additions and 57 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 25 Jan 2017 12:57:05 +0100
Subject: [PATCH] ath9k: rename tx_complete_work to hw_check_work

Also include common MAC alive check. This should make the hang checks
more reliable for modes where beacons are not sent and is used as a
starting point for further hang check improvements

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---

--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -108,7 +108,7 @@ int ath_descdma_setup(struct ath_softc *
#define ATH_AGGR_MIN_QDEPTH 2
/* minimum h/w qdepth for non-aggregated traffic */
#define ATH_NON_AGGR_MIN_QDEPTH 8
-#define ATH_TX_COMPLETE_POLL_INT 1000
+#define ATH_HW_CHECK_POLL_INT 1000
#define ATH_TXFIFO_DEPTH 8
#define ATH_TX_ERROR 0x01

@@ -745,7 +745,7 @@ void ath9k_csa_update(struct ath_softc *
#define ATH_PAPRD_TIMEOUT 100 /* msecs */
#define ATH_PLL_WORK_INTERVAL 100

-void ath_tx_complete_poll_work(struct work_struct *work);
+void ath_hw_check_work(struct work_struct *work);
void ath_reset_work(struct work_struct *work);
bool ath_hw_check(struct ath_softc *sc);
void ath_hw_pll_work(struct work_struct *work);
@@ -1053,7 +1053,7 @@ struct ath_softc {
#ifdef CPTCFG_ATH9K_DEBUGFS
struct ath9k_debug debug;
#endif
- struct delayed_work tx_complete_work;
+ struct delayed_work hw_check_work;
struct delayed_work hw_pll_work;
struct timer_list sleep_timer;

--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -681,6 +681,7 @@ static int ath9k_init_softc(u16 devid, s
INIT_WORK(&sc->hw_reset_work, ath_reset_work);
INIT_WORK(&sc->paprd_work, ath_paprd_calibrate);
INIT_DELAYED_WORK(&sc->hw_pll_work, ath_hw_pll_work);
+ INIT_DELAYED_WORK(&sc->hw_check_work, ath_hw_check_work);

ath9k_init_channel_context(sc);

--- a/drivers/net/wireless/ath/ath9k/link.c
+++ b/drivers/net/wireless/ath/ath9k/link.c
@@ -20,20 +20,13 @@
* TX polling - checks if the TX engine is stuck somewhere
* and issues a chip reset if so.
*/
-void ath_tx_complete_poll_work(struct work_struct *work)
+static bool ath_tx_complete_check(struct ath_softc *sc)
{
- struct ath_softc *sc = container_of(work, struct ath_softc,
- tx_complete_work.work);
struct ath_txq *txq;
int i;
- bool needreset = false;
-

- if (sc->tx99_state) {
- ath_dbg(ath9k_hw_common(sc->sc_ah), RESET,
- "skip tx hung detection on tx99\n");
- return;
- }
+ if (sc->tx99_state)
+ return true;

for (i = 0; i < IEEE80211_NUM_ACS; i++) {
txq = sc->tx.txq_map[i];
@@ -41,25 +34,36 @@ void ath_tx_complete_poll_work(struct wo
ath_txq_lock(sc, txq);
if (txq->axq_depth) {
if (txq->axq_tx_inprogress) {
- needreset = true;
ath_txq_unlock(sc, txq);
- break;
- } else {
- txq->axq_tx_inprogress = true;
+ goto reset;
}
+
+ txq->axq_tx_inprogress = true;
}
ath_txq_unlock(sc, txq);
}

- if (needreset) {
- ath_dbg(ath9k_hw_common(sc->sc_ah), RESET,
- "tx hung, resetting the chip\n");
- ath9k_queue_reset(sc, RESET_TYPE_TX_HANG);
+ return true;
+
+reset:
+ ath_dbg(ath9k_hw_common(sc->sc_ah), RESET,
+ "tx hung, resetting the chip\n");
+ ath9k_queue_reset(sc, RESET_TYPE_TX_HANG);
+ return false;
+
+}
+
+void ath_hw_check_work(struct work_struct *work)
+{
+ struct ath_softc *sc = container_of(work, struct ath_softc,
+ hw_check_work.work);
+
+ if (!ath_hw_check(sc) ||
+ !ath_tx_complete_check(sc))
return;
- }

- ieee80211_queue_delayed_work(sc->hw, &sc->tx_complete_work,
- msecs_to_jiffies(ATH_TX_COMPLETE_POLL_INT));
+ ieee80211_queue_delayed_work(sc->hw, &sc->hw_check_work,
+ msecs_to_jiffies(ATH_HW_CHECK_POLL_INT));
}

/*
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -181,7 +181,7 @@ void ath9k_ps_restore(struct ath_softc *
static void __ath_cancel_work(struct ath_softc *sc)
{
cancel_work_sync(&sc->paprd_work);
- cancel_delayed_work_sync(&sc->tx_complete_work);
+ cancel_delayed_work_sync(&sc->hw_check_work);
cancel_delayed_work_sync(&sc->hw_pll_work);

#ifdef CPTCFG_ATH9K_BTCOEX_SUPPORT
@@ -198,7 +198,8 @@ void ath_cancel_work(struct ath_softc *s

void ath_restart_work(struct ath_softc *sc)
{
- ieee80211_queue_delayed_work(sc->hw, &sc->tx_complete_work, 0);
+ ieee80211_queue_delayed_work(sc->hw, &sc->hw_check_work,
+ ATH_HW_CHECK_POLL_INT);

if (AR_SREV_9340(sc->sc_ah) || AR_SREV_9330(sc->sc_ah))
ieee80211_queue_delayed_work(sc->hw, &sc->hw_pll_work,
@@ -2091,7 +2092,7 @@ void __ath9k_flush(struct ieee80211_hw *
int timeout;
bool drain_txq;

- cancel_delayed_work_sync(&sc->tx_complete_work);
+ cancel_delayed_work_sync(&sc->hw_check_work);

if (ah->ah_flags & AH_UNPLUGGED) {
ath_dbg(common, ANY, "Device has been unplugged!\n");
@@ -2129,7 +2130,8 @@ void __ath9k_flush(struct ieee80211_hw *
ath9k_ps_restore(sc);
}

- ieee80211_queue_delayed_work(hw, &sc->tx_complete_work, 0);
+ ieee80211_queue_delayed_work(hw, &sc->hw_check_work,
+ ATH_HW_CHECK_POLL_INT);
}

static bool ath9k_tx_frames_pending(struct ieee80211_hw *hw)
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -2915,8 +2915,6 @@ int ath_tx_init(struct ath_softc *sc, in
return error;
}

- INIT_DELAYED_WORK(&sc->tx_complete_work, ath_tx_complete_poll_work);
-
if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA)
error = ath_tx_edma_init(sc);

Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 25 Jan 2017 12:58:17 +0100
Subject: [PATCH] ath9k_hw: check if the chip failed to wake up

In an RFC patch, Sven Eckelmann and Simon Wunderlich reported:

"QCA 802.11n chips (especially AR9330/AR9340) sometimes end up in a
state in which a read of AR_CFG always returns 0xdeadbeef.
This should not happen when when the power_mode of the device is
ATH9K_PM_AWAKE."

Include the check for the default register state in the existing MAC
hang check.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---

--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -1624,6 +1624,10 @@ bool ath9k_hw_check_alive(struct ath_hw
int count = 50;
u32 reg, last_val;

+ /* Check if chip failed to wake up */
+ if (REG_READ(ah, AR_CFG) == 0xdeadbeef)
+ return false;
+
if (AR_SREV_9300(ah))
return !ath9k_hw_detect_mac_hang(ah);

Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 25 Jan 2017 13:00:58 +0100
Subject: [PATCH] ath9k: check for deaf rx path state

Various chips occasionally run into a state where the tx path still
appears to be working normally, but the rx path is deaf.

There is no known register signature to check for this state explicitly,
so use the lack of rx interrupts as an indicator.

This detection is prone to false positives, since a device could also
simply be in an environment where there are no frames on the air.
However, in this case doing a reset should be harmless since it's
obviously not interrupting any real activity. To avoid confusion, call
the reset counters in this case "Rx path inactive" instead of something
like "Rx path deaf", since it may not be an indication of a real
hardware failure.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---

--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -1027,6 +1027,7 @@ struct ath_softc {

u8 gtt_cnt;
u32 intrstatus;
+ u32 rx_active;
u16 ps_flags; /* PS_* */
bool ps_enabled;
bool ps_idle;
--- a/drivers/net/wireless/ath/ath9k/debug.c
+++ b/drivers/net/wireless/ath/ath9k/debug.c
@@ -763,6 +763,7 @@ static int read_file_reset(struct seq_fi
[RESET_TYPE_BEACON_STUCK] = "Stuck Beacon",
[RESET_TYPE_MCI] = "MCI Reset",
[RESET_TYPE_CALIBRATION] = "Calibration error",
+ [RESET_TYPE_RX_INACTIVE] = "Rx path inactive",
[RESET_TX_DMA_ERROR] = "Tx DMA stop error",
[RESET_RX_DMA_ERROR] = "Rx DMA stop error",
};
--- a/drivers/net/wireless/ath/ath9k/debug.h
+++ b/drivers/net/wireless/ath/ath9k/debug.h
@@ -50,6 +50,7 @@ enum ath_reset_type {
RESET_TYPE_BEACON_STUCK,
RESET_TYPE_MCI,
RESET_TYPE_CALIBRATION,
+ RESET_TYPE_RX_INACTIVE,
RESET_TX_DMA_ERROR,
RESET_RX_DMA_ERROR,
__RESET_TYPE_MAX
--- a/drivers/net/wireless/ath/ath9k/link.c
+++ b/drivers/net/wireless/ath/ath9k/link.c
@@ -53,13 +53,27 @@ reset:

}

+static bool ath_rx_active_check(struct ath_softc *sc)
+{
+ if (sc->rx_active) {
+ sc->rx_active = 0;
+ return true;
+ }
+
+ ath_dbg(ath9k_hw_common(sc->sc_ah), RESET,
+ "rx path inactive, resetting the chip\n");
+ ath9k_queue_reset(sc, RESET_TYPE_RX_INACTIVE);
+ return false;
+}
+
void ath_hw_check_work(struct work_struct *work)
{
struct ath_softc *sc = container_of(work, struct ath_softc,
hw_check_work.work);

if (!ath_hw_check(sc) ||
- !ath_tx_complete_check(sc))
+ !ath_tx_complete_check(sc) ||
+ !ath_rx_active_check(sc))
return;

ieee80211_queue_delayed_work(sc->hw, &sc->hw_check_work,
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -269,6 +269,7 @@ static bool ath_complete_reset(struct at
}

sc->gtt_cnt = 0;
+ sc->rx_active = 1;

ath9k_hw_set_interrupts(ah);
ath9k_hw_enable_interrupts(ah);
@@ -452,6 +453,7 @@ void ath9k_tasklet(unsigned long data)
ath_rx_tasklet(sc, 0, true);

ath_rx_tasklet(sc, 0, false);
+ sc->rx_active = 1;
}

if (status & ATH9K_INT_TX) {
Loading

0 comments on commit b94177e

Please sign in to comment.