Skip to content

Commit

Permalink
kernel: improve mtk ppe flow accounting
Browse files Browse the repository at this point in the history
Properly track L2 flows, and ensure that stale data gets cleared

Signed-off-by: Felix Fietkau <nbd@nbd.name>
  • Loading branch information
nbd168 committed Mar 23, 2023
1 parent d0a0696 commit aa27771
Show file tree
Hide file tree
Showing 3 changed files with 655 additions and 0 deletions.
@@ -0,0 +1,314 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 23 Mar 2023 10:24:11 +0100
Subject: [PATCH] net: ethernet: mtk_eth_soc: improve keeping track of
offloaded flows

Unify tracking of L2 and L3 flows. Use the generic list field in struct
mtk_foe_entry for tracking L2 subflows. Preparation for improving
flow accounting support.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---

--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -466,26 +466,30 @@ int mtk_foe_entry_set_queue(struct mtk_e
return 0;
}

+static int
+mtk_flow_entry_match_len(struct mtk_eth *eth, struct mtk_foe_entry *entry)
+{
+ int type = mtk_get_ib1_pkt_type(eth, entry->ib1);
+
+ if (type > MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+ return offsetof(struct mtk_foe_entry, ipv6._rsv);
+ else
+ return offsetof(struct mtk_foe_entry, ipv4.ib2);
+}
+
static bool
mtk_flow_entry_match(struct mtk_eth *eth, struct mtk_flow_entry *entry,
- struct mtk_foe_entry *data)
+ struct mtk_foe_entry *data, int len)
{
- int type, len;
-
if ((data->ib1 ^ entry->data.ib1) & MTK_FOE_IB1_UDP)
return false;

- type = mtk_get_ib1_pkt_type(eth, entry->data.ib1);
- if (type > MTK_PPE_PKT_TYPE_IPV4_DSLITE)
- len = offsetof(struct mtk_foe_entry, ipv6._rsv);
- else
- len = offsetof(struct mtk_foe_entry, ipv4.ib2);
-
return !memcmp(&entry->data.data, &data->data, len - 4);
}

static void
-__mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+__mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
+ bool set_state)
{
struct hlist_head *head;
struct hlist_node *tmp;
@@ -495,13 +499,12 @@ __mtk_foe_entry_clear(struct mtk_ppe *pp
mtk_flow_l2_ht_params);

head = &entry->l2_flows;
- hlist_for_each_entry_safe(entry, tmp, head, l2_data.list)
- __mtk_foe_entry_clear(ppe, entry);
+ hlist_for_each_entry_safe(entry, tmp, head, list)
+ __mtk_foe_entry_clear(ppe, entry, set_state);
return;
}

- hlist_del_init(&entry->list);
- if (entry->hash != 0xffff) {
+ if (entry->hash != 0xffff && set_state) {
struct mtk_foe_entry *hwe = mtk_foe_get_entry(ppe, entry->hash);

hwe->ib1 &= ~MTK_FOE_IB1_STATE;
@@ -520,7 +523,7 @@ __mtk_foe_entry_clear(struct mtk_ppe *pp
if (entry->type != MTK_FLOW_TYPE_L2_SUBFLOW)
return;

- hlist_del_init(&entry->l2_data.list);
+ hlist_del_init(&entry->list);
kfree(entry);
}

@@ -536,66 +539,55 @@ static int __mtk_foe_entry_idle_time(str
return now - timestamp;
}

+static bool
+mtk_flow_entry_update(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+{
+ struct mtk_foe_entry foe = {};
+ struct mtk_foe_entry *hwe;
+ u16 hash = entry->hash;
+ int len;
+
+ if (hash == 0xffff)
+ return false;
+
+ hwe = mtk_foe_get_entry(ppe, hash);
+ len = mtk_flow_entry_match_len(ppe->eth, &entry->data);
+ memcpy(&foe, hwe, len);
+
+ if (!mtk_flow_entry_match(ppe->eth, entry, &foe, len) ||
+ FIELD_GET(MTK_FOE_IB1_STATE, foe.ib1) != MTK_FOE_STATE_BIND)
+ return false;
+
+ entry->data.ib1 = foe.ib1;
+
+ return true;
+}
+
static void
mtk_flow_entry_update_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
{
u32 ib1_ts_mask = mtk_get_ib1_ts_mask(ppe->eth);
struct mtk_flow_entry *cur;
- struct mtk_foe_entry *hwe;
struct hlist_node *tmp;
int idle;

idle = __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
- hlist_for_each_entry_safe(cur, tmp, &entry->l2_flows, l2_data.list) {
+ hlist_for_each_entry_safe(cur, tmp, &entry->l2_flows, list) {
int cur_idle;
- u32 ib1;
-
- hwe = mtk_foe_get_entry(ppe, cur->hash);
- ib1 = READ_ONCE(hwe->ib1);

- if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND) {
- cur->hash = 0xffff;
- __mtk_foe_entry_clear(ppe, cur);
+ if (!mtk_flow_entry_update(ppe, cur)) {
+ __mtk_foe_entry_clear(ppe, entry, false);
continue;
}

- cur_idle = __mtk_foe_entry_idle_time(ppe, ib1);
+ cur_idle = __mtk_foe_entry_idle_time(ppe, cur->data.ib1);
if (cur_idle >= idle)
continue;

idle = cur_idle;
entry->data.ib1 &= ~ib1_ts_mask;
- entry->data.ib1 |= hwe->ib1 & ib1_ts_mask;
- }
-}
-
-static void
-mtk_flow_entry_update(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
-{
- struct mtk_foe_entry foe = {};
- struct mtk_foe_entry *hwe;
-
- spin_lock_bh(&ppe_lock);
-
- if (entry->type == MTK_FLOW_TYPE_L2) {
- mtk_flow_entry_update_l2(ppe, entry);
- goto out;
+ entry->data.ib1 |= cur->data.ib1 & ib1_ts_mask;
}
-
- if (entry->hash == 0xffff)
- goto out;
-
- hwe = mtk_foe_get_entry(ppe, entry->hash);
- memcpy(&foe, hwe, ppe->eth->soc->foe_entry_size);
- if (!mtk_flow_entry_match(ppe->eth, entry, &foe)) {
- entry->hash = 0xffff;
- goto out;
- }
-
- entry->data.ib1 = foe.ib1;
-
-out:
- spin_unlock_bh(&ppe_lock);
}

static void
@@ -632,7 +624,8 @@ __mtk_foe_entry_commit(struct mtk_ppe *p
void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
{
spin_lock_bh(&ppe_lock);
- __mtk_foe_entry_clear(ppe, entry);
+ __mtk_foe_entry_clear(ppe, entry, true);
+ hlist_del_init(&entry->list);
spin_unlock_bh(&ppe_lock);
}

@@ -679,8 +672,8 @@ mtk_foe_entry_commit_subflow(struct mtk_
{
const struct mtk_soc_data *soc = ppe->eth->soc;
struct mtk_flow_entry *flow_info;
- struct mtk_foe_entry foe = {}, *hwe;
struct mtk_foe_mac_info *l2;
+ struct mtk_foe_entry *hwe;
u32 ib1_mask = mtk_get_ib1_pkt_type_mask(ppe->eth) | MTK_FOE_IB1_UDP;
int type;

@@ -688,30 +681,30 @@ mtk_foe_entry_commit_subflow(struct mtk_
if (!flow_info)
return;

- flow_info->l2_data.base_flow = entry;
flow_info->type = MTK_FLOW_TYPE_L2_SUBFLOW;
flow_info->hash = hash;
hlist_add_head(&flow_info->list,
&ppe->foe_flow[hash / soc->hash_offset]);
- hlist_add_head(&flow_info->l2_data.list, &entry->l2_flows);
+ hlist_add_head(&flow_info->list, &entry->l2_flows);

hwe = mtk_foe_get_entry(ppe, hash);
- memcpy(&foe, hwe, soc->foe_entry_size);
- foe.ib1 &= ib1_mask;
- foe.ib1 |= entry->data.ib1 & ~ib1_mask;
+ memcpy(&flow_info->data, hwe, soc->foe_entry_size);
+ flow_info->data.ib1 &= ib1_mask;
+ flow_info->data.ib1 |= entry->data.ib1 & ~ib1_mask;

- l2 = mtk_foe_entry_l2(ppe->eth, &foe);
+ l2 = mtk_foe_entry_l2(ppe->eth, &flow_info->data);
memcpy(l2, &entry->data.bridge.l2, sizeof(*l2));

- type = mtk_get_ib1_pkt_type(ppe->eth, foe.ib1);
+ type = mtk_get_ib1_pkt_type(ppe->eth, flow_info->data.ib1);
if (type == MTK_PPE_PKT_TYPE_IPV4_HNAPT)
- memcpy(&foe.ipv4.new, &foe.ipv4.orig, sizeof(foe.ipv4.new));
+ memcpy(&flow_info->data.ipv4.new, &flow_info->data.ipv4.orig,
+ sizeof(flow_info->data.ipv4.new));
else if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T && l2->etype == ETH_P_IP)
l2->etype = ETH_P_IPV6;

- *mtk_foe_entry_ib2(ppe->eth, &foe) = entry->data.bridge.ib2;
+ *mtk_foe_entry_ib2(ppe->eth, &flow_info->data) = entry->data.bridge.ib2;

- __mtk_foe_entry_commit(ppe, &foe, hash);
+ __mtk_foe_entry_commit(ppe, &flow_info->data, hash);
}

void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
@@ -721,9 +714,11 @@ void __mtk_ppe_check_skb(struct mtk_ppe
struct mtk_foe_entry *hwe = mtk_foe_get_entry(ppe, hash);
struct mtk_flow_entry *entry;
struct mtk_foe_bridge key = {};
+ struct mtk_foe_entry foe = {};
struct hlist_node *n;
struct ethhdr *eh;
bool found = false;
+ int entry_len;
u8 *tag;

spin_lock_bh(&ppe_lock);
@@ -731,20 +726,14 @@ void __mtk_ppe_check_skb(struct mtk_ppe
if (FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) == MTK_FOE_STATE_BIND)
goto out;

- hlist_for_each_entry_safe(entry, n, head, list) {
- if (entry->type == MTK_FLOW_TYPE_L2_SUBFLOW) {
- if (unlikely(FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) ==
- MTK_FOE_STATE_BIND))
- continue;
-
- entry->hash = 0xffff;
- __mtk_foe_entry_clear(ppe, entry);
- continue;
- }
+ entry_len = mtk_flow_entry_match_len(ppe->eth, hwe);
+ memcpy(&foe, hwe, entry_len);

- if (found || !mtk_flow_entry_match(ppe->eth, entry, hwe)) {
+ hlist_for_each_entry_safe(entry, n, head, list) {
+ if (found ||
+ !mtk_flow_entry_match(ppe->eth, entry, &foe, entry_len)) {
if (entry->hash != 0xffff)
- entry->hash = 0xffff;
+ __mtk_foe_entry_clear(ppe, entry, false);
continue;
}

@@ -795,9 +784,17 @@ out:

int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
{
- mtk_flow_entry_update(ppe, entry);
+ int idle;
+
+ spin_lock_bh(&ppe_lock);
+ if (entry->type == MTK_FLOW_TYPE_L2)
+ mtk_flow_entry_update_l2(ppe, entry);
+ else
+ mtk_flow_entry_update(ppe, entry);
+ idle = __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
+ spin_unlock_bh(&ppe_lock);

- return __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
+ return idle;
}

int mtk_ppe_prepare_reset(struct mtk_ppe *ppe)
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -275,13 +275,7 @@ struct mtk_flow_entry {
s8 wed_index;
u8 ppe_index;
u16 hash;
- union {
- struct mtk_foe_entry data;
- struct {
- struct mtk_flow_entry *base_flow;
- struct hlist_node list;
- } l2_data;
- };
+ struct mtk_foe_entry data;
struct rhash_head node;
unsigned long cookie;
};

3 comments on commit aa27771

@Fail-Safe
Copy link
Contributor

@Fail-Safe Fail-Safe commented on aa27771 Mar 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nbd168 I started experiencing a regression after four of the commits you merged yesterday. I tested each of the following commits in order:

I'm confident this commit (aa27771) is the one that caused the regression I'm experiencing. On my RT3200 with WED enabled, I can consistently cause it to crash when offloaded flows go into the dozens.

My test for this is to log into the Netflix website, then move my mouse from one artwork tile to the next. Netflix begins pre-loading the show/movie's trailer to display on the pop-up. This causes multiple connections to many different IP addresses. Once the flow count keeps increasing, my RT3200 locks up and crashes. It does not reboot into recovery, so I am unable to snag a stack trace. But it does happen consistently when I "stress test" it in the fashion I described.

I'm happy to troubleshoot/test any potential patches if you have ideas.

@Fail-Safe
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nbd168 I was able to score a crash dump for you after this issue happened again:

<1>[   84.515285] Unable to handle kernel paging request at virtual address 00010102464c457f
<1>[   84.523230] Mem abort info:
<1>[   84.526015]   ESR = 0x0000000096000004
<1>[   84.529766]   EC = 0x25: DABT (current EL), IL = 32 bits
<1>[   84.535072]   SET = 0, FnV = 0
<1>[   84.538136]   EA = 0, S1PTW = 0
<1>[   84.541274]   FSC = 0x04: level 0 translation fault
<1>[   84.546143] Data abort info:
<1>[   84.549033]   ISV = 0, ISS = 0x00000004
<1>[   84.552863]   CM = 0, WnR = 0
<1>[   84.555822] [00010102464c457f] address between user and kernel address ranges
<0>[   84.562967] Internal error: Oops: 96000004 [#1] SMP
<7>[   84.567843] Modules linked in: nft_fib_inet nf_flow_table_ipv6 nf_flow_table_ipv4 nf_flow_table_inet nft_reject_ipv6 nft_reject_ipv4 nft_reject_inet nft_reject nft_redir nft_quota nft_objref nft_numgen nft_nat nft_masq nft_log nft_limit nft_hash nft_flow_offload nft_fib_ipv6 nft_fib_ipv4 nft_fib nft_ct nft_counter nft_chain_nat nf_tables nf_nat nf_flow_table nf_conntrack mt7915e mt7615e mt7615_common mt76_connac_lib mt76 mac80211 cfg80211 nfnetlink nf_reject_ipv6 nf_reject_ipv4 nf_log_syslog nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c hwmon compat cls_flower act_vlan cls_bpf act_bpf sch_tbf sch_ingress sch_htb sch_hfsc em_u32 cls_u32 cls_route cls_matchall cls_fw cls_flow cls_basic act_skbedit act_mirred act_gact cryptodev autofs4 seqiv authencesn authenc leds_gpio gpio_button_hotplug
<7>[   84.637105] CPU: 1 PID: 343 Comm: napi/mtk_eth-4 Tainted: G S                5.15.102 #0
<7>[   84.645192] Hardware name: Linksys E8450 (UBI) (DT)
<7>[   84.650060] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
<7>[   84.657015] pc : __mtk_ppe_check_skb+0x10c/0x55c
<7>[   84.661634] lr : __mtk_ppe_check_skb+0x154/0x55c
<7>[   84.666244] sp : ffffffc00942bb30
<7>[   84.669549] x29: ffffffc00942bb30 x28: 0000000000000000 x27: ffffff80065ffd00
<7>[   84.676682] x26: 000000000000ffff x25: 0000000000000028 x24: ffffff80056b4780
<7>[   84.683814] x23: ffffffc0085cf7e0 x22: 00010102464c457f x21: ffffffc008c05000
<7>[   84.690944] x20: 00000000000008f4 x19: ffffff8000d00080 x18: 0000000000000001
<7>[   84.698075] x17: 0000000000003300 x16: ffffffc0091f1000 x15: ffffff80027f0886
<7>[   84.705205] x14: 00000000428684c4 x13: 01bbed2234865a94 x12: 1c3d0b440994d43c
<7>[   84.712335] x11: 2600170000000186 x10: 0000000020632063 x9 : 1c3d0b440994d43c
<7>[   84.719466] x8 : 2600170000000186 x7 : 0000000020632063 x6 : 2a0086c01a001e53
<7>[   84.726597] x5 : c086002a63206320 x4 : 0000000000068808 x3 : 000000000000ffff
<7>[   84.733727] x2 : 0000000000000001 x1 : 0000000000000003 x0 : 00000000ffffffff
<7>[   84.740859] Call trace:
<7>[   84.743296]  __mtk_ppe_check_skb+0x10c/0x55c
<7>[   84.747560]  mtk_poll_rx+0x93c/0xa5c
<7>[   84.751128]  mtk_napi_rx+0x90/0x180
<7>[   84.754610]  __napi_poll+0x54/0x1b0
<7>[   84.758096]  napi_threaded_poll+0x84/0xe4
<7>[   84.762098]  kthread+0x11c/0x130
<7>[   84.765320]  ret_from_fork+0x10/0x20
<0>[   84.768893] Code: f8767b16 529ffffa b40002d6 d503201f (f94002d8)
<4>[   84.774978] ---[ end trace 8222ee3554dbe808 ]---
<0>[   84.786025] Kernel panic - not syncing: Oops: Fatal exception in interrupt
<2>[   84.792896] SMP: stopping secondary CPUs
<0>[   84.796814] Kernel Offset: disabled
<0>[   84.800291] CPU features: 0x0,00006000,00000802
<0>[   84.804814] Memory Limit: none

@nbd168
Copy link
Member Author

@nbd168 nbd168 commented on aa27771 Mar 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Fail-Safe Based on your crash log, I found a bug in the code that seems very much related.
Please try the latest version and let me know if it works now.
Thanks!

Please sign in to comment.