From f75159e9936143177b442afc78150b7a7ad8aa07 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 20 Sep 2011 01:25:41 +0000 Subject: [PATCH 01/15] ptp: fix L2 event message recognition The IEEE 1588 standard defines two kinds of messages, event and general messages. Event messages require time stamping, and general do not. When using UDP transport, two separate ports are used for the two message types. The BPF designed to recognize event messages incorrectly classifies L2 general messages as event messages. This commit fixes the issue by extending the filter to check the message type field for L2 PTP packets. Event messages are be distinguished from general messages by testing the "general" bit. Signed-off-by: Richard Cochran Cc: Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index e07e2742a865a4..1dc420ba213a52 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -51,6 +51,7 @@ #define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN) #define PTP_EV_PORT 319 +#define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ #define OFF_ETYPE 12 #define OFF_IHL 14 @@ -116,14 +117,20 @@ static inline int ptp_filter_init(struct sock_filter *f, int len) {OP_OR, 0, 0, PTP_CLASS_IPV6 }, /* */ \ {OP_RETA, 0, 0, 0 }, /* */ \ /*L3x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, /* */ \ -/*L40*/ {OP_JEQ, 0, 6, ETH_P_8021Q }, /* f goto L50 */ \ +/*L40*/ {OP_JEQ, 0, 9, ETH_P_8021Q }, /* f goto L50 */ \ {OP_LDH, 0, 0, OFF_ETYPE + 4 }, /* */ \ - {OP_JEQ, 0, 9, ETH_P_1588 }, /* f goto L60 */ \ + {OP_JEQ, 0, 15, ETH_P_1588 }, /* f goto L60 */ \ + {OP_LDB, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ + {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ + {OP_JEQ, 0, 12, 0 }, /* f goto L6x */ \ {OP_LDH, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ {OP_OR, 0, 0, PTP_CLASS_VLAN }, /* */ \ {OP_RETA, 0, 0, 0 }, /* */ \ -/*L50*/ {OP_JEQ, 0, 4, ETH_P_1588 }, /* f goto L61 */ \ +/*L50*/ {OP_JEQ, 0, 7, ETH_P_1588 }, /* f goto L61 */ \ + {OP_LDB, 0, 0, ETH_HLEN }, /* */ \ + {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ + {OP_JEQ, 0, 4, 0 }, /* f goto L6x */ \ {OP_LDH, 0, 0, ETH_HLEN }, /* */ \ {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ {OP_OR, 0, 0, PTP_CLASS_L2 }, /* */ \ From 02715ed2e7b71569ed6e61f0b23b0736a509913d Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 20 Sep 2011 01:25:42 +0000 Subject: [PATCH 02/15] dp83640: reduce driver noise The driver has two warning messages that might be triggered by normal use cases. When they appear, the messages give the impression of a never ending series of errors. This commit changes them to debug messages instead. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index cb6e0b486b1e22..edd7304773eb87 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -589,7 +589,7 @@ static void decode_rxts(struct dp83640_private *dp83640, prune_rx_ts(dp83640); if (list_empty(&dp83640->rxpool)) { - pr_warning("dp83640: rx timestamp pool is empty\n"); + pr_debug("dp83640: rx timestamp pool is empty\n"); goto out; } rxts = list_first_entry(&dp83640->rxpool, struct rxts, list); @@ -612,7 +612,7 @@ static void decode_txts(struct dp83640_private *dp83640, skb = skb_dequeue(&dp83640->tx_queue); if (!skb) { - pr_warning("dp83640: have timestamp but tx_queue empty\n"); + pr_debug("dp83640: have timestamp but tx_queue empty\n"); return; } ns = phy2txts(phy_txts); From d3c52173be57ec920deda28923d80b68e5630594 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 21 Sep 2011 22:08:26 +0000 Subject: [PATCH 03/15] MAINTAINERS: tehuti: Alexander Indenbaum's address bounces I got: Generating server: Tehuti.onmicrosoft.com baum@tehutinetworks.net #< #5.1.1 smtp;550 5.1.1 RESOLVER.ADR.RecipNotFound; not found> #SMTP# Signed-off-by: Ian Campbell Cc: Alexander Indenbaum Cc: Andy Gospodarek Cc: Andrew Morton Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ae8820e173a217..ace8f9c81b96a1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6374,7 +6374,6 @@ S: Supported F: arch/arm/mach-tegra TEHUTI ETHERNET DRIVER -M: Alexander Indenbaum M: Andy Gospodarek L: netdev@vger.kernel.org S: Supported From aabdcb0b553b9c9547b1a506b34d55a764745870 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 23 Sep 2011 08:23:47 +0000 Subject: [PATCH 04/15] can bcm: fix tx_setup off-by-one errors This patch fixes two off-by-one errors that canceled each other out. Checking for the same condition two times in bcm_tx_timeout_tsklet() reduced the count of frames to be sent by one. This did not show up the first time tx_setup is invoked as an additional frame is sent due to TX_ANNONCE. Invoking a second tx_setup on the same item led to a reduced (by 1) number of sent frames. Reported-by: Andre Naujoks Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index d6c8ae5b2e6a33..c9cdb8d78e7088 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -365,9 +365,6 @@ static void bcm_tx_timeout_tsklet(unsigned long data) bcm_send_to_user(op, &msg_head, NULL, 0); } - } - - if (op->kt_ival1.tv64 && (op->count > 0)) { /* send (next) frame */ bcm_can_tx(op); @@ -970,8 +967,9 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, /* spec: send can_frame when starting timer */ op->flags |= TX_ANNOUNCE; - if (op->kt_ival1.tv64 && (op->count > 0)) { - /* op->count-- is done in bcm_tx_timeout_handler */ + /* only start timer when having more frames than sent below */ + if (op->kt_ival1.tv64 && (op->count > 1)) { + /* op->count-- is done in bcm_tx_timeout_tsklet */ hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); } else @@ -979,8 +977,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, HRTIMER_MODE_REL); } - if (op->flags & TX_ANNOUNCE) + if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); + if (op->kt_ival1.tv64 && (op->count > 0)) + op->count--; + } return msg_head->nframes * CFSIZ + MHSIZ; } From 6482aa7c120447858da1869197b48eff66435a31 Mon Sep 17 00:00:00 2001 From: Divy Le Ray Date: Sat, 24 Sep 2011 06:11:31 +0000 Subject: [PATCH 05/15] cxgb4: Fix EEH on IBM P7IOC Fix EEH recovery on new P Series platform by requesting fundamental reset. Signed-off-by: Divy Le Ray Signed-off-by: David S. Miller --- drivers/net/cxgb4/cxgb4_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/cxgb4/cxgb4_main.c b/drivers/net/cxgb4/cxgb4_main.c index c9957b7f17b57f..b4efa292fd6fe7 100644 --- a/drivers/net/cxgb4/cxgb4_main.c +++ b/drivers/net/cxgb4/cxgb4_main.c @@ -3712,6 +3712,9 @@ static int __devinit init_one(struct pci_dev *pdev, setup_debugfs(adapter); } + /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ + pdev->needs_freset = 1; + if (is_offload(adapter)) attach_ulds(adapter); From 676a1184e8afd4fed7948232df1ff91517400859 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 25 Sep 2011 02:21:30 +0000 Subject: [PATCH 06/15] ipv6: nullify ipv6_ac_list and ipv6_fl_list when creating new socket ipv6_ac_list and ipv6_fl_list from listening socket are inadvertently shared with new socket created for connection. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3c9fa618b69daa..79cc6469508d36 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1383,6 +1383,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); @@ -1447,6 +1449,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; /* Clone RX bits */ From 95de86cf5162f78bc5aea80d1a9e5a248196ffaf Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 28 Sep 2011 05:33:43 +0000 Subject: [PATCH 07/15] ibmveth: Fix oops on request_irq failure If request_irq fails, the ibmveth driver will overwrite the rc and end up returning a successful rc on its open function, resulting in an oops later when a packet gets sent and buffers are not allocated due to the failed open. Signed-off-by: Brian King Signed-off-by: David S. Miller --- drivers/net/ibmveth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 8dd5fccef72529..d393f1e764eda8 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -636,8 +636,8 @@ static int ibmveth_open(struct net_device *netdev) netdev_err(netdev, "unable to request irq 0x%x, rc %d\n", netdev->irq, rc); do { - rc = h_free_logical_lan(adapter->vdev->unit_address); - } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY)); + lpar_rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY)); goto err_out; } From 605b91c8f619047ed6d88e24c531d4bdddad46f4 Mon Sep 17 00:00:00 2001 From: "Roy.Li" Date: Wed, 28 Sep 2011 19:51:54 +0000 Subject: [PATCH 08/15] net: Documentation: Fix type of variables Signed-off-by: Roy.Li Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 81546990f41ca1..ca5cdcd0f0e368 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1042,7 +1042,7 @@ conf/interface/*: The functional behaviour for certain settings is different depending on whether local forwarding is enabled or not. -accept_ra - BOOLEAN +accept_ra - INTEGER Accept Router Advertisements; autoconfigure using them. Possible values are: @@ -1106,7 +1106,7 @@ dad_transmits - INTEGER The amount of Duplicate Address Detection probes to send. Default: 1 -forwarding - BOOLEAN +forwarding - INTEGER Configure interface-specific Host/Router behaviour. Note: It is recommended to have the same setting on all From 85a64889492b45f931ddac87ec09d84aa7347ee1 Mon Sep 17 00:00:00 2001 From: Jonathan Lallinger Date: Thu, 29 Sep 2011 07:58:41 +0000 Subject: [PATCH 09/15] RDSRDMA: Fix cleanup of rds_iw_mr_pool In the rds_iw_mr_pool struct the free_pinned field keeps track of memory pinned by free MRs. While this field is incremented properly upon allocation, it is never decremented upon unmapping. This would cause the rds_rdma module to crash the kernel upon unloading, by triggering the BUG_ON in the rds_iw_destroy_mr_pool function. This change keeps track of the MRs that become unpinned, so that free_pinned can be decremented appropriately. Signed-off-by: Jonathan Lallinger Signed-off-by: Steve Wise Signed-off-by: David S. Miller --- net/rds/iw_rdma.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 8b77edbab27228..4e1de171866c7a 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -84,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list); + struct list_head *kill_list, + int *unpinned); static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) @@ -499,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) LIST_HEAD(unmap_list); LIST_HEAD(kill_list); unsigned long flags; - unsigned int nfreed = 0, ncleaned = 0, free_goal; + unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal; int ret = 0; rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); @@ -524,7 +525,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) * will be destroyed by the unmap function. */ if (!list_empty(&unmap_list)) { - ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list); + ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, + &kill_list, &unpinned); /* If we've been asked to destroy all MRs, move those * that were simply cleaned to the kill list */ if (free_all) @@ -548,6 +550,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) spin_unlock_irqrestore(&pool->list_lock, flags); } + atomic_sub(unpinned, &pool->free_pinned); atomic_sub(ncleaned, &pool->dirty_count); atomic_sub(nfreed, &pool->item_count); @@ -828,7 +831,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list) + struct list_head *kill_list, + int *unpinned) { struct rds_iw_mapping *mapping, *next; unsigned int ncleaned = 0; @@ -855,6 +859,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, spin_lock_irqsave(&pool->list_lock, flags); list_for_each_entry_safe(mapping, next, unmap_list, m_list) { + *unpinned += mapping->m_sg.len; list_move(&mapping->m_list, &laundered); ncleaned++; } From 12d0d0d3a7349daa95dbfd5d7df8146255bc7c67 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 29 Sep 2011 15:33:47 -0400 Subject: [PATCH 10/15] can bcm: fix incomplete tx_setup fix The commit aabdcb0b553b9c9547b1a506b34d55a764745870 ("can bcm: fix tx_setup off-by-one errors") fixed only a part of the original problem reported by Andre Naujoks. It turned out that the original code needed to be re-ordered to reduce complexity and to finally fix the reported frame counting issues. Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index c9cdb8d78e7088..c84963d2dee695 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -344,6 +344,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } +static void bcm_tx_start_timer(struct bcm_op *op) +{ + if (op->kt_ival1.tv64 && op->count) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival1), + HRTIMER_MODE_ABS); + else if (op->kt_ival2.tv64) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival2), + HRTIMER_MODE_ABS); +} + static void bcm_tx_timeout_tsklet(unsigned long data) { struct bcm_op *op = (struct bcm_op *)data; @@ -365,23 +377,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data) bcm_send_to_user(op, &msg_head, NULL, 0); } - - /* send (next) frame */ bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival1), - HRTIMER_MODE_ABS); - } else { - if (op->kt_ival2.tv64) { + } else if (op->kt_ival2.tv64) + bcm_can_tx(op); - /* send (next) frame */ - bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival2), - HRTIMER_MODE_ABS); - } - } + bcm_tx_start_timer(op); } /* @@ -961,28 +962,21 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_cancel(&op->timer); } - if ((op->flags & STARTTIMER) && - ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) { - + if (op->flags & STARTTIMER) { + hrtimer_cancel(&op->timer); /* spec: send can_frame when starting timer */ op->flags |= TX_ANNOUNCE; - - /* only start timer when having more frames than sent below */ - if (op->kt_ival1.tv64 && (op->count > 1)) { - /* op->count-- is done in bcm_tx_timeout_tsklet */ - hrtimer_start(&op->timer, op->kt_ival1, - HRTIMER_MODE_REL); - } else - hrtimer_start(&op->timer, op->kt_ival2, - HRTIMER_MODE_REL); } if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); - if (op->kt_ival1.tv64 && (op->count > 0)) + if (op->count) op->count--; } + if (op->flags & STARTTIMER) + bcm_tx_start_timer(op); + return msg_head->nframes * CFSIZ + MHSIZ; } From a0db2dad0935e798973bb79676e722b82f177206 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Fri, 23 Sep 2011 10:53:34 +0000 Subject: [PATCH 11/15] bonding: properly stop queuing work when requested During a test where a pair of bonding interfaces using ARP monitoring were both brought up and torn down (with an rmmod) repeatedly, a panic in the timer code was noticed. I tracked this down and determined that any of the bonding functions that ran as workqueue handlers and requeued more work might not properly exit when the module was removed. There was a flag protected by the bond lock called kill_timers that is set when the interface goes down or the module is removed, but many of the functions that monitor link status now unlock the bond lock to take rtnl first. There is a chance that another CPU running the rmmod could get the lock and set kill_timers after the first check has passed. This patch does not allow any function to queue work that will make itself run unless kill_timers is not set. I also noticed while doing this work that bond_resend_igmp_join_requests did not have a check for kill_timers, so I added the needed call there as well. Signed-off-by: Andy Gospodarek Reported-by: Liang Zheng Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 3 ++- drivers/net/bonding/bond_alb.c | 3 ++- drivers/net/bonding/bond_main.c | 13 ++++++++----- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a047eb973e3bc6..47b928ed08f8a0 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2168,7 +2168,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } re_arm: - queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); + if (!bond->kill_timers) + queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); out: read_unlock(&bond->lock); } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 7f8b20a34ee344..d4fbd2e62616cf 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1440,7 +1440,8 @@ void bond_alb_monitor(struct work_struct *work) } re_arm: - queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); + if (!bond->kill_timers) + queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); out: read_unlock(&bond->lock); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 43f2ea5410884a..6d79b78cfc7559 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -777,6 +777,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) read_lock(&bond->lock); + if (bond->kill_timers) + goto out; + /* rejoin all groups on bond device */ __bond_resend_igmp_join_requests(bond->dev); @@ -790,9 +793,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) __bond_resend_igmp_join_requests(vlan_dev); } - if (--bond->igmp_retrans > 0) + if ((--bond->igmp_retrans > 0) && !bond->kill_timers) queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); - +out: read_unlock(&bond->lock); } @@ -2538,7 +2541,7 @@ void bond_mii_monitor(struct work_struct *work) } re_arm: - if (bond->params.miimon) + if (bond->params.miimon && !bond->kill_timers) queue_delayed_work(bond->wq, &bond->mii_work, msecs_to_jiffies(bond->params.miimon)); out: @@ -2886,7 +2889,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) } re_arm: - if (bond->params.arp_interval) + if (bond->params.arp_interval && !bond->kill_timers) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); out: read_unlock(&bond->lock); @@ -3154,7 +3157,7 @@ void bond_activebackup_arp_mon(struct work_struct *work) bond_ab_arp_probe(bond); re_arm: - if (bond->params.arp_interval) + if (bond->params.arp_interval && !bond->kill_timers) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); out: read_unlock(&bond->lock); From d0e5d83284dac15c015bb48115b6780f5a6413cd Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 30 Sep 2011 06:37:51 +0000 Subject: [PATCH 12/15] net: xen-netback: correctly restart Tx after a VM restore/migrate If a VM is saved and restored (or migrated) the netback driver will no longer process any Tx packets from the frontend. xenvif_up() does not schedule the processing of any pending Tx requests from the front end because the carrier is off. Without this initial kick the frontend just adds Tx requests to the ring without raising an event (until the ring is full). This was caused by 47103041e91794acdbc6165da0ae288d844c820b (net: xen-netback: convert to hw_features) which reordered the calls to xenvif_up() and netif_carrier_on() in xenvif_connect(). Signed-off-by: David Vrabel Cc: Ian Campbell Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 0ca86f9ec4ed38..182562952c792a 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -327,12 +327,12 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, xenvif_get(vif); rtnl_lock(); - if (netif_running(vif->dev)) - xenvif_up(vif); if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) dev_set_mtu(vif->dev, ETH_DATA_LEN); netdev_update_features(vif->dev); netif_carrier_on(vif->dev); + if (netif_running(vif->dev)) + xenvif_up(vif); rtnl_unlock(); return 0; From 7091fbd82cd5686444ffe9935ed6a8190101fe9d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 30 Sep 2011 10:38:28 +0000 Subject: [PATCH 13/15] make PACKET_STATISTICS getsockopt report consistently between ring and non-ring This is a minor change. Up until kernel 2.6.32, getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, ...) would return total and dropped packets since its last invocation. The introduction of socket queue overflow reporting [1] changed drop rate calculation in the normal packet socket path, but not when using a packet ring. As a result, the getsockopt now returns different statistics depending on the reception method used. With a ring, it still returns the count since the last call, as counts are incremented in tpacket_rcv and reset in getsockopt. Without a ring, it returns 0 if no drops occurred since the last getsockopt and the total drops over the lifespan of the socket otherwise. The culprit is this line in packet_rcv, executed on a drop: drop_n_acct: po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); As it shows, the new drop number it taken from the socket drop counter, which is not reset at getsockopt. I put together a small example that demonstrates the issue [2]. It runs for 10 seconds and overflows the queue/ring on every odd second. The reported drop rates are: ring: 16, 0, 16, 0, 16, ... non-ring: 0, 15, 0, 30, 0, 46, 0, 60, 0 , 74. Note how the even ring counts monotonically increase. Because the getsockopt adds tp_drops to tp_packets, total counts are similarly reported cumulatively. Long story short, reinstating the original code, as the below patch does, fixes the issue at the cost of additional per-packet cycles. Another solution that does not introduce per-packet overhead is be to keep the current data path, record the value of sk_drops at getsockopt() at call N in a new field in struct packetsock and subtract that when reporting at call N+1. I'll be happy to code that, instead, it's just more messy. [1] http://patchwork.ozlabs.org/patch/35665/ [2] http://kernel.googlecode.com/files/test-packetsock-getstatistics.c Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c698cec0a44541..fabb4fafa281ce 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -961,7 +961,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: - po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); + spin_lock(&sk->sk_receive_queue.lock); + po->stats.tp_drops++; + atomic_inc(&sk->sk_drops); + spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { From 5f3a11419099d5cc010cfbfc524ca10d8fb81f89 Mon Sep 17 00:00:00 2001 From: Toshiharu Okada Date: Sun, 25 Sep 2011 21:27:42 +0000 Subject: [PATCH 14/15] pch_gbe: Fixed the issue on which PC was frozen when link was downed. When a link was downed during network use, there is an issue on which PC freezes. This patch fixed this issue. Signed-off-by: Toshiharu Okada Signed-off-by: David S. Miller --- drivers/net/pch_gbe/pch_gbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index 567ff10889be62..5474189d320be9 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -2195,7 +2195,7 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget) /* If no Tx and not enough Rx work done, * exit the polling mode */ - if ((work_done < budget) || !netif_running(netdev)) + if (work_done < budget) poll_end_flag = true; } From 805e969f6151eda7bc1a57e9c737054230acc3cc Mon Sep 17 00:00:00 2001 From: Toshiharu Okada Date: Sun, 25 Sep 2011 21:27:43 +0000 Subject: [PATCH 15/15] pch_gbe: Fixed the issue on which a network freezes The pch_gbe driver has an issue which a network stops, when receiving traffic is high. In the case, The link down and up are necessary to return a network. This patch fixed this issue. Signed-off-by: Toshiharu Okada Signed-off-by: David S. Miller --- drivers/net/pch_gbe/pch_gbe_main.c | 56 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index 5474189d320be9..b8b4ba27b0e7ca 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -1199,6 +1199,8 @@ static irqreturn_t pch_gbe_intr(int irq, void *data) iowrite32((int_en & ~PCH_GBE_INT_RX_FIFO_ERR), &hw->reg->INT_EN); pch_gbe_stop_receive(adapter); + int_st |= ioread32(&hw->reg->INT_ST); + int_st = int_st & ioread32(&hw->reg->INT_EN); } if (int_st & PCH_GBE_INT_RX_DMA_ERR) adapter->stats.intr_rx_dma_err_count++; @@ -1218,14 +1220,11 @@ static irqreturn_t pch_gbe_intr(int irq, void *data) /* Set Pause packet */ pch_gbe_mac_set_pause_packet(hw); } - if ((int_en & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT)) - == 0) { - return IRQ_HANDLED; - } } /* When request status is Receive interruption */ - if ((int_st & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT))) { + if ((int_st & (PCH_GBE_INT_RX_DMA_CMPLT | PCH_GBE_INT_TX_CMPLT)) || + (adapter->rx_stop_flag == true)) { if (likely(napi_schedule_prep(&adapter->napi))) { /* Enable only Rx Descriptor empty */ atomic_inc(&adapter->irq_sem); @@ -1385,7 +1384,7 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, struct sk_buff *skb; unsigned int i; unsigned int cleaned_count = 0; - bool cleaned = false; + bool cleaned = true; pr_debug("next_to_clean : %d\n", tx_ring->next_to_clean); @@ -1396,7 +1395,6 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, while ((tx_desc->gbec_status & DSC_INIT16) == 0x0000) { pr_debug("gbec_status:0x%04x\n", tx_desc->gbec_status); - cleaned = true; buffer_info = &tx_ring->buffer_info[i]; skb = buffer_info->skb; @@ -1439,8 +1437,10 @@ pch_gbe_clean_tx(struct pch_gbe_adapter *adapter, tx_desc = PCH_GBE_TX_DESC(*tx_ring, i); /* weight of a sort for tx, to avoid endless transmit cleanup */ - if (cleaned_count++ == PCH_GBE_TX_WEIGHT) + if (cleaned_count++ == PCH_GBE_TX_WEIGHT) { + cleaned = false; break; + } } pr_debug("called pch_gbe_unmap_and_free_tx_resource() %d count\n", cleaned_count); @@ -2168,7 +2168,6 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget) { struct pch_gbe_adapter *adapter = container_of(napi, struct pch_gbe_adapter, napi); - struct net_device *netdev = adapter->netdev; int work_done = 0; bool poll_end_flag = false; bool cleaned = false; @@ -2176,33 +2175,32 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget) pr_debug("budget : %d\n", budget); - /* Keep link state information with original netdev */ - if (!netif_carrier_ok(netdev)) { + pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget); + cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring); + + if (!cleaned) + work_done = budget; + /* If no Tx and not enough Rx work done, + * exit the polling mode + */ + if (work_done < budget) poll_end_flag = true; - } else { - pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget); + + if (poll_end_flag) { + napi_complete(napi); + if (adapter->rx_stop_flag) { + adapter->rx_stop_flag = false; + pch_gbe_start_receive(&adapter->hw); + } + pch_gbe_irq_enable(adapter); + } else if (adapter->rx_stop_flag) { adapter->rx_stop_flag = false; pch_gbe_start_receive(&adapter->hw); int_en = ioread32(&adapter->hw.reg->INT_EN); iowrite32((int_en | PCH_GBE_INT_RX_FIFO_ERR), - &adapter->hw.reg->INT_EN); + &adapter->hw.reg->INT_EN); } - cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring); - - if (cleaned) - work_done = budget; - /* If no Tx and not enough Rx work done, - * exit the polling mode - */ - if (work_done < budget) - poll_end_flag = true; - } - - if (poll_end_flag) { - napi_complete(napi); - pch_gbe_irq_enable(adapter); - } pr_debug("poll_end_flag : %d work_done : %d budget : %d\n", poll_end_flag, work_done, budget);