Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Update OFED to Linux 3.7 and update Mellanox drivers.

Update the OFED Infiniband core to the version supplied in Linux
version 3.7.

The update to OFED is nearly all additional defines and functions
with the exception of the addition of additional parameters to
ib_register_device() and the reg_user_mr callback.

In addition the ibcore (Infiniband core) and ipoib (IP over Infiniband)
have both been made into completely loadable modules to facilitate
testing of the OFED stack in FreeBSD.

Finally the Mellanox Infiniband drivers are now updated to the
latest version shipping with Linux 3.7.

Submitted by: Mellanox FreeBSD driver team:
                Oded Shanoon (odeds mellanox.com),
                Meny Yossefi (menyy mellanox.com),
                Orit Moskovich (oritm mellanox.com)

Approved by: re
  • Loading branch information...
commit 91eb2b78a7e56a727c14039b1a0e05969398f2b5 1 parent 7e9b42c
Alfred Perlstein splbio authored
Showing with 25,094 additions and 3,534 deletions.
  1. +16 −1 sys/conf/files
  2. +59 −15 sys/contrib/rdma/ib_umem.h
  3. +3 −2 sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
  4. +14 −0 sys/modules/Makefile
  5. +23 −0 sys/modules/ibcore/Makefile
  6. +31 −0 sys/modules/ipoib/Makefile
  7. +3 −3 sys/modules/mlx4/Makefile
  8. +8 −1 sys/modules/mlx4ib/Makefile
  9. +1 −1  sys/ofed/drivers/infiniband/core/addr.c
  10. +1 −1  sys/ofed/drivers/infiniband/core/cma.c
  11. +2 −1  sys/ofed/drivers/infiniband/core/core_priv.h
  12. +20 −4 sys/ofed/drivers/infiniband/core/device.c
  13. +21 −0 sys/ofed/drivers/infiniband/core/sa_query.c
  14. +180 −111 sys/ofed/drivers/infiniband/core/sysfs.c
  15. +15 −15 sys/ofed/drivers/infiniband/core/uverbs_cmd.c
  16. +4 −5 sys/ofed/drivers/infiniband/core/uverbs_main.c
  17. +14 −14 sys/ofed/drivers/infiniband/core/verbs.c
  18. +2 −0  sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
  19. +30 −3 sys/ofed/drivers/infiniband/hw/mlx4/Makefile
  20. +25 −27 sys/ofed/drivers/infiniband/hw/mlx4/ah.c
  21. +688 −0 sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
  22. +440 −0 sys/ofed/drivers/infiniband/hw/mlx4/cm.c
  23. +98 −62 sys/ofed/drivers/infiniband/hw/mlx4/cq.c
  24. +1,928 −86 sys/ofed/drivers/infiniband/hw/mlx4/mad.c
  25. +1,350 −478 sys/ofed/drivers/infiniband/hw/mlx4/main.c
  26. +1,254 −0 sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
  27. +441 −48 sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
  28. +422 −93 sys/ofed/drivers/infiniband/hw/mlx4/mr.c
  29. +1,506 −706 sys/ofed/drivers/infiniband/hw/mlx4/qp.c
  30. +18 −51 sys/ofed/drivers/infiniband/hw/mlx4/srq.c
  31. +800 −0 sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
  32. +11 −1 sys/ofed/drivers/infiniband/hw/mlx4/user.h
  33. +0 −1  sys/ofed/drivers/infiniband/hw/mlx4/wc.c
  34. +1 −1  sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
  35. +1 −1  sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
  36. +1 −0  sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
  37. +2 −2 sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
  38. +2 −1  sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
  39. +17 −0 sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
  40. +31 −6 sys/ofed/drivers/net/mlx4/Makefile
  41. +21 −24 sys/ofed/drivers/net/mlx4/alloc.c
  42. +31 −19 sys/ofed/drivers/net/mlx4/catas.c
  43. +1,747 −36 sys/ofed/drivers/net/mlx4/cmd.c
  44. +152 −68 sys/ofed/drivers/net/mlx4/cq.c
  45. +4 −2 sys/ofed/drivers/net/mlx4/en_cq.c
  46. +13 −9 sys/ofed/drivers/net/mlx4/en_main.c
  47. +6 −4 sys/ofed/drivers/net/mlx4/en_netdev.c
  48. +23 −10 sys/ofed/drivers/net/mlx4/en_port.c
  49. +2 −5 sys/ofed/drivers/net/mlx4/en_port.h
  50. +2 −4 sys/ofed/drivers/net/mlx4/en_rx.c
  51. +2 −2 sys/ofed/drivers/net/mlx4/en_tx.c
  52. +768 −125 sys/ofed/drivers/net/mlx4/eq.c
  53. +884 −118 sys/ofed/drivers/net/mlx4/fw.c
  54. +45 −5 sys/ofed/drivers/net/mlx4/fw.h
  55. +76 −75 sys/ofed/drivers/net/mlx4/icm.c
  56. +8 −12 sys/ofed/drivers/net/mlx4/icm.h
  57. +14 −39 sys/ofed/drivers/net/mlx4/intf.c
  58. +1,624 −448 sys/ofed/drivers/net/mlx4/main.c
  59. +1,124 −64 sys/ofed/drivers/net/mlx4/mcg.c
  60. +926 −34 sys/ofed/drivers/net/mlx4/mlx4.h
  61. +6 −5 sys/ofed/drivers/net/mlx4/mlx4_en.h
  62. +323 −197 sys/ofed/drivers/net/mlx4/mr.c
  63. +99 −8 sys/ofed/drivers/net/mlx4/pd.c
  64. +750 −59 sys/ofed/drivers/net/mlx4/port.c
  65. +24 −16 sys/ofed/drivers/net/mlx4/profile.c
  66. +281 −83 sys/ofed/drivers/net/mlx4/qp.c
  67. +4 −3 sys/ofed/drivers/net/mlx4/reset.c
  68. +4,315 −0 sys/ofed/drivers/net/mlx4/resource_tracker.c
  69. +17 −29 sys/ofed/drivers/net/mlx4/sense.c
  70. +93 −67 sys/ofed/drivers/net/mlx4/srq.c
  71. +325 −0 sys/ofed/drivers/net/mlx4/sys_tune.c
  72. +22 −0 sys/ofed/include/asm/atomic.h
  73. +1 −0  sys/ofed/include/asm/byteorder.h
  74. +53 −0 sys/ofed/include/linux/atomic.h
  75. +166 −0 sys/ofed/include/linux/bitops.h
  76. +17 −0 sys/ofed/include/linux/clocksource.h
  77. +3 −0  sys/ofed/include/linux/compat.h
  78. +6 −0 sys/ofed/include/linux/device.h
  79. +7 −0 sys/ofed/include/linux/dma-mapping.h
  80. +4 −0 sys/ofed/include/linux/gfp.h
  81. +4 −0 sys/ofed/include/linux/idr.h
  82. +5 −0 sys/ofed/include/linux/if_ether.h
  83. +2 −0  sys/ofed/include/linux/in6.h
  84. +70 −0 sys/ofed/include/linux/kernel.h
  85. +2 −0  sys/ofed/include/linux/linux_compat.c
  86. +3 −0  sys/ofed/include/linux/list.h
  87. +112 −3 sys/ofed/include/linux/log2.h
  88. +59 −11 sys/ofed/include/linux/mlx4/cmd.h
  89. +17 −1 sys/ofed/include/linux/mlx4/cq.h
  90. +628 −116 sys/ofed/include/linux/mlx4/device.h
  91. +27 −13 sys/ofed/include/linux/mlx4/driver.h
  92. +52 −19 sys/ofed/include/linux/mlx4/qp.h
  93. +0 −12 sys/ofed/include/linux/mlx4/srq.h
  94. +3 −0  sys/ofed/include/linux/moduleparam.h
  95. +107 −0 sys/ofed/include/linux/pci.h
  96. +6 −0 sys/ofed/include/linux/sysfs.h
  97. +2 −0  sys/ofed/include/linux/types.h
  98. +20 −1 sys/ofed/include/linux/workqueue.h
  99. +15 −0 sys/ofed/include/rdma/ib_cm.h
  100. +1 −1  sys/ofed/include/rdma/ib_mad.h
  101. +156 −0 sys/ofed/include/rdma/ib_pma.h
  102. +34 −0 sys/ofed/include/rdma/ib_sa.h
  103. +86 −5 sys/ofed/include/rdma/ib_user_verbs.h
  104. +212 −41 sys/ofed/include/rdma/ib_verbs.h
17 sys/conf/files
View
@@ -3542,6 +3542,18 @@ ofed/drivers/infiniband/ulp/sdp/sdp_tx.c optional sdp inet \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
+ofed/drivers/infiniband/hw/mlx4/alias_GUID.c optional mlx4ib \
+ no-depend obj-prefix "mlx4ib_" \
+ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
+ofed/drivers/infiniband/hw/mlx4/mcg.c optional mlx4ib \
+ no-depend obj-prefix "mlx4ib_" \
+ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
+ofed/drivers/infiniband/hw/mlx4/sysfs.c optional mlx4ib \
+ no-depend obj-prefix "mlx4ib_" \
+ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
+ofed/drivers/infiniband/hw/mlx4/cm.c optional mlx4ib \
+ no-depend obj-prefix "mlx4ib_" \
+ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
ofed/drivers/infiniband/hw/mlx4/ah.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
@@ -3624,7 +3636,10 @@ ofed/drivers/net/mlx4/sense.c optional mlx4ib | mlxen \
ofed/drivers/net/mlx4/srq.c optional mlx4ib | mlxen \
no-depend obj-prefix "mlx4_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
-ofed/drivers/net/mlx4/xrcd.c optional mlx4ib | mlxen \
+ofed/drivers/net/mlx4/resource_tracker.c optional mlx4ib | mlxen \
+ no-depend obj-prefix "mlx4_" \
+ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
+ofed/drivers/net/mlx4/sys_tune.c optional mlx4ib | mlxen \
no-depend obj-prefix "mlx4_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
74 sys/contrib/rdma/ib_umem.h
View
@@ -28,21 +28,17 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $FreeBSD$
*/
#ifndef IB_UMEM_H
#define IB_UMEM_H
-struct ib_ucontext;
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+#include <linux/dma-attrs.h>
-struct ib_umem_chunk {
- TAILQ_ENTRY(ib_umem_chunk) entry;
- int nents;
- int nmap;
- struct rdma_scatterlist page_list[0];
-};
+struct ib_ucontext;
struct ib_umem {
struct ib_ucontext *context;
@@ -50,28 +46,76 @@ struct ib_umem {
int offset;
int page_size;
int writable;
- TAILQ_HEAD(, ib_umem_chunk) chunk_list;
-#ifdef notyet
+ int hugetlb;
+ struct list_head chunk_list;
struct work_struct work;
struct mm_struct *mm;
-#endif
unsigned long diff;
};
+/* contiguous memory structure */
+struct ib_cmem {
+
+ struct ib_ucontext *context;
+ size_t length;
+ /* Link list of contiguous blocks being part of that cmem */
+ struct list_head ib_cmem_block;
+
+ /* Order of cmem block, 2^ block_order will equal number
+ of physical pages per block
+ */
+ unsigned long block_order;
+ /* Refernce counter for that memory area
+ - When value became 0 pages will be returned to the kernel.
+ */
+ struct kref refcount;
+};
+
+
+struct ib_cmem_block {
+
+ struct list_head list;
+ /* page will point to the page struct of the head page
+ in the current compound page.
+ block order is saved once as part of ib_cmem.
+ */
+ struct page *page;
+};
+
+
+
+struct ib_umem_chunk {
+ struct list_head list;
+ int nents;
+ int nmap;
+ struct dma_attrs attrs;
+ struct scatterlist page_list[0];
+};
+
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
- size_t size, int access);
+ size_t size, int access, int dmasync);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
+int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
+ struct vm_area_struct *vma);
+struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
+ unsigned long total_size,
+ unsigned long page_size_order);
+void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem);
+int ib_umem_map_to_vma(struct ib_umem *umem,
+ struct vm_area_struct *vma);
+
#else /* CONFIG_INFINIBAND_USER_MEM */
+#include <linux/err.h>
static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
unsigned long addr, size_t size,
- int access) {
- return ERR_PTR(EINVAL);
+ int access, int dmasync) {
+ return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
5 sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
View
@@ -541,7 +541,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
+ u64 virt, int acc, struct ib_udata *udata,
+ int mr_id)
{
__be64 *pages;
int shift, i, n;
@@ -1136,7 +1137,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp;
- ret = ib_register_device(&dev->ibdev);
+ ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto bail1;
14 sys/modules/Makefile
View
@@ -128,6 +128,7 @@ SUBDIR= \
hwpmc \
${_hyperv} \
${_i2c} \
+ ${_ibcore} \
${_ibcs2} \
${_ichwd} \
${_ida} \
@@ -149,6 +150,7 @@ SUBDIR= \
${_igb} \
${_iir} \
${_io} \
+ ${_ipoib} \
${_ipdivert} \
${_ipfilter} \
${_ipfw} \
@@ -499,11 +501,17 @@ _fe= fe
_glxiic= glxiic
_glxsb= glxsb
_i2c= i2c
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_ibcore= ibcore
+.endif
_ibcs2= ibcs2
_ie= ie
_if_ndis= if_ndis
_igb= igb
_io= io
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_ipoib= ipoib
+.endif
_lindev= lindev
_linprocfs= linprocfs
_linsysfs= linsysfs
@@ -675,6 +683,9 @@ _hptrr= hptrr
.endif
_hyperv= hyperv
_i2c= i2c
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_ibcore= ibcore
+.endif
_ichwd= ichwd
_ida= ida
_if_ndis= if_ndis
@@ -682,6 +693,9 @@ _igb= igb
_iir= iir
_io= io
_ipmi= ipmi
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_ipoib= ipoib
+.endif
_ips= ips
_ipw= ipw
.if ${MK_SOURCELESS_UCODE} != "no"
23 sys/modules/ibcore/Makefile
View
@@ -0,0 +1,23 @@
+# $FreeBSD$
+.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/core
+.PATH: ${.CURDIR}/../../ofed/include/linux
+
+.include <bsd.own.mk>
+
+KMOD = ibcore
+SRCS = addr.c cm_msgs.h iwcm.c mad_rmpp.h sa_query.c ucma.c uverbs_cmd.c
+SRCS+= agent.c local_sa.c iwcm.h multicast.c smi.c ud_header.c uverbs_main.c
+SRCS+= agent.h core_priv.h mad.c notice.c smi.h umem.c uverbs_marshall.c
+SRCS+= cache.c device.c mad_priv.h packer.c sysfs.c user_mad.c verbs.c
+SRCS+= cm.c fmr_pool.c mad_rmpp.c sa.h ucm.c uverbs.h cma.c
+SRCS+= linux_compat.c linux_radix.c linux_idr.c
+SRCS+= vnode_if.h device_if.h bus_if.h pci_if.h opt_inet.h
+
+CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/core
+CFLAGS+= -I${.CURDIR}/../mlx4ib
+CFLAGS+= -I${.CURDIR}/../../ofed/include/
+CFLAGS+= -DINET6 -DINET -DOFED
+
+.include <bsd.kmod.mk>
+
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions
31 sys/modules/ipoib/Makefile
View
@@ -0,0 +1,31 @@
+# $FreeBSD$
+.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/ulp/ipoib
+.PATH: ${.CURDIR}/../../ofed/include/linux
+
+.include <bsd.own.mk>
+
+KMOD = ipoib
+SRCS = device_if.h bus_if.h opt_ofed.h vnode_if.h opt_inet.h opt_inet6.h
+SRCS += ipoib_cm.c ipoib_ib.c ipoib_main.c ipoib_multicast.c ipoib_verbs.c ipoib.h
+SRCS+= linux_compat.c linux_radix.c linux_idr.c
+
+CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/ulp/ipoib
+CFLAGS+= -I${.CURDIR}/../ibcore
+CFLAGS+= -I${.CURDIR}/../../ofed/include/
+CFLAGS+= -DINET6 -DINET -DOFED
+
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
+.include <bsd.kmod.mk>
+
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions
6 sys/modules/mlx4/Makefile
View
@@ -3,11 +3,11 @@
.include <bsd.own.mk>
+.PATH: ${.CURDIR}/../../ofed/include/linux
KMOD = mlx4
SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
-SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c
-SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c xrcd.c
-SRCS+= opt_inet.h opt_inet6.h
+SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c linux_compat.c linux_radix.c linux_idr.c
+SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c resource_tracker.c sys_tune.c
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4
CFLAGS+= -I${.CURDIR}/../../ofed/include/
9 sys/modules/mlx4ib/Makefile
View
@@ -1,14 +1,21 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
+.PATH: ${.CURDIR}/../../ofed/include/linux
.include <bsd.own.mk>
KMOD = mlx4ib
SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
-SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c
+SRCS+= linux_compat.c linux_radix.c linux_idr.c
+SRCS+= alias_GUID.c mcg.c sysfs.c ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c cm.c
SRCS+= opt_inet.h opt_inet6.h
+#CFLAGS+= -I${.CURDIR}/../../ofed/include/
+#CFLAGS+= -I${.CURDIR}/../../../../include
+CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
CFLAGS+= -I${.CURDIR}/../../ofed/include/
+CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
+CFLAGS+= -DINET6 -DINET -DOFED
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"
2  sys/ofed/drivers/infiniband/core/addr.c
View
@@ -356,7 +356,7 @@ static int addr_resolve(struct sockaddr *src_in,
u_char edst[MAX_ADDR_LEN];
int multi;
int bcast;
- int error;
+ int error = 0;
/*
* Determine whether the address is unicast, multicast, or broadcast
2  sys/ofed/drivers/infiniband/core/cma.c
View
@@ -2957,7 +2957,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
- } else if ((addr->sa_family == AF_INET6)) {
+ } else if (addr->sa_family == AF_INET6) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
3  sys/ofed/drivers/infiniband/core/core_priv.h
View
@@ -38,7 +38,8 @@
#include <rdma/ib_verbs.h>
-int ib_device_register_sysfs(struct ib_device *device);
+int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *,
+ u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sysfs_setup(void);
24 sys/ofed/drivers/infiniband/core/device.c
View
@@ -273,7 +273,9 @@ static int read_port_table_lengths(struct ib_device *device)
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
-int ib_register_device(struct ib_device *device)
+int ib_register_device(struct ib_device *device,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *))
{
int ret;
@@ -294,8 +296,6 @@ int ib_register_device(struct ib_device *device)
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
- device->ib_uverbs_xrcd_table = RB_ROOT;
- mutex_init(&device->xrcd_table_mutex);
ret = read_port_table_lengths(device);
if (ret) {
@@ -304,7 +304,7 @@ int ib_register_device(struct ib_device *device)
goto out;
}
- ret = ib_device_register_sysfs(device);
+ ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
@@ -752,3 +752,19 @@ static void __exit ib_core_cleanup(void)
module_init(ib_core_init);
module_exit(ib_core_cleanup);
+
+#undef MODULE_VERSION
+#include <sys/module.h>
+static int
+ibcore_evhand(module_t mod, int event, void *arg)
+{
+ return (0);
+}
+
+static moduledata_t ibcore_mod = {
+ .name = "ibcore",
+ .evhand = ibcore_evhand,
+};
+
+MODULE_VERSION(ibcore, 1);
+DECLARE_MODULE(ibcore, ibcore_mod, SI_SUB_SMP, SI_ORDER_ANY);
21 sys/ofed/drivers/infiniband/core/sa_query.c
View
@@ -1105,6 +1105,27 @@ static void ib_sa_inform_release(struct ib_sa_query *sa_query)
kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query));
}
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_guidinfo_rec *rec,
+ ib_sa_comp_mask comp_mask, u8 method,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_guidinfo_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ // stub function -
+ // called originally from mad.c under mlx4_ib_init_sriov()
+ // which calls mlx4_ib_init_alias_guid_service() in alias_GUID.c
+ // which goes down to this function
+
+ printk("ERROR: function should be called only in SRIOV flow!!!");
+
+ return 0;
+}
+
/**
* ib_sa_informinfo_query - Start an InformInfo registration.
* @client:SA client
291 sys/ofed/drivers/infiniband/core/sysfs.c
View
@@ -38,6 +38,7 @@
#include <linux/string.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
struct ib_port {
struct kobject kobj;
@@ -103,7 +104,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
return ret;
return sprintf(buf, "%d: %s\n", attr.state,
- attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
+ attr.state < ARRAY_SIZE(state_name) ?
state_name[attr.state] : "UNKNOWN");
}
@@ -292,118 +293,124 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
return sprintf(buf, "0x%04x\n", pkey);
}
-#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
-struct port_table_attribute port_pma_attr_##_name = { \
- .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
- .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
-}
-
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr,
+ char *buf, int c_ext)
{
- struct port_table_attribute *tab_attr =
- container_of(attr, struct port_table_attribute, attr);
- int offset = tab_attr->index & 0xffff;
- int width = (tab_attr->index >> 16) & 0xff;
- struct ib_mad *in_mad = NULL;
- struct ib_mad *out_mad = NULL;
- ssize_t ret;
-
- if (!p->ibdev->process_mad)
- return sprintf(buf, "N/A (no PMA)\n");
-
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad) {
- ret = -ENOMEM;
- goto out;
- }
-
- in_mad->mad_hdr.base_version = 1;
- in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
- in_mad->mad_hdr.class_version = 1;
- in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
- in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int offset = tab_attr->index & 0xffff;
+ int width = (tab_attr->index >> 16) & 0xff;
+ struct ib_mad *in_mad = NULL;
+ struct ib_mad *out_mad = NULL;
+ ssize_t ret;
+
+ if (!p->ibdev->process_mad)
+ return -ENXIO;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ in_mad->mad_hdr.base_version = 1;
+ in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
+ in_mad->mad_hdr.class_version = 1;
+ in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ if (c_ext)
+ in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS_EXT;
+ else
+ in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS;
+
+ in_mad->data[41] = p->port_num; /* PortSelect field */
+
+ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
+ p->port_num, NULL, NULL, in_mad, out_mad) &
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ switch (width) {
+ case 4:
+ ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+ (4 - (offset % 8))) & 0xf);
+ break;
+ case 8:
+ ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+ break;
+ case 16:
+ ret = sprintf(buf, "%u\n",
+ be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 32:
+ ret = sprintf(buf, "%u\n",
+ be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 64:
+ ret = sprintf(buf, "%llu\n", (unsigned long long)
+ be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ default:
+ ret = 0;
+ }
- in_mad->data[41] = p->port_num; /* PortSelect field */
-
- if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
- p->port_num, NULL, NULL, in_mad, out_mad) &
- (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
- (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
- ret = -EINVAL;
- goto out;
- }
+out:
+ kfree(in_mad);
+ kfree(out_mad);
- switch (width) {
- case 4:
- ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
- (4 - (offset % 8))) & 0xf);
- break;
- case 8:
- ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
- break;
- case 16:
- ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
- break;
- case 32:
- ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
- break;
- default:
- ret = 0;
- }
+ return ret;
+}
-out:
- kfree(in_mad);
- kfree(out_mad);
+#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+}
- return ret;
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ return get_pma_counters(p, attr, buf, 0);
}
-static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
-static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
-static PORT_PMA_ATTR(link_downed , 2, 8, 56);
-static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
+static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
+static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
+static PORT_PMA_ATTR(link_downed , 2, 8, 56);
+static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
-static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
+static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
-static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
+static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
-static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
-static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
-static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
-static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
-static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
-/*
- * There is no bit allocated for port_xmit_wait in the CounterSelect field
- * (IB spec). However, since this bit is ignored when reading
- * (show_pma_counter), the _counter field of port_xmit_wait can be set to zero.
- */
-static PORT_PMA_ATTR(port_xmit_wait , 0, 32, 320);
+static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
+static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
+static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
+static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
+static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
static struct attribute *pma_attrs[] = {
- &port_pma_attr_symbol_error.attr.attr,
- &port_pma_attr_link_error_recovery.attr.attr,
- &port_pma_attr_link_downed.attr.attr,
- &port_pma_attr_port_rcv_errors.attr.attr,
- &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
- &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
- &port_pma_attr_port_xmit_discards.attr.attr,
- &port_pma_attr_port_xmit_constraint_errors.attr.attr,
- &port_pma_attr_port_rcv_constraint_errors.attr.attr,
- &port_pma_attr_local_link_integrity_errors.attr.attr,
- &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
- &port_pma_attr_VL15_dropped.attr.attr,
- &port_pma_attr_port_xmit_data.attr.attr,
- &port_pma_attr_port_rcv_data.attr.attr,
- &port_pma_attr_port_xmit_packets.attr.attr,
- &port_pma_attr_port_rcv_packets.attr.attr,
- &port_pma_attr_port_xmit_wait.attr.attr,
- NULL
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_port_xmit_data.attr.attr,
+ &port_pma_attr_port_rcv_data.attr.attr,
+ &port_pma_attr_port_xmit_packets.attr.attr,
+ &port_pma_attr_port_rcv_packets.attr.attr,
+ NULL
};
static struct attribute_group pma_group = {
@@ -411,6 +418,44 @@ static struct attribute_group pma_group = {
.attrs = pma_attrs
};
+#define PORT_PMA_ATTR_EXT(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_ext_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter_ext, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+}
+
+static ssize_t show_pma_counter_ext(struct ib_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return get_pma_counters(p, attr, buf, 1);
+}
+
+static PORT_PMA_ATTR_EXT(port_xmit_data_64 , 0, 64, 64);
+static PORT_PMA_ATTR_EXT(port_rcv_data_64 , 0, 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets_64 , 0, 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets_64 , 0, 64, 256);
+static PORT_PMA_ATTR_EXT(port_unicast_xmit_packets , 0, 64, 320);
+static PORT_PMA_ATTR_EXT(port_unicast_rcv_packets , 0, 64, 384);
+static PORT_PMA_ATTR_EXT(port_multicast_xmit_packets , 0, 64, 448);
+static PORT_PMA_ATTR_EXT(port_multicast_rcv_packets , 0, 64, 512);
+
+static struct attribute *pma_attrs_ext[] = {
+ &port_pma_attr_ext_port_xmit_data_64.attr.attr,
+ &port_pma_attr_ext_port_rcv_data_64.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets_64.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets_64.attr.attr,
+ &port_pma_attr_ext_port_unicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_unicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_port_multicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_multicast_rcv_packets.attr.attr,
+ NULL
+};
+
+static struct attribute_group pma_ext_group = {
+ .name = "counters_ext",
+ .attrs = pma_attrs_ext
+};
+
static void ib_port_release(struct kobject *kobj)
{
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@@ -503,7 +548,9 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
return NULL;
}
-static int add_port(struct ib_device *device, int port_num)
+static int add_port(struct ib_device *device, int port_num,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *))
{
struct ib_port *p;
struct ib_port_attr attr;
@@ -522,7 +569,7 @@ static int add_port(struct ib_device *device, int port_num)
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
- device->ports_parent,
+ kobject_get(device->ports_parent),
"%d", port_num);
if (ret)
goto err_put;
@@ -531,10 +578,14 @@ static int add_port(struct ib_device *device, int port_num)
if (ret)
goto err_put;
+ ret = sysfs_create_group(&p->kobj, &pma_ext_group);
+ if (ret)
+ goto err_remove_pma;
+
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
if (!p->gid_group.attrs)
- goto err_remove_pma;
+ goto err_remove_pma_ext;
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
@@ -550,6 +601,12 @@ static int add_port(struct ib_device *device, int port_num)
if (ret)
goto err_free_pkey;
+ if (port_callback) {
+ ret = port_callback(device, port_num, &p->kobj);
+ if (ret)
+ goto err_remove_pkey;
+ }
+
list_add_tail(&p->kobj.entry, &device->port_list);
#ifdef __linux__
@@ -557,6 +614,9 @@ static int add_port(struct ib_device *device, int port_num)
#endif
return 0;
+err_remove_pkey:
+ sysfs_remove_group(&p->kobj, &p->pkey_group);
+
err_free_pkey:
for (i = 0; i < attr.pkey_tbl_len; ++i)
kfree(p->pkey_group.attrs[i]);
@@ -572,6 +632,9 @@ static int add_port(struct ib_device *device, int port_num)
kfree(p->gid_group.attrs);
+err_remove_pma_ext:
+ sysfs_remove_group(&p->kobj, &pma_ext_group);
+
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
@@ -786,16 +849,17 @@ static struct attribute_group iw_stats_group = {
.attrs = iw_proto_stats_attrs,
};
-int ib_device_register_sysfs(struct ib_device *device)
+int ib_device_register_sysfs(struct ib_device *device,
+ int (*port_callback)(struct ib_device *, u8, struct kobject *))
{
struct device *class_dev = &device->dev;
int ret;
int i;
class_dev->class = &ib_class;
- class_dev->driver_data = device;
class_dev->parent = device->dma_device;
- dev_set_name(class_dev, device->name);
+ dev_set_name(class_dev, device->name);
+ dev_set_drvdata(class_dev, device);
INIT_LIST_HEAD(&device->port_list);
@@ -810,19 +874,19 @@ int ib_device_register_sysfs(struct ib_device *device)
}
device->ports_parent = kobject_create_and_add("ports",
- &class_dev->kobj);
- if (!device->ports_parent) {
+ kobject_get(&class_dev->kobj));
+ if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
}
if (device->node_type == RDMA_NODE_IB_SWITCH) {
- ret = add_port(device, 0);
+ ret = add_port(device, 0, port_callback);
if (ret)
goto err_put;
} else {
for (i = 1; i <= device->phys_port_cnt; ++i) {
- ret = add_port(device, i);
+ ret = add_port(device, i, port_callback);
if (ret)
goto err_put;
}
@@ -864,10 +928,15 @@ void ib_device_unregister_sysfs(struct ib_device *device)
{
struct kobject *p, *t;
struct ib_port *port;
+ int i;
/* Hold kobject until ib_dealloc_device() */
kobject_get(&device->dev.kobj);
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
+ device_remove_file(&device->dev, ib_class_attributes[i]);
+ }
+
list_for_each_entry_safe(p, t, &device->port_list, entry) {
list_del(&p->entry);
port = container_of(p, struct ib_port, kobj);
@@ -891,7 +960,7 @@ void ib_sysfs_cleanup(void)
class_unregister(&ib_class);
}
-int ib_sysfs_create_port_files(struct ib_device *device,
+/*int ib_sysfs_create_port_files(struct ib_device *device,
int (*create)(struct ib_device *dev, u8 port_num,
struct kobject *kobj))
{
@@ -908,4 +977,4 @@ int ib_sysfs_create_port_files(struct ib_device *device,
return ret;
}
-EXPORT_SYMBOL(ib_sysfs_create_port_files);
+EXPORT_SYMBOL(ib_sysfs_create_port_files);*/
30 sys/ofed/drivers/infiniband/core/uverbs_cmd.c
View
@@ -312,7 +312,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->xrc_domain_list);
+ INIT_LIST_HEAD(&ucontext->xrcd_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -633,7 +633,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata);
+ cmd.access_flags, &udata, 0);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -1087,7 +1087,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.srq = srq;
attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
attr.qp_type = cmd.qp_type;
- attr.xrc_domain = xrcd;
+ attr.xrcd = xrcd;
attr.create_flags = 0;
attr.cap.max_send_wr = cmd.max_send_wr;
@@ -1115,14 +1115,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
qp->event_handler = attr.event_handler;
qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
- qp->xrcd = attr.xrc_domain;
+ qp->xrcd = attr.xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&attr.send_cq->usecnt);
atomic_inc(&attr.recv_cq->usecnt);
if (attr.srq)
atomic_inc(&attr.srq->usecnt);
- else if (attr.xrc_domain)
- atomic_inc(&attr.xrc_domain->usecnt);
+ else if (attr.xrcd)
+ atomic_inc(&attr.xrcd->usecnt);
obj->uevent.uobject.object = qp;
ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@@ -2032,8 +2032,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
srq->uobject = &obj->uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
- srq->xrc_cq = NULL;
- srq->xrcd = NULL;
+ srq->ext.xrc.cq = NULL;
+ srq->ext.xrc.xrcd = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
@@ -2083,7 +2083,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
- struct ib_uverbs_create_xrc_srq cmd;
+ struct ib_uverbs_create_xsrq cmd;
struct ib_uverbs_create_srq_resp resp;
struct ib_udata udata;
struct ib_uevent_object *obj;
@@ -2119,7 +2119,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
goto err;
}
- xrc_cq = idr_read_cq(cmd.xrc_cq, file->ucontext, 0);
+ xrc_cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
if (!xrc_cq) {
ret = -EINVAL;
goto err_put_pd;
@@ -2152,8 +2152,8 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
srq->uobject = &obj->uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
- srq->xrc_cq = xrc_cq;
- srq->xrcd = xrcd;
+ srq->ext.xrc.cq = xrc_cq;
+ srq->ext.xrc.xrcd = xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&xrc_cq->usecnt);
atomic_inc(&xrcd->usecnt);
@@ -2528,7 +2528,7 @@ ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&xrcd_uobj->xrc_reg_qp_list);
mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->xrc_domain_list);
+ list_add_tail(&uobj->list, &file->ucontext->xrcd_list);
mutex_unlock(&file->mutex);
uobj->live = 1;
@@ -2598,7 +2598,7 @@ ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file,
if (!ret) {
list_for_each_entry(t_uobj, &file->ucontext->srq_list, list) {
struct ib_srq *srq = t_uobj->object;
- if (srq->xrcd && srq->xrcd == uobj->object) {
+ if (srq->ext.xrc.xrcd && srq->ext.xrc.xrcd == uobj->object) {
ret = -EBUSY;
break;
}
@@ -2702,7 +2702,7 @@ ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file,
init_attr.sq_sig_type =
cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_XRC;
- init_attr.xrc_domain = xrcd;
+ init_attr.xrcd = xrcd;
init_attr.cap.max_send_wr = 1;
init_attr.cap.max_recv_wr = 0;
9 sys/ofed/drivers/infiniband/core/uverbs_main.c
View
@@ -110,8 +110,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
[IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
[IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq,
- [IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN] = ib_uverbs_open_xrc_domain,
- [IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN] = ib_uverbs_close_xrc_domain,
+ [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrc_domain,
+ [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrc_domain,
[IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp,
[IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp,
[IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp,
@@ -258,7 +258,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
}
mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) {
+ list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
struct ib_xrcd *xrcd = uobj->object;
struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1;
struct ib_uxrcd_object *xrcd_uobj =
@@ -629,8 +629,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (hdr.in_words * 4 != count)
return -EINVAL;
- if (hdr.command < 0 ||
- hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
+ if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
!uverbs_cmd_table[hdr.command] ||
!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
return -EINVAL;
28 sys/ofed/drivers/infiniband/core/verbs.c
View
@@ -250,8 +250,8 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
- srq->xrc_cq = NULL;
- srq->xrcd = NULL;
+ srq->ext.xrc.cq = NULL;
+ srq->ext.xrc.xrcd = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
}
@@ -278,8 +278,8 @@ struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
- srq->xrc_cq = xrc_cq;
- srq->xrcd = xrcd;
+ srq->ext.xrc.cq = xrc_cq;
+ srq->ext.xrc.xrcd = xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&xrcd->usecnt);
atomic_inc(&xrc_cq->usecnt);
@@ -319,8 +319,8 @@ int ib_destroy_srq(struct ib_srq *srq)
return -EBUSY;
pd = srq->pd;
- xrc_cq = srq->xrc_cq;
- xrcd = srq->xrcd;
+ xrc_cq = srq->ext.xrc.cq;
+ xrcd = srq->ext.xrc.xrcd;
ret = srq->device->destroy_srq(srq);
if (!ret) {
@@ -355,7 +355,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->qp_context = qp_init_attr->qp_context;
qp->qp_type = qp_init_attr->qp_type;
qp->xrcd = qp->qp_type == IB_QPT_XRC ?
- qp_init_attr->xrc_domain : NULL;
+ qp_init_attr->xrcd : NULL;
atomic_inc(&pd->usecnt);
atomic_inc(&qp_init_attr->send_cq->usecnt);
atomic_inc(&qp_init_attr->recv_cq->usecnt);
@@ -371,8 +371,8 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
- enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETH + 1];
- enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETH + 1];
+ enum ib_qp_attr_mask req_param[IB_QPT_RAW_PACKET + 1];
+ enum ib_qp_attr_mask opt_param[IB_QPT_RAW_PACKET + 1];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -382,7 +382,7 @@ static const struct {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
- [IB_QPT_RAW_ETH] = IB_QP_PORT,
+ [IB_QPT_RAW_PACKET] = IB_QP_PORT,
[IB_QPT_UC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
@@ -1005,7 +1005,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (qp->qp_type == IB_QPT_RAW_ETH) {
+ if (qp->qp_type == IB_QPT_RAW_PACKET) {
/* In raw Etherent mgids the 63 msb's should be 0 */
if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
return -EINVAL;
@@ -1013,7 +1013,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
- if (qp->qp_type != IB_QPT_RAW_ETH)
+ if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
@@ -1028,7 +1028,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (qp->qp_type == IB_QPT_RAW_ETH) {
+ if (qp->qp_type == IB_QPT_RAW_PACKET) {
/* In raw Etherent mgids the 63 msb's should be 0 */
if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
return -EINVAL;
@@ -1036,7 +1036,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
- if (qp->qp_type != IB_QPT_RAW_ETH)
+ if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
2  sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
View
@@ -1,5 +1,7 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
+ depends on NETDEVICES && ETHERNET && PCI
+ select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---
This driver provides low-level InfiniBand support for
33 sys/ofed/drivers/infiniband/hw/mlx4/Makefile
View
@@ -1,4 +1,31 @@
-obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
+# $FreeBSD$
+#.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
+#.PATH: ${.CURDIR}/../../../../include/linux
-mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
-mlx4_ib-y += wc.o
+.include <bsd.own.mk>
+
+KMOD = mlx4ib
+SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
+#SRCS+= linux_compat.c linux_radix.c
+SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c
+SRCS+= opt_inet.h opt_inet6.h
+
+#CFLAGS+= -I${.CURDIR}/../../ofed/include/
+CFLAGS+= -I${.CURDIR}/../../../../include
+CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
+
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
+.include <bsd.kmod.mk>
+
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions
52 sys/ofed/drivers/infiniband/hw/mlx4/ah.c
View
@@ -30,25 +30,25 @@
* SOFTWARE.
*/
-#include "mlx4_ib.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include <linux/slab.h>
#include <linux/inet.h>
#include <linux/string.h>
-#include <rdma/ib_cache.h>
+
+#include "mlx4_ib.h"
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port)
{
- struct mlx4_ib_iboe *iboe = &dev->iboe;
struct in6_addr in6;
*is_mcast = 0;
- spin_lock(&iboe->lock);
- if (!iboe->netdevs[port - 1]) {
- spin_unlock(&iboe->lock);
- return -EINVAL;
- }
- spin_unlock(&iboe->lock);
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
if (rdma_link_local_addr(&in6))
@@ -92,15 +92,15 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
}
static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct mlx4_ib_ah *ah)
+ struct mlx4_ib_ah *ah)
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
+ union ib_gid sgid;
u8 mac[6];
int err;
int is_mcast;
u16 vlan_tag;
- union ib_gid sgid;
err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
if (err)
@@ -130,7 +130,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah->av.ib.dlid = cpu_to_be16(0xc000);
memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
- ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
return &ah->ibah;
}
@@ -147,25 +147,24 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
if (!(ah_attr->ah_flags & IB_AH_GRH)) {
ret = ERR_PTR(-EINVAL);
- goto out;
} else {
- /* TBD: need to handle the case when we get called
- in an atomic context and there we might sleep. We
- don't expect this currently since we're working with
- link local addresses which we can translate without
- going to sleep */
+ /*
+ * TBD: need to handle the case when we get
+ * called in an atomic context and there we
+ * might sleep. We don't expect this
+ * currently since we're working with link
+ * local addresses which we can translate
+ * without going to sleep.
+ */
ret = create_iboe_ah(pd, ah_attr, ah);
- if (IS_ERR(ret))
- goto out;
- else
- return ret;
}
+
+ if (IS_ERR(ret))
+ kfree(ah);
+
+ return ret;
} else
return create_ib_ah(pd, ah_attr, ah); /* never fails */
-
-out:
- kfree(ah);
- return ret;
}
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
@@ -202,4 +201,3 @@ int mlx4_ib_destroy_ah(struct ib_ah *ah)
kfree(to_mah(ah));
return 0;
}
-
688 sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
View
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+ /***********************************************************/
+/*This file support the handling of the Alias GUID feature. */
+/***********************************************************/
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_pack.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/delay.h>
+#include "mlx4_ib.h"
+
+/*
+The driver keeps the current state of all guids, as they are in the HW.
+Whenever we receive an smp mad GUIDInfo record, the data will be cached.
+*/
+
+struct mlx4_alias_guid_work_context {
+ u8 port;
+ struct mlx4_ib_dev *dev ;
+ struct ib_sa_query *sa_query;
+ struct completion done;
+ int query_id;
+ struct list_head list;
+ int block_num;
+};
+
+struct mlx4_next_alias_guid_work {
+ u8 port;
+ u8 block_num;
+ struct mlx4_sriov_alias_guid_info_rec_det rec_det;
+};
+
+
+void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
+ u8 port_num, u8 *p_data)
+{
+ int i;
+ u64 guid_indexes;
+ int slave_id;
+ int port_index = port_num - 1;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+
+ guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
+ ports_guid[port_num - 1].
+ all_rec_per_port[block_num].guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ /* The location of the specific index starts from bit number 4
+ * until bit num 11 */
+ if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
+ slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
+ if (slave_id >= dev->dev->num_slaves) {
+ pr_debug("The last slave: %d\n", slave_id);
+ return;
+ }
+
+ /* cache the guid: */
+ memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
+ &p_data[i * GUID_REC_SIZE],
+ GUID_REC_SIZE);
+ } else
+ pr_debug("Guid number: %d in block: %d"
+ " was not updated\n", i, block_num);
+ }
+}
+
+static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
+{
+ if (index >= NUM_ALIAS_GUID_PER_PORT) {
+ pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
+ return (__force __be64) -1;
+ }
+ return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
+}
+
+
+ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
+{
+ return IB_SA_COMP_MASK(4 + index);
+}
+
+/*
+ * Whenever new GUID is set/unset (guid table change) create event and
+ * notify the relevant slave (master also should be notified).
+ * If the GUID value is not as we have in the cache the slave will not be
+ * updated; in this case it waits for the smp_snoop or the port management
+ * event to call the function and to update the slave.
+ * block_number - the index of the block (16 blocks available)
+ * port_number - 1 or 2
+ */
+void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num, u8 port_num,
+ u8 *p_data)
+{
+ int i;
+ u64 guid_indexes;
+ int slave_id;
+ enum slave_port_state new_state;
+ enum slave_port_state prev_state;
+ __be64 tmp_cur_ag, form_cache_ag;
+ enum slave_port_gen_event gen_event;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+
+ guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
+ ports_guid[port_num - 1].
+ all_rec_per_port[block_num].guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+
+ /*calculate the slaves and notify them*/
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ /* the location of the specific index runs from bits 4..11 */
+ if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
+ continue;
+
+ slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
+ if (slave_id >= dev->dev->num_slaves)
+ return;
+ tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
+ form_cache_ag = get_cached_alias_guid(dev, port_num,
+ (NUM_ALIAS_GUID_IN_REC * block_num) + i);
+ /*
+ * Check if guid is not the same as in the cache,
+ * If it is different, wait for the snoop_smp or the port mgmt
+ * change event to update the slave on its port state change
+ */
+ if (tmp_cur_ag != form_cache_ag)
+ continue;
+ mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
+
+ /*2 cases: Valid GUID, and Invalid Guid*/
+
+ if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
+ prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
+ new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
+ MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
+ &gen_event);
+ pr_debug("slave: %d, port: %d prev_port_state: %d,"
+ " new_port_state: %d, gen_event: %d\n",
+ slave_id, port_num, prev_state, new_state, gen_event);
+ if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
+ pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
+ port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
+ }
+ } else { /* request to invalidate GUID */
+ set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
+ &gen_event);
+ pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
+ MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+ }
+ }
+}
+
+static void aliasguid_query_handler(int status,
+ struct ib_sa_guidinfo_rec *guid_rec,
+ void *context)
+{
+ struct mlx4_ib_dev *dev;
+ struct mlx4_alias_guid_work_context *cb_ctx = context;
+ u8 port_index ;
+ int i;
+ struct mlx4_sriov_alias_guid_info_rec_det *rec;
+ unsigned long flags, flags1;
+
+ if (!context)
+ return;
+
+ dev = cb_ctx->dev;
+ port_index = cb_ctx->port - 1;
+ rec = &dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[cb_ctx->block_num];
+
+ if (status) {
+ rec->status = MLX4_GUID_INFO_STATUS_IDLE;
+ pr_debug("(port: %d) failed: status = %d\n",
+ cb_ctx->port, status);
+ goto out;
+ }
+
+ if (guid_rec->block_num != cb_ctx->block_num) {
+ pr_err("block num mismatch: %d != %d\n",
+ cb_ctx->block_num, guid_rec->block_num);
+ goto out;
+ }
+
+ pr_debug("lid/port: %d/%d, block_num: %d\n",
+ be16_to_cpu(guid_rec->lid), cb_ctx->port,
+ guid_rec->block_num);
+
+ rec = &dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[guid_rec->block_num];
+
+ rec->status = MLX4_GUID_INFO_STATUS_SET;
+ rec->method = MLX4_GUID_INFO_RECORD_SET;
+
+ for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ __be64 tmp_cur_ag;
+ tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+ /* check if the SM didn't assign one of the records.
+ * if it didn't, if it was not sysadmin request:
+ * ask the SM to give a new GUID, (instead of the driver request).
+ */
+ if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
+ mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
+ "block_num: %d was declined by SM, "
+ "ownership by %d (0 = driver, 1=sysAdmin,"
+ " 2=None)\n", __func__, i,
+ guid_rec->block_num, rec->ownership);
+ if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
+ /* if it is driver assign, asks for new GUID from SM*/
+ *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
+ MLX4_NOT_SET_GUID;
+
+ /* Mark the record as not assigned, and let it
+ * be sent again in the next work sched.*/
+ rec->status = MLX4_GUID_INFO_STATUS_IDLE;
+ rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ }
+ } else {
+ /* properly assigned record. */
+ /* We save the GUID we just got from the SM in the
+ * admin_guid in order to be persistent, and in the
+ * request from the sm the process will ask for the same GUID */
+ if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
+ tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
+ /* the sysadmin assignment failed.*/
+ mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
+ " admin guid after SysAdmin "
+ "configuration. "
+ "Record num %d in block_num:%d "
+ "was declined by SM, "
+ "new val(0x%llx) was kept\n",
+ __func__, i,
+ guid_rec->block_num,
+ (long long)be64_to_cpu(*(__be64 *) &
+ rec->all_recs[i * GUID_REC_SIZE]));
+ } else {
+ memcpy(&rec->all_recs[i * GUID_REC_SIZE],
+ &guid_rec->guid_info_list[i * GUID_REC_SIZE],
+ GUID_REC_SIZE);
+ }
+ }
+ }
+ /*
+ The func is call here to close the cases when the
+ sm doesn't send smp, so in the sa response the driver
+ notifies the slave.
+ */
+ mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
+ cb_ctx->port,
+ guid_rec->guid_info_list);
+out:
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ if (!dev->sriov.is_going_down)
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
+ &dev->sriov.alias_guid.ports_guid[port_index].
+ alias_guid_work, 0);
+ if (cb_ctx->sa_query) {
+ list_del(&cb_ctx->list);
+ kfree(cb_ctx);
+ } else
+ complete(&cb_ctx->done);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
+{
+ int i;
+ u64 cur_admin_val;
+ ib_sa_comp_mask comp_mask = 0;
+
+ dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
+ = MLX4_GUID_INFO_STATUS_IDLE;
+ dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
+ = MLX4_GUID_INFO_RECORD_SET;
+
+ /* calculate the comp_mask for that record.*/
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ cur_admin_val =
+ *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
+ /*
+ check the admin value: if it's for delete (~00LL) or
+ it is the first guid of the first record (hw guid) or
+ the records is not in ownership of the sysadmin and the sm doesn't
+ need to assign GUIDs, then don't put it up for assignment.
+ */
+ if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
+ (!index && !i) ||
+ MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
+ ports_guid[port - 1].all_rec_per_port[index].ownership)
+ continue;
+ comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ }
+ dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].guid_indexes = comp_mask;
+}
+
+static int set_guid_rec(struct ib_device *ibdev,
+ u8 port, int index,
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+{
+ int err;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_guidinfo_rec guid_info_rec;
+ ib_sa_comp_mask comp_mask;
+ struct ib_port_attr attr;
+ struct mlx4_alias_guid_work_context *callback_context;
+ unsigned long resched_delay, flags, flags1;
+ struct list_head *head =
+ &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
+
+ err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
+ if (err) {
+ pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
+ err, port);
+ return err;
+ }
+ /*check the port was configured by the sm, otherwise no need to send */
+ if (attr.state != IB_PORT_ACTIVE) {
+ pr_debug("port %d not active...rescheduling\n", port);
+ resched_delay = 5 * HZ;
+ err = -EAGAIN;
+ goto new_schedule;
+ }
+
+ callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
+ if (!callback_context) {
+ err = -ENOMEM;
+ resched_delay = HZ * 5;
+ goto new_schedule;
+ }
+ callback_context->port = port;
+ callback_context->dev = dev;
+ callback_context->block_num = index;
+
+ memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
+
+ guid_info_rec.lid = cpu_to_be16(attr.lid);
+ guid_info_rec.block_num = index;
+
+ memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
+ GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
+ comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
+ rec_det->guid_indexes;
+
+ init_completion(&callback_context->done);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ list_add_tail(&callback_context->list, head);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+
+ callback_context->query_id =
+ ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
+ ibdev, port, &guid_info_rec,
+ comp_mask, rec_det->method, 1000,
+ GFP_KERNEL, aliasguid_query_handler,
+ callback_context,
+ &callback_context->sa_query);
+ if (callback_context->query_id < 0) {
+ pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
+ "%d. will reschedule to the next 1 sec.\n",
+ callback_context->query_id);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ list_del(&callback_context->list);
+ kfree(callback_context);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ resched_delay = 1 * HZ;
+ err = -EAGAIN;
+ goto new_schedule;
+ }
+ err = 0;
+ goto out;
+
+new_schedule:
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ invalidate_guid_record(dev, port, index);
+ if (!dev->sriov.is_going_down) {
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
+ &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
+ resched_delay);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+
+out:
+ return err;
+}
+
+void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
+{
+ int i;
+ unsigned long flags, flags1;
+
+ pr_debug("port %d\n", port);
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
+ invalidate_guid_record(dev, port, i);
+
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
+ /*
+ make sure no work waits in the queue, if the work is already
+ queued(not on the timer) the cancel will fail. That is not a problem
+ because we just want the work started.
+ */
+ cancel_delayed_work(&dev->sriov.alias_guid.
+ ports_guid[port - 1].alias_guid_work);
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
+ &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
+ 0);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+/* The function returns the next record that was
+ * not configured (or failed to be configured) */
+static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
+ struct mlx4_next_alias_guid_work *rec)
+{
+ int j;
+ unsigned long flags;
+
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+ if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
+ MLX4_GUID_INFO_STATUS_IDLE) {
+ memcpy(&rec->rec_det,
+ &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
+ sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
+ rec->port = port;
+ rec->block_num = j;
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
+ MLX4_GUID_INFO_STATUS_PENDING;
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ return 0;
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ }
+ return -ENOENT;
+}
+
+static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
+ int rec_index,
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+{
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
+ rec_det->guid_indexes;
+ memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
+ rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
+ rec_det->status;
+}
+
+static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
+{
+ int j;
+ struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
+
+ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
+ memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
+ rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
+ IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
+ IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
+ IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
+ IB_SA_GUIDINFO_REC_GID7;
+ rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
+ set_administratively_guid_record(dev, port, j, &rec_det);
+ }
+}
+
+static void alias_guid_work(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ int ret = 0;
+ struct mlx4_next_alias_guid_work *rec;
+ struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
+ container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
+ alias_guid_work);
+ struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
+ struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
+ struct mlx4_ib_sriov,
+ alias_guid);
+ struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
+
+ rec = kzalloc(sizeof *rec, GFP_KERNEL);
+ if (!rec) {
+ pr_err("alias_guid_work: No Memory\n");
+ return;
+ }
+
+ pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
+ ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
+ if (ret) {
+ pr_debug("No more records to update.\n");
+ goto out;
+ }
+
+ set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
+ &rec->rec_det);
+
+out:
+ kfree(rec);
+}
+
+
+void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
+{
+ unsigned long flags, flags1;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ if (!dev->sriov.is_going_down) {
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
+ &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
+{
+ int i;
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct mlx4_alias_guid_work_context *cb_ctx;
+ struct mlx4_sriov_alias_guid_port_rec_det *det;
+ struct ib_sa_query *sa_query;
+ unsigned long flags;
+
+ for (i = 0 ; i < dev->num_ports; i++) {
+ cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
+ det = &sriov->alias_guid.ports_guid[i];
+ spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
+ while (!list_empty(&det->cb_list)) {
+ cb_ctx = list_entry(det->cb_list.next,
+ struct mlx4_alias_guid_work_context,
+ list);
+ sa_query = cb_ctx->sa_query;
+ cb_ctx->sa_query = NULL;
+ list_del(&cb_ctx->list);
+ spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
+ ib_sa_cancel_query(cb_ctx->query_id, sa_query);
+ wait_for_completion(&cb_ctx->done);
+ kfree(cb_ctx);
+ spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
+ }
+ spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
+ }
+ for (i = 0 ; i < dev->num_ports; i++) {
+ flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ }
+ ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
+ kfree(dev->sriov.alias_guid.sa_client);
+}
+
+int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
+{
+ char alias_wq_name[15];
+ int ret = 0;
+ int i, j, k;
+ union ib_gid gid;
+
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+ dev->sriov.alias_guid.sa_client =
+ kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
+ if (!dev->sriov.alias_guid.sa_client)
+ return -ENOMEM;
+
+ ib_sa_register_client(dev->sriov.alias_guid.sa_client);
+
+ spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
+
+ for (i = 1; i <= dev->num_ports; ++i) {
+ if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
+ ret = -EFAULT;
+ goto err_unregister;
+ }
+ }
+
+ for (i = 0 ; i < dev->num_ports; i++) {
+ memset(&dev->sriov.alias_guid.ports_guid[i], 0,
+ sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
+ /*Check if the SM doesn't need to assign the GUIDs*/
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ if (mlx4_ib_sm_guid_assign) {
+ dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].
+ ownership = MLX4_GUID_DRIVER_ASSIGN;
+ continue;
+ }
+ dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
+ ownership = MLX4_GUID_NONE_ASSIGN;
+ /*mark each val as it was deleted,
+ till the sysAdmin will give it valid val*/
+ for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
+ *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
+ cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
+ }
+ }
+ INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
+ /*prepare the records, set them to be allocated by sm*/
+ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
+ invalidate_guid_record(dev, i + 1, j);
+
+ dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
+ dev->sriov.alias_guid.ports_guid[i].port = i;
+ if (mlx4_ib_sm_guid_assign)
+ set_all_slaves_guids(dev, i);
+
+ snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
+ dev->sriov.alias_guid.ports_guid[i].wq =
+ create_singlethread_workqueue(alias_wq_name);
+ if (!dev->sriov.alias_guid.ports_guid[i].wq) {
+ ret = -ENOMEM;
+ goto err_thread;
+ }
+ INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
+ alias_guid_work);
+ }
+ return 0;
+
+err_thread:
+ for (--i; i >= 0; i--) {
+ destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ dev->sriov.alias_guid.ports_guid[i].wq = NULL;
+ }
+
+err_unregister:
+ ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
+ kfree(dev->sriov.alias_guid.sa_client);
+ dev->sriov.alias_guid.sa_client = NULL;
+ pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
+ return ret;
+}
440 sys/ofed/drivers/infiniband/hw/mlx4/cm.c
View
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/idr.h>
+#include <rdma/ib_cm.h>
+
+#include "mlx4_ib.h"
+
+#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
+
+struct id_map_entry {
+ struct rb_node node;
+
+ u32 sl_cm_id;
+ u32 pv_cm_id;
+ int slave_id;
+ int scheduled_delete;
+ struct mlx4_ib_dev *dev;
+
+ struct list_head list;
+ struct delayed_work timeout;
+};
+
+struct cm_generic_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+};
+
+struct cm_req_msg {
+ unsigned char unused[0x60];
+ union ib_gid primary_path_sgid;
+};
+
+
+static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->local_comm_id = cpu_to_be32(cm_id);
+}
+
+static u32 get_local_comm_id(struct ib_mad *mad)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+
+ return be32_to_cpu(msg->local_comm_id);
+}
+
+static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->remote_comm_id = cpu_to_be32(cm_id);
+}
+
+static u32 get_remote_comm_id(struct ib_mad *mad)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+
+ return be32_to_cpu(msg->remote_comm_id);
+}
+
+static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
+{
+ struct cm_req_msg *msg = (struct cm_req_msg *)mad;
+
+ return msg->primary_path_sgid;
+}
+
+/* Lock should be taken before called */
+static struct id_map_entry *
+id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
+{
+ struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
+ struct rb_node *node = sl_id_map->rb_node;
+
+ while (node) {
+ struct id_map_entry *id_map_entry =
+ rb_entry(node, struct id_map_entry, node);
+
+ if (id_map_entry->sl_cm_id > sl_cm_id)
+ node = node->rb_left;
+ else if (id_map_entry->sl_cm_id < sl_cm_id)
+ node = node->rb_right;
+ else if (id_map_entry->slave_id > slave_id)
+ node = node->rb_left;
+ else if (id_map_entry->slave_id < slave_id)
+ node = node->rb_right;
+ else
+ return id_map_entry;
+ }
+ return NULL;
+}
+
+static void id_map_ent_timeout(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
+ struct id_map_entry *db_ent, *found_ent;
+ struct mlx4_ib_dev *dev = ent->dev;
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ int pv_id = (int) ent->pv_cm_id;
+
+ spin_lock(&sriov->id_map_lock);
+ db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
+ if (!db_ent)
+ goto out;
+ found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
+ if (found_ent && found_ent == ent)
+ rb_erase(&found_ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, pv_id);
+
+out:
+ list_del(&ent->list);
+ spin_unlock(&sriov->id_map_lock);
+ kfree(ent);
+}
+
+static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
+{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ struct id_map_entry *ent, *found_ent;
+
+ spin_lock(&sriov->id_map_lock);
+ ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
+ if (!ent)
+ goto out;
+ found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
+ if (found_ent && found_ent == ent)
+ rb_erase(&found_ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, pv_cm_id);
+out:
+ spin_unlock(&sriov->id_map_lock);
+}
+
+static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
+{
+ struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
+ struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
+ struct id_map_entry *ent;
+ int slave_id = new->slave_id;
+ int sl_cm_id = new->sl_cm_id;
+
+ ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
+ if (ent) {
+ pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
+ sl_cm_id);
+
+ rb_replace_node(&ent->node, &new->node, sl_id_map);
+ return;
+ }
+
+ /* Go to the bottom of the tree */
+ while (*link) {
+ parent = *link;
+ ent = rb_entry(parent, struct id_map_entry, node);
+
+ if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ rb_link_node(&new->node, parent, link);
+ rb_insert_color(&new->node, sl_id_map);
+}
+
+static struct id_map_entry *
+id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
+{
+ int ret, id;
+ static int next_id;
+ struct id_map_entry *ent;
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+
+ ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
+ if (!ent) {
+ mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ent->sl_cm_id = sl_cm_id;
+ ent->slave_id = slave_id;
+ ent->scheduled_delete = 0;
+ ent->dev = to_mdev(ibdev);
+ INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
+
+ do {
+ spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
+ ret = idr_get_new_above(&sriov->pv_id_table, ent,
+ next_id, &id);
+ if (!ret) {
+ next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
+ ent->pv_cm_id = (u32)id;
+ sl_id_map_add(ibdev, ent);
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+ } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
+ /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
+ if (!ret) {
+ spin_lock(&sriov->id_map_lock);
+ list_add_tail(&ent->list, &sriov->cm_list);
+ spin_unlock(&sriov->id_map_lock);
+ return ent;
+ }
+ /*error flow*/
+ kfree(ent);
+ mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
+ return ERR_PTR(-ENOMEM);
+}
+
+static struct id_map_entry *
+id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
+{
+ struct id_map_entry *ent;
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+
+ spin_lock(&sriov->id_map_lock);
+ if (*pv_cm_id == -1) {
+ ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
+ if (ent)
+ *pv_cm_id = (int) ent->pv_cm_id;
+ } else
+ ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
+ spin_unlock(&sriov->id_map_lock);
+
+ return ent;
+}
+
+static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
+{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ unsigned long flags;
+
+ spin_lock(&sriov->id_map_lock);
+ spin_lock_irqsave(&sriov->going_down_lock, flags);
+ /*make sure that there is no schedule inside the scheduled work.*/
+ if (!sriov->is_going_down) {
+ id->scheduled_delete = 1;
+ schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ }
+ spin_unlock_irqrestore(&sriov->going_down_lock, flags);
+ spin_unlock(&sriov->id_map_lock);
+}
+
+int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
+ struct ib_mad *mad)
+{
+ struct id_map_entry *id;
+ u32 sl_cm_id;
+ int pv_cm_id = -1;
+
+ sl_cm_id = get_local_comm_id(mad);
+
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
+ id = id_map_alloc(ibdev, slave_id, sl_cm_id);
+ if (IS_ERR(id)) {
+ mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
+ __func__, slave_id, sl_cm_id);
+ return PTR_ERR(id);
+ }
+ } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
+ return 0;
+ } else {
+ id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
+ }
+
+ if (!id) {
+ pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
+ slave_id, sl_cm_id);
+ return -EINVAL;
+ }
+
+ set_local_comm_id(mad, id->pv_cm_id);
+
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ schedule_delayed(ibdev, id);
+ else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
+ id_map_find_del(ibdev, pv_cm_id);
+
+ return 0;
+}
+
+int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
+ struct ib_mad *mad, int is_eth)
+{
+ u32 pv_cm_id;