Permalink
Fetching contributors…
Cannot retrieve contributors at this time
6573 lines (6111 sloc) 198 KB
/*****************************************************************************
@(#) File: src/drivers/np_ip.c
-----------------------------------------------------------------------------
Copyright (c) 2008-2015 Monavacon Limited <http://www.monavacon.com/>
Copyright (c) 2001-2008 OpenSS7 Corporation <http://www.openss7.com/>
Copyright (c) 1997-2001 Brian F. G. Bidulock <bidulock@openss7.org>
All Rights Reserved.
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by the Free
Software Foundation, version 3 of the license.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>, or
write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA
02139, USA.
-----------------------------------------------------------------------------
U.S. GOVERNMENT RESTRICTED RIGHTS. If you are licensing this Software on
behalf of the U.S. Government ("Government"), the following provisions apply
to you. If the Software is supplied by the Department of Defense ("DoD"), it
is classified as "Commercial Computer Software" under paragraph 252.227-7014
of the DoD Supplement to the Federal Acquisition Regulations ("DFARS") (or any
successor regulations) and the Government is acquiring only the license rights
granted herein (the license rights customarily provided to non-Government
users). If the Software is supplied to any unit or agency of the Government
other than DoD, it is classified as "Restricted Computer Software" and the
Government's rights in the Software are defined in paragraph 52.227-19 of the
Federal Acquisition Regulations ("FAR") (or any successor regulations) or, in
the cases of NASA, in paragraph 18.52.227-86 of the NASA Supplement to the FAR
(or any successor regulations).
-----------------------------------------------------------------------------
Commercial licensing and support of this software is available from OpenSS7
Corporation at a fee. See http://www.openss7.com/
*****************************************************************************/
static char const ident[] = "src/drivers/np_ip.c (" PACKAGE_ENVR ") " PACKAGE_DATE;
/*
* This driver provides the functionality of an IP (Internet Protocol) hook similar to raw sockets,
* with the exception that the hook acts as a port bound intercept for IP packets for the bound
* protocol ids. This dirver is used primarily by OpenSS7 protocol test module (e.g. for SCTP) and
* for applications where entire ranges of port numbers for an existing protocol id must be
* intercepted (e.g. for RTP/RTCP). This driver uses hook into the Linux IP protocol tables and
* passes packets tranparently on to the underlying protocol in which it is not interested (bound).
* The driver uses the NPI (Network Provider Interface) API.
*/
#define _DEBUG 1
#undef _DEBUG
#define _SVR4_SOURCE 1
#include <sys/os7/compat.h>
#ifdef LINUX
#undef ASSERT
#include <linux/bitops.h>
#define np_tst_bit(nr,addr) test_bit(nr,addr)
#define np_set_bit(nr,addr) __set_bit(nr,addr)
#define np_clr_bit(nr,addr) __clear_bit(nr,addr)
#include <linux/interrupt.h>
#ifdef HAVE_KINC_LINUX_BRLOCK_H
#include <linux/brlock.h>
#endif
#include <linux/udp.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/route.h>
#include <net/inet_ecn.h>
#include <net/snmp.h>
#include <net/udp.h>
#ifdef HAVE_KINC_NET_DST_H
#include <net/dst.h>
#endif
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#endif /* LINUX */
#include "net_hooks.h"
#include <sys/npi.h>
#include <sys/npi_ip.h>
#define NP_DESCRIP "Internet Protocol (IP) Network Provider (NP_IP) STREAMS Driver"
#define NP_EXTRA "Part of the OpenSS7 stack for Linux Fast-STREAMS"
#define NP_COPYRIGHT "Copyright (c) 2008-2015 Monavacon Limited. All Rights Reserved."
#define NP_REVISION "OpenSS7 src/drivers/np_ip.c (" PACKAGE_ENVR ") " PACKAGE_DATE
#define NP_DEVICE "SVR 4.2 MP STREAMS NPI NP_IP Network Provider"
#define NP_CONTACT "Brian Bidulock <bidulock@openss7.org>"
#define NP_LICENSE "GPL"
#define NP_BANNER NP_DESCRIP "\n" \
NP_EXTRA "\n" \
NP_REVISION "\n" \
NP_COPYRIGHT "\n" \
NP_DEVICE "\n" \
NP_CONTACT
#define NP_SPLASH NP_DESCRIP "\n" \
NP_REVISION
#ifdef LINUX
MODULE_AUTHOR(NP_CONTACT);
MODULE_DESCRIPTION(NP_DESCRIP);
MODULE_SUPPORTED_DEVICE(NP_DEVICE);
#ifdef MODULE_LICENSE
MODULE_LICENSE(NP_LICENSE);
#endif /* MODULE_LICENSE */
#ifdef MODULE_ALIAS
MODULE_ALIAS("streams-np_ip");
#endif /* MODULE_ALIAS */
#ifdef MODULE_VERSION
MODULE_VERSION(PACKAGE_ENVR);
#endif
#endif /* LINUX */
#define NP_DRV_ID CONFIG_STREAMS_NP_IP_MODID
#define NP_DRV_NAME CONFIG_STREAMS_NP_IP_NAME
#define NP_CMAJORS CONFIG_STREAMS_NP_IP_NMAJORS
#define NP_CMAJOR_0 CONFIG_STREAMS_NP_IP_MAJOR
#define NP_UNITS CONFIG_STREAMS_NP_IP_NMINORS
#ifdef LINUX
#ifdef MODULE_ALIAS
MODULE_ALIAS("streams-modid-" __stringify(CONFIG_STREAMS_NP_IP_MODID));
MODULE_ALIAS("streams-driver-np_ip");
MODULE_ALIAS("streams-major-" __stringify(CONFIG_STREAMS_NP_IP_MAJOR));
MODULE_ALIAS("/dev/streams/np_ip");
MODULE_ALIAS("/dev/streams/np_ip/*");
MODULE_ALIAS("/dev/streams/clone/np_ip");
MODULE_ALIAS("char-major-" __stringify(CONFIG_STREAMS_CLONE_MAJOR) "-" __stringify(NP_CMAJOR_0));
MODULE_ALIAS("/dev/np_ip");
//MODULE_ALIAS("devname:np_ip");
#endif /* defined MODULE_ALIAS */
#endif /* defined LINUX */
/*
* ==========================================================================
*
* STREAMS Definitions
*
* ==========================================================================
*/
#define DRV_ID NP_DRV_ID
#define DRV_NAME NP_DRV_NAME
#define CMAJORS NP_CMAJORS
#define CMAJOR_0 NP_CMAJOR_0
#define UNITS NP_UNITS
#ifdef MODULE
#define DRV_BANNER NP_BANNER
#else /* MODULE */
#define DRV_BANNER NP_SPLASH
#endif /* MODULE */
STATIC struct module_info np_minfo = {
.mi_idnum = DRV_ID, /* Module ID number */
.mi_idname = DRV_NAME, /* Module name */
.mi_minpsz = 0, /* Min packet size accepted */
.mi_maxpsz = (1 << 16), /* Max packet size accepted */
.mi_hiwat = (1 << 18), /* Hi water mark */
.mi_lowat = (1 << 16), /* Lo water mark */
};
STATIC struct module_stat np_rstat __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
STATIC struct module_stat np_wstat __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
/* Upper multiplex is a N provider following the NPI. */
STATIC streamscall int np_qopen(queue_t *, dev_t *, int, int, cred_t *);
STATIC streamscall int np_qclose(queue_t *, int, cred_t *);
streamscall int np_rput(queue_t *, mblk_t *);
streamscall int np_rsrv(queue_t *);
STATIC struct qinit np_rinit = {
.qi_putp = np_rput, /* Read put procedure (message from below) */
.qi_srvp = np_rsrv, /* Read service procedure */
.qi_qopen = np_qopen, /* Each open */
.qi_qclose = np_qclose, /* Last close */
.qi_minfo = &np_minfo, /* Module information */
.qi_mstat = &np_rstat, /* Module statistics */
};
streamscall int np_wput(queue_t *, mblk_t *);
streamscall int np_wsrv(queue_t *);
STATIC struct qinit np_winit = {
.qi_putp = np_wput, /* Write put procedure (message from above) */
.qi_srvp = np_wsrv, /* Write service procedure */
.qi_minfo = &np_minfo, /* Module information */
.qi_mstat = &np_wstat, /* Module statistics */
};
MODULE_STATIC struct streamtab np_info = {
.st_rdinit = &np_rinit, /* Upper read queue */
.st_wrinit = &np_winit, /* Upper write queue */
};
#if !defined HAVE_KMEMB_STRUCT_SK_BUFF_TRANSPORT_HEADER
#if !defined HAVE_KFUNC_SKB_TRANSPORT_HEADER
static inline unsigned char *
skb_tail_pointer(const struct sk_buff *skb)
{
return skb->tail;
}
static inline unsigned char *
skb_end_pointer(const struct sk_buff *skb)
{
return skb->end;
}
static inline unsigned char *
skb_transport_header(const struct sk_buff *skb)
{
return skb->h.raw;
}
static inline unsigned char *
skb_network_header(const struct sk_buff *skb)
{
return skb->nh.raw;
}
static inline unsigned char *
skb_mac_header(const struct sk_buff *skb)
{
return skb->mac.raw;
}
static inline void
skb_reset_tail_pointer(struct sk_buff *skb)
{
skb->tail = skb->data;
}
static inline void
skb_reset_end_pointer(struct sk_buff *skb)
{
skb->end = skb->data;
}
static inline void
skb_reset_transport_header(struct sk_buff *skb)
{
skb->h.raw = skb->data;
}
static inline void
skb_reset_network_header(struct sk_buff *skb)
{
skb->nh.raw = skb->data;
}
static inline void
skb_reset_mac_header(struct sk_buff *skb)
{
skb->mac.raw = skb->data;
}
static inline void
skb_set_tail_pointer(struct sk_buff *skb, const int offset)
{
skb_reset_tail_pointer(skb);
skb->tail += offset;
}
static inline void
skb_set_transport_header(struct sk_buff *skb, const int offset)
{
skb_reset_transport_header(skb);
skb->h.raw += offset;
}
static inline void
skb_set_network_header(struct sk_buff *skb, const int offset)
{
skb_reset_network_header(skb);
skb->nh.raw += offset;
}
static inline void
skb_set_mac_header(struct sk_buff *skb, const int offset)
{
skb_reset_mac_header(skb);
skb->mac.raw += offset;
}
#endif /* !defined HAVE_KFUNC_SKB_TRANSPORT_HEADER */
#endif /* !defined HAVE_KMEMB_STRUCT_SK_BUFF_TRANSPORT_HEADER */
/*
* Primary data structures.
*/
struct np_bhash_bucket;
struct np_chash_bucket;
struct np_daddr {
uint32_t addr; /* IP address this destination */
uint32_t saddr; /* current source address */
unsigned char ttl; /* time to live, this destination */
unsigned char tos; /* type of service, this destination */
unsigned short mtu; /* maximum transfer unit this destination */
struct dst_entry *dst; /* route for this destination */
int oif; /* current interface */
};
struct np_saddr {
uint32_t addr; /* IP address this source */
};
struct np_baddr {
uint32_t addr; /* IP address this bind */
};
/* Private structure */
typedef struct np {
STR_DECLARATION (struct np); /* Stream declaration */
struct np *bnext; /* linkage for bind/list hash */
struct np **bprev; /* linkage for bind/list hash */
struct np_bhash_bucket *bhash; /* linkage for bind/list hash */
struct np *cnext; /* linkage for conn hash */
struct np **cprev; /* linkage for conn hash */
struct np_chash_bucket *chash; /* linkage for conn hash */
N_info_ack_t info; /* service provider information */
unsigned int BIND_flags; /* bind flags */
unsigned int CONN_flags; /* connect flags */
unsigned int CONIND_number; /* maximum number of outstanding connection indications */
bufq_t conq; /* connection indication queue */
bufq_t datq; /* data indication queue */
bufq_t resq; /* reset indication queue */
unsigned short pnum; /* number of bound protocol ids */
uint8_t protoids[16]; /* bound protocol ids */
unsigned short bnum; /* number of bound addresses */
unsigned short bport; /* bound port number (network order) */
struct np_baddr baddrs[8]; /* bound addresses */
unsigned short snum; /* number of source (connected) addresses */
unsigned short sport; /* source (connected) port number (network order) */
struct np_saddr saddrs[8]; /* source (connected) addresses */
unsigned short dnum; /* number of destination (connected) addresses */
unsigned short dport; /* destination (connected) port number (network order) */
struct np_daddr daddrs[8]; /* destination (connected) addresses */
struct N_qos_sel_info_ip qos; /* network service provider quality of service */
struct N_qos_range_info_ip qor; /* network service provider quality of service range */
} np_t;
#define PRIV(__q) (((__q)->q_ptr))
#define NP_PRIV(__q) ((struct np *)((__q)->q_ptr))
typedef struct df {
rwlock_t lock; /* structure lock */
SLIST_HEAD (np, np); /* master list of np (open) structures */
} df_t;
#if defined __RW_LOCK_UNLOCKED
static struct df master = {.lock = __RW_LOCK_UNLOCKED(master.lock), };
#elif defined RW_LOCK_UNLOCKED
static struct df master = {.lock = RW_LOCK_UNLOCKED, };
#else
#error cannot initialize read-write locks
#endif
/*
* Bind buckets, caches and hashes.
*/
struct np_bind_bucket {
struct np_bind_bucket *next; /* linkage of bind buckets for hash slot */
struct np_bind_bucket **prev; /* linkage of bind buckets for hash slot */
unsigned char proto; /* IP protocol identifier */
unsigned short port; /* port number (host order) */
struct np *owners; /* list of owners of this protocol/port combination */
struct np *dflt; /* default listeners/destinations for this protocol */
};
struct np_conn_bucket {
struct np_conn_bucket *next; /* linkage of conn buckets for hash slot */
struct np_conn_bucket **prev; /* linkage of conn buckets for hash slot */
unsigned char proto; /* IP protocol identifier */
unsigned short sport; /* source port number (network order) */
unsigned short dport; /* destination port number (network order) */
struct np *owners; /* list of owners of this protocol/sport/dport combination */
};
struct np_bhash_bucket {
rwlock_t lock;
struct np *list;
};
struct np_chash_bucket {
rwlock_t lock;
struct np *list;
};
STATIC struct np_bhash_bucket *np_bhash;
STATIC size_t np_bhash_size = 0;
STATIC size_t np_bhash_order = 0;
STATIC struct np_chash_bucket *np_chash;
STATIC size_t np_chash_size = 0;
STATIC size_t np_chash_order = 0;
STATIC INLINE fastcall __hot_in int
np_bhashfn(unsigned char proto, unsigned short bport)
{
return ((np_bhash_size - 1) & (proto + bport));
}
STATIC INLINE fastcall __unlikely int
np_chashfn(unsigned char proto, unsigned short sport, unsigned short dport)
{
return ((np_chash_size - 1) & (proto + sport + dport));
}
#if defined HAVE_KTYPE_STRUCT_NET_PROTOCOL
#define mynet_protocol net_protocol
#endif /* defined HAVE_KTYPE_STRUCT_NET_PROTOCOL */
#if defined HAVE_KTYPE_STRUCT_INET_PROTOCOL
#define mynet_protocol inet_protocol
#endif /* defined HAVE_KTYPE_STRUCT_INET_PROTOCOL */
struct ipnet_protocol {
struct mynet_protocol proto;
struct mynet_protocol *next;
struct module *kmod;
};
struct np_prot_bucket {
unsigned char proto; /* protocol number */
int refs; /* reference count */
int corefs; /* N_CONS references */
int clrefs; /* N_CLNS references */
struct ipnet_protocol prot; /* Linux registration structure */
};
#if defined DEFINE_RWLOCK
STATIC DEFINE_RWLOCK(np_prot_lock);
#elif defined __RW_LOCK_UNLOCKED
STATIC rwlock_t np_prot_lock = __RW_LOCK_UNLOCKED(np_prot_lock);
#elif defined RW_LOCK_UNLOCKED
STATIC rwlock_t np_prot_lock = RW_LOCK_UNLOCKED;
#else
#error cannot initialize read-write locks
#endif
STATIC struct np_prot_bucket *np_prots[256];
STATIC kmem_cachep_t np_prot_cachep;
STATIC kmem_cachep_t np_priv_cachep;
static INLINE struct np *
np_get(struct np *np)
{
dassert(np != NULL);
atomic_inc(&np->refcnt);
return (np);
}
static INLINE __hot void
np_put(struct np *np)
{
dassert(np != NULL);
if (atomic_dec_and_test(&np->refcnt)) {
kmem_cache_free(np_priv_cachep, np);
}
}
static INLINE fastcall __hot void
np_release(struct np **npp)
{
struct np *np;
dassert(npp != NULL);
if (likely((np = XCHG(npp, NULL)) != NULL))
np_put(np);
}
static INLINE struct np *
np_alloc(void)
{
struct np *np;
if ((np = kmem_cache_alloc(np_priv_cachep, GFP_ATOMIC))) {
bzero(np, sizeof(*np));
atomic_set(&np->refcnt, 1);
spin_lock_init(&np->lock); /* "np-lock" */
np->priv_put = &np_put;
np->priv_get = &np_get;
// np->type = 0;
// np->id = 0;
// np->state = 0;
// np->flags = 0;
}
return (np);
}
/*
* Locking
*/
/* Must always be bottom-half versions to avoid lock badness. But give these
* different names to avoid conflict with generic definitions. */
//#if defined CONFIG_STREAMS_NOIRQ || defined _TEST
#if 1
#define spin_lock_str2(__lkp, __flags) \
do { (void)__flags; spin_lock_bh(__lkp); } while (0)
#define spin_unlock_str2(__lkp, __flags) \
do { (void)__flags; spin_unlock_bh(__lkp); } while (0)
#define write_lock_str2(__lkp, __flags) \
do { (void)__flags; write_lock_bh(__lkp); } while (0)
#define write_unlock_str2(__lkp, __flags) \
do { (void)__flags; write_unlock_bh(__lkp); } while (0)
#define read_lock_str2(__lkp, __flags) \
do { (void)__flags; read_lock_bh(__lkp); } while (0)
#define read_unlock_str2(__lkp, __flags) \
do { (void)__flags; read_unlock_bh(__lkp); } while (0)
#define local_save_str2(__flags) \
do { (void)__flags; local_bh_disable(); } while (0)
#define local_restore_str2(__flags) \
do { (void)__flags; local_bh_enable(); } while (0)
#else
#define spin_lock_str2(__lkp, __flags) \
spin_lock_irqsave(__lkp, __flags)
#define spin_unlock_str2(__lkp, __flags) \
spin_unlock_irqrestore(__lkp, __flags)
#define write_lock_str2(__lkp, __flags) \
write_lock_irqsave(__lkp, __flags)
#define write_unlock_str2(__lkp, __flags) \
write_unlock_irqrestore(__lkp, __flags)
#define read_lock_str2(__lkp, __flags) \
read_lock_irqsave(__lkp, __flags)
#define read_unlock_str2(__lkp, __flags) \
read_unlock_irqrestore(__lkp, __flags)
#define local_save_str2(__flags) \
local_irq_save(__flags)
#define local_restore_str2(__flags) \
local_irq_restore(__flags)
#endif
/*
* Buffer allocation
*/
STATIC streamscall __unlikely void
np_bufsrv(long data)
{
str_t *s;
queue_t *q;
q = (queue_t *) data;
ensure(q, return);
s = STR_PRIV(q);
ensure(s, return);
if (q == s->iq) {
if (xchg(&s->ibid, 0) != 0)
atomic_dec(&s->refcnt);
qenable(q);
return;
}
if (q == s->oq) {
if (xchg(&s->obid, 0) != 0)
atomic_dec(&s->refcnt);
qenable(q);
return;
}
return;
}
noinline fastcall __unlikely void
np_unbufcall(str_t * s)
{
bufcall_id_t bid;
if ((bid = xchg(&s->ibid, 0))) {
unbufcall(bid);
atomic_dec(&s->refcnt);
}
if ((bid = xchg(&s->obid, 0))) {
unbufcall(bid);
atomic_dec(&s->refcnt);
}
}
noinline fastcall __unlikely void
np_bufcall(queue_t *q, size_t size, int prior)
{
if (q) {
str_t *s = STR_PRIV(q);
bufcall_id_t bid, *bidp = NULL;
if (q == s->iq)
bidp = &s->ibid;
if (q == s->oq)
bidp = &s->obid;
if (bidp) {
atomic_inc(&s->refcnt);
if ((bid = xchg(bidp, bufcall(size, prior, &np_bufsrv, (long) q)))) {
unbufcall(bid); /* Unsafe on LiS without atomic exchange above. */
atomic_dec(&s->refcnt);
}
return;
}
}
swerr();
return;
}
STATIC INLINE fastcall __unlikely mblk_t *
np_allocb(queue_t *q, size_t size, int prior)
{
mblk_t *mp;
if (likely((mp = allocb(size, prior)) != NULL))
return (mp);
rare();
np_bufcall(q, size, prior);
return (mp);
}
STATIC INLINE fastcall __unlikely mblk_t *
np_dupmsg(queue_t *q, mblk_t *bp)
{
mblk_t *mp;
if (likely((mp = dupmsg(bp)) != NULL))
return (mp);
rare();
np_bufcall(q, msgsize(bp), BPRI_MED);
return (mp);
}
/*
* =========================================================================
*
* State Changes
*
* =========================================================================
*/
/* State flags */
#ifndef NSF_UNBND
#define NSF_UNBND (1 << NS_UNBND )
#define NSF_WACK_BREQ (1 << NS_WACK_BREQ )
#define NSF_WACK_UREQ (1 << NS_WACK_UREQ )
#define NSF_IDLE (1 << NS_IDLE )
#ifdef NS_WACK_OPTREQ
#define NSF_WACK_OPTREQ (1 << NS_WACK_OPTREQ )
#endif
#define NSF_WACK_RRES (1 << NS_WACK_RRES )
#define NSF_WCON_CREQ (1 << NS_WCON_CREQ )
#define NSF_WRES_CIND (1 << NS_WRES_CIND )
#define NSF_WACK_CRES (1 << NS_WACK_CRES )
#define NSF_DATA_XFER (1 << NS_DATA_XFER )
#define NSF_WCON_RREQ (1 << NS_WCON_RREQ )
#define NSF_WRES_RIND (1 << NS_WRES_RIND )
#define NSF_WACK_DREQ6 (1 << NS_WACK_DREQ6 )
#define NSF_WACK_DREQ7 (1 << NS_WACK_DREQ7 )
#define NSF_WACK_DREQ9 (1 << NS_WACK_DREQ9 )
#define NSF_WACK_DREQ10 (1 << NS_WACK_DREQ10 )
#define NSF_WACK_DREQ11 (1 << NS_WACK_DREQ11 )
#define NSF_NOSTATES (1 << NS_NOSTATES )
#endif
/* State masks */
#define NSM_ALLSTATES (NSF_NOSTATES - 1)
#define NSM_WACK_DREQ (NSF_WACK_DREQ6 \
|NSF_WACK_DREQ7 \
|NSF_WACK_DREQ9 \
|NSF_WACK_DREQ10 \
|NSF_WACK_DREQ11)
#define NSM_LISTEN (NSF_IDLE \
|NSF_WRES_CIND)
#define NSM_CONNECTED (NSF_WCON_CREQ\
|NSF_WRES_CIND\
|NSF_DATA_XFER\
|NSF_WCON_RREQ\
|NSF_WRES_RIND)
#define NSM_DISCONN (NSF_IDLE\
|NSF_UNBND)
#define NSM_INDATA (NSF_DATA_XFER\
|NSF_WCON_RREQ)
#define NSM_OUTDATA (NSF_DATA_XFER\
|NSF_WRES_RIND)
#ifndef N_PROVIDER
#define N_PROVIDER 0
#define N_USER 1
#endif
#ifdef _DEBUG
STATIC const char *
np_state_name(np_ulong state)
{
switch (state) {
case NS_UNBND:
return ("NS_UNBND");
case NS_WACK_BREQ:
return ("NS_WACK_BREQ");
case NS_WACK_UREQ:
return ("NS_WACK_UREQ");
case NS_IDLE:
return ("NS_IDLE");
case NS_WACK_OPTREQ:
return ("NS_WACK_OPTREQ");
case NS_WACK_RRES:
return ("NS_WACK_RRES");
case NS_WCON_CREQ:
return ("NS_WCON_CREQ");
case NS_WRES_CIND:
return ("NS_WRES_CIND");
case NS_WACK_CRES:
return ("NS_WACK_CRES");
case NS_DATA_XFER:
return ("NS_DATA_XFER");
case NS_WCON_RREQ:
return ("NS_WCON_RREQ");
case NS_WRES_RIND:
return ("NS_WRES_RIND");
case NS_WACK_DREQ6:
return ("NS_WACK_DREQ6");
case NS_WACK_DREQ7:
return ("NS_WACK_DREQ7");
case NS_WACK_DREQ9:
return ("NS_WACK_DREQ9");
case NS_WACK_DREQ10:
return ("NS_WACK_DREQ10");
case NS_WACK_DREQ11:
return ("NS_WACK_DREQ11");
case NS_NOSTATES:
return ("NS_NOSTATES");
default:
return ("(unknown)");
}
}
#endif /* _DEBUG */
/* State functions */
STATIC INLINE fastcall __unlikely void
np_set_state(struct np *np, const np_ulong state)
{
_printd(("%s: %p: %s <- %s\n", DRV_NAME, np, np_state_name(state),
np_state_name(np->info.CURRENT_state)));
np->info.CURRENT_state = state;
}
STATIC INLINE fastcall __unlikely np_ulong
np_get_state(const struct np *np)
{
return (np->info.CURRENT_state);
}
STATIC INLINE fastcall __unlikely np_ulong
np_chk_state(const struct np *np, const np_ulong mask)
{
return (((1 << np->info.CURRENT_state) & (mask)) != 0);
}
STATIC INLINE fastcall __unlikely np_ulong
np_not_state(const struct np *np, const np_ulong mask)
{
return (((1 << np->info.CURRENT_state) & (mask)) == 0);
}
STATIC INLINE fastcall __unlikely long
np_get_statef(const struct np *np)
{
return (1 << np_get_state(np));
}
/*
* =========================================================================
*
* IP Local Management
*
* =========================================================================
*/
STATIC int np_v4_rcv(struct sk_buff *skb);
STATIC void np_v4_err(struct sk_buff *skb, u32 info);
/*
* IP subsystem management
*/
#ifdef LINUX
/**
* np_v4_rcv_next - pass a socket buffer to the next handler
* @skb: socket buffer to pass
*
* In the Linux packet handler, if the packet is not for us, pass it to the next handler. If there
* is no next handler, free the packet and return. Note that we do not have to lock the hash
* because we own it and are also holding a reference to any module owning the next handler. This
* function returns zero (0) if the packet has not or will not be seen by another packet handler,
* and one (1) if the packet has or will be seen by another packet handler. This return value is
* used to determine whether to generate ICMP errors or not.
*/
STATIC INLINE fastcall __hot_in int
np_v4_rcv_next(struct sk_buff *skb)
{
struct np_prot_bucket *pb;
struct mynet_protocol *pp;
struct iphdr *iph;
unsigned char proto;
iph = (typeof(iph)) skb_network_header(skb);
proto = iph->protocol;
if ((pb = np_prots[proto]) && (pp = pb->prot.next)) {
pp->handler(skb);
return (1);
}
kfree_skb(skb);
return (0);
}
/**
* np_v4_err_next - pass a socket buffer to the next error handler
* @skb: socket buffer to pass
*
* In the Linux packet error handler, if the packet is not for us, pass it to the next error
* handler. If there is no next error handler, simply return.
*/
STATIC INLINE fastcall __hot_in void
np_v4_err_next(struct sk_buff *skb, __u32 info)
{
struct np_prot_bucket *pb;
struct mynet_protocol *pp;
unsigned char proto;
proto = ((struct iphdr *) skb->data)->protocol;
if ((pb = np_prots[proto]) && (pp = pb->prot.next))
pp->err_handler(skb, info);
return;
}
/**
* np_init_nproto - initialize network protocol override
* @proto: the protocol to register or override
*
* This is the network protocol override function.
*
* This is complicated because we hack the inet protocol tables. If no other protocol was
* previously registered, this reduces to inet_add_protocol(). If there is a protocol previously
* registered, we take a reference on the kernel module owning the entry, if possible, and replace
* the entry with our own, saving a pointer to the previous entry for passing sk_bufs along that we
* are not interested in. Taking a module reference is particularly for things like SCTP, where
* unloading the module after protocol override would break things horribly. Taking the reference
* keeps the module from unloading (this works for OpenSS7 SCTP as well as lksctp).
*/
STATIC INLINE fastcall __unlikely struct np_prot_bucket *
np_init_nproto(unsigned char proto, unsigned int type)
{
struct np_prot_bucket *pb;
struct ipnet_protocol *pp;
struct mynet_protocol **ppp;
int hash = proto & (MAX_INET_PROTOS - 1);
write_lock_bh(&np_prot_lock);
if ((pb = np_prots[proto]) != NULL) {
pb->refs++;
switch (type) {
case N_CONS:
++pb->corefs;
break;
case N_CLNS:
++pb->clrefs;
break;
default:
swerr();
break;
}
} else if ((pb = kmem_cache_alloc(np_prot_cachep, GFP_ATOMIC))) {
bzero(pb, sizeof(*pb));
pb->refs = 1;
switch (type) {
case N_CONS:
pb->corefs = 1;
break;
case N_CLNS:
pb->clrefs = 1;
break;
default:
swerr();
break;
}
pp = &pb->prot;
#ifdef HAVE_KMEMB_STRUCT_INET_PROTOCOL_PROTOCOL
pp->proto.protocol = proto;
pp->proto.name = "streams-np_ip";
#endif
#if defined HAVE_KTYPE_STRUCT_NET_PROTOCOL_PROTO
pp->proto.proto = proto;
#endif /* defined HAVE_KTYPE_STRUCT_NET_PROTOCOL_PROTO */
#if defined HAVE_KMEMB_STRUCT_NET_PROTOCOL_NO_POLICY || defined HAVE_KMEMB_STRUCT_INET_PROTOCOL_NO_POLICY
pp->proto.no_policy = 1;
#endif
pp->proto.handler = &np_v4_rcv;
pp->proto.err_handler = &np_v4_err;
ppp = &inet_protosp[hash];
{
net_protocol_lock();
#ifdef HAVE_OLD_STYLE_INET_PROTOCOL
while (*ppp && (*ppp)->protocol != proto)
ppp = &(*ppp)->next;
#endif /* HAVE_OLD_STYLE_INET_PROTOCOL */
if (*ppp != NULL) {
#ifdef HAVE_KMEMB_STRUCT_INET_PROTOCOL_COPY
/* can only override last entry */
if ((*ppp)->copy != 0) {
__ptrace(("Cannot override copy entry\n"));
net_protocol_unlock();
write_unlock_bh(&np_prot_lock);
kmem_cache_free(np_prot_cachep, pb);
return (NULL);
}
#endif /* HAVE_KMEMB_STRUCT_INET_PROTOCOL_COPY */
if ((pp->kmod = streams_module_address((ulong) *ppp))
&& pp->kmod != THIS_MODULE) {
if (!try_module_get(pp->kmod)) {
__ptrace(("Cannot acquire module\n"));
net_protocol_unlock();
write_unlock_bh(&np_prot_lock);
kmem_cache_free(np_prot_cachep, pb);
return (NULL);
}
}
#if defined HAVE_KMEMB_STRUCT_NET_PROTOCOL_NEXT || defined HAVE_KMEMB_STRUCT_INET_PROTOCOL_NEXT
pp->proto.next = (*ppp)->next;
#endif
}
pp->next = xchg(ppp, &pp->proto);
net_protocol_unlock();
}
/* link into hash slot */
np_prots[proto] = pb;
}
write_unlock_bh(&np_prot_lock);
return (pb);
}
/**
* np_term_nproto - terminate network protocol override
* @proto: network protocol to terminate
*
* This is the network protocol restoration function.
*
* This is complicated and brittle. The module stuff here is just for ourselves (other kernel
* modules pulling the same trick) as Linux IP protocols are normally kernel resident. If a
* protocol was previously registered, restore the protocol's entry and drop the reference to its
* owning kernel module. If there was no protocol previously registered, this reduces to
* inet_del_protocol().
*/
STATIC INLINE fastcall __unlikely void
np_term_nproto(unsigned char proto, unsigned int type)
{
struct np_prot_bucket *pb;
write_lock_bh(&np_prot_lock);
if ((pb = np_prots[proto]) != NULL) {
switch (type) {
case N_CONS:
assure(pb->corefs > 0);
--pb->corefs;
break;
case N_CLNS:
assure(pb->clrefs > 0);
--pb->clrefs;
break;
default:
swerr();
break;
}
if (--pb->refs == 0) {
struct ipnet_protocol *pp = &pb->prot;
struct mynet_protocol **ppp;
int hash = proto & (MAX_INET_PROTOS - 1);
ppp = &inet_protosp[hash];
{
net_protocol_lock();
#ifdef HAVE_OLD_STYLE_INET_PROTOCOL
while (*ppp && *ppp != &pp->proto)
ppp = &(*ppp)->next;
if (pp->next)
pp->next->next = pp->proto.next;
#endif /* HAVE_OLD_STYLE_INET_PROTOCOL */
__assert(*ppp == &pp->proto);
*ppp = pp->next;
net_protocol_unlock();
}
if (pp->next != NULL && pp->kmod != NULL && pp->kmod != THIS_MODULE)
module_put(pp->kmod);
/* unlink from hash slot */
np_prots[proto] = NULL;
kmem_cache_free(np_prot_cachep, pb);
}
}
write_unlock_bh(&np_prot_lock);
}
#endif /* LINUX */
/**
* np_bind_prot - bind a protocol
* @proto: protocol number to bind
*
* NOTICES: Notes about registration. Older 2.4 kernels will allow you to register whatever inet
* protocols you want on top of any existing protocol. This is good. 2.6 kernels, on the other
* hand, do not allow registration of inet protocols over existing inet protocols. We rip symbols
* on 2.6 and put special code in the handler to give us effectively the old 2.4 approach.
* This is also detectable by the fact that inet_add_protocol() returns void on 2.4 and int on 2.6.
*
* Issues with the 2.4 approach to registration is that the ip_input function passes a cloned skb
* to each protocol registered. We don't want to do that. If the message is for us, we want to
* process it without passing it to others.
*
* Issues with the 2.6 approach to registration is that the ip_input function passes the skb to
* only one function. We don't want that either. If the message is not for us, we want to pass it
* to the next protocol module.
*/
STATIC INLINE fastcall __unlikely int
np_bind_prot(unsigned char proto, unsigned int type)
{
struct np_prot_bucket *pb;
if ((pb = np_init_nproto(proto, type)))
return (0);
return (-ENOMEM);
}
/**
* np_unbind_prot - unbind a protocol
* @proto: protocol number to unbind
*/
STATIC INLINE fastcall __unlikely void
np_unbind_prot(unsigned char proto, unsigned int type)
{
np_term_nproto(proto, type);
}
/**
* np_bind - bind a Stream to an NSAP
* @np: private structure
* @PROTOID_buffer: protocol ids to bind
* @PROTOID_length; length of protocol ids
* @ADDR_buffer: addresses to bind
* @ADDR_length: length of addresses
* @CONIND_number: maximum number of connection indications
* @BIND_flags: bind flags
*
* Bind to protocol ids and port and addresses. We currently only bind the first protocol id
* regardless of the number specified. Binding supports wildcard addresses, both in port number and
* IP address. The bind hash contains bind buckets that list Streams that are bound to the same
* protocol id and port number.
*/
STATIC INLINE fastcall __unlikely int
np_bind(struct np *np, unsigned char *PROTOID_buffer, size_t PROTOID_length, struct sockaddr_in *ADDR_buffer,
const socklen_t ADDR_length, const np_ulong CONIND_number, const np_ulong BIND_flags)
{
struct np_bhash_bucket *hp;
unsigned short bport = ADDR_buffer[0].sin_port;
unsigned char proto = PROTOID_buffer[0];
size_t anum = ADDR_length / sizeof(*ADDR_buffer);
struct np *np2;
int i, j, err;
unsigned long flags;
PROTOID_length = 1;
hp = &np_bhash[np_bhashfn(proto, bport)];
write_lock_str2(&hp->lock, flags);
for (np2 = hp->list; np2; np2 = np2->bnext) {
if (proto != np2->protoids[0])
continue;
if (bport != np2->bport)
continue;
/* Allowed to bind to each NSAP once as DEFAULT_DEST, once as DEFAULT_LISTENER and once as
neither. */
if ((BIND_flags & (DEFAULT_DEST | DEFAULT_LISTENER)) !=
(np2->BIND_flags & (DEFAULT_DEST | DEFAULT_LISTENER)))
continue;
for (i = 0; i < np2->bnum; i++) {
for (j = 0; j < anum; j++)
if (np2->baddrs[i].addr == ADDR_buffer[j].sin_addr.s_addr)
break;
if (j < anum)
break;
}
if (i < np2->bnum)
break;
}
if (np2 != NULL) {
{
write_unlock_str2(&hp->lock, flags);
/* There is a question as to which error should be returned when a protocol address
is already bound.
NPI 2.0.0 says that "[i]f the NS provider cannot bind the specified address, it
may assign another network address to the user. It is the network user's
responsibility to check the network address returned in the N_BIND_ACK primitive
to see if it is the same as the one requested."
NPI 2.0.0 says "[o]nly one default listener Stream is allowed per occurrence of
NPI. An attempt to bind a default listener Stream when one is already bound
should result in an error (of type NBOUND)" and "[o]nly one default destination
stream per NSAP is alloed per occurence of NPI. An attempt to bind a default
destination stream to an NSAP when one is already bound should result in an error
of type NBOUND." But aslo, "NBOUND: The NS user attempted to bind a second Stream
to a network address with the CONIND_number set to a non-zero value, or attempted
to bind a second Stream with the DEFAULT_LISTENER flag value set to non-zero."
However, we return NBOUND for a connectionless bind when an attempt is made to
bind a second address to the same NSAP where either both have the DEFAULT_DEST
flag set or both have the DEFAULT_DEST flag clear. */
return (NBOUND);
}
}
if ((err = np_bind_prot(proto, np->info.SERV_type))) {
write_unlock_str2(&hp->lock, flags);
return (err);
}
if ((np_long) np->qos.protocol == QOS_UNKNOWN)
np->qos.protocol = proto;
if ((np->bnext = hp->list))
np->bnext->bprev = &np->bnext;
np->bprev = &hp->list;
hp->list = np_get(np);
np->bhash = hp;
/* copy into private structure */
np->CONIND_number = CONIND_number;
np->BIND_flags = BIND_flags;
np->pnum = PROTOID_length;
for (i = 0; i < PROTOID_length; i++)
np->protoids[i] = PROTOID_buffer[i];
np->bnum = anum;
np->bport = bport;
ptrace(("%s: %s: bound to proto = %d, bport = %d\n", DRV_NAME, __FUNCTION__, (int) proto,
(int) ntohs(bport)));
for (i = 0; i < anum; i++)
np->baddrs[i].addr = ADDR_buffer[i].sin_addr.s_addr;
write_unlock_str2(&hp->lock, flags);
#if defined HAVE_KFUNC_SYNCHRONIZE_NET
#if defined HAVE_KFUNC_IN_ATOMIC || defined in_atomic
if (!in_interrupt() && !in_atomic())
#else
if (!in_interrupt())
#endif
synchronize_net(); /* might sleep */
#endif /* defined HAVE_KFUNC_SYNCHRONIZE_NET */
return (0);
}
#if defined HAVE_KFUNC_DST_OUTPUT
STATIC INLINE __hot_out int
#if defined HAVE_KFUNC_NF_HOOK_OKFN_2_ARG
np_ip_queue_xmit(struct sock *sk, struct sk_buff *skb)
#elif defined HAVE_KFUNC_NF_HOOK_OKFN_3_ARG
np_ip_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
#else
np_ip_queue_xmit(struct sk_buff *skb)
#endif
{
struct dst_entry *dst = skb_dst(skb);
struct iphdr *iph = (typeof(iph)) skb_network_header(skb);
#if defined NETIF_F_TSO
#if defined HAVE_KFUNC___IP_SELECT_IDENT_2_ARGS_SEGS || \
defined HAVE_KFUNC___IP_SELECT_IDENT_3_ARGS_SEGS
__ip_select_ident(iph, dst, 0);
#elif defined HAVE_KFUNC_IP_SELECT_IDENT_MORE_SK_BUFF
ip_select_ident_more(skb, dst, NULL, 0);
#else /* !defined HAVE_KFUNC_IP_SELECT_IDENT_MORE_SK_BUFF */
ip_select_ident_more(iph, dst, NULL, 0);
#endif /* defined HAVE_KFUNC_IP_SELECT_IDENT_MORE_SK_BUFF */
#else /* !defined NETIF_F_TSO */
ip_select_ident(iph, dst, NULL);
#endif /* defined NETIF_F_TSO */
ip_send_check(iph);
#ifndef NF_IP_LOCAL_OUT
#define NF_IP_LOCAL_OUT NF_INET_LOCAL_OUT
#endif
#if defined HAVE_KFUNC_IP_DST_OUTPUT
return NF_HOOK_(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, dst->dev, ip_dst_output);
#else
return NF_HOOK_(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, dst->dev, dst_output_);
#endif
}
#else /* !defined HAVE_KFUNC_DST_OUTPUT */
STATIC INLINE __hot_out int
np_ip_queue_xmit(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct iphdr *iph = skb->nh.iph;
if (skb->len > dst_pmtu(dst)) {
rare();
return ip_fragment(skb, dst->output);
} else {
iph->frag_off |= __constant_htons(IP_DF);
ip_send_check(iph);
return dst->output(skb);
}
}
#endif /* defined HAVE_KFUNC_DST_OUTPUT */
/**
* np_alloc_skb_slow - allocate a socket buffer from a message block
* @np: private pointer
* @mp: the message block
* @headroom: header room for resulting sk_buff
* @gfp: general fault protection
*
* This is the old slow way of allocating a socket buffer. We simple allocate a socket buffer with
* sufficient head room and copy the data from the message block(s) to the socket buffer. This is
* slow. This is the only way that LiS can do things (because it has unworkable message block
* allocation).
*/
noinline fastcall __unlikely struct sk_buff *
np_alloc_skb_slow(struct np *np, mblk_t *mp, unsigned int headroom, int gfp)
{
struct sk_buff *skb;
unsigned int dlen = msgsize(mp);
if (likely((skb = alloc_skb(headroom + dlen, GFP_ATOMIC)) != NULL)) {
skb_reserve(skb, headroom);
{
unsigned char *data;
mblk_t *b;
int blen;
data = skb_put(skb, dlen);
for (b = mp; b; b = b->b_cont) {
if ((blen = b->b_wptr - b->b_rptr) > 0) {
bcopy(b->b_rptr, data, blen);
data += blen;
__assert(data <= skb_tail_pointer(skb));
} else
rare();
}
}
freemsg(mp); /* must absorb */
}
return (skb);
}
#ifdef HAVE_SKBUFF_HEAD_CACHE_USABLE
extern kmem_cachep_t skbuff_head_cache;
/**
* np_alloc_skb_old - allocate a socket buffer from a message block
* @np: private pointer
* @mp: the message block
* @headroom: header room for resulting sk_buff
* @gfp: general fault protection
*
* Description: this function is used for zero-copy allocation of a socket buffer from a message
* block. The socket buffer contains all of the data in the message block including any head or
* tail room (db_base to db_lim). The data portion of the socket buffer contains the data
* referenced by the message block (b_rptr to b_wptr). Because there is no socket buffer destructor
* capable of freeing the message block, we steal the kmem_alloc'ed buffer from the message and
* attach it tot he socket buffer header. The reference to the message block is consumed unless the
* function returns NULL.
*
* A problem exists in converting mblks to sk_buffs (although visa versa is easy): sk_buffs put a
* hidden shared buffer structure at the end of the buffer (where it is easily overwritten on buffer
* overflows). There is not necessarily enough room at the end of the mblk to add this structure.
* There are several things that I added to the Stream head to help with this:
*
* 1. A SO_WRPAD option to M_SETOPTS that will specify how much room to leave after the last SMP
* cache line in the buffer.
*
* 2. Three flags, SO_NOCSUM, SO_CSUM, SO_CRC32C were added to the Stream head so that the stream
* can support partial checksum while copying from the user.
*
* 3. db_lim is now always set to the end of the actual allocation rather than the end of the
* requested allocation. Linux kmalloc() allocates from 2^n size memory caches that are
* always SMP cache line aligned.
*
* With these options in play, the size of the buffer should have sufficient room for the shared
* buffer structure. If, however, the data block was not delivered by the Stream head (but an
* intermediate module) or has been modified (by an intermediate module) the tail room might not be
* available. Instead of copying the entire buffer which would be quite memory intensive, in this
* case we allocate a new buffer and copy only the portion of the original buffer necessary to make
* room for the shared buffer structure.
*
* The same is true for the IP header portion. Using SO_WROFF it is possible to reserve sufficient
* room for the hardware header, IP header and UDP header. Message blocks should normally already
* contain this headroom. However, again, it might be possible that the message block originated at
* an intermediate module or was modified by an intermediate module unaware of this policy. If
* there is insufficient headroom, again we allocate a new message block large enough to contain the
* header and make two sk_buffs, one for the header and one for the payload.
*
* As a result, we might wind up with three socket buffers: one containing the headroom for the hard
* header, IP header and UDP header; one containing most of the data payload; and one containing the
* last fragment of the payload smaller than or equal to sizeof(struct skb_shared_info). All but
* the initial socket buffer are placed in the frag_list of the first socket buffer. Note that only
* the header need be completed. If checksum has not yet been performed, it is necessary to walk
* through the data to generate the checksum.
*/
noinline fastcall __unlikely struct sk_buff *
np_alloc_skb_old(struct np *np, mblk_t *mp, unsigned int headroom, int gfp)
{
struct sk_buff *skb;
unsigned char *beg, *end;
/* must not be a fastbuf */
if (unlikely(mp->b_datap->db_size <= FASTBUF))
goto go_slow;
/* must not be esballoc'ed */
if (unlikely(mp->b_datap->db_frtnp != NULL))
goto go_slow;
/* must be only reference (for now) */
if (unlikely(mp->b_datap->db_ref > 1))
goto go_slow;
beg = mp->b_rptr - headroom;
/* First, check if there is enough head room in the data block. */
if (unlikely(beg < mp->b_datap->db_base)) {
goto go_frag;
}
/* Next, check if there is enough tail room in the data block. */
end =
(unsigned char *) (((unsigned long) mp->b_wptr + (SMP_CACHE_BYTES - 1)) & ~(SMP_CACHE_BYTES - 1));
if (unlikely(end + sizeof(struct skb_shared_info) > mp->b_datap->db_lim)) {
goto go_frag;
}
/* Last, allocate a socket buffer header and point it to the payload data. */
skb = kmem_cache_alloc(skbuff_head_cache, gfp);
if (unlikely(skb == NULL))
goto no_skb;
memset(skb, 0, offsetof(struct sk_buff, truesize));
skb->truesize = end - beg + sizeof(struct sk_buff);
atomic_set(&skb->users, 1);
skb->head = mp->b_datap->db_base;
skb->data = mp->b_rptr;
skb_set_tail_pointer(skb, mp->b_wptr - mp->b_rptr);
#if defined NET_SKBUFF_DATA_USES_OFFSET
skb->end = end - skb->head;
#else /* defined NET_SKBUFF_DATA_USES_OFFSET */
skb->end = end;
#endif /* defined NET_SKBUFF_DATA_USES_OFFSET */
skb->len = mp->b_wptr - mp->b_rptr;
skb->cloned = 0;
skb->data_len = 0;
/* initialize shared data structure */
memset(skb_shinfo(skb), 0, sizeof(struct skb_shared_info));
atomic_set(&(skb_shinfo(skb)->dataref), 1);
/* need to release message block and data block without releasing buffer */
/* point into internal buffer */
mp->b_datap->db_frtnp = (struct free_rtn *)
((struct mdbblock *) ((struct mbinfo *) mp->b_datap - 1))->databuf;
/* override with dummy free routine */
mp->b_datap->db_frtnp->free_func = NULL; /* tells freeb not to call */
mp->b_datap->db_frtnp->free_arg = NULL;
freemsg(mp);
no_skb:
return (skb);
return (NULL);
go_frag: /* for now */
go_slow:
return np_alloc_skb_slow(np, mp, headroom, gfp);
}
#endif
STATIC INLINE fastcall __hot_out struct sk_buff *
np_alloc_skb(struct np *np, mblk_t *mp, unsigned int headroom, int gfp)
{
struct sk_buff *skb;
if (unlikely((mp->b_datap->db_flag & (DB_SKBUFF)) == 0))
goto old_way;
if (unlikely((mp->b_rptr < mp->b_datap->db_base + headroom)))
goto go_slow;
if (unlikely((skb = (typeof(skb)) mp->b_datap->db_frtnp->free_arg) == NULL))
goto go_slow;
skb_get(skb);
skb_reserve(skb, mp->b_rptr - skb->data);
skb_put(skb, mp->b_wptr - mp->b_rptr);
freemsg(mp);
return (skb);
old_way:
#ifdef HAVE_SKBUFF_HEAD_CACHE_USABLE
return np_alloc_skb_old(np, mp, headroom, gfp);
#endif
go_slow:
return np_alloc_skb_slow(np, mp, headroom, gfp);
}
noinline fastcall int
np_route_output_slow(struct np *np, const uint32_t daddr, struct rtable **rtp)
{
int err;
if (XCHG(rtp, NULL) != NULL)
dst_release(XCHG(&np->daddrs[0].dst, NULL));
#if defined HAVE_KMEMB_STRUCT_RTABLE_RT_SRC
if (likely((err = ip_route_output(rtp, daddr, np->qos.saddr, 0, 0)) == 0)) {
dst_hold(rt_dst(*rtp));
np->daddrs[0].dst = rt_dst(*rtp);
np->daddrs[0].addr = daddr;
np->daddrs[0].saddr = (*rtp)->rt_src;
#if defined HAVE_KMEMB_STRUCT_RTABLE_RT_OIF
np->daddrs[0].oif = (*rtp)->rt_oif;
#endif /* defined HAVE_KMEMB_STRUCT_RTABLE_RT_OIF */
}
#else /* defined HAVE_KMEMB_STRUCT_RTABLE_RT_SRC */
{
struct flowi4 fl4;
struct rtable *rt;
#ifdef HAVE_KFUNC_FLOWI4_INIT_OUTPUT_12_ARGS
flowi4_init_output(&fl4, 0, 0, 0, RT_SCOPE_UNIVERSE, 0, 0, daddr, np->qos.saddr, 0, 0, (kuid_t){ 0 });
#else
flowi4_init_output(&fl4, 0, 0, 0, RT_SCOPE_UNIVERSE, 0, 0, daddr, np->qos.saddr, 0, 0);
#endif
rt = __ip_route_output_key(&init_net, &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
np->daddrs[0].dst = rt_dst(rt);
np->daddrs[0].addr = fl4.daddr;
np->daddrs[0].saddr = fl4.saddr;
np->daddrs[0].oif = fl4.flowi4_oif;
np->daddrs[0].tos = fl4.flowi4_tos;
if (rtp)
*rtp = rt;
err = 0;
}
#endif /* defined HAVE_KMEMB_STRUCT_RTABLE_RT_SRC */
return (err);
}
STATIC INLINE fastcall __hot_out int
np_route_output(struct np *np, const uint32_t daddr, struct rtable **rtp)
{
register struct rtable *rt;
if (likely((rt = *rtp) != NULL)) {
dst_hold(rt_dst(rt));
return (0);
}
return np_route_output_slow(np, daddr, rtp);
}
/**
* np_senddata - process a unit data request
* @np: Stream private structure
* @protocol: IP protocol number for packet
* @daddr: destination address
* @mp: message payload
*/
STATIC INLINE fastcall __hot_out int
np_senddata(struct np *np, uint8_t protocol, uint32_t daddr, mblk_t *mp)
{
struct rtable *rt;
int err;
rt = (struct rtable *) np->daddrs[0].dst;
prefetch(rt);
if (likely((err = np_route_output(np, daddr, &rt)) == 0)) {
struct sk_buff *skb;
struct net_device *dev = rt_dst(rt)->dev;
size_t hlen = ((dev->hard_header_len + 15) & ~15)
+ sizeof(struct iphdr) + 0;
size_t dlen = msgsize(mp);
size_t plen = dlen + 0;
size_t tlen = plen + sizeof(struct iphdr);
_ptrace(("%s: %s: sending data message block %p\n", DRV_NAME, __FUNCTION__, mp));
usual(hlen > sizeof(struct iphdr) + 0);
usual(dlen);
if (likely((skb = np_alloc_skb(np, mp, hlen, GFP_ATOMIC)) != NULL)) {
struct iphdr *iph;
uint32_t saddr;
#ifdef HAVE_KMEMB_STRUCT_RTABLE_RT_SRC
saddr = np->qos.saddr ? : rt->rt_src;
#else
saddr = np->qos.saddr ? : np->daddrs[0].saddr;
#endif
/* find headers */
__skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
skb_dst_set(skb, rt_dst(rt));
skb->priority = 0; // np->qos.priority;
iph = (typeof(iph)) skb_network_header(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = np->qos.tos ? : np->daddrs[0].tos;
iph->frag_off = htons(IP_DF); /* never frag */
// iph->frag_off = 0; /* need qos bit */
iph->ttl = np->qos.ttl ? : np->daddrs[0].ttl;
iph->daddr = daddr;
iph->saddr = saddr;
iph->protocol = protocol;
iph->tot_len = htons(tlen);
#ifndef HAVE_KFUNC_DST_OUTPUT
#ifdef HAVE_KFUNC___IP_SELECT_IDENT_2_ARGS
__ip_select_ident(iph, rt_dst(rt));
#elif defined HAVE_KFUNC___IP_SELECT_IDENT_3_ARGS
__ip_select_ident(iph, rt_dst(rt), 0);
#else
#error HAVE_KFUNC___IP_SELECT_IDENT_2_ARGS or HAVE_KFUNC___IP_SELECT_IDENT_3_ARGS must be defined.
#endif
#endif
_printd(("sending message %p\n", skb));
#ifdef HAVE_KFUNC_DST_OUTPUT
NF_HOOK_(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, dev, np_ip_queue_xmit);
#else
np_ip_queue_xmit(skb);
#endif
return (QR_ABSORBED);
}
_rare();
return (-ENOBUFS);
}
_rare();
return (err);
}
STATIC INLINE fastcall int
np_datack(queue_t *q)
{
/* not supported */
return (-EOPNOTSUPP);
}
/**
* np_conn_check - check and enter into connection hashes
* @np: private structure
* @proto: protocol to which to connect
*/
STATIC fastcall int
np_conn_check(struct np *np, unsigned char proto)
{
unsigned short sport = np->sport;
unsigned short dport = np->dport;
struct np *conflict = NULL;
struct np_chash_bucket *hp, *hp1, *hp2;
unsigned long flags;
hp1 = &np_chash[np_chashfn(proto, dport, sport)];
hp2 = &np_chash[np_chashfn(proto, 0, 0)];
write_lock_str2(&hp1->lock, flags);
if (hp1 != hp2)
write_lock(&hp2->lock);
hp = hp1;
do {
register struct np *np2;
for (np2 = hp->list; np2; np2 = np2->cnext) {
int i, j;
if (np_not_state(np, (NSF_DATA_XFER | NSF_WRES_RIND)))
continue;
if (np2->sport != sport)
continue;
if (np2->dport != dport)
continue;
for (i = 0; conflict == NULL && i < np2->snum; i++)
for (j = 0; conflict == NULL && j < np->snum; j++)
if (np2->saddrs[i].addr == np->saddrs[j].addr)
conflict = np2;
if (conflict == NULL)
continue;
conflict = NULL;
for (i = 0; conflict == NULL && i < np2->dnum; i++)
for (j = 0; conflict == NULL && j < np->dnum; j++)
if (np2->daddrs[i].addr == np->daddrs[j].addr)
conflict = np2;
if (conflict == NULL)
continue;
break;
}
} while (conflict == NULL && hp != hp2 && (hp = hp2));
if (conflict != NULL) {
if (hp1 != hp2)
write_unlock(&hp2->lock);
write_unlock_str2(&hp1->lock, flags);
/* how do we say already connected? (-EISCONN) */
return (NBADADDR);
}
/* link into connection hash */
if ((np->cnext = hp1->list))
np->cnext->cprev = &np->cnext;
np->cprev = &hp1->list;
hp1->list = np_get(np);
np->chash = hp1;
if (hp1 != hp2)
write_unlock(&hp2->lock);
write_unlock_str2(&hp1->lock, flags);
return (0);
}
/**
* np_connect - form a connection
* @np: private structure
* @DEST_buffer: pointer to destination addresses
* @DEST_length: length of destination addresses
* @QOS_buffer: pointer to connection quality-of-service parameters
* @CONN_flags: connection flags
*
* Destination addresses and port number as well as connection request quality of service parameters
* should already be stored into the private structure. Yes, this information will remain if there
* is an error in the connection request. When any primitive containing options fails and returns
* and error, it is the caller's responsibility to set again the values of the options.
*/
STATIC fastcall int
np_connect(struct np *np, const struct sockaddr_in *DEST_buffer, socklen_t DEST_length,
struct N_qos_sel_conn_ip *QOS_buffer, const np_ulong CONN_flags)
{
size_t dnum = DEST_length / sizeof(*DEST_buffer);
int err;
int i;
err = NBADQOSPARAM;
/* first validate parameters */
if (QOS_buffer->priority != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->priority < np->qor.priority.priority_min_value)
goto error;
if ((np_long) QOS_buffer->priority > np->qor.priority.priority_max_value)
goto error;
} else {
QOS_buffer->priority = np->qos.priority;
}
if (QOS_buffer->protocol != QOS_UNKNOWN) {
for (i = 0; i < np->pnum; i++)
if (np->protoids[i] == QOS_buffer->protocol)
break;
if (i >= np->pnum)
goto error;
} else {
QOS_buffer->protocol = np->qos.protocol;
}
if (QOS_buffer->ttl != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->ttl < np->qor.ttl.ttl_min_value)
goto error;
if ((np_long) QOS_buffer->ttl > np->qor.ttl.ttl_max_value)
goto error;
} else {
QOS_buffer->ttl = np->qos.ttl;
}
if (QOS_buffer->tos != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->tos < np->qor.tos.tos_min_value)
goto error;
if ((np_long) QOS_buffer->tos > np->qor.tos.tos_max_value)
goto error;
} else {
QOS_buffer->tos = np->qos.tos;
}
if (QOS_buffer->mtu != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->mtu < np->qor.mtu.mtu_min_value)
goto error;
if ((np_long) QOS_buffer->mtu > np->qor.mtu.mtu_max_value)
goto error;
} else {
QOS_buffer->mtu = np->qos.mtu;
}
/* Need to determine source addressess from bound addresses before we can test the source address. If
we are bound to specific addresses, then the source address list is simply the destination address
list. If bound to a wildcard address, then the source address list could be determined from the
scope of the destination addresses and the available interfaces and their addresses. However, for
the moment it is probably easier to simply allow wildcard source addresses and let the user
specify any address when there is a wildcard source address. */
np->sport = np->bport;
np->snum = np->bnum;
for (i = 0; i < np->bnum; i++)
np->saddrs[i].addr = np->baddrs[i].addr;
if (QOS_buffer->saddr != QOS_UNKNOWN) {
if (QOS_buffer->saddr != 0) {
for (i = 0; i < np->snum; i++) {
if (np->saddrs[i].addr == INADDR_ANY)
break;
if (np->saddrs[i].addr == QOS_buffer->saddr)
break;
}
if (i >= np->snum)
goto recover;
}
} else {
QOS_buffer->saddr = np->qos.saddr;
}
if (QOS_buffer->daddr != QOS_UNKNOWN) {
/* Specified default destination address must be in the destination address list. */
for (i = 0; i < dnum; i++)
if (DEST_buffer[i].sin_addr.s_addr == QOS_buffer->daddr)
break;
if (i >= dnum)
goto recover;
} else {
/* The default destination address is the first address in the list. */
QOS_buffer->daddr = DEST_buffer[0].sin_addr.s_addr;
}
/* Destination addresses have been checked as follows: they have been aligned. There is at least 1
address and no more than 8 addresses. The first address has an address family type of AF_INET or
zero (0). No IP address in the list is INADDR_ANY. Things that have not been checked are: there
might be duplicates in the list. The user might not have the necessary privilege to use some of
the addresses. Some addresses might be zeronet, broadcast or multicast addresses. The addresses
might be of disjoint scope. There might not exist a route to some addresses. The destination
port number might be zero. */
np->dport = DEST_buffer[0].sin_port;
err = NBADADDR;
if (np->dport == 0 && (np->bport != 0 || np->sport != 0))
goto recover;
if (np->dport != 0 && np->sport == 0)
/* TODO: really need to autobind the stream to a dynamically allocated source port number. */
goto recover;
for (i = 0; i < dnum; i++) {
struct rtable *rt = NULL;
if ((err = ip_route_output(&rt, DEST_buffer[i].sin_addr.s_addr, 0, 0, 0)))
goto recover;
np->daddrs[i].dst = rt_dst(rt);
/* Note that we do not have to use the destination reference cached above. It is enough that
we hold a reference to it so that it remains in the routing caches so lookups to this
destination are fast. They will be released upon disconnection. */
np->daddrs[i].addr = DEST_buffer[i].sin_addr.s_addr;
np->daddrs[i].ttl = QOS_buffer->ttl;
np->daddrs[i].tos = QOS_buffer->tos;
np->daddrs[i].mtu = dst_pmtu(np->daddrs[i].dst);
if (np->daddrs[i].mtu < QOS_buffer->mtu)
QOS_buffer->mtu = np->daddrs[i].mtu;
}
np->dnum = dnum;
/* store negotiated values */
np->qos.protocol = QOS_buffer->protocol;
np->qos.priority = QOS_buffer->priority;
np->qos.ttl = QOS_buffer->ttl;
np->qos.tos = QOS_buffer->tos;
np->qos.mtu = QOS_buffer->mtu;
np->qos.saddr = QOS_buffer->saddr;
np->qos.daddr = QOS_buffer->daddr;
/* note that on failure we are allowed to have partially negotiated some values */
/* note that all these state changes are not seen by the read side until we are placed into the
hashes under hash lock. */
/* try to place in connection hashes with conflict checks */
if ((err = np_conn_check(np, QOS_buffer->protocol)) != 0)
goto recover;
return (0);
recover:
/* clear out source addresses */
np->sport = 0;
for (i = 0; i < np->snum; i++) {
np->saddrs[i].addr = INADDR_ANY;
}
np->snum = 0;
/* clear out destination addresses */
np->dport = 0;
for (i = 0; i < np->dnum; i++) {
if (np->daddrs[i].dst)
dst_release(XCHG(&np->daddrs[i].dst, NULL));
np->daddrs[i].addr = INADDR_ANY;
np->daddrs[i].ttl = 0;
np->daddrs[i].tos = 0;
np->daddrs[i].mtu = 0;
}
np->dnum = 0;
error:
return (err);
}
/**
* np_reset_loc - perform a local reset
* @np: Stream private structure
* @RESET_orig: origin of reset
* @RESET_reason: reason for reset
* @dp: ICMP message payload
*
* When completing a local reset, it is necessary to send an ICMP message to the peer. The attached
* M_DATA message blocks contain the ICMP message payload. The @RESET_reason parameter contains the
* reset reason that translates to an ICMP error code. The destination for the reset on a
* multi-homed connection is the current default destination.
*/
STATIC int
np_reset_loc(struct np *np, np_ulong RESET_orig, np_ulong RESET_reason, mblk_t *dp)
{
fixme(("Write this function.\n"));
/* should send ICMP, but don't discard it because send function will not abosorb it. */
return (QR_DONE);
}
/**
* np_reset_rem - perform a remote reset
* @np: Stream private structure
* @RESET_orig: origin of reset
* @RESET_reason: reason for reset
*
* When completing a remote reset, it is necessary to dequeue and free the earliest outstanding
* reset indication.
*/
STATIC int
np_reset_rem(struct np *np, np_ulong RESET_orig, np_ulong RESET_reason)
{
mblk_t *rp;
/* free last one on list */
if ((rp = bufq_tail(&np->resq)) != NULL) {
bufq_unlink(&np->resq, rp);
freemsg(rp);
}
return (0);
}
STATIC int
np_optmgmt(struct np *np, union N_qos_ip_types *QOS_buffer, np_ulong OPTMGMT_flags)
{
int i;
switch (QOS_buffer->n_qos_type) {
case N_QOS_SEL_INFO_IP:
/* protocol must be one of the bound protocol ids */
if ((np_long) QOS_buffer->n_qos_sel_info.protocol != QOS_UNKNOWN && np->pnum > 0) {
if ((np_long) QOS_buffer->n_qos_sel_info.protocol < 0) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_info.protocol > 255) {
return (NBADQOSPARAM);
}
for (i = 0; i < np->pnum; i++)
if (np->protoids[i] == QOS_buffer->n_qos_sel_info.protocol)
break;
if (i >= np->pnum) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_info.priority != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_info.priority <
np->qor.priority.priority_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_info.priority >
np->qor.priority.priority_max_value) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_info.ttl != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_info.ttl < np->qor.ttl.ttl_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_info.ttl > np->qor.ttl.ttl_max_value) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_info.tos != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_info.tos < np->qor.tos.tos_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_info.tos > np->qor.tos.tos_max_value) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_info.mtu != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_info.mtu < np->qor.mtu.mtu_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_info.mtu > np->qor.mtu.mtu_max_value) {
return (NBADQOSPARAM);
}
}
/* source address should be one of the specified source addresses */
if ((np_long) QOS_buffer->n_qos_sel_info.saddr != QOS_UNKNOWN && np->snum > 0) {
if (QOS_buffer->n_qos_sel_info.saddr != 0) {
for (i = 0; i < np->snum; i++) {
if (np->saddrs[i].addr == INADDR_ANY)
break;
if (np->saddrs[i].addr == QOS_buffer->n_qos_sel_info.saddr)
break;
}
if (i >= np->snum) {
return (NBADQOSPARAM);
}
}
}
/* destination address must be one of the specified destination addresses */
if ((np_long) QOS_buffer->n_qos_sel_info.daddr != QOS_UNKNOWN && np->dnum > 0) {
for (i = 0; i < np->dnum; i++)
if (np->daddrs[i].addr == QOS_buffer->n_qos_sel_info.daddr)
break;
if (i >= np->dnum) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_info.protocol != QOS_UNKNOWN)
np->qos.protocol = QOS_buffer->n_qos_sel_info.protocol;
if ((np_long) QOS_buffer->n_qos_sel_info.priority != QOS_UNKNOWN)
np->qos.priority = QOS_buffer->n_qos_sel_info.priority;
if ((np_long) QOS_buffer->n_qos_sel_info.ttl != QOS_UNKNOWN)
np->qos.ttl = QOS_buffer->n_qos_sel_info.ttl;
if ((np_long) QOS_buffer->n_qos_sel_info.tos != QOS_UNKNOWN)
np->qos.tos = QOS_buffer->n_qos_sel_info.tos;
if ((np_long) QOS_buffer->n_qos_sel_info.mtu != QOS_UNKNOWN)
np->qos.mtu = QOS_buffer->n_qos_sel_info.mtu;
if ((np_long) QOS_buffer->n_qos_sel_info.saddr != QOS_UNKNOWN)
np->qos.saddr = QOS_buffer->n_qos_sel_info.saddr;
if ((np_long) QOS_buffer->n_qos_sel_info.daddr != QOS_UNKNOWN)
np->qos.daddr = QOS_buffer->n_qos_sel_info.daddr;
break;
case N_QOS_RANGE_INFO_IP:
return (NBADQOSTYPE);
case N_QOS_SEL_CONN_IP:
if (!(np->info.SERV_type & N_CONS))
return (NBADQOSTYPE);
/* protocol must be one of the bound protocol ids */
if ((np_long) QOS_buffer->n_qos_sel_conn.protocol != QOS_UNKNOWN && np->pnum > 0) {
if ((np_long) QOS_buffer->n_qos_sel_conn.protocol < 0) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_conn.protocol > 255) {
return (NBADQOSPARAM);
}
for (i = 0; i < np->pnum; i++)
if (np->protoids[i] == QOS_buffer->n_qos_sel_conn.protocol)
break;
if (i >= np->pnum) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_conn.priority != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_conn.priority <
np->qor.priority.priority_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_conn.priority >
np->qor.priority.priority_max_value) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_conn.ttl != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_conn.ttl < np->qor.ttl.ttl_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_conn.ttl > np->qor.ttl.ttl_max_value) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_conn.tos != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_conn.tos < np->qor.tos.tos_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_conn.tos > np->qor.tos.tos_max_value) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_conn.mtu != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_conn.mtu < np->qor.mtu.mtu_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_conn.mtu > np->qor.mtu.mtu_max_value) {
return (NBADQOSPARAM);
}
}
/* source address should be one of the specified source addresses */
if ((np_long) QOS_buffer->n_qos_sel_conn.saddr != QOS_UNKNOWN && np->snum > 0) {
if (QOS_buffer->n_qos_sel_conn.saddr != 0) {
for (i = 0; i < np->snum; i++) {
if (np->saddrs[i].addr == INADDR_ANY)
break;
if (np->saddrs[i].addr == QOS_buffer->n_qos_sel_conn.saddr)
break;
}
if (i >= np->snum) {
return (NBADQOSPARAM);
}
}
}
/* destination address must be one of the specified destination addresses */
if ((np_long) QOS_buffer->n_qos_sel_conn.daddr != QOS_UNKNOWN) {
for (i = 0; i < np->dnum; i++)
if (np->daddrs[i].addr == QOS_buffer->n_qos_sel_conn.daddr)
break;
if (i >= np->dnum) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_conn.protocol != QOS_UNKNOWN)
np->qos.protocol = QOS_buffer->n_qos_sel_conn.protocol;
if ((np_long) QOS_buffer->n_qos_sel_conn.priority != QOS_UNKNOWN)
np->qos.priority = QOS_buffer->n_qos_sel_conn.priority;
if ((np_long) QOS_buffer->n_qos_sel_conn.ttl != QOS_UNKNOWN)
np->qos.ttl = QOS_buffer->n_qos_sel_conn.ttl;
if ((np_long) QOS_buffer->n_qos_sel_conn.tos != QOS_UNKNOWN)
np->qos.tos = QOS_buffer->n_qos_sel_conn.tos;
if ((np_long) QOS_buffer->n_qos_sel_conn.mtu != QOS_UNKNOWN)
np->qos.mtu = QOS_buffer->n_qos_sel_conn.mtu;
if ((np_long) QOS_buffer->n_qos_sel_conn.saddr != QOS_UNKNOWN)
np->qos.saddr = QOS_buffer->n_qos_sel_conn.saddr;
if ((np_long) QOS_buffer->n_qos_sel_conn.daddr != QOS_UNKNOWN)
np->qos.daddr = QOS_buffer->n_qos_sel_conn.daddr;
np->info.SERV_type = N_CONS;
break;
case N_QOS_SEL_UD_IP:
if (!(np->info.SERV_type & N_CLNS))
return (NBADQOSTYPE);
/* protocol must be one of the bound protocol ids */
if ((np_long) QOS_buffer->n_qos_sel_ud.protocol != QOS_UNKNOWN && np->pnum > 0) {
if ((np_long) QOS_buffer->n_qos_sel_ud.protocol < 0) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_ud.protocol > 255) {
return (NBADQOSPARAM);
}
for (i = 0; i < np->pnum; i++)
if (np->protoids[i] == QOS_buffer->n_qos_sel_ud.protocol)
break;
if (i >= np->pnum) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_ud.priority != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_ud.priority < np->qor.priority.priority_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_ud.priority > np->qor.priority.priority_max_value) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_ud.ttl != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_ud.ttl < np->qor.ttl.ttl_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_ud.ttl > np->qor.ttl.ttl_max_value) {
return (NBADQOSPARAM);
}
}
if ((np_long) QOS_buffer->n_qos_sel_ud.tos != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->n_qos_sel_ud.tos < np->qor.tos.tos_min_value) {
return (NBADQOSPARAM);
}
if ((np_long) QOS_buffer->n_qos_sel_ud.tos > np->qor.tos.tos_max_value) {
return (NBADQOSPARAM);
}
}
/* source address should be one of the specified source addresses */
if ((np_long) QOS_buffer->n_qos_sel_ud.saddr != QOS_UNKNOWN && np->snum > 0) {
if (QOS_buffer->n_qos_sel_ud.saddr != 0) {
for (i = 0; i < np->snum; i++) {
if (np->saddrs[i].addr == INADDR_ANY)
break;
if (np->saddrs[i].addr == QOS_buffer->n_qos_sel_ud.saddr)
break;
}
if (i >= np->snum) {
return (NBADQOSPARAM);
}
}
}
if ((np_long) QOS_buffer->n_qos_sel_ud.protocol != QOS_UNKNOWN)
np->qos.protocol = QOS_buffer->n_qos_sel_ud.protocol;
if ((np_long) QOS_buffer->n_qos_sel_ud.priority != QOS_UNKNOWN)
np->qos.priority = QOS_buffer->n_qos_sel_ud.priority;
if ((np_long) QOS_buffer->n_qos_sel_ud.ttl != QOS_UNKNOWN)
np->qos.ttl = QOS_buffer->n_qos_sel_ud.ttl;
if ((np_long) QOS_buffer->n_qos_sel_ud.tos != QOS_UNKNOWN)
np->qos.tos = QOS_buffer->n_qos_sel_ud.tos;
if ((np_long) QOS_buffer->n_qos_sel_ud.saddr != QOS_UNKNOWN)
np->qos.saddr = QOS_buffer->n_qos_sel_ud.saddr;
np->info.SERV_type = N_CLNS;
break;
default:
return (NBADQOSTYPE);
}
return (0);
}
/**
* np_unbind - unbind a Stream from an NSAP
* @np: private structure
*
* Simply remove the Stream from the bind hashes and release a reference to the Stream. This
* function can be called whether the stream is bound or not (and is always called before the
* private structure is freed.
*/
STATIC int
np_unbind(struct np *np)
{
struct np_bhash_bucket *hp;
unsigned long flags;
if ((hp = np->bhash)) {
write_lock_str2(&hp->lock, flags);
if ((*np->bprev = np->bnext))
np->bnext->bprev = np->bprev;
np->bnext = NULL;
np->bprev = &np->bnext;
np->bhash = NULL;
np_unbind_prot(np->protoids[0], np->info.SERV_type);
np->bport = np->sport = 0;
np->bnum = np->snum = np->pnum = 0;
np_set_state(np, NS_UNBND);
np_put(np);
write_unlock_str2(&hp->lock, flags);
#if defined HAVE_KFUNC_SYNCHRONIZE_NET
#if defined HAVE_KFUNC_IN_ATOMIC || defined in_atomic
if (!in_interrupt() && !in_atomic())
#else
if (!in_interrupt())
#endif
synchronize_net(); /* might sleep */
#endif /* defined HAVE_KFUNC_SYNCHRONIZE_NET */
return (0);
}
return (-EALREADY);
}
/**
* np_passive - perform a passive connection
* @np: private structure
* @RES_buffer: responding addresses
* @RES_length: length of responding addresses
* @QOS_buffer: quality of service parameters
* @SEQ_number: connection indication being accepted
* @TOKEN_value: accepting Stream private structure
* @CONN_flags: connection flags
* @dp: user connect data
*/
noinline fastcall int
np_passive(struct np *np, struct sockaddr_in *RES_buffer, const socklen_t RES_length,
struct N_qos_sel_conn_ip *QOS_buffer, mblk_t *SEQ_number, struct np *TOKEN_value,
const np_ulong CONN_flags, mblk_t *dp)
{
size_t rnum = RES_length / sizeof(*RES_buffer);
int err;
struct iphdr *iph;
struct udphdr *uh;
int i, j;
/* Get at the connection indication. The packet is contained in the SEQ_number message block
starting with the IP header. */
iph = (typeof(iph)) SEQ_number->b_rptr;
uh = (typeof(uh)) (SEQ_number->b_rptr + (iph->ihl << 2));
if (TOKEN_value != np) {
err = NBADTOKEN;
/* Accepting Stream must be bound to the same protocol as connection indication. */
for (j = 0; j < TOKEN_value->pnum; j++)
if (TOKEN_value->protoids[j] == iph->protocol)
break;
if (j >= TOKEN_value->pnum)
/* Must be bound to the same protocol. */
goto error;
/* Accepting Stream must be bound to the same address (or wildcard) including destination
address in connection indication. */
for (i = 0; i < TOKEN_value->bnum; i++)
if (TOKEN_value->baddrs[i].addr == INADDR_ANY
|| TOKEN_value->baddrs[i].addr == iph->daddr)
break;
if (i >= TOKEN_value->bnum)
goto error;
}
/* validate parameters */
err = NBADQOSPARAM;
/* Cannot really validate parameters here. One of the problems is that some of the information
against which we should be checking is contained in the connection indication packet, and other
information is associated with the destination addresses themselves, that are contained in the
responding address(es) for NPI-IP. Therefore, QOS parameter checks must be performed in the
np_passive() function instead. */
if (QOS_buffer->protocol != QOS_UNKNOWN) {
/* Specified protocol probably needs to be the same as the indication, but since we only bind
to one protocol id at the moment that is not a problem. The connection indication
protocol was checked against the accepting Stream above. */
for (i = 0; i < TOKEN_value->pnum; i++)
if (TOKEN_value->protoids[i] == QOS_buffer->protocol)
break;
if (i >= TOKEN_value->pnum)
goto error;
} else {
QOS_buffer->protocol = TOKEN_value->qos.protocol;
}
if (QOS_buffer->priority != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->priority < TOKEN_value->qor.priority.priority_min_value)
goto error;
if ((np_long) QOS_buffer->priority > TOKEN_value->qor.priority.priority_max_value)
goto error;
} else {
QOS_buffer->priority = TOKEN_value->qos.priority;
}
if (QOS_buffer->ttl != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->ttl < TOKEN_value->qor.ttl.ttl_min_value)
goto error;
if ((np_long) QOS_buffer->ttl > TOKEN_value->qor.ttl.ttl_max_value)
goto error;
} else {
QOS_buffer->ttl = TOKEN_value->qos.ttl;
}
if (QOS_buffer->tos != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->tos < TOKEN_value->qor.tos.tos_min_value)
goto error;
if ((np_long) QOS_buffer->tos > TOKEN_value->qor.tos.tos_max_value)
goto error;
} else {
/* FIXME: TOS should be negotiated. The TOS should be upgraded to whatever TOS the caller
wishes, but not downgraded. */
QOS_buffer->tos = TOKEN_value->qos.tos;
}
if (QOS_buffer->mtu != QOS_UNKNOWN) {
if ((np_long) QOS_buffer->mtu < TOKEN_value->qor.mtu.mtu_min_value)
goto error;
if ((np_long) QOS_buffer->mtu > TOKEN_value->qor.mtu.mtu_max_value)
goto error;
/* FIXME: MTU should be negotiated. The MTU should be downgraded to the lesser value of what
the connection requires or what was specified, but not upgraded. */
} else {
QOS_buffer->mtu = TOKEN_value->qos.mtu;
}
/* Need to determine source addressess from bound addresses before we can test the source address. If
we are bound to specific addresses, then the source address list is simply the destination address
list. If bound to a wildcard address, then the source address list could be determined from the
scope of the destination addresses and the available interfaces and their addresses. However, for
the moment it is probably easier to simply allow wildcard source addresses and let the user
specify any address when there is a wildcard source address. Port number is a different
situation: either the Stream is bound to the port number in the received connection indication, or
it was bound to a wildcard port number. In either case, the local port number for the connection
is the port number to which the connection indication was sent. */
TOKEN_value->sport = uh->dest;
TOKEN_value->snum = TOKEN_value->bnum;
for (i = 0; i < TOKEN_value->bnum; i++)
TOKEN_value->saddrs[i].addr = TOKEN_value->baddrs[i].addr;
if (QOS_buffer->saddr != QOS_UNKNOWN) {
if (QOS_buffer->saddr != 0) {
for (i = 0; i < TOKEN_value->snum; i++) {
if (TOKEN_value->saddrs[i].addr == INADDR_ANY)
break;
if (TOKEN_value->saddrs[i].addr == QOS_buffer->saddr)
break;
}
if (i >= TOKEN_value->snum)
goto recover;
}
} else {
QOS_buffer->saddr = TOKEN_value->qos.saddr;
}
/* Here's a problem: we don't realy have any destination addresses yet, so we can't check at this
point. */
if (QOS_buffer->daddr != QOS_UNKNOWN) {
if (rnum > 0) {
/* Specified destination addresses must be in the responding address list. */
for (i = 0; i < rnum; i++)
if (RES_buffer[i].sin_addr.s_addr == QOS_buffer->daddr)
break;
if (i >= rnum)
goto recover;
} else {
/* If no responding address list is provided (rnum == 0), the destination address
must be the source address of the connection indication. */
if (QOS_buffer->daddr != iph->saddr)
goto recover;
}
} else {
QOS_buffer->daddr = rnum ? RES_buffer[0].sin_addr.s_addr : iph->saddr;
}
TOKEN_value->dport = rnum ? RES_buffer[0].sin_port : uh->source;
err = NBADADDR;
if (TOKEN_value->dport == 0 && (TOKEN_value->bport != 0 || TOKEN_value->sport != 0))
goto recover;
if (TOKEN_value->dport != 0 && TOKEN_value->sport == 0)
/* TODO: really need to autobind the stream to a dynamically allocated source port number. */
goto recover;
if (rnum > 0) {
for (i = 0; i < rnum; i++) {
struct rtable *rt = NULL;
if ((err = ip_route_output(&rt, RES_buffer[i].sin_addr.s_addr, 0, 0, 0)))
goto recover;
TOKEN_value->daddrs[i].dst = rt_dst(rt);
/* Note that we do not have to use the destination reference cached above. It is
enough that we hold a reference to it so that it remains in the routing caches so
lookups to this destination are fast. They will be released upon disconnection. */
TOKEN_value->daddrs[i].addr = RES_buffer[i].sin_addr.s_addr;
TOKEN_value->daddrs[i].ttl = QOS_buffer->ttl;
TOKEN_value->daddrs[i].tos = QOS_buffer->tos;
TOKEN_value->daddrs[i].mtu = dst_pmtu(TOKEN_value->daddrs[i].dst);
if (TOKEN_value->daddrs[i].mtu < QOS_buffer->mtu)
QOS_buffer->mtu = TOKEN_value->daddrs[i].mtu;
}
TOKEN_value->dnum = rnum;
} else {
struct rtable *rt = NULL;
if ((err = ip_route_output(&rt, iph->saddr, 0, 0, 0)))
goto recover;
TOKEN_value->daddrs[0].dst = rt_dst(rt);
/* Note that we do not have to use the destination reference cached above. It is enough that
we hold a reference to it so that it remains in the routing caches so lookups to this
destination are fast. They will be released upon disconnection. */
TOKEN_value->daddrs[0].addr = iph->saddr;
TOKEN_value->daddrs[0].ttl = QOS_buffer->ttl;
TOKEN_value->daddrs[0].tos = QOS_buffer->tos;
TOKEN_value->daddrs[0].mtu = dst_pmtu(TOKEN_value->daddrs[0].dst);
if (TOKEN_value->daddrs[0].mtu < QOS_buffer->mtu)
QOS_buffer->mtu = TOKEN_value->daddrs[0].mtu;
TOKEN_value->dnum = 1;
}
/* store negotiated qos values */
TOKEN_value->qos.protocol = QOS_buffer->protocol;
TOKEN_value->qos.priority = QOS_buffer->priority;
TOKEN_value->qos.ttl = QOS_buffer->ttl;
TOKEN_value->qos.tos = QOS_buffer->tos;
TOKEN_value->qos.mtu = QOS_buffer->mtu;
TOKEN_value->qos.saddr = QOS_buffer->saddr;
TOKEN_value->qos.daddr = QOS_buffer->daddr;
/* note: on failure allowed to have partially negotiated options */
/* try to place in connection hashes with conflict checks */
if ((err = np_conn_check(TOKEN_value, QOS_buffer->protocol)) != 0)
goto recover;
if (dp != NULL)
if (unlikely((err = np_senddata(np, np->qos.protocol, np->qos.daddr, dp)) != QR_ABSORBED))
goto recover;
if (SEQ_number != NULL) {
bufq_unlink(&np->conq, SEQ_number);
freeb(XCHG(&SEQ_number, SEQ_number->b_cont));
/* queue any pending data */
while (SEQ_number)
put(TOKEN_value->oq, XCHG(&SEQ_number, SEQ_number->b_cont));
}
return (QR_ABSORBED);
recover:
/* clear out source addresses */
TOKEN_value->sport = 0;
for (i = 0; i < TOKEN_value->snum; i++) {
TOKEN_value->saddrs[i].addr = INADDR_ANY;
}
TOKEN_value->snum = 0;
/* clear out destination addresses */
TOKEN_value->dport = 0;
for (i = 0; i < TOKEN_value->dnum; i++) {
if (TOKEN_value->daddrs[i].dst)
dst_release(XCHG(&TOKEN_value->daddrs[i].dst, NULL));
TOKEN_value->daddrs[i].addr = INADDR_ANY;
TOKEN_value->daddrs[i].ttl = 0;
TOKEN_value->daddrs[i].tos = 0;
TOKEN_value->daddrs[i].mtu = 0;
}
TOKEN_value->dnum = 0;
error:
return (err);
}
/**
* np_disconnect - disconnect a network connection
* @np: private structure
* @RES_buffer: responding address (unused)
* @SEQ_number: connection indication being refused
* @DISCON_reason: disconnect reason (unused)
* @dp: user disconnect data
*/
STATIC int
np_disconnect(struct np *np, struct sockaddr_in *RES_buffer, mblk_t *SEQ_number, const np_ulong DISCON_reason,
mblk_t *dp)
{
struct np_chash_bucket *hp;
int err;
unsigned long flags;
if (dp != NULL) {
err = np_senddata(np, np->qos.protocol, np->qos.daddr, dp);
if (unlikely(err != QR_ABSORBED))
goto error;
}
if (SEQ_number != NULL) {
bufq_unlink(&np->conq, SEQ_number);
freemsg(SEQ_number);
}
if ((hp = np->chash) != NULL) {
write_lock_str2(&hp->lock, flags);
if ((*np->cprev = np->cnext))
np->cnext->cprev = np->cprev;
np->cnext = NULL;
np->cprev = &np->cnext;
np->chash = NULL;
np->dport = np->sport = 0;
np->dnum = np->snum = 0;
np_set_state(np, NS_IDLE);
np_put(np);
write_unlock_str2(&hp->lock, flags);
}
return (QR_ABSORBED);
error:
return (err);
}
/*
* Addressing:
*
* NSAPs (Protocol IDs) are IP protocol numbers. NSAP addresses consist of a port number and a
* list of IP addreses. If the port number is zero, any port number is used. Initially it is only
* supporting IPv4.
*
* There are two types of providers: connectionless and connection oriented.
*
* - Connectionless providers will start delivering packets after the bind.
*
* - When the NS provider is bound to multiple protocol ids, or bound or connected to multiple
* addresses, data will be delivered as N_DATA_IND primitives that contain the protocol id index,
* destination address index, and source addresses index in the DATA_xfer_flags as the highest
* order byte, next highest order byte and so on. An index of 0 indicates the first bound
* protocol id, source address or destination address. In this way, the high order 3 bytes of
* the DATA_xfer_flags are coded all zeros in the non-multiple case.
*
* The NS user is also permitted to send N_DATA_REQ primitives that contain the protocol id,
* destination address, and source addresses, similarly encoded in the DATA_xfer_flags. Invalid
* indexes will error the stream.
*
* - Connection oriented provider bound as listening will start delivering packets as connection
* indications after the bind. The user can either reject these with a disconnect request or can
* accept them with a connection response, with a slight difference from normal NPI: the
* responding address in the connection response is the list of peer addresses to which to
* establish a connection connect rather than the local responding address.
*
* If the connection is accepted on the listening stream, no further connection indications will
* be delivered. If accepted on another stream, further connection indications (belonging to
* annother association) will be delivered.
*
* - Connection oriented providers bound as non-listening will deliver packets only after a
* successful connection establishment operation. Connection establishment does not doe anything
* except to provide a port and list of addresses from which the provider is to deliver packets.
* This can be useful for RTP and for establishing endpoint communication with SCTP.
*
* - Before bind, the provider will report both N_CLNS and N_CONS. When bound with bind flags
* equal to N_CLNS in the second least significant BIND_flags byte, the provider will be
* connectionless. When bound with bind flags equal to N_CONS in the second least significant
* BIND_flags byte, the provider will be connection-oriented.
*/
/*
* TS Provider -> TS User (Indication, Confirmation and Ack) Primitives
* ====================================================================
*/
/**
* m_flush: deliver an M_FLUSH message upstream
* @q: a queue in the queue pair
* @how: FLUSHBAND or FLUSHALL
* @band: band to flush if how is FLUSHBAND
*/
noinline fastcall int
m_flush(queue_t *q, const int how, const int band)
{
struct np *np = NP_PRIV(q);
mblk_t *mp;
if (unlikely((mp = np_allocb(q, 2, BPRI_HI)) == NULL))
goto enobufs;
mp->b_datap->db_type = M_FLUSH;
*mp->b_wptr++ = how;
*mp->b_wptr++ = band;
putnext(np->oq, mp);
return (QR_DONE);
enobufs:
return (-ENOBUFS);
}
/**
* m_error: deliver an M_ERROR message upstream
* @q: a queue in the queue pair (write queue)
* @error: the error to deliver
*/
noinline fastcall __unlikely int
m_error(queue_t *q, const int error)
{
struct np *np = NP_PRIV(q);
mblk_t *mp;
if (likely((mp = np_allocb(q, 2, BPRI_HI)) != NULL)) {
mp->b_datap->db_type = M_ERROR;
mp->b_wptr[0] = mp->b_wptr[1] = error;
mp->b_wptr += 2;
/* make sure the stream is disconnected */
np_disconnect(np, NULL, NULL, N_REASON_UNDEFINED, NULL);
/* make sure the stream is unbound */
np_unbind(np);
_printd(("%s: %p: <- M_ERROR %d\n", DRV_NAME, np, error));
qreply(q, mp);
return (QR_DONE);
}
return (-ENOBUFS);
}
/**
* m_hangup: deliver an M_HANGUP message upstream
* @q: a queue in the queue pair (write queue)
*/
noinline fastcall __unlikely int
m_hangup(queue_t *q)
{
struct np *np = NP_PRIV(q);
mblk_t *mp;
if (likely((mp = np_allocb(q, 0, BPRI_HI)) != NULL)) {
mp->b_datap->db_type = M_HANGUP;
/* make sure the stream is disconnected */
np_disconnect(np, NULL, NULL, N_REASON_UNDEFINED, NULL);
/* make sure the stream is unbound */
np_unbind(np);
_printd(("%s: %p: <- M_HANGUP\n", DRV_NAME, np));
qreply(q, mp);
return (QR_DONE);
}
return (-ENOBUFS);
}
/**
* ne_error_reply - reply to a message with an M_ERROR message
* @q: active queue in queue pair (write queue)
* @error: error number
*/
noinline fastcall __unlikely int
ne_error_reply(queue_t *q, const long error)
{
switch (error) {
case 0:
case 1:
case 2:
__seldom();
return (error);
case -EBUSY:
case -EAGAIN:
case -ENOMEM:
case -ENOBUFS:
return (error);
case -EPIPE:
case -ENETDOWN:
case -EHOSTUNREACH:
return m_hangup(q);
default:
return m_error(q, EPROTO);
}
}
/**
* ne_info_ack - generate an N_INFO_ACK and pass it upstream
* @q: active queue in queue pair (write queue)
*/
noinline fastcall int
ne_info_ack(queue_t *q)
{
struct np *np = NP_PRIV(q);
mblk_t *mp;
N_info_ack_t *p;
struct sockaddr_in *ADDR_buffer;
struct N_qos_sel_info_ip *QOS_buffer = &np->qos;
struct N_qos_range_info_ip *QOS_range_buffer = &np->qor;
unsigned char *PROTOID_buffer = np->protoids;
size_t ADDR_length = np->snum * sizeof(*ADDR_buffer);
size_t QOS_length = sizeof(*QOS_buffer);
size_t QOS_range_length = sizeof(*QOS_range_buffer);
size_t PROTOID_length = np->pnum;
size_t size = sizeof(*p) + ADDR_length + QOS_length + QOS_range_length + PROTOID_length;
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto enobufs;
mp->b_datap->db_type = M_PCPROTO;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_INFO_ACK;
p->NSDU_size = np->info.NSDU_size;
p->ENSDU_size = np->info.ENSDU_size;
p->CDATA_size = np->info.CDATA_size;
p->DDATA_size = np->info.DDATA_size;
p->ADDR_size = np->info.ADDR_size;
p->ADDR_length = ADDR_length;
p->ADDR_offset = ADDR_length ? sizeof(*p) : 0;
p->QOS_length = QOS_length;
p->QOS_offset = QOS_length ? sizeof(*p) + ADDR_length : 0;
p->QOS_range_length = QOS_range_length;
p->QOS_range_offset = QOS_range_length ? sizeof(*p) + ADDR_length + QOS_length : 0;
p->OPTIONS_flags = np->info.OPTIONS_flags;
p->NIDU_size = np->info.NIDU_size;
p->SERV_type = np->info.SERV_type ? np->info.SERV_type : (N_CONS | N_CLNS);
p->CURRENT_state = np_get_state(np);
p->PROVIDER_type = np->info.PROVIDER_type;
p->NODU_size = 536;
p->PROTOID_length = PROTOID_length;
p->PROTOID_offset = PROTOID_length ? sizeof(*p) + ADDR_length + QOS_length + QOS_range_length : 0;
p->NPI_version = np->info.NPI_version;
mp->b_wptr += sizeof(*p);
if (ADDR_length) {
int i;
for (i = 0; i < np->snum; i++) {
ADDR_buffer = (struct sockaddr_in *) mp->b_wptr;
ADDR_buffer->sin_family = AF_INET;
ADDR_buffer->sin_port = np->sport;
ADDR_buffer->sin_addr.s_addr = np->saddrs[i].addr;
mp->b_wptr += sizeof(struct sockaddr_in);
}
}
if (QOS_length) {
bcopy(QOS_buffer, mp->b_wptr, QOS_length);
mp->b_wptr += QOS_length;
}
if (QOS_range_length) {
bcopy(QOS_range_buffer, mp->b_wptr, QOS_range_length);
mp->b_wptr += QOS_range_length;
}
if (PROTOID_length) {
bcopy(PROTOID_buffer, mp->b_wptr, PROTOID_length);
mp->b_wptr += PROTOID_length;
}
_printd(("%s: %p: <- N_INFO_ACK\n", DRV_NAME, np));
qreply(q, mp);
return (QR_DONE);
enobufs:
return (-ENOBUFS);
}
/**
* ne_bind_ack - NE_BIND_ACK event
* @q: active queue in queue pair (write queue)
* @PROTOID_buffer: protocol ids to bind
* @PROTOID_length: length of protocol ids
* @ADDR_buffer: addresses to bind
* @ADDR_length: length of addresses
* @CONIND_number: maximum number of connection indications
* @BIND_flags: bind flags
*
* Generate an N_BIND_ACK and pass it upstream.
*/
noinline fastcall int
ne_bind_ack(queue_t *q, unsigned char *PROTOID_buffer, size_t PROTOID_length, struct sockaddr_in *ADDR_buffer,
socklen_t ADDR_length, np_ulong CONIND_number, np_ulong BIND_flags)
{
struct np *np = NP_PRIV(q);
mblk_t *mp = NULL;
N_bind_ack_t *p;
size_t size = sizeof(*p) + ADDR_length + PROTOID_length;
int err;
err = -EFAULT;
if (unlikely(np_get_state(np) != NS_WACK_BREQ))
goto error;
err = -ENOBUFS;
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto error;
err =
np_bind(np, PROTOID_buffer, PROTOID_length, ADDR_buffer, ADDR_length, CONIND_number, BIND_flags);
if (unlikely(err != 0)) {
freeb(mp);
goto error;
}
mp->b_datap->db_type = M_PCPROTO;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_BIND_ACK;
p->ADDR_length = ADDR_length;
p->ADDR_offset = ADDR_length ? sizeof(*p) : 0;
p->CONIND_number = CONIND_number;
p->TOKEN_value = (BIND_flags & TOKEN_REQUEST) ? (long) RD(q) : 0;
p->PROTOID_length = PROTOID_length;
p->PROTOID_offset = PROTOID_length ? sizeof(*p) + ADDR_length : 0;
mp->b_wptr += sizeof(*p);
if (ADDR_length) {
bcopy(ADDR_buffer, mp->b_wptr, ADDR_length);
mp->b_wptr += ADDR_length;
}
if (PROTOID_length) {
bcopy(PROTOID_buffer, mp->b_wptr, PROTOID_length);
mp->b_wptr += PROTOID_length;
}
/* all ready, complete the bind */
np_set_state(np, NS_IDLE);
_printd(("%s: %p: <- N_BIND_ACK\n", DRV_NAME, np));
qreply(q, mp);
return (QR_DONE);
error:
return (err);
}
/**
* ne_error_ack - generate an N_ERROR_ACK and pass it upstream
* @q: active queue in queue pair (write queue)
* @ERROR_prim: primitive in error
* @err: error (positive NPI_error, negative UNIX_error)
*
* Some errors are passed through because they are intercepted and used by the put or service
* procedure on the write side queue. Theses are 0 (no error), -EBUSY (canput failed), -EAGAIN
* (lock failed), -ENOMEM (kmem_alloc failed), -ENOBUFS (allocb failed). All of these pass-through
* errors will cause the state to be restored to the state before the current write queue message
* was processed and the current write queue message will be placed back on the write queue.
*
* Some BSD-specific UNIX errors are translated to equivalent NPI errors.
*
* This function will also place the NPI provider interface state in the correct state following
* issuing the N_ERROR_ACK according to the Sequence of Primities of the Network Provider Interface
* specification, Revision 2.0.0.
*/
noinline fastcall __unlikely int
ne_error_ack(queue_t *q, np_ulong ERROR_prim, np_long error)
{
struct np *np = NP_PRIV(q);
N_error_ack_t *p;
mblk_t *mp;
int err;
/* rollback state */
np_set_state(np, np->i_oldstate);
np->i_oldstate = np_get_state(np);
err = error;
switch (error) {
case -EBUSY:
case -EAGAIN:
case -ENOMEM:
case -ENOBUFS:
goto error;
case 0:
goto error;
case -EADDRINUSE:
error = NBOUND;
break;
case -EADDRNOTAVAIL:
case -EDESTADDRREQ:
error = NNOADDR;
break;
case -EMSGSIZE:
error = NBADDATA;
break;
case -EACCES:
error = NACCESS;
break;
case -EOPNOTSUPP:
error = NNOTSUPPORT;
break;
}
err = -ENOBUFS;
if ((mp = np_allocb(q, sizeof(*p), BPRI_MED)) == NULL)
goto error;
mp->b_datap->db_type = M_PCPROTO;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_ERROR_ACK;
p->ERROR_prim = ERROR_prim;
p->NPI_error = (error < 0) ? NSYSERR : error;
p->UNIX_error = (error < 0) ? -error : 0;
mp->b_wptr += sizeof(*p);
_printd(("%s: %p: <- N_ERROR_ACK\n", DRV_NAME, np));
qreply(q, mp);
return (0);
error:
return (err);
}
/**
* ne_ok_ack - generate an N_OK_ACK and pass it upstream
* @q: active queue in queue pair (write queue)
* @CORRECT_prim: correct primitive
* @ADDR_buffer: destination or responding address
* @ADDR_length: length of destination or responding addresses
* @QOS_buffer: quality of service parameters
* @SEQ_number: sequence number (i.e. connection/reset indication sequence number)
* @TOKEN_value: token (i.e. connection response token)
* @flags: mangement flags, connection flags, disconnect reason, etc.
* @dp: user data
*/
STATIC int
ne_ok_ack(queue_t *q, np_ulong CORRECT_prim, struct sockaddr_in *ADDR_buffer, socklen_t ADDR_length,
void *QOS_buffer, mblk_t *SEQ_number, struct np *TOKEN_value, np_ulong flags, mblk_t *dp)
{
struct np *np = NP_PRIV(q);
N_ok_ack_t *p;
mblk_t *mp;
const size_t size = sizeof(*p);
int err = QR_DONE;
pl_t pl;
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto enobufs;
mp->b_datap->db_type = M_PCPROTO;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_OK_ACK;
p->CORRECT_prim = CORRECT_prim;
mp->b_wptr += sizeof(*p);
switch (np_get_state(np)) {
case NS_WACK_OPTREQ:
err = np_optmgmt(np, QOS_buffer, flags);
if (unlikely(err != 0))
goto free_error;
pl = bufq_lock(&np->conq);
np_set_state(np, bufq_length(&np->conq) > 0 ? NS_WRES_CIND : NS_IDLE);
bufq_unlock(&np->conq, pl);
break;
case NS_WACK_UREQ:
err = np_unbind(np);
if (unlikely(err != 0))
goto free_error;
/* NPI spec says that if the provider must flush both queues before responding with a
N_OK_ACK primitive when responding to a N_UNBIND_REQ. This is to flush queued data for
connectionless providers. */
err = m_flush(q, FLUSHRW, 0);
if (unlikely(err != 0))
goto free_error;
np_set_state(np, NS_UNBND);
break;
case NS_WACK_CRES:
/* FIXME: needs to hold reference to and lock the accepting stream */
if (np != TOKEN_value)
TOKEN_value->i_oldstate = np_get_state(TOKEN_value);
np_set_state(TOKEN_value, NS_DATA_XFER);
err =
np_passive(np, ADDR_buffer, ADDR_length, QOS_buffer, SEQ_number, TOKEN_value, flags, dp);
if (unlikely(err != QR_ABSORBED)) {
np_set_state(TOKEN_value, TOKEN_value->i_oldstate);
goto free_error;
}
if (np != TOKEN_value) {
pl = bufq_lock(&np->conq);
np_set_state(np, bufq_length(&np->conq) > 0 ? NS_WRES_CIND : NS_IDLE);
bufq_unlock(&np->conq, pl);
}
break;
case NS_WACK_RRES:
err = np_reset_rem(np, N_USER, N_REASON_UNDEFINED);
if (unlikely(err != 0))
goto free_error;
pl = bufq_lock(&np->resq);
np_set_state(np, bufq_length(&np->resq) > 0 ? NS_WRES_RIND : NS_DATA_XFER);
bufq_unlock(&np->resq, pl);
break;
case NS_WACK_DREQ6:
case NS_WACK_DREQ7:
case NS_WACK_DREQ9:
case NS_WACK_DREQ10:
case NS_WACK_DREQ11:
err = np_disconnect(np, ADDR_buffer, SEQ_number, flags, dp);
if (unlikely(err != QR_ABSORBED))
goto free_error;
pl = bufq_lock(&np->conq);
np_set_state(np, bufq_length(&np->conq) > 0 ? NS_WRES_CIND : NS_IDLE);
bufq_unlock(&np->conq, pl);
break;
default:
/* Note: if we are not in a WACK state we simply do not change state. This occurs normally
when we are responding to a N_OPTMGMT_REQ in other than the NS_IDLE state. */
if (CORRECT_prim == N_OPTMGMT_REQ) {
err = np_optmgmt(np, QOS_buffer, flags);
if (unlikely(err != 0))
goto free_error;
break;
}
break;
}
_printd(("%s: %p: <- N_OK_ACK\n", DRV_NAME, np));
qreply(q, mp);
return (err);
free_error:
freemsg(mp);
goto error;
enobufs:
err = -ENOBUFS;
goto error;
error:
return (err);
}
/**
* ne_conn_con: perform N_CONN_CON actions
* @q: active queue in queue pair (write queue)
* @RES_buffer: responding addresses
* @RES_length: length of responding addresses
* @QOS_buffer: connected quality of service
* @CONN_flags: connected connection flags
*
* The NPI-IP driver only supports a pseudo-connection-oriented mode. The destination address and
* quality-of-service parameters returned in the N_CONN_CON do not represent a connection
* establishment or negotiation with the remote NS provider, but are simply formed locally and
* negotiated locally. The N_CONN_CON is synthesized from the N_CONN_REQ and is returned
* automatically and immediately in response to the N_CONN_REQ.
*
* Note that, unlike TPI, NPI does not return a N_OK_ACK in response to N_CONN_REQ and does not have
* a TS_WACK_CREQ state, only a NS_WCON_CREQ state. This makes NPI more ammenable to this kind of
* pseudo-connection-oriented mode.
*
* Originally, this primitive returned one N_QOS_SEL_CONN_IP structure for each destination. This
* is no longer the case. Only one N_QOS_SEL_CONN_IP structure is returned representing the values
* for the entire association. ttl is the maximum ttl of the destinations. mtu is the smallest
* value for the destinations. These values are set in the private structure by the np_connect()
* function.
*/
STATIC INLINE fastcall int
ne_conn_con(queue_t *q, struct sockaddr_in *RES_buffer, socklen_t RES_length,
struct N_qos_sel_conn_ip *QOS_buffer, np_ulong CONN_flags)
{
struct np *np = NP_PRIV(q);
mblk_t *mp = NULL;
N_conn_con_t *p;
int err;
size_t QOS_length = sizeof(*QOS_buffer);
size_t size = sizeof(*p) + RES_length + QOS_length;
np_set_state(np, NS_WCON_CREQ);
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto enobufs;
err = np_connect(np, RES_buffer, RES_length, QOS_buffer, CONN_flags);
if (unlikely(err != 0))
goto free_error;
np_set_state(np, NS_DATA_XFER);
mp->b_datap->db_type = M_PCPROTO;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_CONN_CON;
p->RES_length = RES_length;
p->RES_offset = RES_length ? sizeof(*p) : 0;
p->CONN_flags = CONN_flags;
p->QOS_length = QOS_length;
p->QOS_offset = QOS_length ? sizeof(*p) + RES_length : 0;
mp->b_wptr += sizeof(*p);
if (RES_length) {
bcopy(RES_buffer, mp->b_wptr, RES_length);
mp->b_wptr += RES_length;
}
if (QOS_length) {
bcopy(QOS_buffer, mp->b_wptr, QOS_length);
mp->b_wptr += QOS_length;
}
_printd(("%s: %p: <- N_CONN_CON\n", DRV_NAME, np));
qreply(q, mp);
return (QR_DONE);
free_error:
freeb(mp);
goto error;
enobufs:
err = -ENOBUFS;
goto error;
error:
return (err);
}
/**
* ne_reset_con - generate a N_RESET_CON message
* @q: active queue in queue pair (write queue)
* @RESET_orig: origin of the reset
* @RESET_reason: reason for the reset
* @dp: message containing IP packet
*
* An N_RESET_CON message is sent only when the reset completes successfully.
*/
STATIC fastcall int
ne_reset_con(queue_t *q, np_ulong RESET_orig, np_ulong RESET_reason, mblk_t *dp)
{
struct np *np = NP_PRIV(q);
mblk_t *mp = NULL;
N_reset_con_t *p;
size_t size = sizeof(*p);
int err;
pl_t pl;
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto enobufs;
if (unlikely((err = np_reset_loc(np, RESET_orig, RESET_reason, dp)) != 0))
goto free_error;
mp->b_datap->db_type = M_PROTO;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_RESET_CON;
mp->b_wptr += sizeof(*p);
pl = bufq_lock(&np->resq);
np_set_state(np, bufq_length(&np->resq) > 0 ? NS_WRES_RIND : NS_DATA_XFER);
bufq_unlock(&np->resq, pl);
_printd(("%s: <- N_RESET_CON\n", DRV_NAME));
qreply(q, mp);
return (QR_DONE);
free_error:
freeb(mp);
goto error;
enobufs:
err = -ENOBUFS;
goto error;
error:
return (err);
}
/**
* ne_conn_ind - generate a N_CONN_IND message
* @q: active queue in queue pair (read queue)
* @dp: message containing IP packet
*
* We generate connection indications to Streams that are bound as listening to an address including
* the destination address of the IP packet, where no connection exists for the source address of
* the IP packet.
*/
STATIC INLINE fastcall __hot_get int
ne_conn_ind(queue_t *q, mblk_t *SEQ_number)
{
struct np *np = NP_PRIV(q);
mblk_t *mp, *cp;
N_conn_ind_t *p;
struct sockaddr_in *DEST_buffer, *SRC_buffer;
struct N_qos_sel_conn_ip *QOS_buffer;
np_ulong DEST_length, SRC_length, QOS_length;
size_t size;
struct iphdr *iph = (struct iphdr *) SEQ_number->b_rptr;
struct udphdr *uh = (struct udphdr *) (SEQ_number->b_rptr + (iph->ihl << 2));
unsigned long flags;
assure(SEQ_number->b_wptr >= SEQ_number->b_rptr + sizeof(*iph));
assure(SEQ_number->b_wptr >= SEQ_number->b_rptr + (iph->ihl << 2));
if (unlikely(np_not_state(np, (NSF_IDLE | NSF_WRES_CIND | NSF_WACK_CRES))))
goto discard;
/* Make sure we don't already have a connection indication */
spin_lock_str2(&np->conq.q_lock, flags);
for (cp = bufq_head(&np->conq); cp; cp = cp->b_next) {
struct iphdr *iph2 = (struct iphdr *) cp->b_rptr;
if (iph->protocol == iph2->protocol && iph->saddr == iph2->saddr && iph->daddr == iph2->daddr) {
/* already have a connection indication, link the data */
linkb(cp, SEQ_number);
spin_unlock_str2(&np->conq.q_lock, flags);
goto absorbed;
}
}
spin_unlock_str2(&np->conq.q_lock, flags);
if (unlikely(bufq_length(&np->conq) >= np->CONIND_number))
/* If there are already too many connection indications outstanding, discard further
connection indications until some are accepted -- we might get fancy later and queue it
anyway. Note that data for existing outstanding connection indications is preserved
above. */
goto eagain;
if (unlikely(np_not_state(np, (NSF_IDLE | NSF_WRES_CIND))))
/* If there is already a connection accepted on the listening stream, discard further
connection indications until the current connection disconnects */
goto eagain;
np_set_state(np, NS_WRES_CIND);
if (unlikely((cp = np_dupmsg(q, SEQ_number)) == NULL))
goto enobufs;
DEST_length = sizeof(*DEST_buffer);
SRC_length = sizeof(*SRC_buffer);
QOS_length = sizeof(*QOS_buffer);
size = sizeof(*p) + DEST_length + SRC_length + QOS_length;
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto free_enobufs;
if (unlikely(!canputnext(q)))
goto ebusy;
mp->b_datap->db_type = M_PROTO;
mp->b_band = 0;
p = (typeof(p)) mp->b_wptr;
mp->b_wptr += sizeof(*p);
p->PRIM_type = N_CONN_IND;
p->DEST_length = DEST_length;
p->DEST_offset = DEST_length ? sizeof(*p) : 0;
p->SRC_length = SRC_length;
p->SRC_offset = SRC_length ? sizeof(*p) + DEST_length : 0;
p->SEQ_number = (np_ulong) (long) SEQ_number;
p->CONN_flags = 0;
p->QOS_length = QOS_length;
p->QOS_offset = QOS_length ? sizeof(*p) + DEST_length + SRC_length : 0;
if (DEST_length) {
DEST_buffer = (struct sockaddr_in *) mp->b_wptr;
DEST_buffer->sin_family = AF_INET;
DEST_buffer->sin_port = (np->bport != 0) ? uh->dest : 0;
DEST_buffer->sin_addr.s_addr = iph->daddr;
mp->b_wptr += sizeof(struct sockaddr_in);
}
if (SRC_length) {
SRC_buffer = (struct sockaddr_in *) mp->b_wptr;
SRC_buffer->sin_family = AF_INET;
SRC_buffer->sin_port = (np->bport != 0) ? uh->source : 0;
SRC_buffer->sin_addr.s_addr = iph->saddr;
mp->b_wptr += sizeof(struct sockaddr_in);
}
if (QOS_length) {
QOS_buffer = (struct N_qos_sel_conn_ip *) mp->b_wptr;
QOS_buffer->n_qos_type = N_QOS_SEL_CONN_IP;
/* FIXME: might be a problem here on 2.4 where we steal the packet by overwritting the
protocol id. */
QOS_buffer->protocol = iph->protocol;
QOS_buffer->priority = cp->b_band;
QOS_buffer->ttl = iph->ttl;
QOS_buffer->tos = iph->tos;
QOS_buffer->mtu = QOS_UNKNOWN; /* FIXME: determine route and get mtu from it */
QOS_buffer->daddr = iph->daddr;
QOS_buffer->saddr = iph->saddr;
mp->b_wptr += QOS_length;
}
/* should we pull the IP header? */
mp->b_cont = cp;
/* sure, all the info is in the qos structure and addresses */
cp->b_rptr += (iph->ihl << 2);
/* save original in connection indication list */
bufq_queue(&np->conq, SEQ_number);
_printd(("%s: %p: <- N_CONN_IND\n", DRV_NAME, np));
putnext(q, mp);
absorbed:
return (QR_ABSORBED);
ebusy:
freeb(cp);
freeb(mp);
return (-EBUSY);
free_enobufs:
freemsg(cp);
enobufs:
return (-ENOBUFS);
eagain:
return (-EAGAIN);
discard:
return (QR_DONE);
}
/**
* ne_discon_ind - NE_DISCON_IND NC disconnected event
* @q: active queue (read queue)
* @dp: message containing ICMP packet
*
* The N_DISCON_IND is sent when we encounter an error on a connection oriented Stream, i.e. as a
* result of receiving an ICMP error. For multihomed hosts, we only do this if all destination
* addresses have errors, otherwise, we just perform a reset for the affected destination.
*/
STATIC INLINE fastcall int
ne_discon_ind(queue_t *q, struct sockaddr_in *RES_buffer, socklen_t RES_length, np_ulong RESERVED_field,
np_ulong DISCON_orig, np_ulong DISCON_reason, mblk_t *SEQ_number, mblk_t *dp)
{
struct np *np = NP_PRIV(q);
mblk_t *mp;
N_discon_ind_t *p;
size_t size = sizeof(*p) + RES_length;
if (unlikely(np_not_state(np, (NSF_WRES_CIND | NSF_DATA_XFER | NSF_WRES_RIND | NSF_WCON_RREQ))))
goto discard;
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto enobufs;
mp->b_datap->db_type = M_PROTO;
mp->b_band = 2; /* expedite */
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_DISCON_IND;
p->DISCON_orig = DISCON_orig;
p->DISCON_reason = DISCON_reason;
p->RES_length = RES_length;
p->RES_offset = RES_length ? sizeof(*p) : 0;
p->SEQ_number = (np_ulong) (long) SEQ_number;
mp->b_wptr += sizeof(*p);
if (RES_length) {
bcopy(RES_buffer, mp->b_wptr, RES_length);
mp->b_wptr += RES_length;
}
mp->b_cont = dp;
_printd(("%s: %p: <- N_DISCON_IND\n", DRV_NAME, np));
putnext(q, mp);
return (QR_ABSORBED);
enobufs:
return (-ENOBUFS);
discard:
return (QR_DONE);
}
/**
* ne_discon_ind_icmp - NE_DISCON_IND event resulting from ICMP message
* @q: active queue in queue pair
* @mp: the ICMP message
*/
STATIC INLINE fastcall int
ne_discon_ind_icmp(queue_t *q, mblk_t *mp)
{
struct np *np = NP_PRIV(q);
struct iphdr *iph;
struct icmphdr *icmp;
struct udphdr *uh;
struct sockaddr_in res_buf, *RES_buffer = &res_buf;
np_ulong DISCON_reason;
np_ulong RESERVED_field, DISCON_orig;
mblk_t *rp;
mblk_t *cp, *SEQ_number;
ptrdiff_t hidden;
int err;
iph = (struct iphdr *) mp->b_rptr; /* this is the ICMP message IP header */
icmp = (struct icmphdr *) (mp->b_rptr + (iph->ihl << 2));
iph = (struct iphdr *) (icmp + 1); /* this is the encapsulated IP header */
uh = (struct udphdr *) ((unsigned char *) iph + (iph->ihl << 2));
if (mp->b_wptr < (unsigned char *) (uh + 1))
uh = NULL; /* don't have a full transport header */
RES_buffer->sin_family = AF_INET;
RES_buffer->sin_port = uh ? uh->source : 0;
RES_buffer->sin_addr.s_addr = iph->saddr;
switch (icmp->type) {
case ICMP_DEST_UNREACH:
switch (icmp->code) {
case ICMP_NET_UNREACH:
case ICMP_HOST_UNREACH:
case ICMP_PROT_UNREACH:
case ICMP_PORT_UNREACH:
DISCON_orig = N_PROVIDER;
DISCON_reason = N_REJ_NSAP_UNREACH_P; // N_UD_ROUTE_UNAVAIL;
RESERVED_field = 0;
break;
case ICMP_FRAG_NEEDED:
DISCON_orig = N_PROVIDER;
DISCON_reason = N_REJ_QOS_UNAVAIL_P; // N_UD_SEG_REQUIRED;
RESERVED_field = icmp->un.frag.mtu;
break;
case ICMP_NET_UNKNOWN:
case ICMP_HOST_UNKNOWN:
case ICMP_HOST_ISOLATED:
case ICMP_NET_ANO:
case ICMP_HOST_ANO:
case ICMP_PKT_FILTERED:
case ICMP_PREC_VIOLATION:
case ICMP_PREC_CUTOFF:
DISCON_orig = N_PROVIDER;
DISCON_reason = N_REJ_NSAP_UNKNOWN; // N_UD_ROUTE_UNAVAIL;
RESERVED_field = 0;
break;
case ICMP_SR_FAILED:
case ICMP_NET_UNR_TOS:
case ICMP_HOST_UNR_TOS:
DISCON_orig = N_PROVIDER;
DISCON_reason = N_REJ_QOS_UNAVAIL_P; // N_UD_QOS_UNAVAIL;
RESERVED_field = 0;
break;
default:
DISCON_orig = N_UNDEFINED;
DISCON_reason = N_REASON_UNDEFINED; // N_UD_UNDEFINED;
RESERVED_field = 0;
break;
}
break;
case ICMP_SOURCE_QUENCH:
/* Should not cause disconnect. */
DISCON_orig = N_PROVIDER;
DISCON_reason = N_CONGESTION; // N_UD_CONGESTION;
RESERVED_field = 0;
break;
case ICMP_TIME_EXCEEDED:
switch (icmp->code) {
case ICMP_EXC_TTL:
DISCON_orig = N_PROVIDER;
DISCON_reason = N_REJ_QOS_UNAVAIL_P; // N_UD_LIFE_EXCEEDED;
RESERVED_field = 0;
break;
case ICMP_EXC_FRAGTIME:
DISCON_orig = N_PROVIDER;
DISCON_reason = N_REJ_QOS_UNAVAIL_P; // N_UD_TD_EXCEEDED;
RESERVED_field = 0;
break;
default:
DISCON_orig = N_UNDEFINED;
DISCON_reason = N_REASON_UNDEFINED; // N_UD_UNDEFINED;
RESERVED_field = 0;
break;
}
break;
case ICMP_PARAMETERPROB:
DISCON_orig = N_UNDEFINED;
DISCON_reason = N_REASON_UNDEFINED; // N_UD_UNDEFINED;
RESERVED_field = 0;
break;
default:
DISCON_orig = N_UNDEFINED;
DISCON_reason = N_REASON_UNDEFINED; // N_UD_UNDEFINED;
RESERVED_field = 0;
break;
}
/* check for outstanding reset indications for responding address */
for (rp = bufq_head(&np->resq); rp; rp = rp->b_next) {
struct iphdr *iph2 = (struct iphdr *) rp->b_rptr;
if (iph->protocol == iph2->protocol && iph->saddr == iph2->saddr && iph->daddr == iph2->daddr)
break;
}
/* check for outstanding connection indications for responding address */
for (cp = bufq_head(&np->conq); cp; cp = cp->b_next) {
struct iphdr *iph2 = (struct iphdr *) cp->b_rptr;
if (iph->protocol == iph2->protocol && iph->saddr == iph2->saddr && iph->daddr == iph2->daddr)
break;
}
SEQ_number = cp;
/* hide ICMP header */
hidden = (unsigned char *) iph - mp->b_rptr;
mp->b_rptr = (unsigned char *) iph;
if ((err =
ne_discon_ind(q, RES_buffer, sizeof(*RES_buffer), RESERVED_field, DISCON_orig, DISCON_reason,
SEQ_number, mp)) < 0)
mp->b_rptr -= hidden;
else {
if (cp != NULL) {
bufq_unlink(&np->conq, cp);
freemsg(cp);
}
if (rp != NULL) {
bufq_unlink(&np->conq, rp);
freemsg(rp);
}
}
return (err);
}
/**
* ne_data_ind - generate a N_DATA_IND message
* @q: active queue in queue pair (read queue)
* @dp: message containing IP packet
*
* Very fast. In fact, we could just pass the raw M_DATA blocks upstream. We leave the IP header
* in the block.
*/
STATIC INLINE fastcall __hot_get int
ne_data_ind(queue_t *q, mblk_t *dp)
{
mblk_t *mp;
N_data_ind_t *p;
const size_t size = sizeof(*p);
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto enobufs;
if (unlikely(!canputnext(q)))
goto ebusy;
mp->b_datap->db_type = M_PROTO;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_DATA_IND;
/* TODO: here we can set some info like ECN... */
p->DATA_xfer_flags = 0;
mp->b_wptr += sizeof(*p);
mp->b_cont = dp;
dp->b_datap->db_type = M_DATA; /* just in case */
_printd(("%s: %p: <- N_DATA_IND\n", DRV_NAME, NP_PRIV(q)));
putnext(q, mp);
return (QR_ABSORBED);
ebusy:
freeb(mp);
return (-EBUSY);
enobufs:
return (-ENOBUFS);
}
/**
* ne_exdata_ind - generate a N_EXDATA_IND message
* @q: active queue in queue pair (read queue)
* @dp: message containing IP packet
*/
STATIC INLINE fastcall __hot_get int
ne_exdata_ind(queue_t *q, mblk_t *dp)
{
mblk_t *mp;
N_exdata_ind_t *p;
if (unlikely((mp = np_allocb(q, sizeof(*p), BPRI_MED)) == NULL))
goto enobufs;
if (unlikely(!bcanputnext(q, 1)))
goto ebusy;
mp->b_datap->db_type = M_PROTO;
mp->b_band = 1;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_EXDATA_IND;
mp->b_wptr += sizeof(*p);
mp->b_cont = dp;
dp->b_datap->db_type = M_DATA; /* just in case */
_printd(("%s: %p: <- N_EXDATA_IND\n", DRV_NAME, NP_PRIV(q)));
putnext(q, mp);
return (QR_ABSORBED);
ebusy:
freeb(mp);
return (-EBUSY);
enobufs:
return (-ENOBUFS);
}
/**
* ne_unitdata_ind - generate a N_UNITDATA_IND message
* @q: active queue in queue pair (read queue)
* @dp: message containing IP packet
*
* Note that on unit data indications the entire IP packet is delivered in the data part of
* the message. If the NS user is willing to extract information from the IP header, it is
* welcome to discard the control part.
*/
STATIC INLINE fastcall __hot_get int
ne_unitdata_ind(queue_t *q, mblk_t *dp)
{
struct np *np = NP_PRIV(q);
mblk_t *mp;
N_unitdata_ind_t *p;
struct sockaddr_in *SRC_buffer, *DEST_buffer;
const np_ulong SRC_length = sizeof(*SRC_buffer);
const np_ulong DEST_length = sizeof(*DEST_buffer);
size_t size = sizeof(*p) + SRC_length + DEST_length;
struct iphdr *iph;
struct udphdr *uh;
if (unlikely(np_get_state(np) != NS_IDLE))
goto discard;
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto enobufs;
if (unlikely(!canputnext(q)))
goto ebusy;
mp->b_datap->db_type = M_PROTO;
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_UNITDATA_IND;
p->SRC_length = SRC_length;
p->SRC_offset = SRC_length ? sizeof(*p) : 0;
p->DEST_length = DEST_length;
p->DEST_offset = DEST_length ? sizeof(*p) + SRC_length : 0;
p->ERROR_type = 0;
mp->b_wptr += sizeof(*p);
iph = (struct iphdr *) dp->b_rptr;
uh = (struct udphdr *) (dp->b_rptr + (iph->ihl << 2));
if (SRC_length) {
SRC_buffer = (struct sockaddr_in *) mp->b_wptr;
SRC_buffer->sin_family = AF_INET;
SRC_buffer->sin_port = uh->source;
SRC_buffer->sin_addr.s_addr = iph->saddr;
mp->b_wptr += SRC_length;
}
if (DEST_length) {
DEST_buffer = (struct sockaddr_in *) mp->b_wptr;
DEST_buffer->sin_family = AF_INET;
DEST_buffer->sin_port = uh->dest;
DEST_buffer->sin_addr.s_addr = iph->daddr;
mp->b_wptr += DEST_length;
}
/* pull IP header */
dp->b_rptr = (unsigned char *) uh;
mp->b_cont = dp;
dp->b_datap->db_type = M_DATA; /* just in case */
_printd(("%s: %p: <- N_UNITDATA_IND\n", DRV_NAME, np));
putnext(q, mp);
return (QR_ABSORBED);
ebusy:
freeb(mp);
return (-EBUSY);
enobufs:
return (-ENOBUFS);
discard:
return (QR_DONE);
}
/**
* ne_uderror_ind - generate a N_UDERROR_IND message
* @q: active queue in queue pair (read or write queue)
* @DEST_buffer: pointer to destination address
* @RESERVED_field: MTU if error is N_UD_SEG_REQUIRED
* @ERROR_type: error number
* @dp: message containing (part of) errored packet
*
* Note that in the special case of N_UD_SEG_REQUIRED, we use the RESERVED_field to indicate that
* the value of the MTU is for the destination, gleened from the ICMP message. This is a sneaky
* trick, because the field must be coded zero according to spec, so the presence of a non-zero
* value indicates that the MTU value is present for supporting providers. Users that wish to
* ignore this field must use N_INFO_REQ and examine the NSDU_size parameter to determine the
* current value of the MTU.
*/
noinline __unlikely int
ne_uderror_ind(queue_t *q, struct sockaddr_in *DEST_buffer, np_ulong RESERVED_field, np_ulong ERROR_type,
mblk_t *dp)
{
struct np *np = NP_PRIV(q);
mblk_t *mp;
N_uderror_ind_t *p;
np_ulong DEST_length = sizeof(*DEST_buffer);
size_t size = sizeof(*p) + (DEST_buffer ? DEST_length : 0);
if (unlikely(np_get_state(np) != NS_IDLE))
goto discard;
if (unlikely((mp = np_allocb(q, size, BPRI_MED)) == NULL))
goto enobufs;
if (unlikely(!canputnext(np->oq)))
goto ebusy;
mp->b_datap->db_type = M_PROTO;
mp->b_band = 2; /* XXX move ahead of data indications */
p = (typeof(p)) mp->b_wptr;
p->PRIM_type = N_UDERROR_IND;
p->DEST_length = DEST_buffer ? DEST_length : 0;
p->DEST_offset = DEST_buffer ? sizeof(*p) : 0;
p->RESERVED_field = RESERVED_field;
p->ERROR_type = ERROR_type;
mp->b_wptr += sizeof(*p);
if (DEST_buffer) {
bcopy(DEST_buffer, mp->b_wptr, DEST_length);
mp->b_wptr += DEST_length;
}
if (dp != NULL) {
mp->b_cont = dp;
dp->b_datap->db_type = M_DATA; /* was M_ERROR in some cases */
}
_printd(("%s: %p: <- N_UDERROR_IND\n", DRV_NAME, np));
putnext(np->oq, mp);
return (QR_ABSORBED);
ebusy:
freeb(mp);
ptrace(("%s: ERROR: Flow controlled\n", DRV_NAME));
return (-EBUSY);
enobufs:
ptrace(("%s: ERROR: No buffers\n", DRV_NAME));
return (-ENOBUFS);
discard:
return (QR_DONE);
}
/**
* ne_uderror_ind_icmp - generate an N_UDERROR_IND message from an ICMP packet
* @q: active queue in pair (read queue)
* @mp: message containing ICMP packet
*
* There is another reason for issuing an N_UDERROR_IND and that is Explicit Congestion
* Notification, but there is no ICMP message associated with that and it has not yet been coded:
* probably need an ne_uderror_ind_ecn() function.
*
* Note that the special case of N_UD_SEG_REQUIRED, we use the RESERVED_field to indicate that the
* value of the MTU is for the destination, gleened from the ICMP message. This is a sneaky trick,
* because the field must be coded zero according to NPI spec, so the presence of a non-zero value
* indicates the MTU value from a supporting NPI provider.
*/
noinline fastcall __unlikely int
ne_uderror_ind_icmp(queue_t *q, mblk_t *mp)
{
struct iphdr *iph;
struct icmphdr *icmp;
struct udphdr *uh;
struct sockaddr_in dst_buf, *DEST_buffer = &dst_buf;
np_ulong ERROR_type;
np_ulong RESERVED_field;
unsigned char *hidden;
int err;
iph = (struct iphdr *) mp->b_rptr; /* this is the ICMP message IP header */
icmp = (struct icmphdr *) (mp->b_rptr + (iph->ihl << 2));
iph = (struct iphdr *) (icmp + 1); /* this is the encapsulated IP header */
uh = (struct udphdr *) ((unsigned char *) iph + (iph->ihl << 2));
if (mp->b_wptr < (unsigned char *) (uh + 1))
uh = NULL; /* don't have a full transport header */
DEST_buffer->sin_family = AF_INET;
DEST_buffer->sin_port = uh ? uh->source : 0;
DEST_buffer->sin_addr.s_addr = iph->saddr;
switch (icmp->type) {
case ICMP_DEST_UNREACH:
switch (icmp->code) {
case ICMP_NET_UNREACH:
case ICMP_HOST_UNREACH:
case ICMP_PROT_UNREACH:
case ICMP_PORT_UNREACH:
ERROR_type = N_UD_ROUTE_UNAVAIL;
RESERVED_field = 0;
break;
case ICMP_FRAG_NEEDED:
ERROR_type = N_UD_SEG_REQUIRED;
RESERVED_field = icmp->un.frag.mtu;
break;
case ICMP_NET_UNKNOWN:
case ICMP_HOST_UNKNOWN:
case ICMP_HOST_ISOLATED:
case ICMP_NET_ANO:
case ICMP_HOST_ANO:
case ICMP_PKT_FILTERED:
case ICMP_PREC_VIOLATION:
case ICMP_PREC_CUTOFF:
ERROR_type = N_UD_ROUTE_UNAVAIL;
RESERVED_field = 0;
break;
case ICMP_SR_FAILED:
case ICMP_NET_UNR_TOS:
case ICMP_HOST_UNR_TOS:
ERROR_type = N_UD_QOS_UNAVAIL;
RESERVED_field = 0;
break;
default:
ERROR_type = N_UD_UNDEFINED;
RESERVED_field =