Permalink
Fetching contributors…
Cannot retrieve contributors at this time
9245 lines (8690 sloc) 266 KB
/*****************************************************************************
@(#) File: src/drivers/rawip.c
-----------------------------------------------------------------------------
Copyright (c) 2008-2015 Monavacon Limited <http://www.monavacon.com/>
Copyright (c) 2001-2008 OpenSS7 Corporation <http://www.openss7.com/>
Copyright (c) 1997-2001 Brian F. G. Bidulock <bidulock@openss7.org>
All Rights Reserved.
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by the Free
Software Foundation, version 3 of the license.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>, or
write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA
02139, USA.
-----------------------------------------------------------------------------
U.S. GOVERNMENT RESTRICTED RIGHTS. If you are licensing this Software on
behalf of the U.S. Government ("Government"), the following provisions apply
to you. If the Software is supplied by the Department of Defense ("DoD"), it
is classified as "Commercial Computer Software" under paragraph 252.227-7014
of the DoD Supplement to the Federal Acquisition Regulations ("DFARS") (or any
successor regulations) and the Government is acquiring only the license rights
granted herein (the license rights customarily provided to non-Government
users). If the Software is supplied to any unit or agency of the Government
other than DoD, it is classified as "Restricted Computer Software" and the
Government's rights in the Software are defined in paragraph 52.227-19 of the
Federal Acquisition Regulations ("FAR") (or any successor regulations) or, in
the cases of NASA, in paragraph 18.52.227-86 of the NASA Supplement to the FAR
(or any successor regulations).
-----------------------------------------------------------------------------
Commercial licensing and support of this software is available from OpenSS7
Corporation at a fee. See http://www.openss7.com/
*****************************************************************************/
static char const ident[] = "src/drivers/rawip.c (" PACKAGE_ENVR ") " PACKAGE_DATE;
/*
* This driver provides a somewhat different approach to RAW IP that the inet
* driver. It hooks directly to the inet_protos list in the Linux kernel and
* intercepts RAW IP packets ahead of Linux native RAW IP. RAW IP is then processed
* per TPI specifications. If no Stream is bound, the received sk_buff is
* passed on to Linux native RAW IP.
*
* One of the reasons for taking this approach instead of the Streams over
* Sockets approach of the inet.c driver is that RAW IP is easy enough to
* implement, and this driver will provide a reasonable comparison between
* the efficiency and performance of the Streams and the Socket frameworks.
*
* There are three ways that this module cn operate: 1) The module can be
* opened as a pseudo-device driver (DRVOPEN|CLONEOPEN) of its own right. 2)
* The module can be pushed over another Stream. That Stream needs to be a
* freshly opened (unbound) NPI IP driver Stream. An NPI IP Stream can be
* linked underneath the driver as a multiplexing driver. The reason for all
* these approaches is to determine the relative merits (performance and
* otherwise) of each approach.
*/
#define _SVR4_SOURCE
#include <sys/os7/compat.h>
#ifdef LINUX
#undef ASSERT
#include <linux/bitops.h>
#define tp_tst_bit(nr,addr) test_bit(nr,addr)
#define tp_set_bit(nr,addr) __set_bit(nr,addr)
#define tp_clr_bit(nr,addr) __clear_bit(nr,addr)
#include <linux/interrupt.h>
#ifdef HAVE_KINC_LINUX_BRLOCK_H
#include <linux/brlock.h>
#endif
#include <net/ip.h>
#include <net/icmp.h>
#include <net/route.h>
#include <net/inet_ecn.h>
#include <net/snmp.h>
#include <net/udp.h>
#ifdef HAVE_KINC_NET_DST_H
#include <net/dst.h>
#endif
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#endif /* LINUX */
#include "net_hooks.h"
#include <sys/npi.h>
#include <sys/npi_ip.h>
#if defined HAVE_TIHDR_H
# include <tihdr.h>
#else
# include <sys/tihdr.h>
#endif
#include <sys/xti.h>
#include <sys/xti_inet.h>
#define T_ALLLEVELS -1
#define TP_DESCRIP "Raw Internet Protocol (RAWIP) STREAMS Driver"
#define TP_EXTRA "Part of the OpenSS7 IP Stack for Linux Fast-STREAMS"
#define TP_COPYRIGHT "Copyright (c) 2008-2015 Monavacon Limited. All Rights Reserved."
#define TP_REVISION "OpenSS7 src/drivers/rawip.c (" PACKAGE_ENVR ") " PACKAGE_DATE
#define TP_DEVICE "SVR 4.2 MP STREAMS RAW IP Driver"
#define TP_CONTACT "Brian Bidulock <bidulock@openss7.org>"
#define TP_LICENSE "GPL"
#define TP_BANNER TP_DESCRIP "\n" \
TP_EXTRA "\n" \
TP_REVISION "\n" \
TP_COPYRIGHT "\n" \
TP_DEVICE "\n" \
TP_CONTACT
#define TP_SPLASH TP_DESCRIP " - " \
TP_REVISION
#ifdef LINUX
MODULE_AUTHOR(TP_CONTACT);
MODULE_DESCRIPTION(TP_DESCRIP);
MODULE_SUPPORTED_DEVICE(TP_DEVICE);
#ifdef MODULE_LICENSE
MODULE_LICENSE(TP_LICENSE);
#endif /* MODULE_LICENSE */
#ifdef MODULE_ALIAS
MODULE_ALIAS("streams-rawip");
#endif /* MODULE_ALIAS */
#ifdef MODULE_VERSION
MODULE_VERSION(PACKAGE_ENVR);
#endif
#endif /* LINUX */
#define TP_DRV_ID CONFIG_STREAMS_RAW_MODID
#define TP_DRV_NAME CONFIG_STREAMS_RAW_NAME
#define TP_CMAJORS CONFIG_STREAMS_RAW_NMAJORS
#define TP_CMAJOR_0 CONFIG_STREAMS_RAW_MAJOR
#define TP_UNITS CONFIG_STREAMS_RAW_NMINORS
#ifdef LINUX
#ifdef MODULE_ALIAS
MODULE_ALIAS("streams-modid-" __stringify(CONFIG_STREAMS_RAW_MODID));
MODULE_ALIAS("streams-driver-rawip");
MODULE_ALIAS("streams-major-" __stringify(CONFIG_STREAMS_RAW_MAJOR));
MODULE_ALIAS("/dev/streams/rawip");
MODULE_ALIAS("/dev/streams/rawip/*");
MODULE_ALIAS("/dev/streams/clone/rawip");
MODULE_ALIAS("char-major-" __stringify(CONFIG_STREAMS_CLONE_MAJOR) "-" __stringify(TP_CMAJOR_0));
MODULE_ALIAS("/dev/rawip2");
//MODULE_ALIAS("devname:rawip2");
#endif /* MODULE_ALIAS */
#endif /* LINUX */
/*
* ==========================================================================
*
* STREAMS Definitions
*
* ==========================================================================
*/
#define DRV_ID TP_DRV_ID
#define DRV_NAME TP_DRV_NAME
#define CMAJORS TP_CMAJORS
#define CMAJOR_0 TP_CMAJOR_0
#define UNITS TP_UNITS
#ifdef MODULE
#define DRV_BANNER TP_BANNER
#else /* MODULE */
#define DRV_BANNER TP_SPLASH
#endif /* MODULE */
STATIC struct module_info tp_rinfo = {
.mi_idnum = DRV_ID, /* Module ID number */
.mi_idname = DRV_NAME, /* Module name */
.mi_minpsz = 0, /* Min packet size accepted */
.mi_maxpsz = (1 << 16), /* Max packet size accepted */
.mi_hiwat = SHEADHIWAT << 5, /* Hi water mark */
.mi_lowat = 0, /* Lo water mark */
};
STATIC struct module_stat tp_rstat __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
STATIC struct module_stat tp_wstat __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
/* Upper multiplex is a T provider following the TPI. */
STATIC streamscall int tp_qopen(queue_t *, dev_t *, int, int, cred_t *);
STATIC streamscall int tp_qclose(queue_t *, int, cred_t *);
streamscall int tp_rput(queue_t *, mblk_t *);
streamscall int tp_rsrv(queue_t *);
STATIC struct qinit tp_rinit = {
.qi_putp = tp_rput, /* Read put procedure (message from below) */
.qi_srvp = tp_rsrv, /* Read service procedure */
.qi_qopen = tp_qopen, /* Each open */
.qi_qclose = tp_qclose, /* Last close */
.qi_minfo = &tp_rinfo, /* Module information */
.qi_mstat = &tp_rstat, /* Module statistics */
};
STATIC struct module_info tp_winfo = {
.mi_idnum = DRV_ID, /* Module ID number */
.mi_idname = DRV_NAME, /* Module name */
.mi_minpsz = 0, /* Min packet size accepted */
.mi_maxpsz = (1 << 16), /* Max packet size accepted */
.mi_hiwat = SHEADHIWAT, /* Hi water mark */
.mi_lowat = 0, /* Lo water mark */
};
streamscall int tp_wput(queue_t *, mblk_t *);
streamscall int tp_wsrv(queue_t *);
STATIC struct qinit tp_winit = {
.qi_putp = tp_wput, /* Write put procedure (message from above) */
.qi_srvp = tp_wsrv, /* Write service procedure */
.qi_minfo = &tp_winfo, /* Module information */
.qi_mstat = &tp_wstat, /* Module statistics */
};
MODULE_STATIC struct streamtab tp_rawinfo = {
.st_rdinit = &tp_rinit, /* Upper read queue */
.st_wrinit = &tp_winit, /* Upper write queue */
};
#if !defined HAVE_KMEMB_STRUCT_SK_BUFF_TRANSPORT_HEADER
#if !defined HAVE_KFUNC_SKB_TRANSPORT_HEADER
static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
{
return skb->tail;
}
static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
{
return skb->end;
}
static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
{
return skb->h.raw;
}
static inline unsigned char *skb_network_header(const struct sk_buff *skb)
{
return skb->nh.raw;
}
static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
{
return skb->mac.raw;
}
static inline void skb_reset_tail_pointer(struct sk_buff *skb)
{
skb->tail = skb->data;
}
static inline void skb_reset_end_pointer(struct sk_buff *skb)
{
skb->end = skb->data;
}
static inline void skb_reset_transport_header(struct sk_buff *skb)
{
skb->h.raw = skb->data;
}
static inline void skb_reset_network_header(struct sk_buff *skb)
{
skb->nh.raw = skb->data;
}
static inline void skb_reset_mac_header(struct sk_buff *skb)
{
skb->mac.raw = skb->data;
}
static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
{
skb_reset_tail_pointer(skb);
skb->tail += offset;
}
static inline void skb_set_transport_header(struct sk_buff *skb, const int offset)
{
skb_reset_transport_header(skb);
skb->h.raw += offset;
}
static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
{
skb_reset_network_header(skb);
skb->nh.raw += offset;
}
static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
{
skb_reset_mac_header(skb);
skb->mac.raw += offset;
}
#endif /* !defined HAVE_KFUNC_SKB_TRANSPORT_HEADER */
#endif /* !defined HAVE_KMEMB_STRUCT_SK_BUFF_TRANSPORT_HEADER */
/*
* Primary data structures.
*/
struct tp_bhash_bucket;
struct tp_chash_bucket;
struct tp_daddr {
uint32_t addr; /* IP address this destination */
uint32_t saddr; /* current source address */
unsigned char ttl; /* time to live, this destination */
unsigned char tos; /* type of service, this destination */
unsigned short mtu; /* maximum transfer unit this destination */
struct dst_entry *dst; /* route for this destination */
int oif; /* current interface */
};
struct tp_saddr {
uint32_t addr; /* IP address this source */
};
struct tp_baddr {
uint32_t addr; /* IP address this bind */
};
typedef struct tp_options {
#if 0
unsigned char flags[12]; /* twelve bytes of flags for 96 flags */
#else
unsigned long flags[3]; /* at least 96 flags */
#endif
struct {
t_uscalar_t debug[4]; /* XTI_DEBUG */
struct t_linger linger; /* XTI_LINGER */
t_uscalar_t rcvbuf; /* XTI_RCVBUF */
t_uscalar_t rcvlowat; /* XTI_RCVLOWAT */
t_uscalar_t sndbuf; /* XTI_SNDBUF */
t_uscalar_t sndlowat; /* XTI_SNDLOWAT */
t_uscalar_t priority; /* XTI_PRIORITY */
} xti;
struct {
unsigned char protocol; /* T_IP_PROTOCOL */
unsigned char options[40]; /* T_IP_OPTIONS */
unsigned char ttl; /* T_IP_TTL */
unsigned char tos; /* T_IP_TOS */
unsigned int reuseaddr; /* T_IP_REUSEADDR */
unsigned int dontroute; /* T_IP_DONTROUTE */
unsigned int broadcast; /* T_IP_BROADCAST */
uint32_t addr; /* T_IP_ADDR */
uint32_t saddr; /* T_IP_SADDR */
uint32_t daddr; /* T_IP_DADDR */
uint32_t mtu; /* T_IP_MTU */
} ip;
} tp_options_t;
/* Private structure */
typedef struct tp {
STR_DECLARATION (struct tp); /* Stream declaration */
struct tp *bnext; /* linkage for bind/list hash */
struct tp **bprev; /* linkage for bind/list hash */
struct tp_bhash_bucket *bhash; /* linkage for bind/list hash */
struct tp *cnext; /* linkage for conn hash */
struct tp **cprev; /* linkage for conn hash */
struct tp_chash_bucket *chash; /* linkage for conn hash */
struct T_info_ack info; /* service provider information */
unsigned int sndblk; /* sending blocked */
unsigned int sndmem; /* send buffer memory allocated */
unsigned int rcvmem; /* recv buffer memory allocated */
unsigned int BIND_flags; /* bind flags */
unsigned int CONN_flags; /* connect flags */
unsigned int CONIND_number; /* maximum number of outstanding connection indications */
bufq_t conq; /* connection indication queue */
unsigned short pnum; /* number of bound protocol ids */
uint8_t protoids[16]; /* bound protocol ids */
unsigned short bnum; /* number of bound addresses */
unsigned short bport; /* bound port number (network order) */
struct tp_baddr baddrs[8]; /* bound addresses */
unsigned short snum; /* number of source (connected) addresses */
unsigned short sport; /* source (connected) port number (network order) */
struct tp_saddr saddrs[8]; /* source (connected) addresses */
unsigned short dnum; /* number of destination (connected) addresses */
unsigned short dport; /* destination (connected) port number (network order) */
struct tp_daddr daddrs[8]; /* destination (connected) addresses */
struct tp_options options; /* protocol options */
unsigned char _pad[40]; /* pad for ip options */
} tp_t;
#define PRIV(__q) (((__q)->q_ptr))
#define TP_PRIV(__q) ((struct tp *)((__q)->q_ptr))
typedef struct df {
rwlock_t lock; /* structure lock */
SLIST_HEAD (tp, tp); /* master list of tp (open) structures */
} df_t;
#if defined __RW_LOCK_UNLOCKED
static struct df master = {.lock = __RW_LOCK_UNLOCKED(master.lock), };
#elif defined RW_LOCK_UNLOCKED
static struct df master = {.lock = RW_LOCK_UNLOCKED, };
#else
#error cannot initialize read-write locks
#endif
#define xti_default_debug { 0, }
#define xti_default_linger (struct t_linger){T_YES, 120}
#define xti_default_rcvbuf (SK_RMEM_MAX << 1)
#define xti_default_rcvlowat 1
#define xti_default_sndbuf (SK_WMEM_MAX << 1)
#define xti_default_sndlowat SK_WMEM_MAX
#define xti_default_priority 0
#define ip_default_protocol 17
#define ip_default_options { 0, }
#define ip_default_tos 0
#define ip_default_ttl 64
#define ip_default_reuseaddr T_YES
#define ip_default_dontroute T_NO
#define ip_default_broadcast T_NO
#define ip_default_addr INADDR_ANY
#define ip_default_saddr INADDR_ANY
#define ip_default_daddr INADDR_ANY
#define ip_default_mtu 536
#define udp_default_checksum T_YES
enum {
_T_BIT_XTI_DEBUG = 0,
_T_BIT_XTI_LINGER,
_T_BIT_XTI_RCVBUF,
_T_BIT_XTI_RCVLOWAT,
_T_BIT_XTI_SNDBUF,
_T_BIT_XTI_SNDLOWAT,
_T_BIT_XTI_PRIORITY,
_T_BIT_IP_PROTOCOL,
_T_BIT_IP_OPTIONS,
_T_BIT_IP_TOS,
_T_BIT_IP_TTL,
_T_BIT_IP_REUSEADDR,
_T_BIT_IP_DONTROUTE,
_T_BIT_IP_BROADCAST,
_T_BIT_IP_ADDR,
_T_BIT_IP_RETOPTS,
_T_BIT_IP_SADDR,
_T_BIT_IP_DADDR,
_T_BIT_IP_MTU,
};
#define t_tst_bit(nr,addr) tp_tst_bit(nr,addr)
#define t_set_bit(nr,addr) tp_set_bit(nr,addr)
#define t_clr_bit(nr,addr) tp_clr_bit(nr,addr)
/*
* Bind buckets, caches and hashes.
*/
struct tp_bind_bucket {
struct tp_bind_bucket *next; /* linkage of bind buckets for hash slot */
struct tp_bind_bucket **prev; /* linkage of bind buckets for hash slot */
unsigned char proto; /* IP protocol identifier */
unsigned short port; /* port number (host order) */
struct tp *owners; /* list of owners of this protocol/port combination */
struct tp *dflt; /* default listeners/destinations for this protocol */
};
struct tp_conn_bucket {
struct tp_conn_bucket *next; /* linkage of conn buckets for hash slot */
struct tp_conn_bucket **prev; /* linkage of conn buckets for hash slot */
unsigned char proto; /* IP protocol identifier */
unsigned short sport; /* source port number (network order) */
unsigned short dport; /* destination port number (network order) */
struct tp *owners; /* list of owners of this protocol/sport/dport combination */
};
struct tp_bhash_bucket {
rwlock_t lock;
struct tp *list;
};
struct tp_chash_bucket {
rwlock_t lock;
struct tp *list;
};
STATIC struct tp_bhash_bucket *tp_bhash;
STATIC size_t tp_bhash_size = 0;
STATIC size_t tp_bhash_order = 0;
STATIC struct tp_chash_bucket *tp_chash;
STATIC size_t tp_chash_size = 0;
STATIC size_t tp_chash_order = 0;
STATIC INLINE fastcall __hot_in int
tp_bhashfn(unsigned char proto, unsigned short bport)
{
return ((tp_bhash_size - 1) & (proto + bport));
}
STATIC INLINE fastcall __unlikely int
tp_chashfn(unsigned char proto, unsigned short sport, unsigned short dport)
{
return ((tp_chash_size - 1) & (proto + sport + dport));
}
#if defined HAVE_KTYPE_STRUCT_NET_PROTOCOL
#define mynet_protocol net_protocol
#endif /* defined HAVE_KTYPE_STRUCT_NET_PROTOCOL */
#if defined HAVE_KTYPE_STRUCT_INET_PROTOCOL
#define mynet_protocol inet_protocol
#endif /* defined HAVE_KTYPE_STRUCT_INET_PROTOCOL */
struct ipnet_protocol {
struct mynet_protocol proto;
struct mynet_protocol *next;
struct module *kmod;
};
struct tp_prot_bucket {
unsigned char proto; /* protocol number */
int refs; /* reference count */
int corefs; /* T_COTS(_ORD) references */
int clrefs; /* T_CLTS references */
struct ipnet_protocol prot; /* Linux registration structure */
};
#if defined DEFINE_RWLOCK
STATIC DEFINE_RWLOCK(tp_prot_lock);
#elif defined __RW_LOCK_UNLOCKED
STATIC rwlock_t tp_prot_lock = __RW_LOCK_UNLOCKED(tp_prot_lock);
#elif defined RW_LOCK_UNLOCKED
STATIC rwlock_t tp_prot_lock = RW_LOCK_UNLOCKED;
#else
#error cannot initialize read-write locks
#endif
STATIC struct tp_prot_bucket *tp_prots[256];
STATIC kmem_cachep_t tp_raw_prot_cachep;
STATIC kmem_cachep_t tp_raw_priv_cachep;
static INLINE struct tp *
tp_get(struct tp *tp)
{
dassert(tp != NULL);
atomic_inc(&tp->refcnt);
return (tp);
}
static INLINE __hot void
tp_put(struct tp *tp)
{
dassert(tp != NULL);
if (atomic_dec_and_test(&tp->refcnt)) {
kmem_cache_free(tp_raw_priv_cachep, tp);
}
}
static INLINE fastcall __hot void
tp_release(struct tp **tpp)
{
struct tp *tp;
dassert(tpp != NULL);
if (likely((tp = XCHG(tpp, NULL)) != NULL))
tp_put(tp);
}
static INLINE struct tp *
tp_alloc(void)
{
struct tp *tp;
if ((tp = kmem_cache_alloc(tp_raw_priv_cachep, GFP_ATOMIC))) {
bzero(tp, sizeof(*tp));
atomic_set(&tp->refcnt, 1);
spin_lock_init(&tp->lock); /* "tp-lock" */
tp->priv_put = &tp_put;
tp->priv_get = &tp_get;
// tp->type = 0;
// tp->id = 0;
// tp->state = 0;
tp->flags = 0;
}
return (tp);
}
/*
* Locking
*/
/* Must always be bottom-half versions to avoid lock badness. But give these
* different names to avoid conflict with generic definitions. */
//#if defined CONFIG_STREAMS_NOIRQ || defined _TEST
#if 1
#define spin_lock_str2(__lkp, __flags) \
do { (void)__flags; spin_lock_bh(__lkp); } while (0)
#define spin_unlock_str2(__lkp, __flags) \
do { (void)__flags; spin_unlock_bh(__lkp); } while (0)
#define write_lock_str2(__lkp, __flags) \
do { (void)__flags; write_lock_bh(__lkp); } while (0)
#define write_unlock_str2(__lkp, __flags) \
do { (void)__flags; write_unlock_bh(__lkp); } while (0)
#define read_lock_str2(__lkp, __flags) \
do { (void)__flags; read_lock_bh(__lkp); } while (0)
#define read_unlock_str2(__lkp, __flags) \
do { (void)__flags; read_unlock_bh(__lkp); } while (0)
#define local_save_str2(__flags) \
do { (void)__flags; local_bh_disable(); } while (0)
#define local_restore_str2(__flags) \
do { (void)__flags; local_bh_enable(); } while (0)
#else
#define spin_lock_str2(__lkp, __flags) \
spin_lock_irqsave(__lkp, __flags)
#define spin_unlock_str2(__lkp, __flags) \
spin_unlock_irqrestore(__lkp, __flags)
#define write_lock_str2(__lkp, __flags) \
write_lock_irqsave(__lkp, __flags)
#define write_unlock_str2(__lkp, __flags) \
write_unlock_irqrestore(__lkp, __flags)
#define read_lock_str2(__lkp, __flags) \
read_lock_irqsave(__lkp, __flags)
#define read_unlock_str2(__lkp, __flags) \
read_unlock_irqrestore(__lkp, __flags)
#define local_save_str2(__flags) \
local_irq_save(__flags)
#define local_restore_str2(__flags) \
local_irq_restore(__flags)
#endif
/*
* Buffer allocation
*/
STATIC streamscall __unlikely void
tp_bufsrv(long data)
{
str_t *s;
queue_t *q;
q = (queue_t *) data;
ensure(q, return);
s = STR_PRIV(q);
ensure(s, return);
if (q == s->iq) {
if (xchg(&s->ibid, 0) != 0)
atomic_dec(&s->refcnt);
qenable(q);
return;
}
if (q == s->oq) {
if (xchg(&s->obid, 0) != 0)
atomic_dec(&s->refcnt);
qenable(q);
return;
}
return;
}
noinline fastcall __unlikely void
tp_unbufcall(str_t * s)
{
bufcall_id_t bid;
if ((bid = xchg(&s->ibid, 0))) {
unbufcall(bid);
atomic_dec(&s->refcnt);
}
if ((bid = xchg(&s->obid, 0))) {
unbufcall(bid);
atomic_dec(&s->refcnt);
}
}
noinline fastcall __unlikely void
tp_bufcall(queue_t *q, size_t size, int prior)
{
if (q) {
str_t *s = STR_PRIV(q);
bufcall_id_t bid, *bidp = NULL;
if (q == s->iq)
bidp = &s->ibid;
if (q == s->oq)
bidp = &s->obid;
if (bidp) {
atomic_inc(&s->refcnt);
if ((bid = xchg(bidp, bufcall(size, prior, &tp_bufsrv, (long) q)))) {
unbufcall(bid); /* Unsafe on LiS without atomic exchange above. */
atomic_dec(&s->refcnt);
}
return;
}
}
swerr();
return;
}
STATIC INLINE fastcall __unlikely mblk_t *
tp_allocb(queue_t *q, size_t size, int prior)
{
mblk_t *mp;
if (likely((mp = allocb(size, prior)) != NULL))
return (mp);
rare();
tp_bufcall(q, size, prior);
return (mp);
}
STATIC INLINE fastcall __unlikely mblk_t *
tp_dupmsg(queue_t *q, mblk_t *bp)
{
mblk_t *mp;
if (likely((mp = dupmsg(bp)) != NULL))
return (mp);
rare();
tp_bufcall(q, msgsize(bp), BPRI_MED);
return (mp);
}
/*
* =========================================================================
*
* State Changes
*
* =========================================================================
*/
/* State flags */
#define TSF_UNBND ( 1 << TS_UNBND )
#define TSF_WACK_BREQ ( 1 << TS_WACK_BREQ )
#define TSF_WACK_UREQ ( 1 << TS_WACK_UREQ )
#define TSF_IDLE ( 1 << TS_IDLE )
#ifdef TS_WACK_OPTREQ
#define TSF_WACK_OPTREQ ( 1 << TS_WACK_OPTREQ )
#endif
#define TSF_WACK_CREQ ( 1 << TS_WACK_CREQ )
#define TSF_WCON_CREQ ( 1 << TS_WCON_CREQ )
#define TSF_WRES_CIND ( 1 << TS_WRES_CIND )
#define TSF_WACK_CRES ( 1 << TS_WACK_CRES )
#define TSF_DATA_XFER ( 1 << TS_DATA_XFER )
#define TSF_WIND_ORDREL ( 1 << TS_WIND_ORDREL )
#define TSF_WREQ_ORDREL ( 1 << TS_WREQ_ORDREL )
#define TSF_WACK_DREQ6 ( 1 << TS_WACK_DREQ6 )
#define TSF_WACK_DREQ7 ( 1 << TS_WACK_DREQ7 )
#define TSF_WACK_DREQ9 ( 1 << TS_WACK_DREQ9 )
#define TSF_WACK_DREQ10 ( 1 << TS_WACK_DREQ10 )
#define TSF_WACK_DREQ11 ( 1 << TS_WACK_DREQ11 )
#define TSF_NOSTATES ( 1 << TS_NOSTATES )
/* State masks */
#define TSM_ALLSTATES (TSF_NOSTATES - 1)
#define TSM_WACK_DREQ (TSF_WACK_DREQ6 \
|TSF_WACK_DREQ7 \
|TSF_WACK_DREQ9 \
|TSF_WACK_DREQ10 \
|TSF_WACK_DREQ11)
#define TSM_LISTEN (TSF_IDLE \
|TSF_WRES_CIND)
#define TSM_CONNECTED (TSF_WCON_CREQ\
|TSF_WRES_CIND\
|TSF_DATA_XFER\
|TSF_WIND_ORDREL\
|TSF_WREQ_ORDREL)
#define TSM_DISCONN (TSF_IDLE\
|TSF_UNBND)
#define TSM_INDATA (TSF_DATA_XFER\
|TSF_WIND_ORDREL)
#define TSM_OUTDATA (TSF_DATA_XFER\
|TSF_WREQ_ORDREL)
#ifndef T_PROVIDER
#define T_PROVIDER 0
#define T_USER 1
#endif
#ifdef _DEBUG
STATIC const char *
tp_state_name(t_scalar_t state)
{
switch (state) {
case TS_UNBND:
return ("TS_UNBND");
case TS_WACK_BREQ:
return ("TS_WACK_BREQ");
case TS_WACK_UREQ:
return ("TS_WACK_UREQ");
case TS_IDLE:
return ("TS_IDLE");
case TS_WACK_OPTREQ:
return ("TS_WACK_OPTREQ");
case TS_WACK_CREQ:
return ("TS_WACK_CREQ");
case TS_WCON_CREQ:
return ("TS_WCON_CREQ");
case TS_WRES_CIND:
return ("TS_WRES_CIND");
case TS_WACK_CRES:
return ("TS_WACK_CRES");
case TS_DATA_XFER:
return ("TS_DATA_XFER");
case TS_WIND_ORDREL:
return ("TS_WIND_ORDREL");
case TS_WREQ_ORDREL:
return ("TS_WREQ_ORDREL");
case TS_WACK_DREQ6:
return ("TS_WACK_DREQ6");
case TS_WACK_DREQ7:
return ("TS_WACK_DREQ7");
case TS_WACK_DREQ9:
return ("TS_WACK_DREQ9");
case TS_WACK_DREQ10:
return ("TS_WACK_DREQ10");
case TS_WACK_DREQ11:
return ("TS_WACK_DREQ11");
case TS_NOSTATES:
return ("TS_NOSTATES");
default:
return ("(unknown)");
}
}
#endif /* _DEBUG */
/* State functions */
STATIC INLINE fastcall __unlikely void
tp_set_state(struct tp *tp, const t_uscalar_t state)
{
_printd(("%s: %p: %s <- %s\n", DRV_NAME, tp, tp_state_name(state),
tp_state_name(tp->info.CURRENT_state)));
tp->info.CURRENT_state = state;
}
STATIC INLINE fastcall __unlikely t_uscalar_t
tp_get_state(const struct tp *tp)
{
return (tp->info.CURRENT_state);
}
STATIC INLINE fastcall __unlikely t_uscalar_t
tp_chk_state(const struct tp *tp, const t_uscalar_t mask)
{
return (((1 << tp->info.CURRENT_state) & (mask)) != 0);
}
STATIC INLINE fastcall __unlikely t_uscalar_t
tp_not_state(const struct tp *tp, const t_uscalar_t mask)
{
return (((1 << tp->info.CURRENT_state) & (mask)) == 0);
}
STATIC INLINE fastcall __unlikely long
tp_get_statef(const struct tp *tp)
{
return (1 << tp_get_state(tp));
}
/*
* Option Handling
*/
#define T_SPACE(len) \
(sizeof(struct t_opthdr) + T_ALIGN(len))
#define T_LENGTH(len) \
(sizeof(struct t_opthdr) + len)
#define _T_SPACE_SIZEOF(s) \
T_SPACE(sizeof(s))
#define _T_LENGTH_SIZEOF(s) \
T_LENGTH(sizeof(s))
STATIC struct tp_options tp_defaults = {
.flags = {0,},
.xti = {
.debug = xti_default_debug,
.linger = xti_default_linger,
.rcvbuf = xti_default_rcvbuf,
.rcvlowat = xti_default_rcvlowat,
.sndbuf = xti_default_sndbuf,
.sndlowat = xti_default_sndlowat,
.priority = xti_default_priority,
},
.ip = {
.protocol = ip_default_protocol,
.options = ip_default_options,
.tos = ip_default_tos,
.ttl = ip_default_ttl,
.reuseaddr = ip_default_reuseaddr,
.dontroute = ip_default_dontroute,
.broadcast = ip_default_broadcast,
.addr = ip_default_addr,
.saddr = ip_default_saddr,
.daddr = ip_default_daddr,
.mtu = ip_default_mtu,
},
};
#define t_defaults tp_defaults
/**
* t_opts_size_ud - size options from received message for unitdata
* @t: private structure
* @mp: message pointer for message
*/
STATIC INLINE fastcall __hot_in int
t_opts_size_ud(const struct tp *t, const mblk_t *mp)
{
if (likely(t->bnum == 1))
if (likely(t->baddrs[0].addr != INADDR_ANY))
return (0);
/* only need to deliver up destination address info if the stream is multihomed (i.e.
wildcard bound) */
return (_T_SPACE_SIZEOF(t_defaults.ip.addr)); /* T_IP_ADDR */
}
/**
* t_opts_build_ud - build options output from received message for unitdata
* @t: private structure
* @mp: message pointer for message
* @op: output pointer
* @olen: output length
*/
static INLINE fastcall __hot_in int
t_opts_build_ud(const struct tp *t, mblk_t *mp, unsigned char *op, const size_t olen)
{
struct iphdr *iph;
struct t_opthdr *oh;
if (op == NULL || olen == 0)
return (0);
oh = _T_OPT_FIRSTHDR_OFS(op, olen, 0);
iph = (struct iphdr *) mp->b_datap->db_base;
{
if (oh == NULL)
goto efault;
oh->len = _T_LENGTH_SIZEOF(uint32_t);
oh->level = T_INET_IP;
oh->name = T_IP_ADDR;
oh->status = T_SUCCESS;
*((uint32_t *) T_OPT_DATA(oh)) = iph->daddr;
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
assure(oh == NULL);
return (olen);
efault:
swerr();
return (-EFAULT);
}
/**
* t_opts_size - size options from received message
* @t: private structure
* @mp: message pointer for message
*/
STATIC INLINE fastcall __hot_in int
t_opts_size(const struct tp *t, const mblk_t *mp)
{
int size = 0;
struct iphdr *iph;
int optlen = 0;
iph = (struct iphdr *) mp->b_datap->db_base;
optlen = (iph->ihl << 2) - sizeof(struct iphdr);
if (optlen > 0)
size += T_SPACE(optlen); /* T_IP_OPTIONS */
size += _T_SPACE_SIZEOF(t_defaults.ip.ttl); /* T_IP_TTL */
size += _T_SPACE_SIZEOF(t_defaults.ip.tos); /* T_IP_TOS */
size += _T_SPACE_SIZEOF(t_defaults.ip.addr); /* T_IP_ADDR */
return (size);
}
/**
* t_opts_build - build options output from received message
* @t: private structure
* @mp: message pointer for message
* @op: output pointer
* @olen: output length
*/
static INLINE fastcall __hot_in int
t_opts_build(const struct tp *t, mblk_t *mp, unsigned char *op, const size_t olen)
{
struct iphdr *iph;
struct t_opthdr *oh;
int optlen;
if (op == NULL || olen == 0)
return (0);
oh = _T_OPT_FIRSTHDR_OFS(op, olen, 0);
iph = (struct iphdr *) mp->b_datap->db_base;
optlen = (iph->ihl << 2) - sizeof(*iph);
if (unlikely(optlen > 0)) {
if (oh == NULL)
goto efault;
oh->len = T_LENGTH(optlen);
oh->level = T_INET_IP;
oh->name = T_IP_OPTIONS;
oh->status = T_SUCCESS;
bcopy((iph + 1), T_OPT_DATA(oh), optlen);
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
{
if (oh == NULL)
goto efault;
oh->len = _T_LENGTH_SIZEOF(unsigned char);
oh->level = T_INET_IP;
oh->name = T_IP_TOS;
oh->status = T_SUCCESS;
*((unsigned char *) T_OPT_DATA(oh)) = iph->tos;
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
{
if (oh == NULL)
goto efault;
oh->len = _T_LENGTH_SIZEOF(unsigned char);
oh->level = T_INET_IP;
oh->name = T_IP_TTL;
oh->status = T_SUCCESS;
*((unsigned char *) T_OPT_DATA(oh)) = iph->ttl;
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
{
if (oh == NULL)
goto efault;
oh->len = _T_LENGTH_SIZEOF(uint32_t);
oh->level = T_INET_IP;
oh->name = T_IP_ADDR;
oh->status = T_SUCCESS;
*((uint32_t *) T_OPT_DATA(oh)) = iph->daddr;
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
assure(oh == NULL);
return (olen);
efault:
swerr();
return (-EFAULT);
}
/**
* t_errs_size - size options from received ICMP message
* @t: private structure
* @mp: message pointer for ICMP message
*
* Note that mp->b_rptr points to the encapsulated IP header. mp->b_datap->db_base points to the IP
* header of the ICMP message itself.
*/
noinline fastcall __unlikely int
t_errs_size(const struct tp *t, const mblk_t *mp)
{
int size = 0;
if (mp != NULL) {
struct iphdr *iph;
int optlen = 0;
iph = (struct iphdr *) mp->b_rptr;
optlen = (iph->ihl << 2) - sizeof(struct iphdr);
if (optlen > 0)
size += T_SPACE(optlen); /* T_IP_OPTIONS */
size += _T_SPACE_SIZEOF(t_defaults.ip.ttl); /* T_IP_TTL */
size += _T_SPACE_SIZEOF(t_defaults.ip.tos); /* T_IP_TOS */
size += _T_SPACE_SIZEOF(t_defaults.ip.addr); /* T_IP_ADDR */
}
return (size);
}
/**
* t_errs_build - build options output from receive ICMP message
* @t: private structure
* @mp: message pointer for ICMP message
* @op: output pointer
* @olen: output length
*
* Note that mp->b_rptr points to the encapsulated IP header. mp->b_datap->db_base points to the IP
* header of the ICMP message.
*/
noinline fastcall __unlikely int
t_errs_build(const struct tp *t, mblk_t *mp, unsigned char *op, const size_t olen)
{
struct iphdr *iph;
struct t_opthdr *oh;
int optlen;
if (op == NULL || olen == 0)
return (0);
oh = _T_OPT_FIRSTHDR_OFS(op, olen, 0);
iph = (struct iphdr *) mp->b_rptr;
optlen = (iph->ihl << 2) - sizeof(*iph);
if (optlen > 0) {
if (oh == NULL)
goto efault;
oh->len = T_LENGTH(optlen);
oh->level = T_INET_IP;
oh->name = T_IP_OPTIONS;
oh->status = T_SUCCESS;
bcopy((iph + 1), T_OPT_DATA(oh), optlen);
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
{
if (oh == NULL)
goto efault;
oh->len = _T_LENGTH_SIZEOF(unsigned char);
oh->level = T_INET_IP;
oh->name = T_IP_TOS;
oh->status = T_SUCCESS;
*((unsigned char *) T_OPT_DATA(oh)) = iph->tos;
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
{
if (oh == NULL)
goto efault;
oh->len = _T_LENGTH_SIZEOF(unsigned char);
oh->level = T_INET_IP;
oh->name = T_IP_TTL;
oh->status = T_SUCCESS;
*((unsigned char *) T_OPT_DATA(oh)) = iph->ttl;
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
{
if (oh == NULL)
goto efault;
oh->len = _T_LENGTH_SIZEOF(uint32_t);
oh->level = T_INET_IP;
oh->name = T_IP_ADDR;
oh->status = T_SUCCESS;
*((uint32_t *) T_OPT_DATA(oh)) = iph->daddr;
oh = _T_OPT_NEXTHDR_OFS(op, olen, oh, 0);
}
assure(oh == NULL);
return (olen);
efault:
swerr();
return (-EFAULT);
}
/**
* t_opts_parse_ud - parse options from received primitive for T_UNITDATA_REQ
* @opts: pointer to options structure
* @ip: pointer to options
* @ilen: length of options
*
* T_UNITDATA_REQ ignores unrecognized options or option levels.
*/
noinline fastcall int
t_opts_parse_ud(const unsigned char *ip, const size_t ilen, struct tp_options *op)
{
const struct t_opthdr *ih;
int optlen;
int err;
err = TBADOPT;
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0); unlikely(ih != NULL);
ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0)) {
if (unlikely(ih->len < sizeof(*ih)))
goto error;
if (unlikely((unsigned char *) ih + ih->len > ip + ilen))
goto error;
optlen = ih->len - sizeof(*ih);
switch (ih->level) {
default:
continue;
#if 0
case XTI_GENERIC:
switch (ih->name) {
default:
continue;
case XTI_DEBUG:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen > sizeof(op->xti.debug)))
goto error;
bcopy(valp, op->xti.debug, optlen);
t_set_bit(_T_BIT_XTI_DEBUG, op->flags);
continue;
}
case XTI_LINGER:
{
const struct t_linger *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.linger = *valp;
t_set_bit(_T_BIT_XTI_LINGER, op->flags);
continue;
}
case XTI_RCVBUF:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.rcvbuf = *valp;
t_set_bit(_T_BIT_XTI_RCVBUF, op->flags);
continue;
}
case XTI_RCVLOWAT:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.rcvlowat = *valp;
t_set_bit(_T_BIT_XTI_RCVLOWAT, op->flags);
continue;
}
case XTI_SNDBUF:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.sndbuf = *valp;
t_set_bit(_T_BIT_XTI_SNDBUF, op->flags);
continue;
}
case XTI_SNDLOWAT:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.sndlowat = *valp;
t_set_bit(_T_BIT_XTI_SNDLOWAT, op->flags);
continue;
}
}
#endif
case T_INET_IP:
switch (ih->name) {
default:
continue;
case T_IP_OPTIONS:
{
if (unlikely(optlen > 40))
goto error;
/* FIXME: handle options */
t_set_bit(_T_BIT_IP_OPTIONS, op->flags);
continue;
}
case T_IP_TOS:
{
const unsigned char *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->ip.tos = *valp;
t_set_bit(_T_BIT_IP_TOS, op->flags);
continue;
}
case T_IP_TTL:
{
const unsigned char *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->ip.tos = *valp;
t_set_bit(_T_BIT_IP_TTL, op->flags);
continue;
}
case T_IP_DONTROUTE:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
if (unlikely(*valp != T_NO && *valp != T_YES))
goto error;
op->ip.dontroute = *valp;
t_set_bit(_T_BIT_IP_DONTROUTE, op->flags);
continue;
}
case T_IP_BROADCAST:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
if (unlikely(*valp != T_NO && *valp != T_YES))
goto error;
op->ip.broadcast = *valp;
t_set_bit(_T_BIT_IP_BROADCAST, op->flags);
continue;
}
case T_IP_ADDR:
{
const uint32_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
op->ip.addr = *valp;
t_set_bit(_T_BIT_IP_ADDR, op->flags);
continue;
}
case T_IP_REUSEADDR:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
if (unlikely(*valp != T_NO && *valp != T_YES))
goto error;
op->ip.reuseaddr = *valp;
t_set_bit(_T_BIT_IP_REUSEADDR, op->flags);
continue;
}
}
}
}
return (0);
error:
return (err);
}
/**
* t_opts_parse - parse options from received primitive
* @opts: pointer to options structure
* @ip: pointer to options
* @ilen: length of options
*/
STATIC int
t_opts_parse(const unsigned char *ip, const size_t ilen, struct tp_options *op)
{
const struct t_opthdr *ih;
int optlen;
int err;
err = TBADOPT;
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0); unlikely(ih != NULL);
ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0)) {
if (unlikely(ih->len < sizeof(*ih)))
goto error;
if (unlikely((unsigned char *) ih + ih->len > ip + ilen))
goto error;
optlen = ih->len - sizeof(*ih);
switch (ih->level) {
default:
goto error;
case XTI_GENERIC:
switch (ih->name) {
default:
goto error;
case XTI_DEBUG:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen > sizeof(op->xti.debug)))
goto error;
bcopy(valp, op->xti.debug, optlen);
t_set_bit(_T_BIT_XTI_DEBUG, op->flags);
continue;
}
case XTI_LINGER:
{
const struct t_linger *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.linger = *valp;
t_set_bit(_T_BIT_XTI_LINGER, op->flags);
continue;
}
case XTI_RCVBUF:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.rcvbuf = *valp << 1;
t_set_bit(_T_BIT_XTI_RCVBUF, op->flags);
continue;
}
case XTI_RCVLOWAT:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.rcvlowat = *valp;
t_set_bit(_T_BIT_XTI_RCVLOWAT, op->flags);
continue;
}
case XTI_SNDBUF:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.sndbuf = *valp << 1;
t_set_bit(_T_BIT_XTI_SNDBUF, op->flags);
continue;
}
case XTI_SNDLOWAT:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->xti.sndlowat = *valp;
t_set_bit(_T_BIT_XTI_SNDLOWAT, op->flags);
continue;
}
}
case T_INET_IP:
switch (ih->name) {
default:
goto error;
case T_IP_OPTIONS:
if (unlikely(optlen > 40))
goto error;
/* FIXME: handle options */
t_set_bit(_T_BIT_IP_OPTIONS, op->flags);
continue;
case T_IP_TOS:
{
const unsigned char *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->ip.tos = *valp;
t_set_bit(_T_BIT_IP_TOS, op->flags);
continue;
}
case T_IP_TTL:
{
const unsigned char *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
/* FIXME: validate value */
op->ip.tos = *(unsigned char *) T_OPT_DATA(ih);
t_set_bit(_T_BIT_IP_TTL, op->flags);
continue;
}
case T_IP_DONTROUTE:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
if (unlikely(*valp != T_NO && *valp != T_YES))
goto error;
op->ip.dontroute = *valp;
t_set_bit(_T_BIT_IP_DONTROUTE, op->flags);
continue;
}
case T_IP_BROADCAST:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
if (unlikely(*valp != T_NO && *valp != T_YES))
goto error;
op->ip.broadcast = *valp;
t_set_bit(_T_BIT_IP_BROADCAST, op->flags);
continue;
}
case T_IP_ADDR:
{
const uint32_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
op->ip.addr = *valp;
t_set_bit(_T_BIT_IP_ADDR, op->flags);
continue;
}
case T_IP_REUSEADDR:
{
const t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(ih);
if (unlikely(optlen != sizeof(*valp)))
goto error;
if (unlikely(*valp != T_NO && *valp != T_YES))
goto error;
op->ip.reuseaddr = *valp;
t_set_bit(_T_BIT_IP_REUSEADDR, op->flags);
continue;
}
}
}
}
return (0);
error:
return (err);
}
/**
* t_size_default_options - size default options
* @t: private structure
* @ip: input pointer
* @ilen: input length
*
* Check the validity of the option structure, check for correct size of each supplied option given
* the option management flag, and return the size required of the acknowledgement options field.
*/
noinline fastcall int
t_size_default_options(const struct tp *t, const unsigned char *ip, size_t ilen)
{
int olen = 0, optlen;
const struct t_opthdr *ih;
struct t_opthdr all;
if (ip == NULL || ilen == 0) {
/* For zero-length options fake an option header for all names with all levels */
all.level = T_ALLLEVELS;
all.name = T_ALLOPT;
all.len = sizeof(all);
all.status = 0;
ip = (const unsigned char *) &all;
ilen = sizeof(all);
}
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0); ih; ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0)) {
if (ih->len < sizeof(*ih))
goto einval;
if ((unsigned char *) ih + ih->len > ip + ilen)
goto einval;
optlen = ih->len - sizeof(*ih);
(void) optlen;
switch (ih->level) {
default:
goto einval;
case T_ALLLEVELS:
if (ih->name != T_ALLOPT)
goto einval;
case XTI_GENERIC:
switch (ih->name) {
default:
olen += T_SPACE(0);
continue;
case T_ALLOPT:
case XTI_DEBUG:
olen += _T_SPACE_SIZEOF(t_defaults.xti.debug);
if (ih->name != T_ALLOPT)
continue;
case XTI_LINGER:
olen += _T_SPACE_SIZEOF(t_defaults.xti.linger);
if (ih->name != T_ALLOPT)
continue;
case XTI_RCVBUF:
olen += _T_SPACE_SIZEOF(t_defaults.xti.rcvbuf);
if (ih->name != T_ALLOPT)
continue;
case XTI_RCVLOWAT:
olen += _T_SPACE_SIZEOF(t_defaults.xti.rcvlowat);
if (ih->name != T_ALLOPT)
continue;
case XTI_SNDBUF:
olen += _T_SPACE_SIZEOF(t_defaults.xti.sndbuf);
if (ih->name != T_ALLOPT)
continue;
case XTI_SNDLOWAT:
olen += _T_SPACE_SIZEOF(t_defaults.xti.sndlowat);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
case T_INET_IP:
switch (ih->name) {
default:
olen += T_SPACE(0);
continue;
case T_ALLOPT:
case T_IP_OPTIONS:
/* not supported yet */
olen += T_SPACE(0);
if (ih->name != T_ALLOPT)
continue;
case T_IP_TOS:
olen += _T_SPACE_SIZEOF(t_defaults.ip.tos);
if (ih->name != T_ALLOPT)
continue;
case T_IP_TTL:
olen += _T_SPACE_SIZEOF(t_defaults.ip.ttl);
if (ih->name != T_ALLOPT)
continue;
case T_IP_REUSEADDR:
olen += _T_SPACE_SIZEOF(t_defaults.ip.reuseaddr);
if (ih->name != T_ALLOPT)
continue;
case T_IP_DONTROUTE:
olen += _T_SPACE_SIZEOF(t_defaults.ip.dontroute);
if (ih->name != T_ALLOPT)
continue;
case T_IP_BROADCAST:
olen += _T_SPACE_SIZEOF(t_defaults.ip.broadcast);
if (ih->name != T_ALLOPT)
continue;
case T_IP_ADDR:
olen += _T_SPACE_SIZEOF(t_defaults.ip.addr);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
}
}
_ptrace(("%p: Calculated option output size = %u\n", t, olen));
return (olen);
einval:
ptrace(("%p: ERROR: Invalid input options\n", t));
return (-EINVAL);
}
/**
* t_size_current_options - calculate size of current options
* @t: private structure
* @ip: input pointer
* @ilen: input length
*/
noinline fastcall int
t_size_current_options(const struct tp *t, const unsigned char *ip, size_t ilen)
{
int olen = 0, optlen;
const struct t_opthdr *ih;
struct t_opthdr all;
if (ip == NULL || ilen == 0) {
/* For zero-length options fake an option header for all names with all levels */
all.level = T_ALLLEVELS;
all.name = T_ALLOPT;
all.len = sizeof(all);
all.status = 0;
ip = (unsigned char *) &all;
ilen = sizeof(all);
}
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0); ih; ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0)) {
if (ih->len < sizeof(*ih))
goto einval;
if ((unsigned char *) ih + ih->len > ip + ilen)
goto einval;
optlen = ih->len - sizeof(*ih);
(void) optlen;
switch (ih->level) {
default:
goto einval;
case T_ALLLEVELS:
if (ih->name != T_ALLOPT)
goto einval;
case XTI_GENERIC:
switch (ih->name) {
default:
olen += T_SPACE(0);
continue;
case T_ALLOPT:
case XTI_DEBUG:
olen += _T_SPACE_SIZEOF(t->options.xti.debug);
if (ih->name != T_ALLOPT)
continue;
case XTI_LINGER:
olen += _T_SPACE_SIZEOF(t->options.xti.linger);
if (ih->name != T_ALLOPT)
continue;
case XTI_RCVBUF:
olen += _T_SPACE_SIZEOF(t->options.xti.rcvbuf);
if (ih->name != T_ALLOPT)
continue;
case XTI_RCVLOWAT:
olen += _T_SPACE_SIZEOF(t->options.xti.rcvlowat);
if (ih->name != T_ALLOPT)
continue;
case XTI_SNDBUF:
olen += _T_SPACE_SIZEOF(t->options.xti.sndbuf);
if (ih->name != T_ALLOPT)
continue;
case XTI_SNDLOWAT:
olen += _T_SPACE_SIZEOF(t->options.xti.sndlowat);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
case T_INET_IP:
switch (ih->name) {
default:
olen += T_SPACE(0);
continue;
case T_ALLOPT:
case T_IP_OPTIONS:
/* not supported yet */
olen += T_SPACE(0);
if (ih->name != T_ALLOPT)
continue;
case T_IP_TOS:
olen += _T_SPACE_SIZEOF(t->options.ip.tos);
if (ih->name != T_ALLOPT)
continue;
case T_IP_TTL:
olen += _T_SPACE_SIZEOF(t->options.ip.ttl);
if (ih->name != T_ALLOPT)
continue;
case T_IP_REUSEADDR:
olen += _T_SPACE_SIZEOF(t->options.ip.reuseaddr);
if (ih->name != T_ALLOPT)
continue;
case T_IP_DONTROUTE:
olen += _T_SPACE_SIZEOF(t->options.ip.dontroute);
if (ih->name != T_ALLOPT)
continue;
case T_IP_BROADCAST:
olen += _T_SPACE_SIZEOF(t->options.ip.broadcast);
if (ih->name != T_ALLOPT)
continue;
case T_IP_ADDR:
olen += _T_SPACE_SIZEOF(t->options.ip.addr);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
}
}
_ptrace(("%p: Calculated option output size = %u\n", t, olen));
return (olen);
einval:
ptrace(("%p: ERROR: Invalid input options\n", t));
return (-EINVAL);
}
/**
* t_size_check_option - determine size of options output for T_CHECK
* @t: private structure
* @ip: input pointer
* @ilen: input length
*/
noinline fastcall int
t_size_check_options(const struct tp *t, const unsigned char *ip, size_t ilen)
{
int olen = 0, optlen;
const struct t_opthdr *ih;
struct t_opthdr all;
if (ip == NULL || ilen == 0) {
/* For zero-length options fake an option header for all names with all levels */
all.level = T_ALLLEVELS;
all.name = T_ALLOPT;
all.len = sizeof(all);
all.status = 0;
ip = (unsigned char *) &all;
ilen = sizeof(all);
}
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0); ih; ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0)) {
if (ih->len < sizeof(*ih))
goto einval;
if ((unsigned char *) ih + ih->len > ip + ilen)
goto einval;
optlen = ih->len - sizeof(*ih);
switch (ih->level) {
default:
goto einval;
case T_ALLLEVELS:
if (ih->name != T_ALLOPT)
goto einval;
case XTI_GENERIC:
switch (ih->name) {
default:
olen += T_SPACE(optlen);
continue;
case T_ALLOPT:
case XTI_DEBUG:
/* can be any non-zero array of t_uscalar_t */
if (optlen
&& ((optlen % sizeof(t_uscalar_t)) != 0
|| optlen > 4 * sizeof(t_uscalar_t)))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case XTI_LINGER:
if (optlen && optlen != sizeof(t->options.xti.linger))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case XTI_RCVBUF:
if (optlen && optlen != sizeof(t->options.xti.rcvbuf))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case XTI_RCVLOWAT:
if (optlen && optlen != sizeof(t->options.xti.rcvlowat))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case XTI_SNDBUF:
if (optlen && optlen != sizeof(t->options.xti.sndbuf))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case XTI_SNDLOWAT:
if (optlen && optlen != sizeof(t->options.xti.sndlowat))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
case T_INET_IP:
switch (ih->name) {
default:
olen += T_SPACE(optlen);
continue;
case T_ALLOPT:
case T_IP_OPTIONS:
if (optlen && optlen != sizeof(t->options.ip.options))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case T_IP_TOS:
if (optlen && optlen != sizeof(t->options.ip.tos))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case T_IP_TTL:
if (optlen && optlen != sizeof(t->options.ip.ttl))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case T_IP_REUSEADDR:
if (optlen && optlen != sizeof(t->options.ip.reuseaddr))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case T_IP_DONTROUTE:
if (optlen && optlen != sizeof(t->options.ip.dontroute))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case T_IP_BROADCAST:
if (optlen && optlen != sizeof(t->options.ip.broadcast))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case T_IP_ADDR:
if (optlen && optlen != sizeof(t->options.ip.addr))
goto einval;
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
}
}
_ptrace(("%p: Calculated option output size = %u\n", t, olen));
return (olen);
einval:
ptrace(("%p: ERROR: Invalid input options\n", t));
return (-EINVAL);
}
/**
* t_size_negotiate_options - determine size of options output for T_NEGOTIATE
* @t: private structure
* @ip: input pointer
* @ilen: input length
*/
noinline fastcall int
t_size_negotiate_options(const struct tp *t, const unsigned char *ip, size_t ilen)
{
int olen = 0, optlen;
const struct t_opthdr *ih;
struct t_opthdr all;
if (ip == NULL || ilen == 0) {
/* For zero-length options fake an option header for all names with all levels */
all.level = T_ALLLEVELS;
all.name = T_ALLOPT;
all.len = sizeof(all);
all.status = 0;
ip = (unsigned char *) &all;
ilen = sizeof(all);
}
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0); ih; ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0)) {
if (ih->len < sizeof(*ih))
goto einval;
if ((unsigned char *) ih + ih->len > ip + ilen)
goto einval;
optlen = ih->len - sizeof(*ih);
switch (ih->level) {
default:
goto einval;
case T_ALLLEVELS:
if (ih->name != T_ALLOPT)
goto einval;
case XTI_GENERIC:
switch (ih->name) {
default:
olen += T_SPACE(optlen);
continue;
case T_ALLOPT:
case XTI_DEBUG:
if (ih->name != T_ALLOPT
&& ((optlen % sizeof(t_uscalar_t)) != 0
|| optlen > 4 * sizeof(t_uscalar_t)))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.xti.debug);
if (ih->name != T_ALLOPT)
continue;
case XTI_LINGER:
if (ih->name != T_ALLOPT && optlen != sizeof(t->options.xti.linger))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.xti.linger);
if (ih->name != T_ALLOPT)
continue;
case XTI_RCVBUF:
if (ih->name != T_ALLOPT && optlen != sizeof(t->options.xti.rcvbuf))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.xti.rcvbuf);
if (ih->name != T_ALLOPT)
continue;
case XTI_RCVLOWAT:
if (ih->name != T_ALLOPT
&& optlen != sizeof(t->options.xti.rcvlowat))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.xti.rcvlowat);
if (ih->name != T_ALLOPT)
continue;
case XTI_SNDBUF:
if (ih->name != T_ALLOPT && optlen != sizeof(t->options.xti.sndbuf))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.xti.sndbuf);
if (ih->name != T_ALLOPT)
continue;
case XTI_SNDLOWAT:
if (ih->name != T_ALLOPT
&& optlen != sizeof(t->options.xti.sndlowat))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.xti.sndlowat);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
case T_INET_IP:
switch (ih->name) {
default:
olen += T_SPACE(optlen);
continue;
case T_ALLOPT:
case T_IP_OPTIONS:
/* If the status is T_SUCCESS, T_FAILURE, T_NOTSUPPORT or
T_READONLY, the returned option value is the same as the one
requested on input. */
olen += T_SPACE(optlen);
if (ih->name != T_ALLOPT)
continue;
case T_IP_TOS:
if (ih->name != T_ALLOPT && optlen != sizeof(t->options.ip.tos))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.ip.tos);
if (ih->name != T_ALLOPT)
continue;
case T_IP_TTL:
if (ih->name != T_ALLOPT && optlen != sizeof(t->options.ip.ttl))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.ip.ttl);
if (ih->name != T_ALLOPT)
continue;
case T_IP_REUSEADDR:
if (ih->name != T_ALLOPT
&& optlen != sizeof(t->options.ip.reuseaddr))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.ip.reuseaddr);
if (ih->name != T_ALLOPT)
continue;
case T_IP_DONTROUTE:
if (ih->name != T_ALLOPT
&& optlen != sizeof(t->options.ip.dontroute))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.ip.dontroute);
if (ih->name != T_ALLOPT)
continue;
case T_IP_BROADCAST:
if (ih->name != T_ALLOPT
&& optlen != sizeof(t->options.ip.broadcast))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.ip.broadcast);
if (ih->name != T_ALLOPT)
continue;
case T_IP_ADDR:
if (ih->name != T_ALLOPT && optlen != sizeof(t->options.ip.addr))
goto einval;
olen += _T_SPACE_SIZEOF(t->options.ip.addr);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
}
}
_ptrace(("%p: Calculated option output size = %u\n", t, olen));
return (olen);
einval:
ptrace(("%p: ERROR: Invalid input options\n", t));
return (-EINVAL);
}
/**
* t_overall_result - determine overall options results
* @overall: current overall result
* @result: result to include in the overall result
*
* Calculates the overall T_OPTMGMT_ACK flag result from individual results.
*/
STATIC uint
t_overall_result(t_scalar_t * overall, const uint result)
{
switch (result) {
case T_NOTSUPPORT:
if (!(*overall & (T_NOTSUPPORT)))
*overall = T_NOTSUPPORT;
break;
case T_READONLY:
if (!(*overall & (T_NOTSUPPORT | T_READONLY)))
*overall = T_READONLY;
break;
case T_FAILURE:
if (!(*overall & (T_NOTSUPPORT | T_READONLY | T_FAILURE)))
*overall = T_FAILURE;
break;
case T_PARTSUCCESS:
if (!(*overall & (T_NOTSUPPORT | T_READONLY | T_FAILURE | T_PARTSUCCESS)))
*overall = T_PARTSUCCESS;
break;
case T_SUCCESS:
if (!
(*overall &
(T_NOTSUPPORT | T_READONLY | T_FAILURE | T_PARTSUCCESS | T_SUCCESS)))
*overall = T_SUCCESS;
break;
}
return (result);
}
/**
* t_build_default_options - build options output for T_DEFAULT
* @t: private structure
* @ip: input pointer
* @ilen: input length
* @op: output pointer
* @olen: output length
*
* Perform the actions required of T_DEFAULT placing the output in the provided buffer.
*/
noinline fastcall t_scalar_t
t_build_default_options(const struct tp *t, const unsigned char *ip, size_t ilen,
unsigned char *op, size_t *olen)
{
t_scalar_t overall = T_SUCCESS;
const struct t_opthdr *ih;
struct t_opthdr *oh, all;
int optlen;
if (ilen == 0) {
/* For zero-length options fake an option for all names within all levels. */
all.level = T_ALLLEVELS;
all.name = T_ALLOPT;
all.len = sizeof(all);
all.status = 0;
ip = (unsigned char *) &all;
ilen = sizeof(all);
}
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0), oh = _T_OPT_FIRSTHDR_OFS(op, *olen, 0);
ih && oh;
ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0), oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)) {
/* don't need to do this, it was done when we sized options */
if (ih->len < sizeof(*ih))
goto einval;
if ((unsigned char *) ih + ih->len > ip + ilen)
goto einval;
optlen = ih->len - sizeof(*ih);
(void) optlen;
switch (ih->level) {
default:
goto einval;
case T_ALLLEVELS:
if (ih->name != T_ALLOPT)
goto einval;
case XTI_GENERIC:
switch (ih->name) {
default:
oh->len = sizeof(*oh);
oh->level = ih->level;
oh->name = ih->name;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
continue;
case T_ALLOPT:
case XTI_DEBUG:
oh->len = _T_LENGTH_SIZEOF(t_defaults.xti.debug);
oh->level = XTI_GENERIC;
oh->name = XTI_DEBUG;
oh->status = T_SUCCESS;
bcopy(t_defaults.xti.debug, T_OPT_DATA(oh),
sizeof(t_defaults.xti.debug));
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_LINGER:
oh->len = _T_LENGTH_SIZEOF(t_defaults.xti.linger);
oh->level = XTI_GENERIC;
oh->name = XTI_LINGER;
oh->status = T_SUCCESS;
*((struct t_linger *) T_OPT_DATA(oh)) = t_defaults.xti.linger;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_RCVBUF:
oh->len = _T_LENGTH_SIZEOF(t_defaults.xti.rcvbuf);
oh->level = XTI_GENERIC;
oh->name = XTI_RCVBUF;
oh->status = T_SUCCESS;
*((t_uscalar_t *) T_OPT_DATA(oh)) = t_defaults.xti.rcvbuf;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_RCVLOWAT:
oh->len = _T_LENGTH_SIZEOF(t_defaults.xti.rcvlowat);
oh->level = XTI_GENERIC;
oh->name = XTI_RCVLOWAT;
oh->status = T_SUCCESS;
*((t_uscalar_t *) T_OPT_DATA(oh)) = t_defaults.xti.rcvlowat;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_SNDBUF:
oh->len = _T_LENGTH_SIZEOF(t_defaults.xti.sndbuf);
oh->level = XTI_GENERIC;
oh->name = XTI_SNDBUF;
oh->status = T_SUCCESS;
*((t_uscalar_t *) T_OPT_DATA(oh)) = t_defaults.xti.sndbuf;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_SNDLOWAT:
oh->len = _T_LENGTH_SIZEOF(t_defaults.xti.sndlowat);
oh->level = XTI_GENERIC;
oh->name = XTI_SNDLOWAT;
oh->status = T_SUCCESS;
*((t_uscalar_t *) T_OPT_DATA(oh)) = t_defaults.xti.sndlowat >> 1;
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_INET_IP:
switch (ih->name) {
default:
oh->len = sizeof(*oh);
oh->level = ih->level;
oh->name = ih->name;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
continue;
case T_ALLOPT:
case T_IP_OPTIONS:
{
oh->len = sizeof(*oh);
oh->level = T_INET_IP;
oh->name = T_IP_OPTIONS;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
/* not supported yet */
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case T_IP_TOS:
oh->len = _T_LENGTH_SIZEOF(t_defaults.ip.tos);
oh->level = T_INET_IP;
oh->name = T_IP_TOS;
oh->status = T_SUCCESS;
*((unsigned char *) T_OPT_DATA(oh)) = t_defaults.ip.tos;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_TTL:
oh->len = _T_LENGTH_SIZEOF(t_defaults.ip.ttl);
oh->level = T_INET_IP;
oh->name = T_IP_TTL;
oh->status = T_SUCCESS;
*((unsigned char *) T_OPT_DATA(oh)) = t_defaults.ip.ttl;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_REUSEADDR:
oh->len = _T_LENGTH_SIZEOF(t_defaults.ip.reuseaddr);
oh->level = T_INET_IP;
oh->name = T_IP_REUSEADDR;
oh->status = T_SUCCESS;
*((unsigned int *) T_OPT_DATA(oh)) = t_defaults.ip.reuseaddr;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_DONTROUTE:
oh->len = _T_LENGTH_SIZEOF(t_defaults.ip.dontroute);
oh->level = T_INET_IP;
oh->name = T_IP_DONTROUTE;
oh->status = T_SUCCESS;
*((unsigned int *) T_OPT_DATA(oh)) = t_defaults.ip.dontroute;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_BROADCAST:
oh->len = _T_LENGTH_SIZEOF(t_defaults.ip.broadcast);
oh->level = T_INET_IP;
oh->name = T_IP_BROADCAST;
oh->status = T_SUCCESS;
*((unsigned int *) T_OPT_DATA(oh)) = t_defaults.ip.broadcast;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_ADDR:
oh->len = _T_LENGTH_SIZEOF(t_defaults.ip.addr);
oh->level = T_INET_IP;
oh->name = T_IP_ADDR;
oh->status = T_SUCCESS;
*((uint32_t *) T_OPT_DATA(oh)) = t_defaults.ip.addr;
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
}
}
if (ih && !oh)
goto efault;
if (oh)
*olen = (unsigned char *) oh - op;
return (overall);
einval:
return (-EINVAL);
efault:
swerr();
return (-EFAULT);
}
/**
* t_build_current_options - build options output for T_CURRENT
* @t: private structure
* @ip: input pointer
* @ilen: input length
* @op: output pointer
* @olen: output length
*
* Perform the actions required of T_CURRENT placing the output in the provided buffer.
*/
noinline fastcall t_scalar_t
t_build_current_options(const struct tp *t, const unsigned char *ip, size_t ilen,
unsigned char *op, size_t *olen)
{
t_scalar_t overall = T_SUCCESS;
const struct t_opthdr *ih;
struct t_opthdr *oh, all;
int optlen;
if (ilen == 0) {
/* For zero-length options fake an option for all names within all levels. */
all.level = T_ALLLEVELS;
all.name = T_ALLOPT;
all.len = sizeof(all);
all.status = 0;
ip = (unsigned char *) &all;
ilen = sizeof(all);
}
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0), oh = _T_OPT_FIRSTHDR_OFS(op, *olen, 0);
ih && oh;
ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0), oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)) {
/* don't need to do this, it was done when we sized options */
if (ih->len < sizeof(*ih))
goto einval;
if ((unsigned char *) ih + ih->len > ip + ilen)
goto einval;
optlen = ih->len - sizeof(*ih);
(void) optlen;
switch (ih->level) {
default:
goto einval;
case T_ALLLEVELS:
if (ih->name != T_ALLOPT)
goto einval;
case XTI_GENERIC:
switch (ih->name) {
default:
oh->len = sizeof(*oh);
oh->level = ih->level;
oh->name = ih->name;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
continue;
case T_ALLOPT:
case XTI_DEBUG:
oh->len = _T_LENGTH_SIZEOF(t->options.xti.debug);
oh->level = XTI_GENERIC;
oh->name = XTI_DEBUG;
oh->status = T_SUCCESS;
bcopy(t->options.xti.debug, T_OPT_DATA(oh),
4 * sizeof(t_uscalar_t));
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_LINGER:
oh->len = _T_LENGTH_SIZEOF(t->options.xti.linger);
oh->level = XTI_GENERIC;
oh->name = XTI_LINGER;
oh->status = T_SUCCESS;
/* refresh current value */
*((struct t_linger *) T_OPT_DATA(oh)) = t->options.xti.linger;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_RCVBUF:
oh->len = _T_LENGTH_SIZEOF(t->options.xti.rcvbuf);
oh->level = XTI_GENERIC;
oh->name = XTI_RCVBUF;
oh->status = T_SUCCESS;
/* refresh current value */
*((t_uscalar_t *) T_OPT_DATA(oh)) = t->options.xti.rcvbuf >> 1;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_RCVLOWAT:
oh->len = _T_LENGTH_SIZEOF(t->options.xti.rcvlowat);
oh->level = XTI_GENERIC;
oh->name = XTI_RCVLOWAT;
oh->status = T_SUCCESS;
/* refresh current value */
*((t_uscalar_t *) T_OPT_DATA(oh)) = t->options.xti.rcvlowat;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_SNDBUF:
oh->len = _T_LENGTH_SIZEOF(t->options.xti.sndbuf);
oh->level = XTI_GENERIC;
oh->name = XTI_SNDBUF;
oh->status = T_SUCCESS;
/* refresh current value */
*((t_uscalar_t *) T_OPT_DATA(oh)) = t->options.xti.sndbuf >> 1;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_SNDLOWAT:
oh->len = _T_LENGTH_SIZEOF(t->options.xti.sndlowat);
oh->level = XTI_GENERIC;
oh->name = XTI_SNDLOWAT;
oh->status = T_SUCCESS;
/* refresh current value */
*((t_uscalar_t *) T_OPT_DATA(oh)) = t->options.xti.sndlowat;
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_INET_IP:
switch (ih->name) {
default:
oh->len = sizeof(*oh);
oh->level = ih->level;
oh->name = ih->name;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
continue;
case T_ALLOPT:
case T_IP_OPTIONS:
{
oh->len = sizeof(*oh);
oh->level = T_INET_IP;
oh->name = T_IP_OPTIONS;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
/* not supported yet */
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case T_IP_TOS:
oh->len = _T_LENGTH_SIZEOF(t->options.ip.tos);
oh->level = T_INET_IP;
oh->name = T_IP_TOS;
oh->status = T_SUCCESS;
/* refresh current value */
*((unsigned char *) T_OPT_DATA(oh)) = t->options.ip.tos;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_TTL:
oh->len = _T_LENGTH_SIZEOF(t->options.ip.ttl);
oh->level = T_INET_IP;
oh->name = T_IP_TTL;
oh->status = T_SUCCESS;
/* refresh current value */
*((unsigned char *) T_OPT_DATA(oh)) = t->options.ip.ttl;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_REUSEADDR:
oh->len = _T_LENGTH_SIZEOF(t->options.ip.reuseaddr);
oh->level = T_INET_IP;
oh->name = T_IP_REUSEADDR;
oh->status = T_SUCCESS;
/* refresh current value */
*((unsigned int *) T_OPT_DATA(oh)) = t->options.ip.reuseaddr;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_DONTROUTE:
oh->len = _T_LENGTH_SIZEOF(t->options.ip.dontroute);
oh->level = T_INET_IP;
oh->name = T_IP_DONTROUTE;
oh->status = T_SUCCESS;
/* refresh current value */
*((unsigned int *) T_OPT_DATA(oh)) = t->options.ip.dontroute;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_BROADCAST:
oh->len = _T_LENGTH_SIZEOF(t->options.ip.broadcast);
oh->level = T_INET_IP;
oh->name = T_IP_BROADCAST;
oh->status = T_SUCCESS;
/* refresh current value */
*((unsigned int *) T_OPT_DATA(oh)) = t->options.ip.broadcast;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_ADDR:
oh->len = _T_LENGTH_SIZEOF(t->options.ip.addr);
oh->level = T_INET_IP;
oh->name = T_IP_ADDR;
oh->status = T_SUCCESS;
/* refresh current value */
*((uint32_t *) T_OPT_DATA(oh)) = t->options.ip.addr;
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
}
}
if (ih && !oh)
goto efault;
if (oh)
*olen = (unsigned char *) oh - op;
return (overall);
einval:
return (-EINVAL);
efault:
swerr();
return (-EFAULT);
}
#ifdef __LP64__
#undef MAX_SCHEDULE_TIMEOUT
#define MAX_SCHEDULE_TIMEOUT INT_MAX
#endif
/**
* t_build_check_options - built output options for T_CHECK
* @t: private structure
* @ip: input pointer
* @ilen: input length
* @op: output pointer
* @olen: output length
*
* Perform the actions required of T_CHECK placing the output in the provided buffer.
*/
noinline fastcall t_scalar_t
t_build_check_options(const struct tp *t, const unsigned char *ip, size_t ilen, unsigned char *op,
size_t *olen)
{
t_scalar_t overall = T_SUCCESS;
const struct t_opthdr *ih;
struct t_opthdr *oh, all;
int optlen;
if (ilen == 0) {
/* For zero-length options fake an option for all names within all levels. */
all.level = T_ALLLEVELS;
all.name = T_ALLOPT;
all.len = sizeof(all);
all.status = 0;
ip = (unsigned char *) &all;
ilen = sizeof(all);
}
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0), oh = _T_OPT_FIRSTHDR_OFS(op, *olen, 0);
ih && oh;
ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0), oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)) {
/* don't need to do this, it was done when we sized options */
if (ih->len < sizeof(*ih))
goto einval;
if ((unsigned char *) ih + ih->len > ip + ilen)
goto einval;
optlen = ih->len - sizeof(*ih);
switch (ih->level) {
default:
goto einval;
case T_ALLLEVELS:
if (ih->name != T_ALLOPT)
goto einval;
case XTI_GENERIC:
switch (ih->name) {
default:
oh->len = ih->len;
oh->level = ih->level;
oh->name = ih->name;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
if (optlen)
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
continue;
case T_ALLOPT:
case XTI_DEBUG:
oh->len = ih->len;
oh->level = XTI_GENERIC;
oh->name = XTI_DEBUG;
oh->status = T_SUCCESS;
if (optlen) {
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (!capable(CAP_NET_ADMIN)) {
oh->status =
t_overall_result(&overall, T_NOTSUPPORT);
} else if (optlen) {
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(oh), valp, optlen);
}
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_LINGER:
oh->len = ih->len;
oh->level = XTI_GENERIC;
oh->name = XTI_LINGER;
oh->status = T_SUCCESS;
if (optlen) {
struct t_linger *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if ((valp->l_onoff != T_NO && valp->l_onoff != T_YES)
|| (valp->l_linger == T_UNSPEC
&& valp->l_onoff != T_NO))
goto einval;
if (valp->l_linger == T_UNSPEC) {
valp->l_linger = t_defaults.xti.linger.l_linger;
}
if (valp->l_linger == T_INFINITE) {
valp->l_linger = MAX_SCHEDULE_TIMEOUT / HZ;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
if (valp->l_linger < 0)
goto einval;
if (valp->l_linger > MAX_SCHEDULE_TIMEOUT / HZ) {
valp->l_linger = MAX_SCHEDULE_TIMEOUT / HZ;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_RCVBUF:
oh->len = ih->len;
oh->level = XTI_GENERIC;
oh->name = XTI_RCVBUF;
oh->status = T_SUCCESS;
if (optlen) {
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if (*valp > sysctl_rmem_max) {
*valp = sysctl_rmem_max;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
} else if (*valp < SOCK_MIN_RCVBUF / 2) {
*valp = SOCK_MIN_RCVBUF / 2;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_RCVLOWAT:
oh->len = ih->len;
oh->level = XTI_GENERIC;
oh->name = XTI_RCVLOWAT;
oh->status = T_SUCCESS;
if (optlen) {
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if (*valp > INT_MAX) {
*valp = INT_MAX;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
} else if (*valp <= 0) {
*valp = 1;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_SNDBUF:
oh->len = ih->len;
oh->level = XTI_GENERIC;
oh->name = XTI_SNDBUF;
oh->status = T_SUCCESS;
if (optlen) {
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if (*valp > sysctl_wmem_max) {
*valp = sysctl_wmem_max;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
} else if (*valp < SOCK_MIN_SNDBUF / 2) {
*valp = SOCK_MIN_SNDBUF / 2;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
break;
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case XTI_SNDLOWAT:
oh->len = ih->len;
oh->level = XTI_GENERIC;
oh->name = XTI_SNDLOWAT;
oh->status = T_SUCCESS;
if (optlen) {
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if (*valp > 1) {
*valp = 1;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
} else if (*valp <= 0) {
*valp = 1;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
break;
}
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_INET_IP:
switch (ih->name) {
default:
oh->len = ih->len;
oh->level = ih->level;
oh->name = ih->name;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
if (optlen)
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
continue;
case T_ALLOPT:
case T_IP_OPTIONS:
/* not supported yet */
oh->len = ih->len;
oh->level = T_INET_IP;
oh->name = T_IP_OPTIONS;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
if (optlen)
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_TOS:
oh->len = ih->len;
oh->level = T_INET_IP;
oh->name = T_IP_TOS;
oh->status = T_SUCCESS;
if (optlen) {
unsigned char *valp = (typeof(valp)) T_OPT_DATA(oh);
unsigned char prec, type;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
prec = (*valp >> 5) & 0x7;
type = *valp & (T_LDELAY | T_HITHRPT | T_HIREL | T_LOCOST);
if (*valp != SET_TOS(prec, type))
goto einval;
if (prec >= T_CRITIC_ECP && !capable(CAP_NET_ADMIN))
oh->status =
t_overall_result(&overall, T_NOTSUPPORT);
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_TTL:
oh->len = ih->len;
oh->level = T_INET_IP;
oh->name = T_IP_TTL;
oh->status = T_SUCCESS;
if (optlen) {
unsigned char *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if (*valp == 0) {
*valp = sysctl_ip_default_ttl;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
if (*valp < 1) {
*valp = 1;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
#if 0
if (*valp > 255) {
*valp = 255;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
#endif
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_REUSEADDR:
oh->len = ih->len;
oh->level = T_INET_IP;
oh->name = T_IP_REUSEADDR;
oh->status = T_SUCCESS;
if (optlen) {
unsigned int *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if (*valp != T_YES && *valp != T_NO)
goto einval;
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_DONTROUTE:
oh->len = ih->len;
oh->level = T_INET_IP;
oh->name = T_IP_DONTROUTE;
oh->status = T_SUCCESS;
if (optlen) {
unsigned int *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if (*valp != T_YES && *valp != T_NO)
goto einval;
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_BROADCAST:
oh->len = ih->len;
oh->level = T_INET_IP;
oh->name = T_IP_BROADCAST;
oh->status = T_SUCCESS;
if (optlen) {
unsigned int *valp = (typeof(valp)) T_OPT_DATA(oh);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (optlen != sizeof(*valp))
goto einval;
if (*valp != T_YES && *valp != T_NO)
goto einval;
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_IP_ADDR:
oh->len = ih->len;
oh->level = T_INET_IP;
oh->name = T_IP_ADDR;
oh->status = T_SUCCESS;
if (optlen)
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name != T_ALLOPT)
continue;
}
if (ih->level != T_ALLLEVELS)
continue;
}
}
if (ih && !oh)
goto efault;
if (oh)
*olen = (unsigned char *) oh - op;
return (overall);
einval:
return (-EINVAL);
efault:
swerr();
return (-EFAULT);
}
/**
* t_build_negotiate_options - build output options for T_NEGOTIATE
* @t: private structure
* @ip: input pointer
* @ilen: input length
* @op: output pointer
* @olen: output length
*
* Perform the actions required of T_NEGOTIATE placing the output in the provided buffer.
*/
noinline fastcall t_scalar_t
t_build_negotiate_options(struct tp *t, const unsigned char *ip, size_t ilen, unsigned char *op,
size_t *olen)
{
t_scalar_t overall = T_SUCCESS;
const struct t_opthdr *ih;
struct t_opthdr *oh, all;
int optlen;
if (ilen == 0) {
/* For zero-length options fake an option for all names within all levels. */
all.level = T_ALLLEVELS;
all.name = T_ALLOPT;
all.len = sizeof(all);
all.status = 0;
ip = (unsigned char *) &all;
ilen = sizeof(all);
}
for (ih = _T_OPT_FIRSTHDR_OFS(ip, ilen, 0), oh = _T_OPT_FIRSTHDR_OFS(op, *olen, 0);
ih && oh;
ih = _T_OPT_NEXTHDR_OFS(ip, ilen, ih, 0), oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)) {
/* don't need to do this, it was done when we sized options */
if (ih->len < sizeof(*ih))
goto einval;
if ((unsigned char *) ih + ih->len > ip + ilen)
goto einval;
optlen = ih->len - sizeof(*ih);
switch (ih->level) {
default:
goto einval;
case T_ALLLEVELS:
if (ih->name != T_ALLOPT)
goto einval;
case XTI_GENERIC:
switch (ih->name) {
default:
oh->len = ih->len;
oh->level = ih->level;
oh->name = ih->name;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
continue;
case T_ALLOPT:
case XTI_DEBUG:
{
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = XTI_GENERIC;
oh->name = XTI_DEBUG;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.xti.debug[0];
} else {
bcopy(T_OPT_DATA(ih), valp, optlen);
}
if (!capable(CAP_NET_ADMIN)) { /* XXX */
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
} else {
bzero(t->options.xti.debug, sizeof(t->options.xti.debug));
if (oh->len > sizeof(*oh))
bcopy(valp, t->options.xti.debug,
oh->len - sizeof(*oh));
}
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case XTI_LINGER:
{
struct t_linger *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = XTI_GENERIC;
oh->name = XTI_LINGER;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.xti.linger;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if ((valp->l_onoff != T_NO && valp->l_onoff != T_YES)
|| (valp->l_linger == T_UNSPEC
&& valp->l_onoff != T_NO))
goto einval;
if (valp->l_linger == T_UNSPEC) {
valp->l_linger = t_defaults.xti.linger.l_linger;
}
if (valp->l_linger == T_INFINITE) {
valp->l_linger = MAX_SCHEDULE_TIMEOUT / HZ;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
if (valp->l_linger < 0)
goto einval;
if (valp->l_linger > MAX_SCHEDULE_TIMEOUT / HZ) {
valp->l_linger = MAX_SCHEDULE_TIMEOUT / HZ;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
t->options.xti.linger = *valp;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case XTI_RCVBUF:
{
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = XTI_GENERIC;
oh->name = XTI_RCVBUF;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = sysctl_rmem_default;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if (*valp > sysctl_rmem_max) {
*valp = sysctl_rmem_max;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
} else if (*valp < SOCK_MIN_RCVBUF / 2) {
*valp = SOCK_MIN_RCVBUF / 2;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
t->options.xti.rcvbuf = *valp << 1;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case XTI_RCVLOWAT:
{
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = XTI_GENERIC;
oh->name = XTI_RCVLOWAT;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.xti.rcvlowat;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if (*valp > INT_MAX) {
*valp = INT_MAX;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
} else if (*valp <= 0) {
*valp = 1;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
t->options.xti.rcvlowat = *valp;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case XTI_SNDBUF:
{
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = XTI_GENERIC;
oh->name = XTI_SNDBUF;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = sysctl_wmem_default;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if (*valp > sysctl_wmem_max) {
*valp = sysctl_wmem_max;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
} else if (*valp < SOCK_MIN_SNDBUF / 2) {
*valp = SOCK_MIN_SNDBUF / 2;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
t->options.xti.sndbuf = *valp << 1;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case XTI_SNDLOWAT:
{
t_uscalar_t *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = XTI_GENERIC;
oh->name = XTI_SNDLOWAT;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = sysctl_wmem_default >> 1;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if (*valp > 1) {
*valp = 1;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
} else if (*valp <= 0) {
*valp = 1;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
t->options.xti.sndlowat = *valp;
if (ih->name != T_ALLOPT)
continue;
}
}
if (ih->level != T_ALLLEVELS)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
case T_INET_IP:
switch (ih->name) {
default:
oh->len = ih->len;
oh->level = ih->level;
oh->name = ih->name;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
continue;
case T_ALLOPT:
case T_IP_OPTIONS:
{
/* not supported yet */
oh->len = ih->len;
oh->level = T_INET_IP;
oh->name = T_IP_OPTIONS;
oh->status = t_overall_result(&overall, T_NOTSUPPORT);
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case T_IP_TOS:
{
unsigned char *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = T_INET_IP;
oh->name = T_IP_TOS;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.ip.tos;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
{
unsigned char prec = (*valp >> 5) & 0x7;
unsigned char type =
*valp & (T_LDELAY | T_HITHRPT |
T_HIREL | T_LOCOST);
if (*valp != SET_TOS(prec, type))
goto einval;
if (prec >= T_CRITIC_ECP && !capable(CAP_NET_ADMIN)) {
oh->status =
t_overall_result
(&overall, T_NOTSUPPORT);
break;
}
}
}
t->options.ip.tos = *valp;
// sk->sk_priority = rt_tos2priority(*valp);
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case T_IP_TTL:
{
unsigned char *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = T_INET_IP;
oh->name = T_IP_TTL;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.ip.ttl;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if (*valp == 0) {
*valp = sysctl_ip_default_ttl;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
if (*valp < 1) {
*valp = 1;
oh->status =
t_overall_result(&overall, T_PARTSUCCESS);
}
}
t->options.ip.ttl = *valp;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case T_IP_REUSEADDR:
{
unsigned int *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = T_INET_IP;
oh->name = T_IP_REUSEADDR;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.ip.reuseaddr;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if (*valp != T_YES && *valp != T_NO)
goto einval;
}
t->options.ip.reuseaddr = *valp;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case T_IP_DONTROUTE:
{
unsigned int *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = T_INET_IP;
oh->name = T_IP_DONTROUTE;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.ip.dontroute;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if (*valp != T_YES && *valp != T_NO)
goto einval;
}
t->options.ip.dontroute = *valp;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case T_IP_BROADCAST:
{
unsigned int *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = T_INET_IP;
oh->name = T_IP_BROADCAST;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.ip.broadcast;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
if (*valp != T_YES && *valp != T_NO)
goto einval;
}
t->options.ip.broadcast = *valp;
if (ih->name != T_ALLOPT)
continue;
if (!(oh = _T_OPT_NEXTHDR_OFS(op, *olen, oh, 0)))
goto efault;
}
case T_IP_ADDR:
{
uint32_t *valp = (typeof(valp)) T_OPT_DATA(oh);
oh->len = _T_LENGTH_SIZEOF(*valp);
oh->level = T_INET_IP;
oh->name = T_IP_ADDR;
oh->status = T_SUCCESS;
bcopy(T_OPT_DATA(ih), T_OPT_DATA(oh), optlen);
if (ih->name == T_ALLOPT) {
*valp = t_defaults.ip.addr;
} else {
*valp = *((typeof(valp)) T_OPT_DATA(ih));
}
t->options.ip.addr = *valp;
if (ih->name != T_ALLOPT)
continue;
}
}
if (ih->level != T_ALLLEVELS)
continue;
}
}
if (ih && !oh)
goto efault;
if (oh)
*olen = (unsigned char *) oh - op;
return (overall);
einval:
return (-EINVAL);
efault:
swerr();
return (-EFAULT);
}
/**
* t_build_options - build the output options
* @t: private structure
* @ip: input pointer
* @ilen: input length
* @op: output pointer
* @olen: output length
* @flag: management flag
*
* Perform the actions required of T_DEFAULT, T_CURRENT, T_CHECK and T_NEGOTIARE, placing the output
* in the provided buffer.
*/
noinline fastcall t_scalar_t
t_build_options(struct tp *t, const unsigned char *ip, const size_t ilen, unsigned char *op,
size_t *olen, const t_scalar_t flag)
{
switch (flag) {
case T_DEFAULT:
return t_build_default_options(t, ip, ilen, op, olen);
case T_CURRENT:
return t_build_current_options(t, ip, ilen, op, olen);
case T_CHECK:
return t_build_check_options(t, ip, ilen, op, olen);
case T_NEGOTIATE:
return t_build_negotiate_options(t, ip, ilen, op, olen);
}
return (-EINVAL);
}
/*
* =========================================================================
*
* IP Local Management
*
* =========================================================================
*/
STATIC int tp_v4_rcv(struct sk_buff *skb);
STATIC void tp_v4_err(struct sk_buff *skb, u32 info);
/*
* IP subsystem management
*/
#ifdef LINUX
/**
* tp_v4_rcv_next - pass a socket buffer to the next handler
* @skb: socket buffer to pass
*
* In the Linux packet handler, if the packet is not for us, pass it to the next handler. If there
* is no next handler, free the packet and return. Note that we do not have to lock the hash
* because we own it and are also holding a reference to any module owning the next handler. This
* function returns zero (0) if the packet has not or will not be seen by another packet handler,
* and one (1) if the packet has or will be seen by another packet handler. This return value is
* used to determine whether to generate ICMP errors or not.
*/
STATIC INLINE fastcall __hot_in int
tp_v4_rcv_next(struct sk_buff *skb)
{
struct tp_prot_bucket *pb;
struct mynet_protocol *pp;
struct iphdr *iph;
unsigned char proto;
iph = (typeof(iph)) skb_network_header(skb);
proto = iph->protocol;
if ((pb = tp_prots[proto]) && (pp = pb->prot.next)) {
pp->handler(skb);
return (1);
}
kfree_skb(skb);
return (0);
}
/**
* tp_v4_err_next - pass a socket buffer to the next error handler
* @skb: socket buffer to pass
*
* In the Linux packet error handler, if the packet is not for us, pass it to the next error
* handler. If there is no next error handler, simply return.
*/
STATIC INLINE fastcall __hot_in void
tp_v4_err_next(struct sk_buff *skb, __u32 info)
{
struct tp_prot_bucket *pb;
struct mynet_protocol *pp;
unsigned char proto;
proto = ((struct iphdr *) skb->data)->protocol;
if ((pb = tp_prots[proto]) && (pp = pb->prot.next))
pp->err_handler(skb, info);
return;
}
/**
* tp_init_nproto - initialize network protocol override
* @proto: the protocol to register or override
*
* This is the network protocol override function.
*
* This is complicated because we hack the inet protocol tables. If no other protocol was
* previously registered, this reduces to inet_add_protocol(). If there is a protocol previously
* registered, we take a reference on the kernel module owning the entry, if possible, and replace
* the entry with our own, saving a pointer to the previous entry for passing sk_bufs along that we
* are not interested in. Taking a module reference is particularly for things like SCTP, where
* unloading the module after protocol override would break things horribly. Taking the reference
* keeps the module from unloading (this works for OpenSS7 SCTP as well as lksctp).
*/
STATIC INLINE fastcall __unlikely struct tp_prot_bucket *
tp_init_nproto(unsigned char proto, unsigned int type)
{
struct tp_prot_bucket *pb;
struct ipnet_protocol *pp;
struct mynet_protocol **ppp;
int hash = proto & (MAX_INET_PROTOS - 1);
write_lock_bh(&tp_prot_lock);
if ((pb = tp_prots[proto]) != NULL) {
pb->refs++;
switch (type) {
case T_COTS:
case T_COTS_ORD:
++pb->corefs;
break;
case T_CLTS:
++pb->clrefs;
break;
default:
swerr();
break;
}
} else if ((pb = kmem_cache_alloc(tp_raw_prot_cachep, GFP_ATOMIC))) {
bzero(pb, sizeof(*pb));
pb->refs = 1;
switch (type) {
case T_COTS:
case T_COTS_ORD:
pb->corefs = 1;
break;
case T_CLTS:
pb->clrefs = 1;
break;
default:
swerr();
break;
}
pp = &pb->prot;
#ifdef HAVE_KMEMB_STRUCT_INET_PROTOCOL_PROTOCOL
pp->proto.protocol = proto;
pp->proto.name = "streams-rawip";
#endif
#if defined HAVE_KTYPE_STRUCT_NET_PROTOCOL_PROTO
pp->proto.proto = proto;
#endif /* defined HAVE_KTYPE_STRUCT_NET_PROTOCOL_PROTO */
#if defined HAVE_KMEMB_STRUCT_NET_PROTOCOL_NO_POLICY || defined HAVE_KMEMB_STRUCT_INET_PROTOCOL_NO_POLICY
pp->proto.no_policy = 1;
#endif
pp->proto.handler = &tp_v4_rcv;
pp->proto.err_handler = &tp_v4_err;
ppp = &inet_protosp[hash];
{
net_protocol_lock();
#ifdef HAVE_OLD_STYLE_INET_PROTOCOL
while (*ppp && (*ppp)->protocol != proto)
ppp = &(*ppp)->next;
#endif /* HAVE_OLD_STYLE_INET_PROTOCOL */
if (*ppp != NULL) {
#ifdef HAVE_KMEMB_STRUCT_INET_PROTOCOL_COPY
/* can only override last entry */
if ((*ppp)->copy != 0) {
__ptrace(("Cannot override copy entry\n"));
net_protocol_unlock();
write_unlock_bh(&tp_prot_lock);
kmem_cache_free(tp_raw_prot_cachep, pb);
return (NULL);
}
#endif /* HAVE_KMEMB_STRUCT_INET_PROTOCOL_COPY */
if ((pp->kmod = streams_module_address((ulong) *ppp))
&& pp->kmod != THIS_MODULE) {
if (!try_module_get(pp->kmod)) {
__ptrace(("Cannot acquire module\n"));
net_protocol_unlock();
write_unlock_bh(&tp_prot_lock);
kmem_cache_free(tp_raw_prot_cachep, pb);
return (NULL);
}
}
#if defined HAVE_KMEMB_STRUCT_NET_PROTOCOL_NEXT || defined HAVE_KMEMB_STRUCT_INET_PROTOCOL_NEXT
pp->proto.next = (*ppp)->next;
#endif
}
pp->next = xchg(ppp, &pp->proto);
net_protocol_unlock();
}
/* link into hash slot */
tp_prots[proto] = pb;
}
write_unlock_bh(&tp_prot_lock);
return (pb);
}
/**
* tp_term_nproto - terminate network protocol override
* @proto: network protocol to terminate
*
* This is the network protocol restoration function.
*
* This is complicated and brittle. The module stuff here is just for ourselves (other kernel
* modules pulling the same trick) as Linux IP protocols are normally kernel resident. If a
* protocol was previously registered, restore the protocol's entry and drop the reference to its
* owning kernel module. If there was no protocol previously registered, this reduces to
* inet_del_protocol().
*/
STATIC INLINE fastcall __unlikely void
tp_term_nproto(unsigned char proto, unsigned int type)
{
struct tp_prot_bucket *pb;
write_lock_bh(&tp_prot_lock);
if ((pb = tp_prots[proto]) != NULL) {
switch (type) {
case T_COTS:
case T_COTS_ORD:
assure(pb->corefs > 0);
--pb->corefs;
break;
case T_CLTS:
assure(pb->clrefs > 0);
--pb->clrefs;
break;
default:
swerr();
break;
}
if (--pb->refs == 0) {
struct ipnet_protocol *pp = &pb->prot;
struct mynet_protocol **ppp;
int hash = proto & (MAX_INET_PROTOS - 1);
ppp = &inet_protosp[hash];
{
net_protocol_lock();
#ifdef HAVE_OLD_STYLE_INET_PROTOCOL
while (*ppp && *ppp != &pp->proto)
ppp = &(*ppp)->next;
if (pp->next)
pp->next->next = pp->proto.next;
#endif /* HAVE_OLD_STYLE_INET_PROTOCOL */
__assert(*ppp == &pp->proto);
*ppp = pp->next;
net_protocol_unlock();
}
if (pp->next != NULL && pp->kmod != NULL && pp->kmod != THIS_MODULE)
module_put(pp->kmod);
/* unlink from hash slot */
tp_prots[proto] = NULL;
kmem_cache_free(tp_raw_prot_cachep, pb);
}
}
write_unlock_bh(&tp_prot_lock);
}
#endif /* LINUX */
/**
* tp_bind_prot - bind a protocol
* @proto: protocol number to bind
*
* NOTICES: Notes about registration. Older 2.4 kernels will allow you to register whatever inet
* protocols you want on top of any existing protocol. This is good. 2.6 kernels, on the other
* hand, do not allow registration of inet protocols over existing inet protocols. We rip symbols
* on 2.6 and put special code in the handler to give us effectively the old 2.4 approach.
* This is also detectable by the fact that inet_add_protocol() returns void on 2.4 and int on 2.6.
*
* Issues with the 2.4 approach to registration is that the ip_input function passes a cloned skb
* to each protocol registered. We don't want to do that. If the message is for us, we want to
* process it without passing it to others.
*
* Issues with the 2.6 approach to registration is that the ip_input function passes the skb to
* only one function. We don't want that either. If the message is not for us, we want to pass it
* to the next protocol module.
*/
STATIC INLINE fastcall __unlikely int
tp_bind_prot(unsigned char proto, unsigned int type)
{
struct tp_prot_bucket *pb;
if ((pb = tp_init_nproto(proto, type)))
return (0);
return (-ENOMEM);
}
/**
* tp_unbind_prot - unbind a protocol
* @proto: protocol number to unbind
*/
STATIC INLINE fastcall __unlikely void
tp_unbind_prot(unsigned char proto, unsigned int type)
{
tp_term_nproto(proto, type);
}
/**
* tp_bind - bind a Stream to a TSAP
* @tp: private structure
* @ADDR_buffer: addresses to bind
* @ADDR_length: length of addresses
* @CONIND_number: maximum number of connection indications
*
* Bind to protocol ids and port and addresses. We currently only bind the first protocol id
* regardless of the number specified. Binding supports wildcard addresses, both in port number and
* IP address. The bind hash contains bind buckets that list Streams that are bound to the same
* protocol id and port number.
*/
STATIC INLINE fastcall __unlikely int
tp_bind(struct tp *tp, struct sockaddr_in *ADDR_buffer, const t_uscalar_t ADDR_length,
const t_uscalar_t CONIND_number)
{
struct tp_bhash_bucket *hp;
unsigned short bport = 0;
unsigned char proto = ntohs(ADDR_buffer[0].sin_port);
size_t anum = ADDR_length / sizeof(*ADDR_buffer);
struct tp *tp2;
int i, j, err;
unsigned long flags;
hp = &tp_bhash[tp_bhashfn(proto, bport)];
write_lock_str2(&hp->lock, flags);
for (tp2 = hp->list; tp2; tp2 = tp2->bnext) {
if (proto != tp2->protoids[0])
continue;
if (bport != tp2->bport)
continue;
#if 0
/* Allowed to bind to each NSAP once as DEFAULT_DEST, once as DEFAULT_LISTENER and
once as neither. */
if ((BIND_flags & (DEFAULT_DEST | DEFAULT_LISTENER)) !=
(np2->BIND_flags & (DEFAULT_DEST | DEFAULT_LISTENER)))
continue;
#endif
for (i = 0; i < tp2->bnum; i++) {
#if 0
if (tp2->baddrs[i].addr == 0)
break;
#endif
for (j = 0; j < anum; j++)
if (tp2->baddrs[i].addr == ADDR_buffer[j].sin_addr.s_addr)
break;
if (j < anum)
break;
}
if (i < tp2->bnum)
break;
}
if (tp2 != NULL) {
if (tp2->options.ip.reuseaddr != T_YES || tp->options.ip.reuseaddr != T_YES) {
write_unlock_str2(&hp->lock, flags);
return (TADDRBUSY);
}
/* fall through and bind */
}
if ((err = tp_bind_prot(proto, tp->info.SERV_type))) {
write_unlock_str2(&hp->lock, flags);
return (err);
}
tp->options.ip.protocol = proto;
if ((tp->bnext = hp->list))
tp->bnext->bprev = &tp->bnext;
tp->bprev = &hp->list;
hp->list = tp_get(tp);
tp->bhash = hp;
/* copy into private structure */
tp->CONIND_number = CONIND_number;
tp->pnum = 1;
tp->protoids[0] = proto;
tp->bnum = anum;
tp->bport = bport;
_ptrace(("%s: %s: bound proto = %d, bport = %d\n", DRV_NAME, __FUNCTION__,
(int) proto, (int) ntohs(bport)));
for (i = 0; i < anum; i++)
tp->baddrs[i].addr = ADDR_buffer[i].sin_addr.s_addr;
write_unlock_str2(&hp->lock, flags);
#if defined HAVE_KFUNC_SYNCHRONIZE_NET
#if defined HAVE_KFUNC_IN_ATOMIC || defined in_atomic
if (!in_interrupt() && !in_atomic())
#else
if (!in_interrupt())
#endif
synchronize_net(); /* might sleep */
#endif /* defined HAVE_KFUNC_SYNCHRONIZE_NET */
return (0);
}
#if defined HAVE_KFUNC_DST_OUTPUT
STATIC INLINE __hot_out int
#if defined HAVE_KFUNC_NF_HOOK_OKFN_2_ARG
tp_ip_queue_xmit(struct sock *sk, struct sk_buff *skb)
#elif defined HAVE_KFUNC_NF_HOOK_OKFN_3_ARG
tp_ip_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
#else
tp_ip_queue_xmit(struct sk_buff *skb)
#endif
{
struct dst_entry *dst = skb_dst(skb);
struct iphdr *iph = (typeof(iph)) skb_network_header(skb);
#if defined NETIF_F_TSO
#if defined HAVE_KFUNC___IP_SELECT_IDENT_2_ARGS_SEGS || \
defined HAVE_KFUNC___IP_SELECT_IDENT_3_ARGS_SEGS
__ip_select_ident(iph, dst, 0);
#elif defined HAVE_KFUNC_IP_SELECT_IDENT_MORE_SK_BUFF
ip_select_ident_more(skb, dst, NULL, 0);
#else /* !defined HAVE_KFUNC_IP_SELECT_IDENT_MORE_SK_BUFF */
ip_select_ident_more(iph, dst, NULL, 0);
#endif /* defined HAVE_KFUNC_IP_SELECT_IDENT_MORE_SK_BUFF */
#else /* !defined NETIF_F_TSO */
ip_select_ident(iph, dst, NULL);
#endif /* defined NETIF_F_TSO */
ip_send_check(iph);
#ifndef NF_IP_LOCAL_OUT
#define NF_IP_LOCAL_OUT NF_INET_LOCAL_OUT
#endif
#if defined HAVE_KFUNC_IP_DST_OUTPUT
return NF_HOOK_(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, dst->dev, ip_dst_output);
#else /* !defined HAVE_KFUNC_IP_DST_OUTPUT */
return NF_HOOK_(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, dst->dev, dst_output_);
#endif /* defined HAVE_KFUNC_IP_DST_OUTPUT */
}
#else /* !defined HAVE_KFUNC_DST_OUTPUT */
STATIC INLINE __hot_out int
tp_ip_queue_xmit(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct iphdr *iph = skb->nh.iph;
if (skb->len > dst_pmtu(dst)) {
rare();
return ip_fragment(skb, dst->output);
} else {
iph->frag_off |= __constant_htons(IP_DF);
ip_send_check(iph);
return dst->output(skb);
}
}
#endif /* defined HAVE_KFUNC_DST_OUTPUT */
#if 1
noinline fastcall __hot void
tp_skb_destructor_slow(struct tp *tp, struct sk_buff *skb)
{
unsigned long flags;
spin_lock_irqsave(&tp->qlock, flags);
// ensure(tp->sndmem >= skb->truesize, tp->sndmem = skb->truesize);
tp->sndmem -= skb->truesize;
if (unlikely((tp->sndmem < tp->options.xti.sndlowat || tp->sndmem == 0))) {
tp->sndblk = 0; /* no longer blocked */
spin_unlock_irqrestore(&tp->qlock, flags);
if (tp->iq != NULL && tp->iq->q_first != NULL)
qenable(tp->iq);
} else {
spin_unlock_irqrestore(&tp->qlock, flags);
}
#if 0 /* destructor is nulled by skb_orphan */
#ifdef HAVE_KMEMB_STRUCT_SKB_FRAG_STRUCT_PAGE_P
skb_shinfo(skb)->frags[0].page.p = NULL;
#else
skb_shinfo(skb)->frags[0].page = NULL;
#endif
skb->destructor = NULL;
#endif
tp_put(tp);
return;
}
/**
* tp_skb_destructor - socket buffer destructor
* @skb: socket buffer to destroy
*
* This provides the impedance matching between socket buffer flow control and STREAMS flow control.
* When tp->sndmem is greater than tp->options.xti.sndbuf we place STREAMS buffers back on the send
* queue and stall the queue. When the send memory falls below tp->options.xti.sndlowat (or to
* zero) and there are message on the send queue, we enable the queue.
*
* NOTE: There was not enough hysteresis in this function! It was qenabling too fast. We need a
* flag in the private structure that indicates that the queue is stalled awaiting subsiding below
* the send low water mark (or to zero) that is set when we stall the queue and reset when we fall
* beneath the low water mark.
*/
STATIC __hot void
tp_skb_destructor(struct sk_buff *skb)
{
struct tp *tp;
unsigned long flags;
#