Skip to content

Commit

Permalink
Merge pull request #1274 from microsoft/enhancement-tcp-defaults
Browse files Browse the repository at this point in the history
[libos] Merge Config Files
  • Loading branch information
ppenna committed May 21, 2024
2 parents a15818c + e35d60b commit 8447e59
Show file tree
Hide file tree
Showing 35 changed files with 694 additions and 489 deletions.
8 changes: 2 additions & 6 deletions doc/setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ specification, check out the `README.md` file.
- [5. Setup Configuration File (Only Once)](#5-setup-configuration-file-only-once)
- [6. Enable Huge Pages (Only for Catnip on Every System Reboot)](#6-enable-huge-pages-only-for-catnip-on-every-system-reboot)


> **Follow these instructions to build Demikernel on a fresh Ubuntu 22.04 system.**
## 1. Clone This Repository
Expand Down Expand Up @@ -57,13 +56,10 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
- Copy the template from `scripts/config/default.yaml` to
`$HOME/config.yaml`. If running on Azure, use `scripts/config/azure.yaml`.
- Open the file in `$HOME/config.yaml` for editing and do the following:
- Change `XX.XX.XX.XX` to match the IPv4 address of your server host.
- Change `YY.YY.YY.YY` to match the IPv4 address of your client host.
- Change `PPPP` to the port number that you will expose in the server host.
- Change `ZZ.ZZ.ZZ.ZZ` to match the IPv4 address that in the local host.
- Change `XX.XX.XX.XX` to match the IPv4 address that in the local host.
- Change `ff:ff:ff:ff:ff:ff` to match the MAC address in the local host.
- Change `abcde` to match the name of the interface in the local host.
- Change the `arp_table` according to your setup.
- Change the `arp_table` according to your setup. Each line should contain the MAC address of a host matched to the IP address of the same host.
- If using DPDK, change `WW:WW.W` to match the PCIe address of your NIC.
- Save the file.

Expand Down
2 changes: 1 addition & 1 deletion examples/tcp-close/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ impl TcpClient {
let qd: QDesc = self.libos.socket(AF_INET, SOCK_STREAM, 0)?;
// Set default linger to a short period, otherwise, this test will take a long time to complete.
self.libos
.set_socket_option(qd, SocketOption::SO_LINGER(Some(DEFAULT_LINGER)))?;
.set_socket_option(qd, SocketOption::Linger(Some(DEFAULT_LINGER)))?;
self.qds.insert(qd);
Ok(qd)
}
Expand Down
2 changes: 1 addition & 1 deletion examples/tcp-close/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ impl TcpServer {
// Create TCP socket.
let sockqd: QDesc = libos.socket(AF_INET, SOCK_STREAM, 0)?;
// Set default linger to a short period, otherwise, this test will take a long time to complete.
libos.set_socket_option(sockqd, SocketOption::SO_LINGER(Some(DEFAULT_LINGER)))?;
libos.set_socket_option(sockqd, SocketOption::Linger(Some(DEFAULT_LINGER)))?;

// Bind to local address.
libos.bind(sockqd, local)?;
Expand Down
4 changes: 2 additions & 2 deletions examples/tcp-echo/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl TcpEchoClient {
let sockqd: QDesc = self.libos.socket(AF_INET, SOCK_STREAM, 0)?;
// Set default linger to a short period, otherwise, this test will take a long time to complete.
self.libos
.set_socket_option(sockqd, SocketOption::SO_LINGER(Some(DEFAULT_LINGER)))?;
.set_socket_option(sockqd, SocketOption::Linger(Some(DEFAULT_LINGER)))?;

self.clients.insert(sockqd, (vec![0; self.bufsize], 0));
let qt: QToken = self.libos.connect(sockqd, self.remote)?;
Expand Down Expand Up @@ -201,7 +201,7 @@ impl TcpEchoClient {
let qd: QDesc = self.libos.socket(AF_INET, SOCK_STREAM, 0)?;
// Set default linger to a short period, otherwise, this test will take a long time to complete.
self.libos
.set_socket_option(qd, SocketOption::SO_LINGER(Some(DEFAULT_LINGER)))?;
.set_socket_option(qd, SocketOption::Linger(Some(DEFAULT_LINGER)))?;

let qt: QToken = self.libos.connect(qd, self.remote)?;
self.register_operation(qd, qt);
Expand Down
2 changes: 1 addition & 1 deletion examples/tcp-echo/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ impl TcpEchoServer {
pub fn new(mut libos: LibOS, local: SocketAddr) -> Result<Self> {
// Create a TCP socket.
let sockqd: QDesc = libos.socket(AF_INET, SOCK_STREAM, 0)?;
libos.set_socket_option(sockqd, SocketOption::SO_LINGER(Some(DEFAULT_LINGER)))?;
libos.set_socket_option(sockqd, SocketOption::Linger(Some(DEFAULT_LINGER)))?;

// Bind the socket to a local address.
if let Err(e) = libos.bind(sockqd, local) {
Expand Down
2 changes: 1 addition & 1 deletion examples/tcp-wait/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ impl TcpClient {
self.sockqd = Some(self.libos.socket(AF_INET, SOCK_STREAM, 0)?);
// Set default linger to a short period, otherwise, this test will take a long time to complete.
self.libos
.set_socket_option(self.sockqd.unwrap(), SocketOption::SO_LINGER(Some(DEFAULT_LINGER)))?;
.set_socket_option(self.sockqd.unwrap(), SocketOption::Linger(Some(DEFAULT_LINGER)))?;

let qt: QToken = self
.libos
Expand Down
2 changes: 1 addition & 1 deletion examples/tcp-wait/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ impl TcpServer {
// Create TCP socket.
let sockqd: QDesc = libos.socket(AF_INET, SOCK_STREAM, 0)?;
// Set default linger to a short period, otherwise, this test will take a long time to complete.
libos.set_socket_option(sockqd, SocketOption::SO_LINGER(Some(DEFAULT_LINGER)))?;
libos.set_socket_option(sockqd, SocketOption::Linger(Some(DEFAULT_LINGER)))?;

// Bind to local address.
libos.bind(sockqd, local)?;
Expand Down
2 changes: 2 additions & 0 deletions man/demi_getsockopt.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Currently the following values for `level` are supported:
Currently the following values for `option` are supported:
- `SO_LINGER` - Linger on/off and linger time in seconds, for queued, unsent data on `demi_close()`.
- `SO_KEEPALIVE` - Whether connections should be kept alive. On Linux, this is a boolean flag. On Windows, this includes a boolean flag, a keep alive time and a keep alive interval.
- `SO_NODELAY` - Nagle algoirthm on/off.
## Return Value
Expand Down
41 changes: 21 additions & 20 deletions scripts/config/azure.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

client:
connect_to:
host: XX.XX.XX.XX
port: PPPP
client:
host: YY.YY.YY.YY
port: PPPP
server:
bind:
host: XX.XX.XX.XX
port: PPPP
client:
host: YY.YY.YY.YY
port: PPPP
catnip:
my_ipv4_addr: ZZ.ZZ.ZZ.ZZ
my_link_addr: "ff:ff:ff:ff:ff:ff"
my_interface_name: "abcde"
arp_disable: true
demikernel:
local_ipv4_addr: XX.XX.XX.XX
local_link_addr: "ff:ff:ff:ff:ff:ff"
raw_socket:
interface_name: "abcde"
dpdk:
eal_init: ["-c", "0xff", "-n", "4", "-a", "WW:WW.W","--proc-type=auto", "--vdev=net_vdev_netvsc0,iface=eth1"]
eal_init: ["-c", "0xff", "-n", "4", "-a", "YY:YY:.Y","--proc-type=auto", "--vdev=net_vdev_netvsc0,iface=abcde"]
tcp_socket_options:
keepalive:
enabled: false
time_millis: 0
interval: 0
linger:
enabled: true
time_seconds: 0
nodelay: true
inetstack_config:
mtu: 1500
mss: 1500
enable_jumbo_frames: false
udp_checksum_offload: false
tcp_checksum_offload: false

# vim: set tabstop=2 shiftwidth=2
41 changes: 17 additions & 24 deletions scripts/config/default.yaml
Original file line number Diff line number Diff line change
@@ -1,36 +1,29 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

client:
connect_to:
host: XX.XX.XX.XX
port: PPPP
client:
host: YY.YY.YY.YY
port: PPPP
server:
bind:
host: XX.XX.XX.XX
port: PPPP
client:
host: YY.YY.YY.YY
port: PPPP
catnip:
my_ipv4_addr: ZZ.ZZ.ZZ.ZZ
my_link_addr: "ff:ff:ff:ff:ff:ff"
my_interface_name: "abcde"
arp_table:
"ff:ff:ff:ff:ff:ff": "XX.XX.XX.XX"
"ff:ff:ff:ff:ff:ff": "YY.YY.YY.YY"
demikernel:
local_ipv4_addr: XX.XX.XX.XX
local_link_addr: "ff:ff:ff:ff:ff:ff"
raw_socket:
interface_name: "abcde"
dpdk:
eal_init: ["", "-c", "0xff", "-n", "4", "-a", "WW:WW.W","--proc-type=auto"]
catnap:
tcp_keepalive:
tcp_socket_options:
keepalive:
enabled: false
time_millis: 0
interval: 0
linger:
enabled: true
time_seconds: 0

nodelay: true
inetstack_config:
mtu: 1500
mss: 1500
enable_jumbo_frames: false
udp_checksum_offload: false
tcp_checksum_offload: false
arp_table:
"ff:ff:ff:ff:ff:ff": "XX.XX.XX.XX"
"ff:ff:ff:ff:ff:ff": "YY.YY.YY.YY"
# vim: set tabstop=2 shiftwidth=2
10 changes: 1 addition & 9 deletions shim/src/ctrl/getsockopt.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,10 @@ int __getsockopt(int sockfd, int level, int optname, void *optval, socklen_t *op
TRACE("sockfd=%d, level=%d, optname=%d, optval=%p, optlen=%p", sockfd, level, optname, optval, (void *)optlen);

// Issue warnings for common options that are not supported.
if (level == SOL_SOCKET && optname == SO_KEEPALIVE)
{
WARN("%s is not supported", "SO_KEEPALIVE");
}
else if (level == SOL_SOCKET && optname == SO_REUSEADDR)
if (level == SOL_SOCKET && optname == SO_REUSEADDR)
{
WARN("%s is not supported", "SO_REUSEADDR");
}
else if (level == IPPROTO_TCP && optname == TCP_NODELAY)
{
WARN("%s is not supported", "TCP_NODELAY");
}
else if (level == IPPROTO_TCP && optname == TCP_KEEPIDLE)
{
WARN("%s is not supported", "TCP_KEEPIDLE");
Expand Down
14 changes: 5 additions & 9 deletions shim/src/ctrl/setsockopt.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,24 @@ int __setsockopt(int sockfd, int level, int optname, const void *optval, socklen
TRACE("sockfd=%d, level=%d, optname=%d, optval=%p, optlen=%d", sockfd, level, optname, optval, optlen);

// Issue warnings for common options that are not supported.
if (level == SOL_SOCKET && optname == SO_KEEPALIVE)
{
WARN("%s is not supported", "SO_KEEPALIVE");
}
else if (level == SOL_SOCKET && optname == SO_REUSEADDR)
if (level == SOL_SOCKET && optname == SO_REUSEADDR)
{
WARN("%s is not supported", "SO_REUSEADDR");
}
else if (level == IPPROTO_TCP && optname == TCP_NODELAY)
{
WARN("%s is not supported", "TCP_NODELAY");
}
else if (level == IPPROTO_TCP && optname == TCP_KEEPIDLE)
{
WARN("%s is not supported", "TCP_KEEPIDLE");
}
else if (level == IPPROTO_TCP && optname == TCP_KEEPINTVL)
{
// TODO: Unify this support with Windows SO_KEEPALIVE once we support TCP-level options.
// FIXME: https://github.com/microsoft/demikernel/issues/1282
WARN("%s is not supported", "TCP_KEEPINTLVL");
}
else if (level == IPPROTO_TCP && optname == TCP_KEEPCNT)
{
// TODO: Unify this support with Windows SO_KEEPALIVE once we support TCP-level options.
// FIXME: https://github.com/microsoft/demikernel/issues/1282
WARN("%s is not supported", "TCP_KEEPCNT");
}
else if (level == IPPROTO_TCP && optname == TCP_ULP)
Expand Down
32 changes: 21 additions & 11 deletions src/rust/catloop/socket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

use crate::{
catmem::SharedCatmemLibOS,
demikernel::config::Config,
expect_ok,
expect_some,
runtime::{
Expand Down Expand Up @@ -90,8 +91,8 @@ pub struct RequestId(u64);

impl SharedMemorySocket {
/// Creates a new socket that is not bound to an address.
pub fn new() -> Self {
Self(SharedObject::new(MemorySocket {
pub fn new(config: &Config) -> Result<Self, Fail> {
Ok(Self(SharedObject::new(MemorySocket {
catmem_qd: None,
local: None,
remote: None,
Expand All @@ -101,13 +102,18 @@ impl SharedMemorySocket {
rng: SmallRng::seed_from_u64(REQUEST_ID_SEED),
#[cfg(not(debug_assertions))]
rng: SmallRng::from_entropy(),
options: TcpSocketOptions::default(),
}))
options: TcpSocketOptions::new(config)?,
})))
}

/// Allocates a new socket that is bound to [local].
fn alloc(catmem_qd: QDesc, local: Option<SocketAddrV4>, remote: Option<SocketAddrV4>) -> Self {
Self(SharedObject::new(MemorySocket {
fn alloc(
catmem_qd: QDesc,
local: Option<SocketAddrV4>,
remote: Option<SocketAddrV4>,
options: &TcpSocketOptions,
) -> Result<Self, Fail> {
Ok(Self(SharedObject::new(MemorySocket {
catmem_qd: Some(catmem_qd),
local,
remote,
Expand All @@ -117,14 +123,16 @@ impl SharedMemorySocket {
rng: SmallRng::seed_from_u64(REQUEST_ID_SEED),
#[cfg(not(debug_assertions))]
rng: SmallRng::from_entropy(),
options: TcpSocketOptions::default(),
}))
options: options.clone(),
})))
}

/// Set an SO_* option on the socket.
pub fn set_socket_option(&mut self, option: SocketOption) -> Result<(), Fail> {
match option {
SocketOption::SO_LINGER(linger) => self.options.set_linger(linger),
SocketOption::Linger(linger) => self.options.set_linger(linger),
SocketOption::KeepAlive(keepalive) => self.options.set_keepalive(keepalive),
SocketOption::NoDelay(nodelay) => self.options.set_nodelay(nodelay),
}
Ok(())
}
Expand All @@ -133,7 +141,9 @@ impl SharedMemorySocket {
/// [option].
pub fn get_socket_option(&mut self, option: SocketOption) -> Result<SocketOption, Fail> {
match option {
SocketOption::SO_LINGER(_) => Ok(SocketOption::SO_LINGER(self.options.get_linger())),
SocketOption::Linger(_) => Ok(SocketOption::Linger(self.options.get_linger())),
SocketOption::KeepAlive(_) => Ok(SocketOption::KeepAlive(self.options.get_keepalive())),
SocketOption::NoDelay(_) => Ok(SocketOption::NoDelay(self.options.get_nodelay())),
}
}

Expand Down Expand Up @@ -205,7 +215,7 @@ impl SharedMemorySocket {
};

let new_addr: SocketAddrV4 = SocketAddrV4::new(ipv4, new_port);
let new_socket: Self = Self::alloc(new_qd, Some(new_addr), None);
let new_socket: Self = Self::alloc(new_qd, Some(new_addr), None, &self.options)?;

// Check that the remote has retrieved the port number and responded with a valid request id.
match pop_request_id(catmem.clone(), new_qd).await {
Expand Down
15 changes: 10 additions & 5 deletions src/rust/catloop/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use ::socket2::{
};
use ::std::{
net::{
Ipv4Addr,
SocketAddr,
SocketAddrV4,
},
Expand All @@ -51,6 +52,8 @@ pub struct CatloopTransport {
runtime: SharedDemiRuntime,
/// Configuration.
config: Config,
/// Local IP.
local_ipv4_addr: Ipv4Addr,
}

#[derive(Clone)]
Expand All @@ -61,12 +64,14 @@ pub struct SharedCatloopTransport(SharedObject<CatloopTransport>);
//======================================================================================================================

impl SharedCatloopTransport {
pub fn new(config: &Config, runtime: SharedDemiRuntime) -> Self {
Self(SharedObject::new(CatloopTransport {
pub fn new(config: &Config, runtime: SharedDemiRuntime) -> Result<Self, Fail> {
Ok(Self(SharedObject::new(CatloopTransport {
catmem: SharedCatmemLibOS::new(config, runtime.clone()),
runtime,
config: config.clone(),
}))
// Save this here so we can be sure to throw an error before we try to bind.
local_ipv4_addr: config.local_ipv4_addr()?,
})))
}
}

Expand All @@ -78,7 +83,7 @@ impl NetworkTransport for SharedCatloopTransport {
/// that wraps the underlying Catmem queue.
fn socket(&mut self, _: Domain, _: Type) -> Result<Self::SocketDescriptor, Fail> {
// Create fake socket.
Ok(SharedMemorySocket::new())
SharedMemorySocket::new(&self.config)
}

/// Set an SO_* option on the socket.
Expand Down Expand Up @@ -109,7 +114,7 @@ impl NetworkTransport for SharedCatloopTransport {
fn bind(&mut self, sd: &mut Self::SocketDescriptor, local: SocketAddr) -> Result<(), Fail> {
// Check if we are binding to a non-local address.
let local: SocketAddrV4 = unwrap_socketaddr(local)?;
if &self.config.local_ipv4_addr() != local.ip() {
if self.local_ipv4_addr != *local.ip() {
let cause: String = format!("cannot bind to non-local address (sd={:?})", sd);
error!("bind(): {}", cause);
return Err(Fail::new(libc::EADDRNOTAVAIL, &cause));
Expand Down
Loading

0 comments on commit 8447e59

Please sign in to comment.