diff --git a/docs/container/readme.md b/docs/container/readme.md index 633a6c742..1aae5cebb 100644 --- a/docs/container/readme.md +++ b/docs/container/readme.md @@ -9,6 +9,7 @@ Storage module is available on zbus over the following channel | container|[container](#interface)| 0.0.1| ## Home Directory + contd keeps some data in the following locations | directory | path| |----|---| @@ -44,52 +45,52 @@ type ContainerID string // NetworkInfo defines a network configuration for a container type NetworkInfo struct { - // Currently a container can only join one (and only one) - // network namespace that has to be pre defined on the node - // for the container tenant - - // Containers don't need to know about anything about bridges, - // IPs, wireguards since this is all is only known by the network - // resource which is out of the scope of this module - Namespace string + // Currently a container can only join one (and only one) + // network namespace that has to be pre defined on the node + // for the container tenant + + // Containers don't need to know about anything about bridges, + // IPs, wireguards since this is all is only known by the network + // resource which is out of the scope of this module + Namespace string } // MountInfo defines a mount point type MountInfo struct { - Source string // source of the mount point on the host - Target string // target of mount inside the container - Type string // mount type - Options []string // mount options + Source string // source of the mount point on the host + Target string // target of mount inside the container + Type string // mount type + Options []string // mount options } //Container creation info type Container struct { - // Name of container - Name string - // path to the rootfs of the container - RootFS string - // Env env variables to container in format {'KEY=VALUE', 'KEY2=VALUE2'} - Env []string - // Network network info for container - Network NetworkInfo - // Mounts extra mounts for container - Mounts []MountInfo - // Entrypoint the process to start inside the container - Entrypoint string - // Interactivity enable Core X as PID 1 on the container - Interactive bool + // Name of container + Name string + // path to the rootfs of the container + RootFS string + // Env env variables to container in format {'KEY=VALUE', 'KEY2=VALUE2'} + Env []string + // Network network info for container + Network NetworkInfo + // Mounts extra mounts for container + Mounts []MountInfo + // Entrypoint the process to start inside the container + Entrypoint string + // Interactivity enable Core X as PID 1 on the container + Interactive bool } // ContainerModule defines rpc interface to containerd type ContainerModule interface { - // Run creates and starts a container on the node. It also auto - // starts command defined by `entrypoint` inside the container - // ns: tenant namespace - // data: Container info - Run(ns string, data Container) (ContainerID, error) - - // Inspect, return information about the container, given its container id - Inspect(ns string, id ContainerID) (Container, error) - Delete(ns string, id ContainerID) error + // Run creates and starts a container on the node. It also auto + // starts command defined by `entrypoint` inside the container + // ns: tenant namespace + // data: Container info + Run(ns string, data Container) (ContainerID, error) + + // Inspect, return information about the container, given its container id + Inspect(ns string, id ContainerID) (Container, error) + Delete(ns string, id ContainerID) error } -``` \ No newline at end of file +``` diff --git a/docs/network/HIDDEN-PUBLIC.dia b/docs/network/HIDDEN-PUBLIC.dia new file mode 100644 index 000000000..139cffae3 Binary files /dev/null and b/docs/network/HIDDEN-PUBLIC.dia differ diff --git a/docs/network/HIDDEN-PUBLIC.png b/docs/network/HIDDEN-PUBLIC.png new file mode 100644 index 000000000..72fbe3554 Binary files /dev/null and b/docs/network/HIDDEN-PUBLIC.png differ diff --git a/docs/network/NR_layout.dia b/docs/network/NR_layout.dia new file mode 100644 index 000000000..a9f59e20a Binary files /dev/null and b/docs/network/NR_layout.dia differ diff --git a/docs/network/NR_layout.png b/docs/network/NR_layout.png new file mode 100644 index 000000000..233664279 Binary files /dev/null and b/docs/network/NR_layout.png differ diff --git a/docs/network/exitpoints.md b/docs/network/attic/exitpoints.md similarity index 100% rename from docs/network/exitpoints.md rename to docs/network/attic/exitpoints.md diff --git a/docs/network/tools.md b/docs/network/attic/tools.md similarity index 100% rename from docs/network/tools.md rename to docs/network/attic/tools.md diff --git a/docs/network/attic/zostst.dhcp b/docs/network/attic/zostst.dhcp new file mode 100644 index 000000000..0ac53be0d --- /dev/null +++ b/docs/network/attic/zostst.dhcp @@ -0,0 +1,54 @@ +#!/usr/bin/bash + +mgmtnic=( +0c:c4:7a:51:e3:6a +0c:c4:7a:51:e9:e6 +0c:c4:7a:51:ea:18 +0c:c4:7a:51:e3:78 +0c:c4:7a:51:e7:f8 +0c:c4:7a:51:e8:ba +0c:c4:7a:51:e8:0c +0c:c4:7a:51:e7:fa +) + +ipminic=( +0c:c4:7a:4c:f3:b6 +0c:c4:7a:4d:02:8c +0c:c4:7a:4d:02:91 +0c:c4:7a:4d:02:62 +0c:c4:7a:4c:f3:7e +0c:c4:7a:4d:02:98 +0c:c4:7a:4d:02:19 +0c:c4:7a:4c:f2:e0 +) +cnt=1 +for i in ${mgmtnic[*]} ; do +cat << EOF +config host + option name 'zosv2tst-${cnt}' + option dns '1' + option mac '${i}' + option ip '10.5.0.$((${cnt} + 10))' + +EOF +let cnt++ +done + + + +cnt=1 +for i in ${ipminic[*]} ; do +cat << EOF +config host + option name 'ipmiv2tst-${cnt}' + option dns '1' + option mac '${i}' + option ip '10.5.0.$((${cnt} + 100))' + +EOF +let cnt++ +done + +for i in ${mgmtnic[*]} ; do + echo ln -s zoststconf 01-$(echo $i | sed s/:/-/g) +done diff --git a/docs/network/definitions.md b/docs/network/definitions.md new file mode 100644 index 000000000..8b7bb00c1 --- /dev/null +++ b/docs/network/definitions.md @@ -0,0 +1,22 @@ +# Definition of words used throughout the documentation + +## Node + + TL;DR: Computer. + A Node is a computer with CPU, Memory, Disks (or SSD's, NVMe) connected to _A_ network that has Internet access. (i.e. it can reach www.google.com, just like you on your phone, at home) + That Node will, once it has received an IP address (IPv4 or IPv6), register itself when it's new, or confirm it's identity and it's online-ness (for lack of a better word). + +## TNo : Tenant Network object. [The gory details here](https://github.com/threefoldtech/zos/blob/master/modules/network.go) + + TL;DR: The Network Description. + We named it so, because it is a data structure that describes the __whole__ network a user can request (or setup). + That network is a virtualized overlay network. + Basically that means that transfer of data in that network *always* is encrypted, protected from prying eyes, and __resources in that network can only communicate with each other__ **unless** there is a special rule that allows access. Be it by allowing access through firewall rules, *and/or* through a proxy (a service that forwards requests on behalf of, and ships replies back to the client). + +## NR: Network Resource + + TL;DR: the Node-local part of a TNo. + The main building block of a TNo; i.e. each service of a user in a Node lives in an NR. + Each Node hosts User services, whatever type of service that is. Every service in that specific node will always be solely part of the Tenant's Network. (read that twice). + So: A Network Resource is the thing that interconnects all other network resources of the TN (Tenant Network), and provides routing/firewalling for these interconnects, including the default route to the BBI (Big Bad Internet), aka ExitPoint. + All User services that run in a Node are in some way or another connected to the Network Resource (NR), which will provide ip packet forwarding and firewalling to all other network resources (including the Exitpoint) of the TN (Tenant Network) of the user. (read that three times, and the last time, read it slowly and out loud) \ No newline at end of file diff --git a/docs/network/introduction.md b/docs/network/introduction.md new file mode 100644 index 000000000..40ba10b02 --- /dev/null +++ b/docs/network/introduction.md @@ -0,0 +1,72 @@ +# Introduction to networkd the network manager of 0-OS + +## Boot and initial setup + +At boot, be it from an usb stick or PXE, ZOS starts up the kernel, with a few necessary parameters like farm ID and/or possible network parameters, but basically once the kernel has started, [zinit](https://github.com/threefoldtech/zinit) among other things, starts the network initializer. + +In short, that process loops over the available network interfaces and tries to obtain an IP address that also provides for a default gateway. That means: it tries to get Internet connectivity. Without it, ZOS stops there, as not being able to register itself, nor start other processes, there wouldn't be any use for it to be started anyway. + +Once it has obtained Internet connectivity, ZOS can then proceed to make itself known to the Grid, and acknowledge it's existence. It will then regularly poll the Grid for tasks. + +Once initialized, with the network daemon running (a process that will handle all things related to networking), ZOS will set up some basic services so that workloads can themselves use that network. + +## Networkd functionality + +The network daemon is in itself responsible for a few tasks, and working together with the [provision daemon](../provision) it mainly sets up the local infrastructure to get the user network resources, together with the wireguard configurations for the user's mesh network. + +The Wireguard mesh is an overlay network. That means that traffic of that network is encrypted and encapsulated in a new traffic frame that the gets transferred over the underlay network, here in essence the network that has been set up during boot of the node. + +For users or workloads that run on top of the mesh, the mesh network looks and behaves like any other directly connected workload, and as such that workload can reach other workloads or services in that mesh with the added advantage that that traffic is encrypted, protecting services and communications over that mesh from too curious eyes. + +That also means that workloads between nodes in a local network of a farmer is even protected from the farmer himself, in essence protecting the user from the farmer in case that farmer could become too curious. + +As the nodes do not have any way to be accessed, be it over the underlaying network or even the local console of the node, a user can be sure that his workload cannot be snooped upon. + +## Techie talk + +- **boot and initial setup** +For ZOS to work at all (the network is the computer), it needs an internet connection. That is: it needs to be able to communicate with the BCDB over the internet. +So ZOS starts with that: with the `internet` process, that tries go get the node to receive an IP address. That process will have set-up a bridge (`zos`), connected to an interface that is on an Internet-capable network. That bridge will have an IP address that has Internet access. +Also, that bridge is there for future public interfaces into workloads. +Once ZOS can reach the Internet, the rest of the system can be started, where ultimately, the `networkd` daemon is started. + +- **networkd initial setup** +`networkd` starts with recensing the available Network interfaces, and registers them to the BCDB (grid database), so that farmers can specify non-standard configs like for multi-nic machines. Once that is done, `networkd` registers itself to the zbus, so it can receive tasks to execute from the provsioning daemon (`provisiond`). +These tasks are mostly setting up network resources for users, where a network resource is a subnet in the user's wireguard mesh. + +- **multi-nic setups** + +When someone is a farmer, exploiting nodes somewhere in a datacentre, where the nodes have multiple NICs, it is advisable (though not necessary) to differentiate OOB traffic (like initial boot setup) from user traffic (as well the overlay network as the outgoing NAT for nodes for IPv4) to be on a different NIC. With these parameters, a user will have to make sure their switches are properly configured, more in docs later. + +- **registering and configurations** + +Once a node has booted and properly initialized, registering and configuring the node to be able to accept workloads and their associated network configs, is a two-step process. +First, the node registers it's live network setup to the BCDB. That is : all NICs with their associated IP addresses and routes are registered so a farm admin can in a second phase configure eventual separate NICs to handle different kinds of workloads. +In that secondary phase, a farm admin can then set-up the NICs and their associated IP's manually, so that workloads can start using them. + +## Wireguard explanations + +- **wireguard as pointopoint links and what that means** +Wireguard is a special type of VPN, where every instance is as well server for multiple peers as client towards multiple peers. That way you can create fanning-out connections als receive connections from multiple peers, creating effectively a mesh of connections Like this : ![like so](HIDDEN-PUBLIC.png) + +- **wireguard port management** +Every wireguard point (a network resource point) needs a destination/port combo when it's publicly reachable. The destination is a public ip, but the port is the differentiator. So we need to make sure every network wireguard listening port is unique in the node where it runs, and can be reapplied in case of a node's reboot. +ZOS registers the ports **already in use** to the BCDB, so a user can the pick a port that is not yet used. + +- **wireguard and hidden nodes** +Hidden nodes are nodes that are in essence hidden behind a firewall, and unreachable from the Internet to an internal network, be it as an IPv4 NATed host or an IPv6 host that is firewalled in any way, where it's impossible to have connection initiations form the Internet to the node. +As such, these nodes can only partake in a network as client-only towards publicly reachable peers, and can only initiate the connections themselves. (ref previous drawing). +To make sure connectivity stays up, the clients (all) have a keepalive towards all their peers so that communications towards network resources in hidden nodes can be established. + +## Caveats + +- **hidden nodes** +Hidden nodes live (mostly) behind firewalls that keep state about connections and these states have a lifetime. We try at best to keep these communications going, but depending of the firewall your mileage may vary (YMMV ;-)) + +- **local underlay network reachability** +When multiple nodes live in a same hidden network, at the moment we don't try to have the nodes establish connectivity between themselves, so all nodes in that hidden network can only reach each other through the intermediary of a node that is publicly reachable. So to get some performance, a farmer will have to have real routable nodes available in the vicinity. +So for now, a farmer is better off to have his nodes really reachable over a public network. + +- **IPv6 and IPv4 considerations** +While the mesh can work over IPv4 __and__ IPv6 at the same time, the peers can only be reached through one protocol at the same time. That is a peer is IPv4 __or__ IPv6, not both. Hence if a peer is reachable over IPv4, the client towards that peer needs to reach it over IPv4 too and thus needs an IPv4 address. +We advise strongly to have all nodes properly set-up on a routable unfirewalled IPv6 network, so that these problems have no reason to exist. diff --git a/docs/network/mesh.md b/docs/network/mesh.md new file mode 100644 index 000000000..6854f01ce --- /dev/null +++ b/docs/network/mesh.md @@ -0,0 +1,123 @@ +# Zero-Mesh + +## What is it + +When a user wants to deploy a workload, whatever that may be, that workload needs connectivity. +If there is just one service to be run, things can be simple, but in general there are more than one services that need to interact to provide a full stack. Sometimes these services can live on one node, but mostly these service will be deployed over multiple nodes, in different containers. +The Mesh is created for that, where containers can communicate over an encrypted path, and that network can be specified in terms of IP addresses by the user. + +## Overlay Network + +Zero-Mesh is an overlay network. That requires that nodes need a proper working network with existing access to the Internet in the first place, being full-blown public access, or behind a firewall/home router that provides for Private IP NAT to the internet. + +Right now Zero-Mesh has support for both, where nodes behind a firewall are HIDDEN nodes, and nodes that are directly connected, be it over IPv6 or IPv4 as 'normal' nodes. +Hidden nodes can thus only be participating as client nodes for a specific user Mesh, and all publicly reachable nodes can act as aggregators for hidden clients in that user Mesh. + +Also, a Mesh is static: once it is configured, and thus during the lifetime of the network, there is one node containing the aggregator for Mesh clients that live on hidden nodes. So if then an aggregator node has died or is not reachable any more, the mesh needs to be reapplied, with __some__ publicly reachable node as aggregator node. + +So it goes a bit like ![this](HIDDEN-PUBLIC.png) +The Exit labeled NR in that graph is the point where Network Resources in Hidden Nodes connect to. These Exit NRs are then the transfer nodes between Hidden NRs. + +## ZOS networkd + +The networkd daemon receives tasks from the provisioning daemon, so that it can create the necessary resources for a Mesh participator in the User Network (A network Resource - NR). + +A network is defined as a whole by the User, using the tools in the 3bot to generate a proper configuration that can be used by the network daemon. + +What networkd takes care of, is the establishment of the mesh itself, in accordance with the configuration a farmer has given to his nodes. What is configured on top of the Mesh is user defined, and applied as such by the networkd. + +## Internet reachability per Network Resource + +Every node that participates in a User mesh, will also provide for Internet access for every network resource. +that means that every NR has the same Internet access as the node itself. Which also means, in terms of security, that a firewall in the node takes care of blocking all types of entry to the NR, effectively being an Internet access diode, for outgoing and related traffic only. +In a later phase a user will be able to define some network resource as __sole__ outgoing Internet Access point, but for now that is not yet defined. + +## Interworkings + +So How is that set up ? + +Every node participating in a User Network, sets up a Network Resource. +Basically, it's a Linux Network Namespace (sort of a network virtual machine), that contains a wireguard interface that has a list of other Network resources it needs to route encrypted packets toward. + +As a User Network has a range typically a `/16` (like `10.1.0.0/16`), that is user defined. The User then picks a subnet from that range (like e.g. `10.1.1.0/24`) to assign that to every new NR he wants to participate in that Network. + +Workloads that are then provisioned are started in a newly created Container, and that container gets a User assigned IP __in__ that subnet of the Network Resource. + +The Network resource itself then handles the routing and firewalling for the containers that are connected to it. Also, the Network Resource takes care of internet connectivity, so that the container can reach out to other services on the Internet. + +![like this](NR_layout.png) + +Also in a later phase, a User will be able to add IPv6 prefixes to his Network Resources, so that containers are reachable over IPv6. + +Fully-routed IPv6 will then be available, where an Exit NR will be the entrypoint towards that network. + +## Network Resource Internals + +Each NR is basically a router for the User Network, but to allow NRs to access the Internet through the Node's local connection, there are some other internal routers to be added. + +Internally it looks like this : + +```text ++------------------------------------------------------------------------------+ +| |wg mesh | +| +-------------+ +-----+-------+ | +| | | | NR cust1 | 100.64.0.123/16 | +| | container +----------+ 10.3.1.0/24 +----------------------+ | +| | cust1 | veth| | public | | +| +-------------+ +-------------+ | | +| | | +| +-------------+ +-------------+ | | +| | | | NR cust200 | 100.64.4.200/16 | | +| | container +----------+ 10.3.1.0/24 +----------------------+ | +| | cust200 | veth| | public | | +| +-------------+ +------+------+ | | +| |wg mesh | | +| 10.101.123.34/16 | | +| +------------+ |tonrs | +| | | +------------------+ | +| | zos +------+ | 100.64.0.1/16 | | +| | | | 10.101.12.231/16| ndmz | | +| +---+--------+ NIC +-----------------------------+ | | +| | | public +------------------+ | +| +--------+------+ | +| | | +| | | ++------------------------------------------------------------------------------+ + | + | + | + | 10.101.0.0/16 10.101.0.1 + +------------------+------------------------------------------------------------ + + NAT + -------- + rules NR custA + nft add rule inet nat postrouting oifname public masquerade + nft add rule inet filter input iifname public ct state { established, related } accept + nft add rule inet filter input iifname public drop + + rules NR custB + nft add rule inet nat postrouting oifname public masquerade + nft add rule inet filter input iifname public ct state { established, related } accept + nft add rule inet filter input iifname public drop + + rules ndmz + nft add rule inet nat postrouting oifname public masquerade + nft add rule inet filter input iifname public ct state { established, related } accept + nft add rule inet filter input iifname public drop + + + Routing + + if NR only needs to get out: + ip route add default via 100.64.0.1 dev public + + if an NR wants to use another NR as exitpoint + ip route add default via destnr + with for AllowedIPs 0.0.0.0/0 on that wg peer + +``` + +During startup of the Node, the ndmz is put in place, following the configuration if it has a single internet connection , or that with a dual-nic setup, a separate nic is used for internet access. + +The ndmz network has the carrier-grade nat allocation assigned, so we don'tinterfere with RFC1918 private IPv4 address space, so users can use any of them (and not any of `100.64.0.0/10`, of course) diff --git a/docs/network/readme.md b/docs/network/readme.md index 99e06693d..0d554dacb 100644 --- a/docs/network/readme.md +++ b/docs/network/readme.md @@ -1,88 +1,8 @@ -# Network module - -## ZBus - -Network module is available on zbus over the following channel - -| module | object | version | -|--------|--------|---------| -| network|[network](#interface)| 0.0.1| - -## Home Directory -network keeps some data in the following locations -| directory | path| -|----|---| -| root| `/var/cache/modules/network`| - - -## Interface - -```go -//Networker is the interface for the network module -type Networker interface { - ApplyNetResource(Network) (string, error) - DeleteNetResource(Network) error - Namespace(NetID) (string, error) -} -``` - -## Zero-OS networking - -### Some First Explanations - -Zero-OS is meant to provide services in the Threefold grid, and with grid, we naturally understand that the nodes (or their hosted services) need to be reachable for external users or for each other. So networking in 0-OS is a big thing, even when you assume that 'the network' is ubiquitous and always there, many things need to happen correctly before having a netWORK. -For this, apart from all the other absolutely wonderful services in 0-OS, there is the network daemon. If it doesn't succeed it's bootstrap, nothing else will, and 0-OS will stop there. - -So it (the network daemon, that is) - - Configures the Node's initial network configuration, so that the Node can register itself. For now we assume that the Node is connected to a network (ethernet segment) that provides IP addresses over DHCP, be it IPv4 or IPv6, or that there is a Routing Avertisement (RA) daemon for IPv6 running on that network. - Only once it has received an IP Address, most other internal services will be able to start. ([John Gage](https://www.networkcomputing.com/cloud-infrastructure/network-computer-again) from Sun said that `The Network is the Computer`, here that is absolutely true) - - - Notifies [zinit](https://github.com/threefoldtech/zinit/blob/master/docs/readme.md) (the services orchestrator in 0-OS) that it can register the dhcp client as a permanent process on the intitially discovered NIC (Network Interface Card) and that zinit can start other processes, one of which takes care of registering the node to the grid. (more elaborate explanation about that in [identity service](../identity/readme.md). - - - Listens in on the zbus for new or updated Network Resources (NR) that get sent by the provision daemon and applies them. - -[Here some thought dumps from where we started working this out](../../specs/network/Requirements.md) - -### Jargon - -So. Let's have some abbreviations settled first: - - - #### Node : simple - TL;DR: Computer. - A Node is a computer with CPU, Memory, Disks (or SSD's, NVMe) connected to _A_ network that has Internet access. (i.e. it can reach www.google.com, just like you on your phone, at home) - That Node will, once it has received an IP address (IPv4 or IPv6), register itself when it's new, or confirm it's identity and it's online-ness (for lack of a better word). - - - #### TNo : Tenant Network object. [The gory details here](https://github.com/threefoldtech/zos/blob/master/modules/network.go) - TL;DR: The Network Description. - We named it so, because it is a datastructure that describes the __whole__ network a user can request (or setup). - That network is a virtualized overlay network. - Basically that means that transfer of data in that network *always* is encrypted, protected from prying eyes, and __resources in that network can only communicate with each other__ **unless** there is a special rule that allows access. Be it by allowing accesss through firewall rules, *and/or* through a proxy (a service that forwards requests on behalf of, and ships replies back to the client). - - - #### A Tno has an ExitPoint. - TL;DR: Any network needs to get out *somewhere*. [Some more explanation](exitpoints.md) - A Node that happens to live in an Internet Network (to differentiate from a Tenant network), more explictly, a network that is directly routable and accessible (unlike a home network), can be specified as an Exit Node. - That Node can then host Exitpoints for Tenant Networks. - Let's explain that. - Entities in a Tenant Network, where a TN being an overlay network, can only communicate with peers that are part of that network. At a certain point there is a gateway needed for this network to communicate with the 'external' world (BBI): that is an ExitPoint. ExitPoints can only live in Nodes designated for that purpose, namely Exit Nodes. Exit Nodes can only live in networks that are bidirectionally reachable for THE Internet (BBI). - An ExitPoint is *always* a part of a Network Resource (see below). - - - #### Network Resource: (NR) - TL;DR: the Node-local part of a TNo. - The main building block of a TNo; i.e. each service of a user in a Node lives in an NR. - Each Node hosts User services, whatever type of service that is. Every service in that specific node will always be solely part of the Tenant's Network. (read that twice). - So: A Network Resource is the thing that interconnects all other network resources of the TN (Tenant Network), and provides routing/firewalling for these interconnects, including the default route to the BBI (Big Bad Internet), aka ExitPoint. - All User services that run in a Node are in some way or another connected to the Network Resource (NR), which will provide ip packet forwarding and firewalling to all other network resources (including the Exitpoint) of the TN (Tenant Network) of the user. (read that three times, and the last time, read it slowly and out loud) - - - #### IPAM IP Adress management - TL;DR Give IP Adresses to containers attached to the NR's bridge. - When the provisioner wants to start a container that doesn't attach itself to the NR's network namespace (cool that you can do that), but instead needs to create a veth pair and attach it to the NR's preconfigured bridge, the veth end in the container needs to get an IP address in the NR's Prefix (IPv6) and subnet (IPv4). - The NR has a deterministic IPv4 subnet definition that is coupled to the 7-8th byte of the IPv6 Prefix, where it then can use an IPv4 in the /24 CIDR that is assigned to the NR. - As for the IPv6 address, you can choose to have a mac address derived IPv6 address, or/and a fixed address based on the same IPv4 address you gave to the container's interface. - Note: - - a veth pair is a concept in linux that creates 2 virtual network interfaces that are interconnected with a virtual cable. what goes in on one end of the pair, gets out on the other end, and vice-versa. - - a bridge in linux is a concept of a virtual switch that can contain virtual interfaces. When you attach an interface to a bridge, it is a virtual switch with one port. You can add as many interfaces to that virtual switch as you like. - - - +# zos networking +## Index +- [definitions of the vocabulary used in the documentation](definitions.md) +- [Introduction to networkd, the network manager of 0-OS](introduction.md) +- [Detail about the wireguard mesh used to interconnect 0-OS nodes](mesh.md) +- [Documentation for farmer on how to setup the network of their farm](setup_farm_network.md) \ No newline at end of file diff --git a/docs/network/setup_farm_network.md b/docs/network/setup_farm_network.md new file mode 100644 index 000000000..111c09c02 --- /dev/null +++ b/docs/network/setup_farm_network.md @@ -0,0 +1,106 @@ +# ZOSv2 network considerations + +Running ZOS on a node is just a matter of booting it with a USB stick, or with a dhcp/bootp/tftp server with the right configuration so that the node can start the OS. +Once it starts booting, the OS detects the NICs, and starts the network configuration. A Node can only continue it's boot process till the end when it effectively has received an IP address and a route to the Internet. Without that, the Node will retry indefinitely to obtain Internet access and not finish it's startup. + +So a Node needs to be connected to a __wired__ network, providing a dhcp server and a default gateway to the Internet, be it NATed or plainly on the public network, where any route to the Internet, be it IPv4 or IPv6 or both is sufficient. + +For a node to have that ability to host ueser networks, we **strongly** advise to have a working IPv6 setup, as that is the primary IP stack we're using for the User Network's Mesh to function. + +## Running ZOS (v2) at home + +Running a ZOS Node at home is plain simple. Connect it to your router, plug it in the network, insert the preconfigured USB stick containing the bootloader and the `farmer_id`, power it on. +You will then see it appear in the Cockpit, under your farm. + +## Runnig ZOS (v2) in a multi-node farm in a DC + +Multi-Node Farms, where a farmer wants to host the nodes in a Datacentre, have basically the same simplicity, but the nodes can boot from a 0-Boot server that provides for DHCP, and also delivers the iPXE image to load, without the need for a USB stick in every Node. + +A 0-Boot server is not really necessary, but it helps ;-). That server has a list of the MAC addresses of the nodes, and delivers the bootloader over PXE. The farmer is responsible to set-up the network, and configure the 0-Boot server. + +### Necessities + +The Farmer needs to: + +- Obtain an IPv4 subnet from the provider. At least one IPv4 address per node is needed, where all IP addresses are publicly reachable. +- Obtain an IPv6 prefix allocation from the provider. A `/64` will do, that is publicly reachable, but a `/48` is advisable if the farmer wants to provide IPv6 transit for User Networks +- Have the Nodes connected on that public network with a switch so that all Nodes are publicly reachable. +- In case of multiple NICS, also make sure his farm is properly registered in BCDB, so that the Node's public IP Addresses are registered. +- Properly recense the MAC addresses of the Nodes, and configure the DHCP server to provide for an IP address, and in case of multiple NICs also provide for private IP addresses over DHCP per Node. +- Make sure that after first boot, the Nodes are reachable. + +### IPv6 + +IPv6, although already a real protocol since '98, has seen reluctant adoption over the time it exists. That mostly because ISPs and Carriers were reluctant to deploy it, and not seeing the need since the advent of NAT and private IP space, giving the false impression of security. +But this month (10/2019), RIPE sent a mail to all it's LIRs that the last consecutive /22 in IPv4 has been allocated. Needless to say, but that makes the transition to IPv6 in 2019 of utmost importance and necessity. +Hence, ZOS starts with IPv6, and IPv4 is merely an afterthought ;-) +So in a nutshell: we require Farmers to have IPv6 on the Node's network. + +### Routing/firewalling + +Basically, the Nodes are self-protecting, in the sense that they provid no means at all to be accessed through listening processes at all. No service is active on the node itself, and User Networks function solely on an overlay. +That also means that there is no need for a Farm admin to protect the Nodes from exterior acces, albeit some DDoS protection might be a good idea. +In the first pahse we will still allow the Host OS (ZOS) to reply on ICMP ping requests, but that 'feature' might as well be blocked in the future, as once a Node is able to register itself, there is no real need to ever want to try to reach it. + +### Multi-NIC Nodes + +Nodes that Farmers deploy are typically multi-NIC Nodes, where one (typically a 1GBit NIC) can be used for getting a proper DHCP server running from where the Nodes can boot, and one other NIC (1Gbit or even 10GBit), that then is used for transfers of User Data, so that ther is a clean separation, and possible injections bogus data is not possible. + +That means that there would be two networks, either by different physical switches, or by port-based VLANs in the switch (if there is only one). + +- Management NICs + The Management NIC will be used by ZOS to boot, and register itself to the GRID. Also, all communications from the Node to the Grid happens from there. +- Public NICs + +### Farmers and the grid + +A Node, being part of the Grid, has no concept of 'Farmer'. The only relationship for a Node with a Farmer is the fact that that is registered 'somewhere (TM)', and that a such workloads on a Node will be remunerated with Tokens. For the rest, a Node is a wholly stand-alone thing that participates in the Grid. + +```text + 172.16.1.0/24 + 2a02:1807:1100:10::/64 ++--------------------------------------+ +| +--------------+ | +-----------------------+ +| |Node ZOS | +-------+ | | +| | +-------------+1GBit +--------------------+ 1GBit switch | +| | | br-zos +-------+ | | +| | | | | | +| | | | | | +| | | | +------------------+----+ +| +--------------+ | | +-----------+ +| | OOB Network | | | +| | +----------+ ROUTER | +| | | | +| | | | +| | | | +| +------------+ | +----------+ | +| | Public | | | | | +| | container | | | +-----+-----+ +| | | | | | +| | | | | | +| +---+--------+ | +-------------------+--------+ | +| | | | 10GBit Switch | | +| br-pub| +-------+ | | | +| +-----+10GBit +-------------------+ | +----------> +| +-------+ | | Internet +| | | | +| | +----------------------------+ ++--------------------------------------+ + 185.69.167.128/26 Public network + 2a02:1807:1100:0::/64 + +``` + +Where the underlay part of the wireguard interfaces get instantiated in the Public container (namespace), and once created these wireuard interfacesget sent into the User Network (Network Resource), where a user can then configure the interface a he sees fit. + +The router of the farmer fulfills 2 roles: + +- NAT everything in the OOB network to the outside, so that nodes can start and register themselves, as well get tasks to execute from the BCDB. +- Route the assigned IPv4 subnet and IPv6 public prefix on the public segment, to which the public container is connected. + +As such, in case that the farmer wants to provide IPv4 public access for grid proxies, the node will need at least one (1) IPv4 address. It's free to the farmer to assign IPv4 addresses to only a part of the Nodes. +OTOH, it is quite important to have a proper IPv6 setup, because things will work out better. + +It's the Farmer's task to set up the Router and the switches. + +In a simpler setup (small number of nodes for instance), the farmer could setup a single switch and make 2 port-based VLANs to separate OOB and Public, or even wit single-nic nodes, just put them directly on the public segment, but then he will have to provide a DHCP server on the Public network. diff --git a/docs/network/zbus.md b/docs/network/zbus.md new file mode 100644 index 000000000..c2b7a2a96 --- /dev/null +++ b/docs/network/zbus.md @@ -0,0 +1,46 @@ +# Network module + +## ZBus + +Network module is available on zbus over the following channel + +| module | object | version | +|--------|--------|---------| +| network|[network](#interface)| 0.0.1| + +## Home Directory + +network keeps some data in the following locations +| directory | path| +|----|---| +| root| `/var/cache/modules/network`| + + +## Interface + +```go +//Networker is the interface for the network module +type Networker interface { + // Create a new network resource + CreateNR(Network) (string, error) + // Delete a network resource + DeleteNR(Network) error + + // Join a network (with network id) will create a new isolated namespace + // that is hooked to the network bridge with a veth pair, and assign it a + // new IP from the network resource range. The method return the new namespace + // name. + // The member name specifies the name of the member, and must be unique + // The NetID is the network id to join + Join(networkdID NetID, containerID string, addrs []string) (join Member, err error) + + // ZDBPrepare creates a network namespace with a macvlan interface into it + // to allow the 0-db container to be publicly accessible + // it retusn the name of the network namespace created + ZDBPrepare() (string, error) + + // Addrs return the IP addresses of interface + // if the interface is in a network namespace netns needs to be not empty + Addrs(iface string, netns string) ([]net.IP, error) +} +``` \ No newline at end of file diff --git a/docs/storage/readme.md b/docs/storage/readme.md index e12eb3ccb..0fb6d103f 100644 --- a/docs/storage/readme.md +++ b/docs/storage/readme.md @@ -1,13 +1,15 @@ # Storage Module -## ZBus +## ZBus + Storage module is available on zbus over the following channel -| module | object | version | +| module | object | version | |--------|--------|---------| | storage|[storage](#disk-object)| 0.0.1| ## Introduction + This module responsible to manage everything related with storage. In 0-OS we have 2 different storage primitives, storage pool and [0-db](https://github.com/threefoldtech/0-db). Storage pool are used when a direct disk access is required. Typical example would be a container needs to persist some data on disk. @@ -25,13 +27,16 @@ List of sub-modules: - [booting](#booting) ## On Node Booting + When the module boots: + - Make sure to mount all available pools - Scan available disks that are not used by any pool and create new pools on those disks. (all pools now are created with `RaidSingle` policy) - Try to find and mount a cache sub-volume under /var/cache. - If no cache sub-volume is available a new one is created and then mounted. ### zinit unit + The zinit unit file of the module specify the command line, test command, and the order where the services need to be booted. Storage module is a dependency for almost all other system modules, hence it has high boot presidency (calculated on boot) by zinit based on the configuration. @@ -47,21 +52,21 @@ test: mountpoint /var/cache Responsible to discover and prepare all the disk available on a node to be ready to use for the other sub-modules -### Interface +### DO Interface ```go // RaidProfile type type RaidProfile string const ( - // Single profile - Single RaidProfile = "single" - // Raid0 profile - Raid0 RaidProfile = "raid0" - // Raid1 profile - Raid1 RaidProfile = "raid1" - // Raid10 profile - Raid10 RaidProfile = "raid10" + // Single profile + Single RaidProfile = "single" + // Raid0 profile + Raid0 RaidProfile = "raid0" + // Raid1 profile + Raid1 RaidProfile = "raid1" + // Raid10 profile + Raid10 RaidProfile = "raid10" ) // DeviceType is the actual type of hardware that the storage device runs on, @@ -70,57 +75,57 @@ type DeviceType string // Known device types const ( - SSDDevice = "SSD" - HDDDevice = "HDD" + SSDDevice = "SSD" + HDDDevice = "HDD" ) // StoragePolicy describes the pool creation policy type StoragePolicy struct { - // Raid profile for this policy - Raid RaidProfile - // Number of disks to use in a single pool - // note that, the disks count must be valid for - // the chosen raid profile. - Disks uint8 - - // Only create this amount of storage pools. Default to 0 -> unlimited. - // The spared disks can later be used in automatic repair if a physical - // disk got corrupt or bad. - // Note that if it's set to 0 (unlimited), some disks might be spared anyway - // in case the number of disks required in the policy doesn't add up to pools - // for example, a pool of 2s on a machine with 5 disks. - MaxPools uint8 + // Raid profile for this policy + Raid RaidProfile + // Number of disks to use in a single pool + // note that, the disks count must be valid for + // the chosen raid profile. + Disks uint8 + + // Only create this amount of storage pools. Default to 0 -> unlimited. + // The spared disks can later be used in automatic repair if a physical + // disk got corrupt or bad. + // Note that if it's set to 0 (unlimited), some disks might be spared anyway + // in case the number of disks required in the policy doesn't add up to pools + // for example, a pool of 2s on a machine with 5 disks. + MaxPools uint8 } // StorageModule defines the api for storage type StorageModule interface { - // CreateFilesystem creates a filesystem with a given size. The filesystem - // is mounted, and the path to the mountpoint is returned. The filesystem - // is only attempted to be created in a pool of the given type. If no - // more space is available in such a pool, `ErrNotEnoughSpace` is returned. - // It is up to the caller to handle such a situation and decide if he wants - // to try again on a different devicetype - CreateFilesystem(name string, size uint64, poolType DeviceType) (string, error) - - // ReleaseFilesystem signals that the named filesystem is no longer needed. - // The filesystem will be unmounted and subsequently removed. - // All data contained in the filesystem will be lost, and the - // space which has been reserved for this filesystem will be reclaimed. - ReleaseFilesystem(name string) error - - // Path return the path of the mountpoint of the named filesystem - // if no volume with name exists, an empty path and an error is returned - Path(name string) (path string, err error) + // CreateFilesystem creates a filesystem with a given size. The filesystem + // is mounted, and the path to the mountpoint is returned. The filesystem + // is only attempted to be created in a pool of the given type. If no + // more space is available in such a pool, `ErrNotEnoughSpace` is returned. + // It is up to the caller to handle such a situation and decide if he wants + // to try again on a different devicetype + CreateFilesystem(name string, size uint64, poolType DeviceType) (string, error) + + // ReleaseFilesystem signals that the named filesystem is no longer needed. + // The filesystem will be unmounted and subsequently removed. + // All data contained in the filesystem will be lost, and the + // space which has been reserved for this filesystem will be reclaimed. + ReleaseFilesystem(name string) error + + // Path return the path of the mountpoint of the named filesystem + // if no volume with name exists, an empty path and an error is returned + Path(name string) (path string, err error) } - ``` ## 0-db object + > This object is `NOT IMPLEMENTED YET` Responsible to do the capacity planning of the 0-db on top of the disk prepare by the disk sub-module -### Interface +### 0-DB Interface ```go type ZDBNamespace struct {