diff --git a/templates/common/openstack/files/nodeip-finder.yaml b/templates/common/openstack/files/nodeip-finder.yaml new file mode 100644 index 0000000000..19621af6fb --- /dev/null +++ b/templates/common/openstack/files/nodeip-finder.yaml @@ -0,0 +1,78 @@ +filesystem: "root" +mode: 0755 +path: "/usr/local/bin/nodeip-finder" +contents: + inline: | + #!/usr/libexec/platform-python + # /* vim: set filetype=python : */ + """Writes Kubelet and CRI-O configuration to choose the right IP address + + For kubelet, a systemd environment file with a KUBELET_NODE_IP setting + For CRI-O it drops a config file in /etc/crio/crio.conf.d""" + from importlib import util as iutil + from importlib import machinery as imachinery + from types import ModuleType + import os + import pathlib + import socket + import sys + + loader = imachinery.SourceFileLoader( + 'non_virtual_ip', + os.path.join(os.path.dirname(os.path.realpath(__file__)), 'non_virtual_ip')) + spec = iutil.spec_from_loader('non_virtual_ip', loader) + non_virtual_ip = iutil.module_from_spec(spec) + loader.exec_module(non_virtual_ip) + + + KUBELET_WORKAROUND_PATH = '/etc/systemd/system/kubelet.service.d/20-nodenet.conf' + CRIO_WORKAROUND_PATH = '/etc/systemd/system/crio.service.d/20-nodenet.conf' + + + def first_candidate_addr(api_vip: str) -> non_virtual_ip.Address: + filters = (non_virtual_ip.non_host_scope, + non_virtual_ip.non_deprecated, + non_virtual_ip.non_secondary) + iface_addrs = list(non_virtual_ip.interface_addrs(filters)) + subnet, candidates = non_virtual_ip.vip_subnet_and_addrs_in_it(api_vip, iface_addrs) + sys.stderr.write('VIP Subnet %s\n' % subnet.cidr) + + for addr in candidates: + return addr + raise non_virtual_ip.AddressNotFoundException() + + + def main() -> None: + if len(sys.argv) > 1: + api_vip = sys.argv[1] + else: + api_int_name = os.getenv('API_INT') + try: + sstream_tuple = socket.getaddrinfo(api_int_name, None)[0] + _, _, _, _, sockaddr = sstream_tuple + api_vip = sockaddr[0] + sys.stderr.write(f'Found {api_int_name} to resolve to {api_vip}\n') + except socket.gaierror: + sys.stderr.write(f'api-int VIP not provided and failed to resolve {api_int_name}\n') + sys.exit(1) + try: + first: non_virtual_ip.Address = first_candidate_addr(api_vip) + prefixless = first.cidr.split('/')[0] + + # Kubelet + with open(KUBELET_WORKAROUND_PATH, 'w') as kwf: + print(f'[Service]\nEnvironment="KUBELET_NODE_IP={prefixless}"', file=kwf) + + # CRI-O + crio_confd = pathlib.Path(CRIO_WORKAROUND_PATH).parent + crio_confd.mkdir(parents=True, exist_ok=True) + with open(CRIO_WORKAROUND_PATH, 'w') as cwf: + print(f'[Service]\nEnvironment="CONTAINER_STREAM_ADDRESS={prefixless}"', file=cwf) + + except (non_virtual_ip.AddressNotFoundException, non_virtual_ip.SubnetNotFoundException): + sys.stderr.write('Failed to find suitable node ip') + sys.exit(1) + + + if __name__ == '__main__': + main() diff --git a/templates/worker/00-worker/openstack/files/openstack-non-virtual-ip.yaml b/templates/common/openstack/files/openstack-non-virtual-ip.yaml similarity index 76% rename from templates/worker/00-worker/openstack/files/openstack-non-virtual-ip.yaml rename to templates/common/openstack/files/openstack-non-virtual-ip.yaml index 2408d1ae4c..57c9e7e785 100644 --- a/templates/worker/00-worker/openstack/files/openstack-non-virtual-ip.yaml +++ b/templates/common/openstack/files/openstack-non-virtual-ip.yaml @@ -4,6 +4,8 @@ path: "/usr/local/bin/non_virtual_ip" contents: inline: | #!/usr/libexec/platform-python + # /* vim: set filetype=python : */ + import collections import itertools import socket import struct @@ -18,26 +20,44 @@ contents: """ + class AddressNotFoundException(Exception): + """ + Exception raised when no Address in the systems ifaces is on the VIP subnet + """ + + TA = TypeVar('TA', bound='Address') + class Address: - def __init__(self, cidr: str, name: str, family: str, index: int = -1, scope: str = '', deprecated: bool = False) -> None: + def __init__(self, cidr: str, name: str, family: str, index: int = -1, scope: str = '', flags: Iterable[str] = tuple(), label: Optional[str] = None) -> None: self.index = index self.name = name self.family = family self.cidr = cidr self.scope = scope - self.deprecated = deprecated + self.flags = flags + self.label = label @classmethod def from_line(cls: Type[TA], line: str) -> TA: - items = line.split() - return cls(index=int(items[0][:-1]), - name=items[1], - family=items[2], - cidr=items[3], - scope=items[5], - deprecated='deprecated' in items) + tokens = collections.deque(line.split()) + index = int(tokens.popleft()[:-1]) + name = tokens.popleft() + family = tokens.popleft() + cidr = tokens.popleft() + _ = tokens.popleft() # dump scope label + scope = tokens.popleft() + flags = [] + label = None + while True: + token = tokens.popleft() + if token[-1] == '\\': + if len(token) > 1: + label = token[:-1] + break + flags.append(token) + return cls(cidr, name, family, index, scope, flags, label) def __str__(self) -> str: return f'{self.__class__.__name__}({self.cidr}, dev={self.name})' @@ -45,6 +65,7 @@ contents: TR = TypeVar('TR', bound='V6Route') + class V6Route: def __init__(self, destination: str, dev: Optional[str] = None, proto: Optional[str] = None, metric: Optional[int] = None, pref: Optional[str] = None, via: Optional[str] = None) -> None: self.destination: str = destination @@ -127,16 +148,16 @@ contents: if subnet_ip_int_min < vip_int < subnet_ip_int_max: subnet_ip = ntoa(addr.family, subnet_ip_int_min) subnet = Address(name="subnet", - cidr='%s/%s' % (subnet_ip, addr.cidr.split('/')[1]), - family=addr.family, - scope='') + cidr='%s/%s' % (subnet_ip, addr.cidr.split('/')[1]), + family=addr.family, + scope='') candidates.append(addr) if subnet is None: raise SubnetNotFoundException() return subnet, candidates - def interface_cidrs(filters: Optional[Iterable[Callable[[Address], bool]]] = None) -> Iterator[Address]: + def interface_addrs(filters: Optional[Iterable[Callable[[Address], bool]]] = None) -> Iterator[Address]: out = subprocess.check_output(["ip", "-o", "addr", "show"], encoding=sys.stdout.encoding) for addr in (Address.from_line(line) for line in out.splitlines()): if not filters or all(f(addr) for f in filters): @@ -161,10 +182,31 @@ contents: def non_host_scope(addr: Address) -> bool: - return addr.scope != 'host' + if addr.scope == 'host': + sys.stderr.write(f'Filtering out {addr} due to it having host scope\n') + res = False + else: + res = True + return res + def non_deprecated(addr: Address) -> bool: - return not addr.deprecated + if 'deprecated' in addr.flags: + sys.stderr.write(f'Filtering out {addr} due to it being deprecated\n') + res = False + else: + res = True + return res + + + def non_secondary(addr: Address) -> bool: + if 'secondary' in addr.flags: + sys.stderr.write(f'Filtering out {addr} due to it being secondary\n') + res = False + else: + res = True + return res + def in_subnet(subnet: Address) -> Callable[[Address], bool]: subnet_ip_int_min, subnet_ip_int_max = addr_subnet_int_min_max(subnet) @@ -179,9 +221,10 @@ contents: def main() -> None: api_vip = sys.argv[1] vips = set(sys.argv[1:4]) - iface_cidrs = list(interface_cidrs((non_host_scope, non_deprecated))) + filters = (non_host_scope, non_deprecated, non_secondary) + iface_addrs = list(interface_addrs(filters)) try: - subnet, candidates = vip_subnet_and_addrs_in_it(api_vip, iface_cidrs) + subnet, candidates = vip_subnet_and_addrs_in_it(api_vip, iface_addrs) sys.stderr.write('VIP Subnet %s\n' % subnet.cidr) for addr in candidates: @@ -192,5 +235,6 @@ contents: except SubnetNotFoundException: sys.exit(1) + if __name__ == '__main__': main() diff --git a/templates/common/openstack/units/nodeip-configuration.service b/templates/common/openstack/units/nodeip-configuration.service new file mode 100644 index 0000000000..21623b2e67 --- /dev/null +++ b/templates/common/openstack/units/nodeip-configuration.service @@ -0,0 +1,20 @@ +name: "nodeip-configuration.service" +enabled: true +contents: | + [Unit] + Description=Writes IP address configuration so that kubelet and crio services select a valid node IP + # This only applies to VIP managing environments where the kubelet and crio IP + # address picking logic is flawed and may end up selecting an address from a + # different subnet or a deprecated address + Wants=network-online.target + After=network-online.target ignition-firstboot-complete.service + Before=kubelet.service crio.service + + [Service] + # Need oneshot to delay kubelet + Type=oneshot + ExecStart=/usr/local/bin/nodeip-finder {{.Infra.Status.PlatformStatus.OpenStack.APIServerInternalIP }} + + [Install] + WantedBy=multi-user.target + diff --git a/templates/master/01-master-kubelet/openstack/units/kubelet.yaml b/templates/master/01-master-kubelet/openstack/units/kubelet.yaml new file mode 100644 index 0000000000..8889162bf0 --- /dev/null +++ b/templates/master/01-master-kubelet/openstack/units/kubelet.yaml @@ -0,0 +1,39 @@ +name: "kubelet.service" +enabled: true +contents: | + [Unit] + Description=Kubernetes Kubelet + Wants=rpc-statd.service network-online.target crio.service + After=network-online.target crio.service + + [Service] + Type=notify + ExecStartPre=/bin/mkdir --parents /etc/kubernetes/manifests + ExecStartPre=/bin/rm -f /var/lib/kubelet/cpu_manager_state + Environment="KUBELET_LOG_LEVEL=3" + EnvironmentFile=/etc/os-release + EnvironmentFile=-/etc/kubernetes/kubelet-workaround + EnvironmentFile=-/etc/kubernetes/kubelet-env + + ExecStart=/usr/bin/hyperkube \ + kubelet \ + --config=/etc/kubernetes/kubelet.conf \ + --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \ + --kubeconfig=/var/lib/kubelet/kubeconfig \ + --container-runtime=remote \ + --container-runtime-endpoint=/var/run/crio/crio.sock \ + --node-labels=node-role.kubernetes.io/master,node.openshift.io/os_id=${ID} \ + --node-ip="${KUBELET_NODE_IP}" \ + --address="${KUBELET_NODE_IP}" \ + --minimum-container-ttl-duration=6m0s \ + --cloud-provider={{cloudProvider .}} \ + --volume-plugin-dir=/etc/kubernetes/kubelet-plugins/volume/exec \ + {{cloudConfigFlag . }} \ + --register-with-taints=node-role.kubernetes.io/master=:NoSchedule \ + --v=${KUBELET_LOG_LEVEL} + + Restart=always + RestartSec=10 + + [Install] + WantedBy=multi-user.target diff --git a/templates/worker/01-worker-kubelet/openstack/units/kubelet.yaml b/templates/worker/01-worker-kubelet/openstack/units/kubelet.yaml new file mode 100644 index 0000000000..07fc68f3c9 --- /dev/null +++ b/templates/worker/01-worker-kubelet/openstack/units/kubelet.yaml @@ -0,0 +1,38 @@ +name: "kubelet.service" +enabled: true +contents: | + [Unit] + Description=Kubernetes Kubelet + Wants=rpc-statd.service network-online.target crio.service + After=network-online.target crio.service + + [Service] + Type=notify + ExecStartPre=/bin/mkdir --parents /etc/kubernetes/manifests + ExecStartPre=/bin/rm -f /var/lib/kubelet/cpu_manager_state + Environment="KUBELET_LOG_LEVEL=3" + EnvironmentFile=/etc/os-release + EnvironmentFile=-/etc/kubernetes/kubelet-workaround + EnvironmentFile=-/etc/kubernetes/kubelet-env + + ExecStart=/usr/bin/hyperkube \ + kubelet \ + --config=/etc/kubernetes/kubelet.conf \ + --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \ + --kubeconfig=/var/lib/kubelet/kubeconfig \ + --container-runtime=remote \ + --container-runtime-endpoint=/var/run/crio/crio.sock \ + --node-labels=node-role.kubernetes.io/worker,node.openshift.io/os_id=${ID} \ + --node-ip="${KUBELET_NODE_IP}" \ + --address="${KUBELET_NODE_IP}" \ + --minimum-container-ttl-duration=6m0s \ + --volume-plugin-dir=/etc/kubernetes/kubelet-plugins/volume/exec \ + --cloud-provider={{cloudProvider .}} \ + {{cloudConfigFlag . }} \ + --v=${KUBELET_LOG_LEVEL} + + Restart=always + RestartSec=10 + + [Install] + WantedBy=multi-user.target