Skip to content

Commit

Permalink
Merge pull request #2120 from martinkennelly/merge_12_04_24
Browse files Browse the repository at this point in the history
SDN-3931,OCPBUGS-29952,OCPBUGS-25889: Downstream merge 12 04 24
  • Loading branch information
openshift-merge-bot[bot] committed Apr 15, 2024
2 parents 9046355 + 6b8ff6e commit f4dee0f
Show file tree
Hide file tree
Showing 55 changed files with 1,295 additions and 632 deletions.
2 changes: 1 addition & 1 deletion Dockerfile.base
Expand Up @@ -13,7 +13,7 @@ RUN dnf install -y --nodocs \
dnf clean all

ARG ovsver=3.1.0-73.el9fdp
ARG ovnver=23.09.0-112.el9fdp
ARG ovnver=23.09.0-139.el9fdp

RUN INSTALL_PKGS="iptables" && \
ovsver_short=$(echo "$ovsver" | cut -d'.' -f1,2) && \
Expand Down
2 changes: 1 addition & 1 deletion dist/images/Dockerfile.fedora
Expand Up @@ -15,7 +15,7 @@ USER root

ENV PYTHONDONTWRITEBYTECODE yes

ARG ovnver=ovn-23.09.0-112.fc39
ARG ovnver=ovn-23.09.0-139.fc39
# Automatically populated when using docker buildx
ARG TARGETPLATFORM
ARG BUILDPLATFORM
Expand Down
6 changes: 4 additions & 2 deletions dist/images/Dockerfile.ubuntu
Expand Up @@ -14,11 +14,13 @@ USER root

RUN apt-get update && apt-get install -y iproute2 curl software-properties-common util-linux

RUN echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list
RUN curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -

# Install OVS and OVN packages.
RUN apt-get update && apt-get install -y openvswitch-switch openvswitch-common ovn-central ovn-common ovn-host kubectl
RUN apt-get update && apt-get install -y openvswitch-switch openvswitch-common ovn-central ovn-common ovn-host

RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
&& install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl

RUN mkdir -p /var/run/openvswitch

Expand Down
32 changes: 0 additions & 32 deletions dist/templates/ovnkube-alerts.yaml.j2
Expand Up @@ -95,38 +95,6 @@ spec:
The pod creation latency aggregated across all masters for the last
15minutes is more than 5 seconds as compared to 15 minutes before last 15 minutes.

- alert: OvnKubeHighNBCliLatency99thPercentile
expr: |
histogram_quantile(0.99,
sum by (le) (
rate(ovnkube_controller_ovn_cli_latency_seconds_bucket{
command="ovn-nbctl"}[15m])
)
) > 3
for: 10m
labels:
severity: warning
annotations:
description: |
The ovn-nbctl 99th percentile CLI latency {{ value }} aggregated
across all masters for the last 15minutes is more than 3 seconds.

- alert: OvnKubeHighSBCliLatency99thPercentile
expr: |
histogram_quantile(0.99,
sum by (le) (
rate(ovnkube_controller_ovn_cli_latency_seconds_bucket{
command="ovn-sbctl"}[15m])
)
) > 3
for: 10m
labels:
severity: warning
annotations:
description: |
The ovn-sbctl 99th percentile CLI latency {{ value }} aggregated
across all masters for the last 15minutes is more than 3 seconds.

- alert: OvnKubeHighK8sNetworkPolicyUpdateLatency99thPercentile
expr: |
histogram_quantile(0.99,
Expand Down
33 changes: 26 additions & 7 deletions docs/egress-ip.md
Expand Up @@ -115,27 +115,28 @@ COMMIT
...
```

IPRoute2 rules will look like the following - note rule with priority `6000` and also the refered table `1008`:
IPRoute2 rules will look like the following - note rule with priority `6000` and also the table `1111`:
```shell
sh-5.2# ip rule
0: from all lookup local
30: from all fwmark 0x1745ec lookup 7
6000: from 10.244.2.3 lookup 1008
6000: from 10.244.2.3 lookup 1111
32766: from all lookup main
32767: from all lookup default
```

And the default route in the correct table `1008`:
And the default route in the correct table `1111`:
```shell
sh-5.2# ip route show table 1008
sh-5.2# ip route show table 1111
default dev dummy
```

No NAT is required on the OVN primary network gateway router.
OVN-Kubernetes (ovnkube-node) takes care of adding a rule to the rule table with src IP of the pod and routed towards a
new routing table specifically created to route the traffic out the correct interface. IPTables is also altered and an entry
is created within the chain `OVN-KUBE-EGRESS-IP-Multi-NIC` for each pod to allow SNAT to occur when a src IP is match
leaving a particular interface.
new routing table specifically created to route the traffic out the correct interface. IPTables rules are also altered and an entry
is created within the chain `OVN-KUBE-EGRESS-IP-Multi-NIC` for each selected pod to allow SNAT to occur when
egress-ing a particular interface. The routing table number `1111` is generated from the interface name.
Routes within the main routing table who's output interface share the same interface used for Egress IP are also cloned into the VRF 1111.

### Pod to node IP traffic
When a cluster networked pod matched by an egress IP tries to connect to a non-local node IP it hits the following
Expand All @@ -157,6 +158,24 @@ priority=105,pkt_mark=0x3f0,ip,in_port=2 actions=ct(commit,zone=64000,nat(src=<N
This is required to make `pod -> node IP` traffic behave the same regardless of where the pod is hosted.
Implementation details: https://github.com/ovn-org/ovn-kubernetes/commit/e2c981a42a28e6213d9daf3b4489c18dc2b84b19.

For local gateway mode, in which an Egress IP is assigned to a non-primary interface, an IP rule is added to send packets
to the main routing table at a priority higher than that of EgressIP IP rules, which are set to priority `6000`:
```shell
5999: from all fwmark 0x3f0 lookup main
```
Note: `0x3f0` is `1008` in hexadecimal. Lower IP rule priority number indicates higher precedence versus higher IP rule priority number.

This ensures all traffic to node IPs will not be selected by EgressIP IP rules.
However, reply traffic will not have the mark `1008` and would be dropped by reverse path filtering, therefore we add
an IPTable rule to the mangle table to save and restore the `1008` mark:
```shell
sh-5.2# iptables -t mangle -L PREROUTING
Chain PREROUTING (policy ACCEPT)
target prot opt source destination
CONNMARK all -- anywhere anywhere mark match 0x3f0 CONNMARK save
CONNMARK all -- anywhere anywhere mark match 0x0 CONNMARK restore
```

### Dealing with non SNATed traffic
Egress IP is often configured on a node different from the one hosting the affected pods.
Due to the fact that ovn-controllers on different nodes apply the changes independently,
Expand Down
1 change: 1 addition & 0 deletions docs/metrics.md
Expand Up @@ -17,6 +17,7 @@ Measurement accuracy can be impacted by other parallel processing that might be
## Change log
This list is to help notify if there are additions, changes or removals to metrics. Latest changes are at the top of this list.

- Remove ovnkube_controller_ovn_cli_latency_seconds metrics since we have moved most of the OVN DB operations to libovsdb.
- Effect of OVN IC architecture:
- Move all the metrics from subsystem "ovnkube-master" to subsystem "ovnkube-controller". The non-IC and IC deployments will each continue to have their ovnkube-master and ovnkube-controller containers running inside the ovnkube-master and ovnkube-controller pods. The metrics scraping should work seemlessly. See https://github.com/ovn-org/ovn-kubernetes/pull/3723 for details
- Move the following metrics from subsystem "master" to subsystem "clustermanager". Therefore, the follow metrics are renamed.
Expand Down
20 changes: 20 additions & 0 deletions go-controller/pkg/libovsdb/ops/db_object_ids.go
Expand Up @@ -2,6 +2,7 @@ package ops

import (
"fmt"
"strings"

"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
)
Expand Down Expand Up @@ -60,6 +61,25 @@ const (
PrimaryIDKey ExternalIDKey = types.PrimaryIDKey
)

// ObjectNameKey may be used as a secondary ID in the future. To ensure easy filtering for namespaced
// objects, you can combine namespace and name in that key. To unify this process (and potential parsing of the key)
// the following 2 functions exist:
// - BuildNamespaceNameKey to combine namespace and name into one key
// - ParseNamespaceNameKey to split the key back into namespace and name

func BuildNamespaceNameKey(namespace, name string) string {
return namespace + ":" + name
}

func ParseNamespaceNameKey(key string) (namespace, name string, err error) {
s := strings.Split(key, ":")
if len(s) != 2 {
err = fmt.Errorf("failed to parse namespaced name key %v, expected format <namespace>:<name>", key)
return
}
return s[0], s[1], nil
}

// dbIDsMap is used to make sure the same ownerType is not defined twice for the same dbObjType to avoid conflicts.
// It is filled in newObjectIDsType when registering new ObjectIDsType
var dbIDsMap = map[dbObjType]map[ownerType]bool{}
Expand Down
40 changes: 40 additions & 0 deletions go-controller/pkg/libovsdb/ops/db_object_types.go
Expand Up @@ -6,6 +6,7 @@ const (
addressSet dbObjType = iota
acl
dhcpOptions
portGroup
)

const (
Expand All @@ -32,6 +33,9 @@ const (
VirtualMachineOwnerType ownerType = "VirtualMachine"
// NetworkPolicyPortIndexOwnerType is the old version of NetworkPolicyOwnerType, kept for sync only
NetworkPolicyPortIndexOwnerType ownerType = "NetworkPolicyPortIndexOwnerType"
// ClusterOwnerType means the object is cluster-scoped and doesn't belong to any k8s objects
ClusterOwnerType ownerType = "Cluster"

// owner extra IDs, make sure to define only 1 ExternalIDKey for every string value
PriorityKey ExternalIDKey = "priority"
PolicyDirectionKey ExternalIDKey = "direction"
Expand Down Expand Up @@ -184,6 +188,7 @@ var ACLNetpolNode = newObjectIDsType(acl, NetpolNodeOwnerType, []ExternalIDKey{
// Therefore unique id for a given gressPolicy is portPolicy idx + IPBlock idx
// (empty policy and all selector-based peers ACLs will have idx=-1)
// Note: keep for backward compatibility only
// Deprecated, should only be used for sync
var ACLNetworkPolicyPortIndex = newObjectIDsType(acl, NetworkPolicyPortIndexOwnerType, []ExternalIDKey{
// policy namespace+name
ObjectNameKey,
Expand Down Expand Up @@ -241,3 +246,38 @@ var VirtualMachineDHCPOptions = newObjectIDsType(dhcpOptions, VirtualMachineOwne
// CIDR field from DHCPOptions with ":" replaced by "."
CIDRKey,
})

var PortGroupNamespace = newObjectIDsType(portGroup, NamespaceOwnerType, []ExternalIDKey{
// namespace name
ObjectNameKey,
})

// every namespace that has at least 1 network policy, has resources that are shared by all network policies
// in that namespace.
var PortGroupNetpolNamespace = newObjectIDsType(portGroup, NetpolNamespaceOwnerType, []ExternalIDKey{
// namespace
ObjectNameKey,
// in the same namespace there can be 2 default deny port groups, egress and ingress
PolicyDirectionKey,
})

var PortGroupNetworkPolicy = newObjectIDsType(portGroup, NetworkPolicyOwnerType, []ExternalIDKey{
// policy namespace+name
ObjectNameKey,
})

var PortGroupAdminNetworkPolicy = newObjectIDsType(portGroup, AdminNetworkPolicyOwnerType, []ExternalIDKey{
// ANP name
ObjectNameKey,
})

var PortGroupBaselineAdminNetworkPolicy = newObjectIDsType(portGroup, BaselineAdminNetworkPolicyOwnerType, []ExternalIDKey{
// BANP name
ObjectNameKey,
})

var PortGroupCluster = newObjectIDsType(portGroup, ClusterOwnerType, []ExternalIDKey{
// name of a global port group
// currently ClusterPortGroup and ClusterRtrPortGroup are present
ObjectNameKey,
})
24 changes: 0 additions & 24 deletions go-controller/pkg/libovsdb/ops/portgroup.go
Expand Up @@ -21,30 +21,6 @@ func FindPortGroupsWithPredicate(nbClient libovsdbclient.Client, p portGroupPred
return found, err
}

// BuildPortGroup builds a port group referencing the provided ports and ACLs
func BuildPortGroup(hashName string, ports []*nbdb.LogicalSwitchPort, acls []*nbdb.ACL, externalIds map[string]string) *nbdb.PortGroup {
pg := nbdb.PortGroup{
Name: hashName,
ExternalIDs: externalIds,
}

if len(acls) > 0 {
pg.ACLs = make([]string, 0, len(acls))
for _, acl := range acls {
pg.ACLs = append(pg.ACLs, acl.UUID)
}
}

if len(ports) > 0 {
pg.Ports = make([]string, 0, len(ports))
for _, port := range ports {
pg.Ports = append(pg.Ports, port.UUID)
}
}

return &pg
}

// CreateOrUpdatePortGroupsOps creates or updates the provided port groups
// returning the corresponding ops
func CreateOrUpdatePortGroupsOps(nbClient libovsdbclient.Client, ops []libovsdb.Operation, pgs ...*nbdb.PortGroup) ([]libovsdb.Operation, error) {
Expand Down
6 changes: 0 additions & 6 deletions go-controller/pkg/libovsdb/util/acl.go
Expand Up @@ -8,7 +8,6 @@ import (
libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"

v1 "k8s.io/api/core/v1"
knet "k8s.io/api/networking/v1"
Expand Down Expand Up @@ -54,11 +53,6 @@ func ACLDirectionToACLPipeline(aclDir ACLDirection) ACLPipelineType {
}
}

// hash the provided input to make it a valid portGroup name.
func HashedPortGroup(s string) string {
return util.HashForOVN(s)
}

func JoinACLName(substrings ...string) string {
return strings.Join(substrings, "_")
}
Expand Down
35 changes: 35 additions & 0 deletions go-controller/pkg/libovsdb/util/port_group.go
@@ -0,0 +1,35 @@
package util

import (
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
)

func GetPortGroupName(dbIDs *ops.DbObjectIDs) string {
return util.HashForOVN(dbIDs.GetExternalIDs()[ops.PrimaryIDKey.String()])
}

func BuildPortGroup(pgIDs *ops.DbObjectIDs, ports []*nbdb.LogicalSwitchPort, acls []*nbdb.ACL) *nbdb.PortGroup {
externalIDs := pgIDs.GetExternalIDs()
pg := nbdb.PortGroup{
Name: util.HashForOVN(externalIDs[ops.PrimaryIDKey.String()]),
ExternalIDs: externalIDs,
}

if len(acls) > 0 {
pg.ACLs = make([]string, 0, len(acls))
for _, acl := range acls {
pg.ACLs = append(pg.ACLs, acl.UUID)
}
}

if len(ports) > 0 {
pg.Ports = make([]string, 0, len(ports))
for _, port := range ports {
pg.Ports = append(pg.Ports, port.UUID)
}
}

return &pg
}
4 changes: 2 additions & 2 deletions go-controller/pkg/metrics/ovn_db.go
Expand Up @@ -333,8 +333,8 @@ func getNBDBSockPath() (string, error) {
}

func getOvnDbVersionInfo() {
stdout, _, err := util.RunOVSDBClient("-V")
if err == nil && strings.HasPrefix(stdout, "ovsdb-client (Open vSwitch) ") {
stdout, _, err := util.RunOVNNBAppCtl("version")
if err == nil && strings.HasPrefix(stdout, "ovsdb-server (Open vSwitch) ") {
ovnDbVersion = strings.Fields(stdout)[3]
}
basePath, err := getNBDBSockPath()
Expand Down
14 changes: 0 additions & 14 deletions go-controller/pkg/metrics/ovnkube_controller.go
Expand Up @@ -61,17 +61,6 @@ var metricPodCreationLatency = prometheus.NewHistogram(prometheus.HistogramOpts{
Buckets: prometheus.ExponentialBuckets(.1, 2, 15),
})

// metricOvnCliLatency is the duration to execute OVN commands using CLI tools ovn-nbctl or ovn-sbctl.
var metricOvnCliLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: MetricOvnkubeNamespace,
Subsystem: MetricOvnkubeSubsystemController,
Name: "ovn_cli_latency_seconds",
Help: "The latency of various OVN commands. Currently, ovn-nbctl and ovn-sbctl",
Buckets: prometheus.ExponentialBuckets(.1, 2, 15)},
// labels
[]string{"command"},
)

// MetricResourceUpdateCount is the number of times a particular resource's UpdateFunc has been called.
var MetricResourceUpdateCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricOvnkubeNamespace,
Expand Down Expand Up @@ -388,9 +377,6 @@ func RegisterOVNKubeControllerPerformance(nbClient libovsdbclient.Client) {
prometheus.MustRegister(MetricRequeueServiceCount)
prometheus.MustRegister(MetricSyncServiceCount)
prometheus.MustRegister(MetricSyncServiceLatency)
prometheus.MustRegister(metricOvnCliLatency)
// This is set to not create circular import between metrics and util package
util.MetricOvnCliLatency = metricOvnCliLatency
registerWorkqueueMetrics(MetricOvnkubeNamespace, MetricOvnkubeSubsystemController)
prometheus.MustRegister(prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Expand Down
12 changes: 9 additions & 3 deletions go-controller/pkg/metrics/ovs.go
Expand Up @@ -285,10 +285,16 @@ var metricOvsUpcallFlowLimitHit = prometheus.NewGauge(prometheus.GaugeOpts{
type ovsClient func(args ...string) (string, string, error)

func getOvsVersionInfo() {
stdout, _, err := util.RunOVSVsctl("--version")
if err == nil && strings.HasPrefix(stdout, "ovs-vsctl (Open vSwitch)") {
ovsVersion = strings.Fields(stdout)[3]
stdout, _, err := util.RunOvsVswitchdAppCtl("version")
if err != nil {
klog.Errorf("Failed to get version information: %s", err.Error())
return
}
if !strings.HasPrefix(stdout, "ovs-vswitchd (Open vSwitch)") {
klog.Errorf("Unexpected ovs-appctl version output: %s", stdout)
return
}
ovsVersion = strings.Fields(stdout)[3]
}

// ovsDatapathLookupsMetrics obtains the ovs datapath
Expand Down

0 comments on commit f4dee0f

Please sign in to comment.