diff --git a/pkg/subctl/cmd/diagnose/firewall.go b/pkg/subctl/cmd/diagnose/firewall.go index 40f34a239..bf7b2c97c 100644 --- a/pkg/subctl/cmd/diagnose/firewall.go +++ b/pkg/subctl/cmd/diagnose/firewall.go @@ -18,7 +18,13 @@ limitations under the License. package diagnose import ( + "context" + "github.com/spf13/cobra" + "github.com/submariner-io/submariner-operator/pkg/internal/cli" + "github.com/submariner-io/submariner-operator/pkg/subctl/cmd" + subv1 "github.com/submariner-io/submariner/pkg/apis/submariner.io/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "github.com/submariner-io/submariner-operator/pkg/subctl/resource" @@ -32,10 +38,10 @@ var diagnoseFirewallConfigCmd = &cobra.Command{ var validationTimeout uint -func addDiagnoseFWConfigFlags(cmd *cobra.Command) { - cmd.Flags().UintVar(&validationTimeout, "validation-timeout", 90, +func addDiagnoseFWConfigFlags(command *cobra.Command) { + command.Flags().UintVar(&validationTimeout, "validation-timeout", 90, "timeout in seconds while validating the connection attempt") - addNamespaceFlag(cmd) + addNamespaceFlag(command) } func init() { @@ -74,3 +80,77 @@ func spawnPod(client kubernetes.Interface, scheduling resource.PodScheduling, po return pod, nil } + +func getActiveGatewayNodeName(cluster *cmd.Cluster, hostname string, status *cli.Status) string { + nodes, err := cluster.KubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ + LabelSelector: "submariner.io/gateway=true", + }) + if err != nil { + status.EndWithFailure("Error obtaining the Gateway Nodes in cluster %q: %v", cluster.Name, err) + return "" + } + + for _, node := range nodes.Items { + if node.Name == hostname { + return hostname + } + + // On some platforms, the nodeName does not match with the hostname. + // Submariner Endpoint stores the hostname info in the endpoint and not the nodeName. So, we spawn a + // tiny pod to read the hostname and return the corresponding node. + sPod, err := spawnSnifferPodOnNode(cluster.KubeClient, node.Name, "default", "hostname") + if err != nil { + status.EndWithFailure("Error spawning the sniffer pod on the node %q: %v", node.Name, err) + return "" + } + + defer sPod.DeletePod() + + if err = sPod.AwaitPodCompletion(); err != nil { + status.EndWithFailure("Error waiting for the sniffer pod to finish its execution on node %q: %v", node.Name, err) + return "" + } + + if sPod.PodOutput[:len(sPod.PodOutput)-1] == hostname { + return node.Name + } + } + + status.EndWithFailure("Could not find the active Gateway node %q in local cluster in cluster %q", + hostname, cluster.Name) + return "" +} + +func getLocalEndpointResource(cluster *cmd.Cluster, status *cli.Status) *subv1.Endpoint { + endpoints, err := cluster.SubmClient.SubmarinerV1().Endpoints(cmd.OperatorNamespace).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + status.EndWithFailure("Error obtaining the Endpoints in cluster %q: %v", cluster.Name, err) + return nil + } + + for _, endpoint := range endpoints.Items { + if endpoint.Spec.ClusterID == cluster.Name { + return &endpoint + } + } + + status.EndWithFailure("Could not find the local Endpoint in cluster %q", cluster.Name) + return nil +} + +func getAnyRemoteEndpointResource(cluster *cmd.Cluster, status *cli.Status) *subv1.Endpoint { + endpoints, err := cluster.SubmClient.SubmarinerV1().Endpoints(cmd.OperatorNamespace).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + status.EndWithFailure("Error obtaining the Endpoints in cluster %q: %v", cluster.Name, err) + return nil + } + + for _, endpoint := range endpoints.Items { + if endpoint.Spec.ClusterID != cluster.Name { + return &endpoint + } + } + + status.EndWithFailure("Could not find any remote Endpoint in cluster %q", cluster.Name) + return nil +} diff --git a/pkg/subctl/cmd/diagnose/firewall_vxlan.go b/pkg/subctl/cmd/diagnose/firewall_vxlan.go index be8655f01..dc02c0c54 100644 --- a/pkg/subctl/cmd/diagnose/firewall_vxlan.go +++ b/pkg/subctl/cmd/diagnose/firewall_vxlan.go @@ -76,32 +76,31 @@ func checkFWConfig(cluster *cmd.Cluster, status *cli.Status) { return } - gateways, err := cluster.GetGateways() - if err != nil { - status.QueueFailureMessage(fmt.Sprintf("Error retrieving Gateways: %v", err)) + localEndpoint := getLocalEndpointResource(cluster, status) + if localEndpoint == nil { return } - if gateways == nil { - status.QueueWarningMessage("There are no gateways detected") + remoteEndpoint := getAnyRemoteEndpointResource(cluster, status) + if localEndpoint == nil { return } - if len(gateways.Items[0].Status.Connections) == 0 { - status.QueueWarningMessage("There are no active connections to remote clusters") + gwNodeName := getActiveGatewayNodeName(cluster, localEndpoint.Spec.Hostname, status) + if gwNodeName == "" { return } podCommand := fmt.Sprintf("timeout %d %s", validationTimeout, TCPSniffVxLANCommand) - sPod, err := spawnSnifferPodOnGatewayNode(cluster.KubeClient, podNamespace, podCommand) + sPod, err := spawnSnifferPodOnNode(cluster.KubeClient, gwNodeName, podNamespace, podCommand) if err != nil { - status.QueueFailureMessage(fmt.Sprintf("Error spawning the sniffer pod on the Gateway node: %v", err)) + status.EndWithFailure("Error spawning the sniffer pod on the Gateway node: %v", err) return } defer sPod.DeletePod() - remoteClusterIP := strings.Split(gateways.Items[0].Status.Connections[0].Endpoint.Subnets[0], "/")[0] + remoteClusterIP := strings.Split(remoteEndpoint.Spec.Subnets[0], "/")[0] podCommand = fmt.Sprintf("nc -w %d %s 8080", validationTimeout/2, remoteClusterIP) cPod, err := spawnClientPodOnNonGatewayNode(cluster.KubeClient, podNamespace, podCommand) if err != nil { diff --git a/pkg/subctl/cmd/diagnose/tunnel.go b/pkg/subctl/cmd/diagnose/tunnel.go index 9e0cf2f7a..bfee7a9aa 100644 --- a/pkg/subctl/cmd/diagnose/tunnel.go +++ b/pkg/subctl/cmd/diagnose/tunnel.go @@ -18,7 +18,6 @@ limitations under the License. package diagnose import ( - "context" "fmt" "os" "strings" @@ -30,8 +29,6 @@ import ( "github.com/submariner-io/submariner-operator/pkg/subctl/cmd/utils" "github.com/submariner-io/submariner-operator/pkg/subctl/cmd/utils/restconfig" subv1 "github.com/submariner-io/submariner/pkg/apis/submariner.io/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/client-go/rest" ) @@ -108,7 +105,7 @@ func validateTunnelConfigAcrossClusters(localCfg, remoteCfg *rest.Config) bool { status := cli.NewStatus() status.Start(fmt.Sprintf("Checking if tunnels can be setup on the gateway node of cluster %q", localCluster.Name)) - localEndpoint := getEndpointResource(localCluster, status) + localEndpoint := getLocalEndpointResource(localCluster, status) if localEndpoint == nil { return false } @@ -180,45 +177,6 @@ func validateTunnelConfigAcrossClusters(localCfg, remoteCfg *rest.Config) bool { return true } -func getEndpointResource(cluster *cmd.Cluster, status *cli.Status) *subv1.Endpoint { - endpoints, err := cluster.SubmClient.SubmarinerV1().Endpoints(cmd.OperatorNamespace).List(context.TODO(), metav1.ListOptions{}) - if err != nil { - status.EndWithFailure("Error obtaining the Endpoints in cluster %q: %v", cluster.Name, err) - return nil - } - - for _, endpoint := range endpoints.Items { - if endpoint.Spec.ClusterID == cluster.Name { - return &endpoint - } - } - - status.EndWithFailure("Could not find the local Endpoint in cluster %q", cluster.Name) - return nil -} - -func getActiveGatewayNodeName(cluster *cmd.Cluster, hostname string, status *cli.Status) string { - nodes, err := cluster.KubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) - if err != nil { - status.EndWithFailure("Error obtaining the Nodes in cluster %q: %v", cluster.Name, err) - return "" - } - - for _, node := range nodes.Items { - for _, addr := range node.Status.Addresses { - if addr.Type == corev1.NodeHostName { - if strings.HasPrefix(addr.Address, hostname) { - return node.Name - } - } - } - } - - status.EndWithFailure("Could not find the active Gateway node %q in local cluster in cluster %q", - hostname, cluster.Name) - return "" -} - func getTunnelPort(submariner *v1alpha1.Submariner, endpoint *subv1.Endpoint, status *cli.Status) (int32, bool) { var tunnelPort int32 var err error