Skip to content
Permalink
Browse files

Merge pull request #65536 from gnufied/fix-flex-crashing-controller-m…

…anager

Automatic merge from submit-queue (batch tested with PRs 65299, 65524, 65154, 65329, 65536). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Make various fixes to flex tests and fix some crashes

* Fixes two controller-manager crashes when a flex plugin gets removed from flex directory.
* Also enables e2e tests to run in local clusters and other environments.
* Removes disruptive from flex e2e tests because flex can be installed in a running cluster and does not require kubelet or controller-manager restart anymore.

/sig storage

cc @verult @jsafrane 

```release-note
Fix controller-manager crashes when flex plugin is removed from flex plugin directory
```
  • Loading branch information...
Kubernetes Submit Queue
Kubernetes Submit Queue committed Jul 2, 2018
2 parents 1b75810 + 4e7c2f6 commit 92b81114f43f3ca74988194406957a5d1ffd1c5d
@@ -605,7 +605,8 @@ func (pm *VolumePluginMgr) refreshProbedPlugins() {
}
pm.probedPlugins[event.Plugin.GetPluginName()] = event.Plugin
} else if event.Op == ProbeRemove {
delete(pm.probedPlugins, event.Plugin.GetPluginName())
// Plugin is not available on ProbeRemove event, only PluginName
delete(pm.probedPlugins, event.PluginName)
} else {
glog.Errorf("Unknown Operation on PluginName: %s.",
event.Plugin.GetPluginName())
@@ -147,6 +147,7 @@ func (og *operationGenerator) GenerateVolumesAreAttachedFunc(
og.volumePluginMgr.FindPluginBySpec(volumeAttached.VolumeSpec)
if err != nil || volumePlugin == nil {
glog.Errorf(volumeAttached.GenerateErrorDetailed("VolumesAreAttached.FindPluginBySpec failed", err).Error())
continue
}
volumeSpecList, pluginExists := volumesPerPlugin[volumePlugin.GetPluginName()]
if !pluginExists {
@@ -197,7 +197,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
go controller.Run(stopCh)

By(fmt.Sprintf("Block traffic from node %s to the master", node.Name))
host := framework.GetNodeExternalIP(&node)
host, err := framework.GetNodeExternalIP(&node)
framework.ExpectNoError(err)
master := framework.GetMasterAddress(c)
defer func() {
By(fmt.Sprintf("Unblock traffic from node %s to the master", node.Name))
@@ -574,7 +575,8 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() {
go controller.Run(stopCh)

By(fmt.Sprintf("Block traffic from node %s to the master", node.Name))
host := framework.GetNodeExternalIP(&node)
host, err := framework.GetNodeExternalIP(&node)
framework.ExpectNoError(err)
master := framework.GetMasterAddress(c)
defer func() {
By(fmt.Sprintf("Unblock traffic from node %s to the master", node.Name))
@@ -136,7 +136,9 @@ var _ = Describe("[sig-storage] HostPath", func() {

// Create the subPath directory on the host
existing := path.Join(source.Path, subPath)
result, err := framework.SSH(fmt.Sprintf("mkdir -p %s", existing), framework.GetNodeExternalIP(&nodeList.Items[0]), framework.TestContext.Provider)
externalIP, err := framework.GetNodeExternalIP(&nodeList.Items[0])
framework.ExpectNoError(err)
result, err := framework.SSH(fmt.Sprintf("mkdir -p %s", existing), externalIP, framework.TestContext.Provider)
framework.LogSSHResult(result)
framework.ExpectNoError(err)
if result.Code != 0 {
@@ -180,7 +182,9 @@ var _ = Describe("[sig-storage] HostPath", func() {

// Create the subPath file on the host
existing := path.Join(source.Path, subPath)
result, err := framework.SSH(fmt.Sprintf("echo \"mount-tester new file\" > %s", existing), framework.GetNodeExternalIP(&nodeList.Items[0]), framework.TestContext.Provider)
externalIP, err := framework.GetNodeExternalIP(&nodeList.Items[0])
framework.ExpectNoError(err)
result, err := framework.SSH(fmt.Sprintf("echo \"mount-tester new file\" > %s", existing), externalIP, framework.TestContext.Provider)
framework.LogSSHResult(result)
framework.ExpectNoError(err)
if result.Code != 0 {
@@ -948,7 +948,10 @@ func TestHitNodesFromOutsideWithCount(externalIP string, httpPort int32, timeout
// This function executes commands on a node so it will work only for some
// environments.
func TestUnderTemporaryNetworkFailure(c clientset.Interface, ns string, node *v1.Node, testFunc func()) {
host := GetNodeExternalIP(node)
host, err := GetNodeExternalIP(node)
if err != nil {
Failf("Error getting node external ip : %v", err)
}
master := GetMasterAddress(c)
By(fmt.Sprintf("block network traffic from node %s to the master", node.Name))
defer func() {
@@ -4999,7 +4999,7 @@ func GetMasterAddress(c clientset.Interface) string {

// GetNodeExternalIP returns node external IP concatenated with port 22 for ssh
// e.g. 1.2.3.4:22
func GetNodeExternalIP(node *v1.Node) string {
func GetNodeExternalIP(node *v1.Node) (string, error) {
Logf("Getting external IP address for %s", node.Name)
host := ""
for _, a := range node.Status.Addresses {
@@ -5009,9 +5009,26 @@ func GetNodeExternalIP(node *v1.Node) string {
}
}
if host == "" {
Failf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
return "", fmt.Errorf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
}
return host
return host, nil
}

// GetNodeInternalIP returns node internal IP
func GetNodeInternalIP(node *v1.Node) (string, error) {
host := ""
for _, address := range node.Status.Addresses {
if address.Type == v1.NodeInternalIP {
if address.Address != "" {
host = net.JoinHostPort(address.Address, sshPort)
break
}
}
}
if host == "" {
return "", fmt.Errorf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
}
return host, nil
}

// SimpleGET executes a get on the given url, returns error if non-200 returned.
@@ -72,56 +72,58 @@ func testFlexVolume(driver string, cs clientset.Interface, config framework.Volu
// installFlex installs the driver found at filePath on the node, and restarts
// kubelet if 'restart' is true. If node is nil, installs on the master, and restarts
// controller-manager if 'restart' is true.
func installFlex(c clientset.Interface, node *v1.Node, vendor, driver, filePath string, restart bool) {
func installFlex(c clientset.Interface, node *v1.Node, vendor, driver, filePath string) {
flexDir := getFlexDir(c, node, vendor, driver)
flexFile := path.Join(flexDir, driver)

host := ""
var err error
if node != nil {
host = framework.GetNodeExternalIP(node)
host, err = framework.GetNodeExternalIP(node)
if err != nil {
host, err = framework.GetNodeInternalIP(node)
}
} else {
host = net.JoinHostPort(framework.GetMasterHost(), sshPort)
masterHostWithPort := framework.GetMasterHost()
hostName := getHostFromHostPort(masterHostWithPort)
host = net.JoinHostPort(hostName, sshPort)
}

framework.ExpectNoError(err)

cmd := fmt.Sprintf("sudo mkdir -p %s", flexDir)
sshAndLog(cmd, host)
sshAndLog(cmd, host, true /*failOnError*/)

data := generated.ReadOrDie(filePath)
cmd = fmt.Sprintf("sudo tee <<'EOF' %s\n%s\nEOF", flexFile, string(data))
sshAndLog(cmd, host)
sshAndLog(cmd, host, true /*failOnError*/)

cmd = fmt.Sprintf("sudo chmod +x %s", flexFile)
sshAndLog(cmd, host)

if !restart {
return
}

if node != nil {
err := framework.RestartKubelet(host)
framework.ExpectNoError(err)
err = framework.WaitForKubeletUp(host)
framework.ExpectNoError(err)
} else {
err := framework.RestartControllerManager()
framework.ExpectNoError(err)
err = framework.WaitForControllerManagerUp()
framework.ExpectNoError(err)
}
sshAndLog(cmd, host, true /*failOnError*/)
}

func uninstallFlex(c clientset.Interface, node *v1.Node, vendor, driver string) {
flexDir := getFlexDir(c, node, vendor, driver)

host := ""
var err error
if node != nil {
host = framework.GetNodeExternalIP(node)
host, err = framework.GetNodeExternalIP(node)
if err != nil {
host, err = framework.GetNodeInternalIP(node)
}
} else {
host = net.JoinHostPort(framework.GetMasterHost(), sshPort)
masterHostWithPort := framework.GetMasterHost()
hostName := getHostFromHostPort(masterHostWithPort)
host = net.JoinHostPort(hostName, sshPort)
}

if host == "" {
framework.Failf("Error getting node ip : %v", err)
}

cmd := fmt.Sprintf("sudo rm -r %s", flexDir)
sshAndLog(cmd, host)
sshAndLog(cmd, host, false /*failOnError*/)
}

func getFlexDir(c clientset.Interface, node *v1.Node, vendor, driver string) string {
@@ -150,11 +152,11 @@ func getFlexDir(c clientset.Interface, node *v1.Node, vendor, driver string) str
return flexDir
}

func sshAndLog(cmd, host string) {
func sshAndLog(cmd, host string, failOnError bool) {
result, err := framework.SSH(cmd, host, framework.TestContext.Provider)
framework.LogSSHResult(result)
framework.ExpectNoError(err)
if result.Code != 0 {
if result.Code != 0 && failOnError {
framework.Failf("%s returned non-zero, stderr: %s", cmd, result.Stderr)
}
}
@@ -177,7 +179,18 @@ func getNodeVersion(node *v1.Node) *versionutil.Version {
return versionutil.MustParseSemantic(node.Status.NodeInfo.KubeletVersion)
}

var _ = utils.SIGDescribe("Flexvolumes [Disruptive]", func() {
func getHostFromHostPort(hostPort string) string {
// try to split host and port
var host string
var err error
if host, _, err = net.SplitHostPort(hostPort); err != nil {
// if SplitHostPort returns an error, the entire hostport is considered as host
host = hostPort
}
return host
}

var _ = utils.SIGDescribe("Flexvolumes", func() {
f := framework.NewDefaultFramework("flexvolume")

// note that namespace deletion is handled by delete-namespace flag
@@ -189,9 +202,9 @@ var _ = utils.SIGDescribe("Flexvolumes [Disruptive]", func() {
var suffix string

BeforeEach(func() {
framework.SkipUnlessProviderIs("gce")
framework.SkipUnlessMasterOSDistroIs("gci")
framework.SkipUnlessNodeOSDistroIs("debian", "gci")
framework.SkipUnlessProviderIs("gce", "local")
framework.SkipUnlessMasterOSDistroIs("debian", "ubuntu", "gci")
framework.SkipUnlessNodeOSDistroIs("debian", "ubuntu", "gci")
framework.SkipUnlessSSHKeyPresent()

cs = f.ClientSet
@@ -211,7 +224,7 @@ var _ = utils.SIGDescribe("Flexvolumes [Disruptive]", func() {
driverInstallAs := driver + "-" + suffix

By(fmt.Sprintf("installing flexvolume %s on node %s as %s", path.Join(driverDir, driver), node.Name, driverInstallAs))
installFlex(cs, &node, "k8s", driverInstallAs, path.Join(driverDir, driver), true /* restart */)
installFlex(cs, &node, "k8s", driverInstallAs, path.Join(driverDir, driver))

testFlexVolume(driverInstallAs, cs, config, f)

@@ -229,9 +242,9 @@ var _ = utils.SIGDescribe("Flexvolumes [Disruptive]", func() {
driverInstallAs := driver + "-" + suffix

By(fmt.Sprintf("installing flexvolume %s on node %s as %s", path.Join(driverDir, driver), node.Name, driverInstallAs))
installFlex(cs, &node, "k8s", driverInstallAs, path.Join(driverDir, driver), true /* restart */)
installFlex(cs, &node, "k8s", driverInstallAs, path.Join(driverDir, driver))
By(fmt.Sprintf("installing flexvolume %s on master as %s", path.Join(driverDir, driver), driverInstallAs))
installFlex(cs, nil, "k8s", driverInstallAs, path.Join(driverDir, driver), true /* restart */)
installFlex(cs, nil, "k8s", driverInstallAs, path.Join(driverDir, driver))

testFlexVolume(driverInstallAs, cs, config, f)

@@ -245,22 +258,4 @@ var _ = utils.SIGDescribe("Flexvolumes [Disruptive]", func() {
By(fmt.Sprintf("uninstalling flexvolume %s from master", driverInstallAs))
uninstallFlex(cs, nil, "k8s", driverInstallAs)
})

It("should install plugin without kubelet restart", func() {
driver := "dummy"
driverInstallAs := driver + "-" + suffix

By(fmt.Sprintf("installing flexvolume %s on node %s as %s", path.Join(driverDir, driver), node.Name, driverInstallAs))
installFlex(cs, &node, "k8s", driverInstallAs, path.Join(driverDir, driver), false /* restart */)

testFlexVolume(driverInstallAs, cs, config, f)

By("waiting for flex client pod to terminate")
if err := f.WaitForPodTerminated(config.Prefix+"-client", ""); !apierrs.IsNotFound(err) {
framework.ExpectNoError(err, "Failed to wait client pod terminated: %v", err)
}

By(fmt.Sprintf("uninstalling flexvolume %s from node %s", driverInstallAs, node.Name))
uninstallFlex(cs, &node, "k8s", driverInstallAs)
})
})
@@ -84,13 +84,15 @@ var _ = utils.SIGDescribe("NFSPersistentVolumes[Disruptive][Flaky]", func() {
StorageClassName: &emptyStorageClass,
}
// Get the first ready node IP that is not hosting the NFS pod.
var err error
if clientNodeIP == "" {
framework.Logf("Designating test node")
nodes := framework.GetReadySchedulableNodesOrDie(c)
for _, node := range nodes.Items {
if node.Name != nfsServerPod.Spec.NodeName {
clientNode = &node
clientNodeIP = framework.GetNodeExternalIP(clientNode)
clientNodeIP, err = framework.GetNodeExternalIP(clientNode)
framework.ExpectNoError(err)
break
}
}

0 comments on commit 92b8111

Please sign in to comment.
You can’t perform that action at this time.