forked from kubernetes/kubernetes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
remote.go
226 lines (199 loc) · 8.61 KB
/
remote.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package remote
import (
"flag"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/klog"
)
var testTimeoutSeconds = flag.Duration("test-timeout", 45*time.Minute, "How long (in golang duration format) to wait for ginkgo tests to complete.")
var resultsDir = flag.String("results-dir", "/tmp/", "Directory to scp test results to.")
const archiveName = "e2e_node_test.tar.gz"
func CreateTestArchive(suite TestSuite, systemSpecName string) (string, error) {
klog.V(2).Infof("Building archive...")
tardir, err := ioutil.TempDir("", "node-e2e-archive")
if err != nil {
return "", fmt.Errorf("failed to create temporary directory %v.", err)
}
defer os.RemoveAll(tardir)
// Call the suite function to setup the test package.
err = suite.SetupTestPackage(tardir, systemSpecName)
if err != nil {
return "", fmt.Errorf("failed to setup test package %q: %v", tardir, err)
}
// Build the tar
out, err := exec.Command("tar", "-zcvf", archiveName, "-C", tardir, ".").CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to build tar %v. Output:\n%s", err, out)
}
dir, err := os.Getwd()
if err != nil {
return "", fmt.Errorf("failed to get working directory %v.", err)
}
return filepath.Join(dir, archiveName), nil
}
// Returns the command output, whether the exit was ok, and any errors
// TODO(random-liu): junitFilePrefix is not prefix actually, the file name is junit-junitFilePrefix.xml. Change the variable name.
func RunRemote(suite TestSuite, archive string, host string, cleanup bool, imageDesc, junitFilePrefix string, testArgs string, ginkgoArgs string, systemSpecName string) (string, bool, error) {
// Create the temp staging directory
klog.V(2).Infof("Staging test binaries on %q", host)
workspace := newWorkspaceDir()
// Do not sudo here, so that we can use scp to copy test archive to the directdory.
if output, err := SSHNoSudo(host, "mkdir", workspace); err != nil {
// Exit failure with the error
return "", false, fmt.Errorf("failed to create workspace directory %q on host %q: %v output: %q", workspace, host, err, output)
}
if cleanup {
defer func() {
output, err := SSH(host, "rm", "-rf", workspace)
if err != nil {
klog.Errorf("failed to cleanup workspace %q on host %q: %v. Output:\n%s", workspace, host, err, output)
}
}()
}
// Copy the archive to the staging directory
if output, err := runSSHCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), workspace)); err != nil {
// Exit failure with the error
return "", false, fmt.Errorf("failed to copy test archive: %v, output: %q", err, output)
}
// Extract the archive
cmd := getSSHCommand(" && ",
fmt.Sprintf("cd %s", workspace),
fmt.Sprintf("tar -xzvf ./%s", archiveName),
)
klog.V(2).Infof("Extracting tar on %q", host)
// Do not use sudo here, because `sudo tar -x` will recover the file ownership inside the tar ball, but
// we want the extracted files to be owned by the current user.
if output, err := SSHNoSudo(host, "sh", "-c", cmd); err != nil {
// Exit failure with the error
return "", false, fmt.Errorf("failed to extract test archive: %v, output: %q", err, output)
}
// Create the test result directory.
resultDir := filepath.Join(workspace, "results")
if output, err := SSHNoSudo(host, "mkdir", resultDir); err != nil {
// Exit failure with the error
return "", false, fmt.Errorf("failed to create test result directory %q on host %q: %v output: %q", resultDir, host, err, output)
}
klog.V(2).Infof("Running test on %q", host)
output, err := suite.RunTest(host, workspace, resultDir, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, *testTimeoutSeconds)
aggErrs := []error{}
// Do not log the output here, let the caller deal with the test output.
if err != nil {
aggErrs = append(aggErrs, err)
collectSystemLog(host)
}
klog.V(2).Infof("Copying test artifacts from %q", host)
scpErr := getTestArtifacts(host, workspace)
if scpErr != nil {
aggErrs = append(aggErrs, scpErr)
}
return output, len(aggErrs) == 0, utilerrors.NewAggregate(aggErrs)
}
const (
// workspaceDirPrefix is the string prefix used in the workspace directory name.
workspaceDirPrefix = "node-e2e-"
// timestampFormat is the timestamp format used in the node e2e directory name.
timestampFormat = "20060102T150405"
)
func getTimestamp() string {
return fmt.Sprintf(time.Now().Format(timestampFormat))
}
func newWorkspaceDir() string {
return filepath.Join("/tmp", workspaceDirPrefix+getTimestamp())
}
// Parses the workspace directory name and gets the timestamp part of it.
// This can later be used to name other artifacts (such as the
// kubelet-${instance}.service systemd transient service used to launch
// Kubelet) so that they can be matched to each other.
func GetTimestampFromWorkspaceDir(dir string) string {
dirTimestamp := strings.TrimPrefix(filepath.Base(dir), workspaceDirPrefix)
re := regexp.MustCompile("^\\d{8}T\\d{6}$")
if re.MatchString(dirTimestamp) {
return dirTimestamp
}
// Fallback: if we can't find that timestamp, default to using Now()
return getTimestamp()
}
func getTestArtifacts(host, testDir string) error {
logPath := filepath.Join(*resultsDir, host)
if err := os.MkdirAll(logPath, 0755); err != nil {
return fmt.Errorf("failed to create log directory %q: %v", logPath, err)
}
// Copy logs to artifacts/hostname
if _, err := runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/*.log", GetHostnameOrIp(host), testDir), logPath); err != nil {
return err
}
// Copy json files (if any) to artifacts.
if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/*.json", testDir)); err == nil {
if _, err = runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/*.json", GetHostnameOrIp(host), testDir), *resultsDir); err != nil {
return err
}
}
if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/junit*", testDir)); err == nil {
// Copy junit (if any) to the top of artifacts
if _, err = runSSHCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), *resultsDir); err != nil {
return err
}
}
return nil
}
// collectSystemLog is a temporary hack to collect system log when encountered on
// unexpected error.
func collectSystemLog(host string) {
// Encountered an unexpected error. The remote test harness may not
// have finished retrieved and stored all the logs in this case. Try
// to get some logs for debugging purposes.
// TODO: This is a best-effort, temporary hack that only works for
// journald nodes. We should have a more robust way to collect logs.
var (
logName = "system.log"
logPath = fmt.Sprintf("/tmp/%s-%s", getTimestamp(), logName)
destPath = fmt.Sprintf("%s/%s-%s", *resultsDir, host, logName)
)
klog.V(2).Infof("Test failed unexpectedly. Attempting to retrieving system logs (only works for nodes with journald)")
// Try getting the system logs from journald and store it to a file.
// Don't reuse the original test directory on the remote host because
// it could've be been removed if the node was rebooted.
if output, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)); err == nil {
klog.V(2).Infof("Got the system logs from journald; copying it back...")
if output, err := runSSHCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil {
klog.V(2).Infof("Failed to copy the log: err: %v, output: %q", err, output)
}
} else {
klog.V(2).Infof("Failed to run journactl (normal if it doesn't exist on the node): %v, output: %q", err, output)
}
}
// WriteLog is a temporary function to make it possible to write log
// in the runner. This is used to collect serial console log.
// TODO(random-liu): Use the log-dump script in cluster e2e.
func WriteLog(host, filename, content string) error {
logPath := filepath.Join(*resultsDir, host)
if err := os.MkdirAll(logPath, 0755); err != nil {
return fmt.Errorf("failed to create log directory %q: %v", logPath, err)
}
f, err := os.Create(filepath.Join(logPath, filename))
if err != nil {
return err
}
defer f.Close()
_, err = f.WriteString(content)
return err
}