Add TFJob test to the Kfctl test; refactor workflows to start to use (#…

…1333) * Start adding component tests to the new kfctl workflow. This PR adds the TFJob simple to that workflow. To support that we start refactoring workflows.libsonnet to make it easy to define an Argo Dag that can be nested in other workflows so we can reuse subgraphs across tests. Related to #1325 Reusability of subgraphs in our tests. * * Give the variables better names and add comments. * autformat_jsonnet should execute jsonnet format from the root of the git repo since its using paths relative to the root. * Bump the master version because 1.9.7-gke.3 is no longer supported for the master.
kubeflow · Aug 10, 2018 · eababf8 · eababf8
1 parent 9eeb9e6
commit eababf8
Show file tree

Hide file tree

Showing 6 changed files with 232 additions and 9 deletions.
diff --git a/prow_config.yaml b/prow_config.yaml
@@ -20,6 +20,7 @@ workflows:
     include_dirs:
       - scripts/*
       - kubeflow/*
+      - testing/*
     params:
       platform: gke
   # Run tests on minikube

diff --git a/scripts/autoformat_jsonnet.sh b/scripts/autoformat_jsonnet.sh
@@ -76,6 +76,11 @@ else
     fmt_files=($(git diff --name-only ${repo_name}/master -- '*.libsonnet' '*.jsonnet'))
 fi
 
+# Need to execute from root because git will return full paths.
+ROOT=$(git rev-parse --show-toplevel)
+pushd .
+cd ${ROOT}
+
 # 2 spaces vertical indentation
 # Use double quotes for strings
 # Use // for comments
@@ -85,3 +90,4 @@ do
   echo "Autoformatted $f"
 done
 
+popd
diff --git a/scripts/gke/deployment_manager_configs/cluster.jinja b/scripts/gke/deployment_manager_configs/cluster.jinja
@@ -63,7 +63,7 @@ resources:
     zone: {{ properties['zone'] }}
     cluster:
       name: {{ CLUSTER_NAME }}
-      initialClusterVersion: 1.9.7-gke.3
+      initialClusterVersion: 1.9.7-gke.5
       # We need 1.10.2 to support Stackdrivier GKE.
       # loggingService: none
       # monitoringService: none

diff --git a/testing/workflows/components/kfctl_test.jsonnet b/testing/workflows/components/kfctl_test.jsonnet
@@ -30,11 +30,13 @@ local srcDir = srcRootDir + "/kubeflow/kubeflow";
 local runPath = srcDir + "/testing/workflows/run.sh";
 local kfCtlPath = srcDir + "/scripts/kfctl.sh";
 
+local kubeConfig = testDir + "/kfctl_test/.kube/kubeconfig";
+
 // Name for the Kubeflow app.
 // This needs to be unique for each test run because it is
 // used to name GCP resources
 // We take the suffix of the name because it should provide some random salt.
-local appName = "kctl-" + std.substr(name, std.length(name) - 4, 4);
+local appName = "kfctl-" + std.substr(name, std.length(name) - 4, 4);
 
 // Directory containing the app. This is the directory
 // we execute kfctl commands from
@@ -58,7 +60,7 @@ local project = "kubeflow-ci";
 // step_name: Name for the template
 // command: List to pass as the container command.
 // We use separate kubeConfig files for separate clusters
-local buildTemplate(step_name, command, working_dir=null, env_vars=[], sidecars=[], kubeConfig="config") = {
+local buildTemplate(step_name, command, working_dir=null, env_vars=[], sidecars=[]) = {
   name: step_name,
   activeDeadlineSeconds: 1800,  // Set 30 minute timeout for each template
   workingDir: working_dir,
@@ -92,7 +94,7 @@ local buildTemplate(step_name, command, working_dir=null, env_vars=[], sidecars=
         // This way we can configure it on a single step and reuse it on subsequent steps.
         // The directory should be unique for each workflow so that multiple workflows don't collide.
         name: "KUBECONFIG",
-        value: testDir + "/kfctl_test/.kube/" + kubeConfig,
+        value: kubeConfig,
       },
     ] + prowEnv + env_vars,
     volumeMounts: [
@@ -113,7 +115,14 @@ local buildTemplate(step_name, command, working_dir=null, env_vars=[], sidecars=
   sidecars: sidecars,
 };  // buildTemplate
 
-// Create a list of dictionary.
+local componentTests = util.kfTests {
+  name: "gke-tests",
+  platform: "gke",
+  testDir: testDir,
+  kubeConfig: kubeConfig,
+};
+
+// Create a list of dictionary.c
 // Each item is a dictionary describing one step in the graph.
 local dagTemplates = [
   {
@@ -207,6 +216,11 @@ local dagTemplates = [
     ),
     dependencies: ["kfctl-generate-k8s"],
   },
+  // Run the nested tests.
+  {
+    template: componentTests.argoDagTemplate,
+    dependencies: ["kfctl-apply-k8s"],
+  },
 ];
 
 // Each item is a dictionary describing one step in the graph
@@ -274,7 +288,7 @@ local exitDag = {
 local stepTemplates = std.map(function(i) i.template
                               , dagTemplates) +
                       std.map(function(i) i.template
-                              , exitTemplates);
+                              , exitTemplates) + componentTests.argoTaskTemplates;
 
 
 // Add a task to a dag.

diff --git a/testing/workflows/components/workflows.libsonnet b/testing/workflows/components/workflows.libsonnet
@@ -16,6 +16,209 @@
       )
     else [],
 
+  // kfTests defines an Argo DAG for running job tests to validate a Kubeflow deployment.
+  //
+  // The dag is intended to be reused as a sub workflow by other workflows.
+  // It is structured to allow late binding to be used to override values.
+  //
+  // Usage is as follows
+  //
+  // Define a variable and overwrite name and platform.
+  //
+  // local util = import "workflows.libsonnet";
+  // local tests = util.kfTests + {
+  //    name: "gke-tests",
+  //    platform: "gke-latest"
+  // }
+  //
+  // Tests contains the following variables which can be added to your argo workflow
+  //   argoTemplates - This is a list of Argo templates. It includes an Argo template for a Dag representing the set of steps to run
+  //                   as well as the templates for the individual tasks in the dag.
+  //   name - This is the name of the Dag template.
+  //
+  // So to add a nested workflow to your Argo graph
+  //
+  // 1. In your Argo Dag add a step that uses template tests.name
+  // 2. In your Argo Workflow add argoTemplates as templates.
+  //
+  // TODO(jlewi): We need to add the remaining test steps in the e2e worfklow and then reuse kfTests in it.
+  kfTests:: {
+    // name and platform should be given unique values.
+    name: "somename",
+    platform: "gke",
+
+    // In order to refer to objects between the current and outer-most object, we use a variable to create a name for that level:
+    local tests = self,
+
+    // The name for the workspace to run the steps in
+    stepsNamespace: "kubeflow",
+    // mountPath is the directory where the volume to store the test data
+    // should be mounted.
+    mountPath: "/mnt/" + "test-data-volume",
+
+    // testDir is the root directory for all data for a particular test run.
+    testDir: self.mountPath + "/" + self.name,
+    // outputDir is the directory to sync to GCS to contain the output for this job.
+    outputDir: self.testDir + "/output",
+    artifactsDir: self.outputDir + "/artifacts",
+    // Source directory where all repos should be checked out
+    srcRootDir: self.testDir + "/src",
+    // The directory containing the kubeflow/kubeflow repo
+    srcDir: self.srcRootDir + "/kubeflow/kubeflow",
+    image: "gcr.io/kubeflow-ci/test-worker:latest",
+
+
+    // value of KUBECONFIG environment variable. This should be  a full path.
+    kubeConfig: self.testDir + "/.kube/kubeconfig",
+
+    // The name of the NFS volume claim to use for test files.
+    nfsVolumeClaim: "nfs-external",
+    // The name to use for the volume to use to contain test data.
+    dataVolume: "kubeflow-test-volume",
+    kubeflowPy: self.srcDir,
+    // The directory within the kubeflow_testing submodule containing
+    // py scripts to use.
+    kubeflowTestingPy: self.srcRootDir + "/kubeflow/testing/py",
+    tfOperatorRoot: self.srcRootDir + "/kubeflow/tf-operator",
+    tfOperatorPy: self.tfOperatorRoot,
+
+    // Build an Argo template to execute a particular command.
+    // step_name: Name for the template
+    // command: List to pass as the container command.
+    // We use separate kubeConfig files for separate clusters
+    buildTemplate: {
+      // These variables should be overwritten for every test.
+      // They are hidden because they shouldn't be included in the Argo template
+      name: "",
+      command:: "",
+      env_vars:: [],
+      side_cars: [],
+
+
+      activeDeadlineSeconds: 1800,  // Set 30 minute timeout for each template
+
+      local template = self,
+
+      // Actual template for Argo
+      argoTemplate: {
+        name: template.name,
+        container: {
+          command: template.command,
+          name: template.name,
+          image: tests.image,
+          imagePullPolicy: "Always",
+          env: [
+            {
+              // Add the source directories to the python path.
+              name: "PYTHONPATH",
+              value: tests.kubeflowPy + ":" + tests.kubeflowTestingPy + ":" + tests.tfOperatorPy,
+            },
+            {
+              name: "GOOGLE_APPLICATION_CREDENTIALS",
+              value: "/secret/gcp-credentials/key.json",
+            },
+            {
+              name: "GITHUB_TOKEN",
+              valueFrom: {
+                secretKeyRef: {
+                  name: "github-token",
+                  key: "github_token",
+                },
+              },
+            },
+            {
+              // We use a directory in our NFS share to store our kube config.
+              // This way we can configure it on a single step and reuse it on subsequent steps.
+              name: "KUBECONFIG",
+              value: tests.kubeConfig,
+            },
+          ] + template.env_vars,
+          volumeMounts: [
+            {
+              name: tests.dataVolume,
+              mountPath: tests.mountPath,
+            },
+            {
+              name: "github-token",
+              mountPath: "/secret/github-token",
+            },
+            {
+              name: "gcp-credentials",
+              mountPath: "/secret/gcp-credentials",
+            },
+          ],
+        },
+      },
+    },  // buildTemplate
+
+    // Tasks is a dictionary from which we generate:
+    //
+    // 1. An Argo Dag
+    // 2. A list of Argo templates for each task in the Dag.
+    //
+    // This dictionary is intended to be a "private" variable and not to be consumed externally
+    // by the workflows that are trying to nest this dag.
+    //
+    // This variable reduces the boilerplate of writing Argo Dags.
+    // We use tasks to construct argoTaskTemplates and argoDagTemplate
+    // below.
+    //
+    // In Argo we construct a Dag as follows
+    // 1. We define a Dag template (see argoDagTemplate below). A dag
+    //    is a list of tasks which are triplets (name, template, dependencies)
+    // 2. A list of templates (argoTaskTemplates) which define the work to be
+    //    done for each task in the Dag (e.g. run a container, run a dag etc...)
+    //
+    // argoDagTemplate is constructed by iterating over tasks and inserting tasks
+    // for each item. We use the same name as the template for the task.
+    //
+    // argoTaskTemplates is constructing from tasks as well.
+    tasks:: [
+      {
+        local v1alpha2Suffix = "-v1a2",
+        template: tests.buildTemplate {
+          name: "tfjob-test",
+          command: [
+            "python",
+            "-m",
+            "py.test_runner",
+            "test",
+            "--app_dir=" + tests.tfOperatorRoot + "/test/workflows",
+            "--tfjob_version=v1alpha2",
+            "--component=simple_tfjob_v1alpha2",
+            // Name is used for the test case name so it should be unique across
+            // all E2E tests.
+            "--params=name=simple-tfjob-" + tests.platform + ",namespace=" + tests.stepsNamespace,
+            "--junit_path=" + tests.artifactsDir + "/junit_e2e-" + tests.platform + v1alpha2Suffix + ".xml",
+          ],
+        },  // run tests
+        dependencies: null,
+      },
+    ],
+
+    // An Argo template for the dag.
+    argoDagTemplate: {
+      name: tests.name,
+      dag: {
+        // Construct tasks from the templates
+        // we will give the steps the same name as the template
+        tasks: std.map(function(i) {
+          name: i.template.name,
+          template: i.template.name,
+          dependencies: i.dependencies,
+        }, tests.tasks),
+      },
+    },
+
+    // A list of templates for tasks
+    // doesn't include the argoDagTemplate
+    argoTaskTemplates: std.map(function(i) i.template.argoTemplate
+                               , self.tasks),
+
+
+    argoTemplates: [self.argoDagTemplate] + self.argoTaskTemplates,
+  },  // kfTests
+
   parts(namespace, name):: {
     // Workflow to run the e2e test.
     e2e(prow_env, bucket, platform="minikube"):
@@ -35,7 +238,6 @@
       local srcDir = srcRootDir + "/kubeflow/kubeflow";
       local bootstrapDir = srcDir + "/bootstrap";
       local image = "gcr.io/kubeflow-ci/test-worker:latest";
-      local testing_image = "gcr.io/kubeflow-ci/kubeflow-testing";
       local bootstrapperImage = "gcr.io/kubeflow-ci/bootstrapper:" + name;
       // The last 4 digits of the name should be a unique id.
       local deploymentName = "e2e-" + std.substr(name, std.length(name) - 4, 4);

diff --git a/testing/workflows/environments/kubeflow-testing/params.libsonnet b/testing/workflows/environments/kubeflow-testing/params.libsonnet
@@ -9,8 +9,8 @@ params {
     },
     kfctl_test+: {
       namespace: "kubeflow-test-infra",
-      name: "jlewi-kfctl-test-1308-0806-150201",
-      prow_env: "JOB_NAME=kfctl-test,JOB_TYPE=presubmit,REPO_NAME=kubeflow,REPO_OWNER=kubeflow,BUILD_NUMBER=0806-150201,PULL_NUMBER=1308",
+      name: "jlewi-kfctl-test-1308-0808-122152",
+      prow_env: "JOB_NAME=kfctl-test,JOB_TYPE=presubmit,REPO_NAME=kubeflow,REPO_OWNER=kubeflow,BUILD_NUMBER=0808-122152,PULL_NUMBER=1308",
     },
   },
 }