From 1e77594958e346e26e4d29fa0d018729fdce1d86 Mon Sep 17 00:00:00 2001
From: Vishnu kannan <vishnuk@google.com>
Date: Fri, 28 Apr 2017 17:48:36 -0700
Subject: [PATCH 1/3] Adding an installer script that installs Nvidia drivers
 in Container Optimized OS Packaged the script as a docker container stored in
 gcr.io/google-containers A daemonset deployment is included to make it easy
 to consume the installer A cluster e2e has been added to test the
 installation daemonset along with verifying installation by using a sample
 CUDA application. Node e2e for GPUs updated to avoid running on nodes without
 GPU devices.

Signed-off-by: Vishnu kannan <vishnuk@google.com>
---
 cluster/gce/BUILD                             |   5 +-
 cluster/gce/config-default.sh                 |   2 +
 cluster/gce/gci/configure-helper.sh           |   1 +
 cluster/gce/gci/nvidia-gpus/BUILD             |  24 ++
 cluster/gce/gci/nvidia-gpus/Dockerfile        |  28 +++
 cluster/gce/gci/nvidia-gpus/Makefile          |  27 +++
 .../nvidia-gpus/cos-installer-daemonset.yaml  |  57 +++++
 cluster/gce/gci/nvidia-gpus/installer.sh      | 207 ++++++++++++++++++
 hack/generate-bindata.sh                      |   3 +-
 test/e2e/BUILD                                |   1 +
 test/e2e/generated/BUILD                      |   1 +
 test/e2e/nvidia-gpus.go                       | 178 +++++++++++++++
 test/e2e_node/gpus.go                         |  53 ++++-
 test/e2e_node/jenkins/gci-init-gpu.yaml       |  19 ++
 .../e2e_node/jenkins/image-config-serial.yaml |   7 +-
 test/e2e_node/runner/remote/run_remote.go     |   1 +
 test/images/nvidia-cuda/Dockerfile            |  24 ++
 test/images/nvidia-cuda/Makefile              |  28 +++
 test/images/nvidia-cuda/README.md             |  13 ++
 19 files changed, 665 insertions(+), 14 deletions(-)
 create mode 100644 cluster/gce/gci/nvidia-gpus/BUILD
 create mode 100644 cluster/gce/gci/nvidia-gpus/Dockerfile
 create mode 100644 cluster/gce/gci/nvidia-gpus/Makefile
 create mode 100644 cluster/gce/gci/nvidia-gpus/cos-installer-daemonset.yaml
 create mode 100644 cluster/gce/gci/nvidia-gpus/installer.sh
 create mode 100644 test/e2e/nvidia-gpus.go
 create mode 100644 test/e2e_node/jenkins/gci-init-gpu.yaml
 create mode 100644 test/images/nvidia-cuda/Dockerfile
 create mode 100644 test/images/nvidia-cuda/Makefile
 create mode 100644 test/images/nvidia-cuda/README.md

diff --git a/cluster/gce/BUILD b/cluster/gce/BUILD
index ccc7be8ae25f..66079dc2fcd1 100644
--- a/cluster/gce/BUILD
+++ b/cluster/gce/BUILD
@@ -32,7 +32,10 @@ filegroup(
 
 filegroup(
     name = "all-srcs",
-    srcs = [":package-srcs"],
+    srcs = [
+        ":package-srcs",
+        "//cluster/gce/gci/nvidia-gpus:all-srcs",
+    ],
     tags = ["automanaged"],
 )
 
diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh
index 0ca7350af18b..1bcf7b05c17b 100755
--- a/cluster/gce/config-default.sh
+++ b/cluster/gce/config-default.sh
@@ -68,6 +68,8 @@ fi
 # variable. Also please update corresponding image for node e2e at:
 # https://github.com/kubernetes/kubernetes/blob/master/test/e2e_node/jenkins/image-config.yaml
 CVM_VERSION=${CVM_VERSION:-container-vm-v20170214}
+# NOTE: Update the kernel commit SHA in cluster/addons/nvidia-gpus/cos-installer-daemonset.yaml
+# while updating the COS version here.
 GCI_VERSION=${KUBE_GCI_VERSION:-gci-stable-56-9000-84-2}
 MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
 MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh
index 3c60b7b4961e..9c0099ed9ca1 100644
--- a/cluster/gce/gci/configure-helper.sh
+++ b/cluster/gce/gci/configure-helper.sh
@@ -1605,4 +1605,5 @@ else
 fi
 reset-motd
 prepare-mounter-rootfs
+modprobe configs
 echo "Done for the configuration for kubernetes"
diff --git a/cluster/gce/gci/nvidia-gpus/BUILD b/cluster/gce/gci/nvidia-gpus/BUILD
new file mode 100644
index 000000000000..0f8fa04948e2
--- /dev/null
+++ b/cluster/gce/gci/nvidia-gpus/BUILD
@@ -0,0 +1,24 @@
+package(default_visibility = ["//visibility:public"])
+
+load("@io_bazel//tools/build_defs/pkg:pkg.bzl", "pkg_tar")
+load("@io_kubernetes_build//defs:build.bzl", "release_filegroup")
+
+filegroup(
+    name = "sources",
+    srcs = glob([
+        "**/*",
+    ]),
+)
+
+filegroup(
+    name = "package-srcs",
+    srcs = glob(["**"]),
+    tags = ["automanaged"],
+    visibility = ["//visibility:private"],
+)
+
+filegroup(
+    name = "all-srcs",
+    srcs = [":package-srcs"],
+    tags = ["automanaged"],
+)
diff --git a/cluster/gce/gci/nvidia-gpus/Dockerfile b/cluster/gce/gci/nvidia-gpus/Dockerfile
new file mode 100644
index 000000000000..9cec8ab365d8
--- /dev/null
+++ b/cluster/gce/gci/nvidia-gpus/Dockerfile
@@ -0,0 +1,28 @@
+# Copyright 2017 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:16.04
+
+# Disable prompts from apt
+ENV DEBIAN_FRONTEND noninteractive
+
+RUN apt-get -qq update
+RUN apt-get install -qq pciutils gcc g++ git make dpkg-dev bc module-init-tools curl 
+
+RUN mkdir /lakitu-kernel
+RUN git clone https://chromium.googlesource.com/chromiumos/third_party/kernel /lakitu-kernel
+
+ADD installer.sh /usr/bin/nvidia-installer.sh
+RUN chmod a+x /usr/bin/nvidia-installer.sh
+CMD ["/usr/bin/nvidia-installer.sh"]
\ No newline at end of file
diff --git a/cluster/gce/gci/nvidia-gpus/Makefile b/cluster/gce/gci/nvidia-gpus/Makefile
new file mode 100644
index 000000000000..49a0dfc2c84f
--- /dev/null
+++ b/cluster/gce/gci/nvidia-gpus/Makefile
@@ -0,0 +1,27 @@
+# Copyright 2017 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TAG?=v0.1
+REGISTRY?=gcr.io/google_containers
+IMAGE=cos-nvidia-driver-install
+
+all: container
+
+container:
+	docker build --pull -t ${REGISTRY}/${IMAGE}:${TAG} .
+
+push: 
+	gcloud docker -- push ${REGISTRY}/${IMAGE}:${TAG}
+
+.PHONY: all container push
diff --git a/cluster/gce/gci/nvidia-gpus/cos-installer-daemonset.yaml b/cluster/gce/gci/nvidia-gpus/cos-installer-daemonset.yaml
new file mode 100644
index 000000000000..a8ecbf249620
--- /dev/null
+++ b/cluster/gce/gci/nvidia-gpus/cos-installer-daemonset.yaml
@@ -0,0 +1,57 @@
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+    name: cos-nvidia-installer
+    namespace: kube-system
+spec:
+  template:
+    metadata:
+      labels:
+        name: cos-nvidia-installer
+        # Update the version tag here and `LAKITU_KERNEL_SHA1` while using against new COS releases.
+        cos-version: cos-beta-59-9460-20-0
+    spec:
+      hostNetwork: true
+      hostPID: true
+      volumes:
+      - name: dev
+        hostPath:
+          path: /dev
+      - name: nvidia-overlay
+        hostPath:
+          path: /home/kubernetes/bin/nvidia
+      - name: os-release
+        hostPath:
+          path: /etc/os-release
+      - name: sysrq
+        hostPath:
+          path: /proc/sysrq-trigger
+      containers:
+      - image: gcr.io/google_containers/cos-nvidia-driver-install@sha256:ad83ede6e0c6d768bf7cf69a7dec972aa5e8f88778142ca46afd3286ad58cfc8
+        command: ["/bin/sh", "-c"]
+        args: ["usr/bin/nvidia-installer.sh && sleep infinity"]
+        env:
+          - name: BASE_DIR
+            value: "/rootfs/nvidia"
+        name: nvidia-driver-installer
+        resources:
+          requests:
+            cpu: 0.15
+        securityContext:
+          privileged: true
+        env:
+          # The kernel SHA1 here should correspond to the GCI_VERSION specified by default under cluster/gce/config-default.sh 
+          - name: LAKITU_KERNEL_SHA1
+            value: 26481563cb3788ad254c2bf2126b843c161c7e48
+          - name: BASE_DIR
+            value: "/rootfs/nvidia"
+        volumeMounts:
+        - name: nvidia-overlay
+          mountPath: /rootfs/nvidia
+        - name: dev
+          mountPath: /dev
+        - name: os-release
+          mountPath: /rootfs/etc/os-release
+        - name: sysrq
+          mountPath: /sysrq
+
diff --git a/cluster/gce/gci/nvidia-gpus/installer.sh b/cluster/gce/gci/nvidia-gpus/installer.sh
new file mode 100644
index 000000000000..a950d426e86f
--- /dev/null
+++ b/cluster/gce/gci/nvidia-gpus/installer.sh
@@ -0,0 +1,207 @@
+#!/bin/bash
+
+# Copyright 2017 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script is for dynamically installing nvidia kernel drivers in Container Optimized OS
+
+set -o errexit
+set -o pipefail
+set -x
+
+# The script must be run as a root.
+# Prerequisites:
+#
+# LAKITU_KERNEL_SHA1 - The env variable is expected to be set to HEAD of the kernel version used on the host.
+# BASE_DIR - Directory that is mapped to a stateful partition on host. Defaults to `/rootfs/nvidia`.
+#
+# The script will output the following artifacts:
+# ${BASE_DIR}/lib* --> Nvidia CUDA libraries
+# ${BASE_DIR}/bin/* --> Nvidia debug utilities
+# ${BASE_DIR}/.cache/* --> Nvidia driver artifacts cached for idempotency.
+#
+
+BASE_DIR=${BASE_DIR:-"/rootfs/nvidia"}
+CACHE_DIR="${BASE_DIR}/.cache"
+USR_WORK_DIR="${CACHE_DIR}/usr-work"
+USR_WRITABLE_DIR="${CACHE_DIR}/usr-writable"
+LIB_WORK_DIR="${CACHE_DIR}/lib-work"
+LIB_WRITABLE_DIR="${CACHE_DIR}/lib-writable"
+
+LIB_OUTPUT_DIR="${BASE_DIR}/lib"
+BIN_OUTPUT_DIR="${BASE_DIR}/bin"
+
+KERNEL_SRC_DIR="/lakitu-kernel"
+NVIDIA_DRIVER_DIR="/nvidia"
+NVIDIA_DRIVER_VERSION="375.26"
+
+# Source: https://developer.nvidia.com/cuda-downloads
+NVIDIA_CUDA_URL="https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run"
+NVIDIA_CUDA_MD5SUM="33e1bd980e91af4e55f3ef835c103f9b"
+NVIDIA_CUDA_PKG_NAME="cuda_8.0.61_375.26_linux.run"
+NVIDIA_DRIVER_PKG_NAME="NVIDIA-Linux-x86_64-375.26.run"
+
+check_nvidia_device() {
+    lspci
+    if ! lspci | grep -i -q NVIDIA; then
+        echo "No NVIDIA devices attached to this instance."
+        exit 0
+    fi
+    echo "Found NVIDIA device on this instance."
+}
+
+prepare_kernel_source() {
+    local kernel_git_repo="https://chromium.googlesource.com/chromiumos/third_party/kernel"
+    local kernel_version="$(uname -r)"
+    local kernel_version_stripped="$(echo ${kernel_version} | sed 's/\+//')"
+
+    # Checkout the correct tag.
+    echo "Downloading kernel source at tag ${kernel_version_stripped} ..."
+    pushd "${KERNEL_SRC_DIR}"
+    # TODO: Consume KERNEL SHA1 from COS image directly.
+    # git checkout "tags/v${kernel_version_stripped}"
+    git checkout ${LAKITU_KERNEL_SHA1}
+
+    # Prepare kernel configu and source for modules.
+    echo "Preparing kernel sources ..."
+    zcat "/proc/config.gz" > ".config"
+    make olddefconfig
+    make modules_prepare
+    # Done.
+    popd
+}
+
+download_install_nvidia() {
+    local pkg_name="${NVIDIA_CUDA_PKG_NAME}"
+    local url="${NVIDIA_CUDA_URL}"
+    local log_file_name="${NVIDIA_DRIVER_DIR}/nvidia-installer.log"
+
+    mkdir -p "${NVIDIA_DRIVER_DIR}"
+    pushd "${NVIDIA_DRIVER_DIR}"
+
+    echo "Downloading Nvidia CUDA package from ${url} ..."
+    curl -L -s "${url}" -o "${pkg_name}"
+    echo "${NVIDIA_CUDA_MD5SUM} ${pkg_name}" | md5sum --check
+
+    echo "Extracting Nvidia CUDA package ..."
+    sh ${pkg_name} --extract="$(pwd)"
+
+    echo "Running the Nvidia driver installer ..."
+    if ! sh "${NVIDIA_DRIVER_PKG_NAME}" --kernel-source-path="${KERNEL_SRC_DIR}" --silent --accept-license --keep --log-file-name="${log_file_name}"; then
+        echo "Nvidia installer failed, log below:"
+        echo "==================================="
+        tail -50 "${log_file_name}"
+        echo "==================================="
+        exit 1
+    fi
+    # Create unified memory device file.
+    nvidia-modprobe -c0 -u
+    popd
+}
+
+unlock_loadpin_and_reboot_if_needed() {
+    kernel_cmdline="$(cat /proc/cmdline)"
+    if echo "${kernel_cmdline}" | grep -q -v "lsm.module_locking=0"; then
+        local -r esp_partition="/dev/sda12"
+        local -r mount_path="/tmp/esp"
+        local -r grub_cfg="efi/boot/grub.cfg"
+
+        mkdir -p "${mount_path}"
+        mount "${esp_partition}" "${mount_path}"
+
+        pushd "${mount_path}"
+        cp "${grub_cfg}" "${grub_cfg}.orig"
+        sed 's/cros_efi/cros_efi lsm.module_locking=0/g' -i "efi/boot/grub.cfg"
+        cat "${grub_cfg}"
+        popd
+        sync
+        umount "${mount_path}"
+        # Restart the node for loadpin to be disabled.
+        echo b > /sysrq
+    fi
+}
+
+create_uvm_device() {
+    # Create unified memory device file.
+    nvidia-modprobe -c0 -u
+}
+
+verify_base_image() {
+    mount --bind /rootfs/etc/os-release /etc/os-release
+    local id="$(grep "^ID=" /etc/os-release)"
+    if [[ "${id#*=}" != "cos" ]]; then
+        echo "This installer is designed to run on Container-Optimized OS only"
+        exit 1
+    fi
+}
+
+setup_overlay_mounts() {
+    mkdir -p ${USR_WRITABLE_DIR} ${USR_WORK_DIR} ${LIB_WRITABLE_DIR} ${LIB_WORK_DIR} 
+    mount -t overlay -o lowerdir=/usr,upperdir=${USR_WRITABLE_DIR},workdir=${USR_WORK_DIR} none /usr
+    mount -t overlay -o lowerdir=/lib,upperdir=${LIB_WRITABLE_DIR},workdir=${LIB_WORK_DIR} none /lib
+}
+
+exit_if_install_not_needed() {
+    if nvidia-smi; then
+	echo "nvidia drivers already installed. Skipping installation"
+        post_installation_sequence
+	exit 0
+    fi
+}
+
+restart_kubelet() {
+    echo "Sending SIGTERM to kubelet"
+    pkill -SIGTERM kubelet
+}
+
+# Copy user space libraries and debug utilities to a special output directory on the host.
+# Make these artifacts world readable and executable.
+copy_files_to_host() {
+    mkdir -p ${LIB_OUTPUT_DIR} ${BIN_OUTPUT_DIR}
+    cp -r ${USR_WRITABLE_DIR}/lib/x86_64-linux-gnu/* ${LIB_OUTPUT_DIR}/
+    cp -r ${USR_WRITABLE_DIR}/bin/* ${BIN_OUTPUT_DIR}/
+    chmod -R a+rx ${LIB_OUTPUT_DIR}
+    chmod -R a+rx ${BIN_OUTPUT_DIR}
+}
+
+post_installation_sequence() {
+    create_uvm_device
+    # Copy nvidia user space libraries and debug tools to the host for use from other containers.
+    copy_files_to_host
+    # Restart the kubelet for it to pick up the GPU devices.
+    restart_kubelet
+}
+
+main() {
+    # Do not run the installer unless the base image is Container Optimized OS (COS)
+    verify_base_image
+    # Do not run the installer unless a Nvidia device is found on the PCI bus
+    check_nvidia_device
+    # Setup overlay mounts to capture nvidia driver artificats in a more permanent storage on the host.
+    setup_overlay_mounts
+    # Disable a critical security feature in COS that will allow for dynamically loading Nvidia drivers 
+    unlock_loadpin_and_reboot_if_needed
+    # Exit if installation is not required (for idempotency)
+    exit_if_install_not_needed
+    # Checkout kernel sources appropriate for the base image.
+    prepare_kernel_source
+    # Download, compile and install nvidia drivers.
+    download_install_nvidia
+    # Verify that the Nvidia drivers have been successfully installed.
+    nvidia-smi
+    # Perform post installation steps - copying artifacts, restarting kubelet, etc.
+    post_installation_sequence
+}
+
+main "$@"
diff --git a/hack/generate-bindata.sh b/hack/generate-bindata.sh
index 4c843c5cdc83..99f3ec5eb261 100755
--- a/hack/generate-bindata.sh
+++ b/hack/generate-bindata.sh
@@ -48,7 +48,8 @@ go-bindata -nometadata -o "${BINDATA_OUTPUT}.tmp" -pkg generated \
 	"examples/..." \
 	"test/e2e/testing-manifests/..." \
 	"test/images/..." \
-	"test/fixtures/..."
+	"test/fixtures/..." \
+	"cluster/gce/gci/nvidia-gpus/..."
 
 gofmt -s -w "${BINDATA_OUTPUT}.tmp"
 
diff --git a/test/e2e/BUILD b/test/e2e/BUILD
index bb67b533f2ad..ec8df8278d8c 100644
--- a/test/e2e/BUILD
+++ b/test/e2e/BUILD
@@ -86,6 +86,7 @@ go_library(
         "networking.go",
         "networking_perf.go",
         "nodeoutofdisk.go",
+        "nvidia-gpus.go",
         "pod_gc.go",
         "podpreset.go",
         "pods.go",
diff --git a/test/e2e/generated/BUILD b/test/e2e/generated/BUILD
index dc3bd3861419..c8acf6747eb3 100644
--- a/test/e2e/generated/BUILD
+++ b/test/e2e/generated/BUILD
@@ -23,6 +23,7 @@ genrule(
     name = "bindata",
     srcs = [
         "//examples:sources",
+        "//cluster/gce/gci/nvidia-gpus:sources",
         "//test/images:sources",
         "//test/fixtures:sources",
         "//test/e2e/testing-manifests:sources",
diff --git a/test/e2e/nvidia-gpus.go b/test/e2e/nvidia-gpus.go
new file mode 100644
index 000000000000..93b3a62fa881
--- /dev/null
+++ b/test/e2e/nvidia-gpus.go
@@ -0,0 +1,178 @@
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"strings"
+	"time"
+
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/uuid"
+	utilyaml "k8s.io/apimachinery/pkg/util/yaml"
+	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/v1"
+	extensions "k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
+	"k8s.io/kubernetes/test/e2e/framework"
+	"k8s.io/kubernetes/test/e2e/generated"
+
+	. "github.com/onsi/ginkgo"
+	. "github.com/onsi/gomega"
+)
+
+const (
+	testPodNamePrefix = "nvidia-gpu-"
+	testCUDAImage     = "gcr.io/google_containers/cuda-vector-add:v0.1"
+	cosOSImage        = "Container-Optimized OS from Google"
+	// Nvidia driver installation can take upwards of 5 minutes.
+	driverInstallTimeout = 10 * time.Minute
+	// Nvidia COS driver installer daemonset.
+	cosNvidiaDriverInstallerPath = "cluster/gce/gci/nvidia-gpus/cos-installer-daemonset.yaml"
+)
+
+func makeCudaAdditionTestPod() *v1.Pod {
+	podName := testPodNamePrefix + string(uuid.NewUUID())
+	testPod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: podName,
+		},
+		Spec: v1.PodSpec{
+			RestartPolicy: v1.RestartPolicyNever,
+			Containers: []v1.Container{
+				{
+					Name:  "vector-addition",
+					Image: testCUDAImage,
+					Resources: v1.ResourceRequirements{
+						Limits: v1.ResourceList{
+							v1.ResourceNvidiaGPU: *resource.NewQuantity(1, resource.DecimalSI),
+						},
+					},
+					VolumeMounts: []v1.VolumeMount{
+						{
+							Name:      "nvidia-libraries",
+							MountPath: "/usr/local/nvidia/lib64",
+						},
+					},
+				},
+			},
+			Volumes: []v1.Volume{
+				{
+					Name: "nvidia-libraries",
+					VolumeSource: v1.VolumeSource{
+						HostPath: &v1.HostPathVolumeSource{
+							Path: "/home/kubernetes/bin/nvidia/lib",
+						},
+					},
+				},
+			},
+		},
+	}
+	return testPod
+}
+
+func isClusterRunningCOS(f *framework.Framework) bool {
+	nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{})
+	framework.ExpectNoError(err, "getting node list")
+	for _, node := range nodeList.Items {
+		if !strings.Contains(node.Status.NodeInfo.OSImage, cosOSImage) {
+			return false
+		}
+	}
+	return true
+}
+
+func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool {
+	framework.Logf("Getting list of Nodes from API server")
+	nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{})
+	framework.ExpectNoError(err, "getting node list")
+	for _, node := range nodeList.Items {
+		if node.Spec.Unschedulable {
+			continue
+		}
+		if node.Status.Capacity.NvidiaGPU().Value() == 0 {
+			framework.Logf("Nvidia GPUs not available on Node: %q", node.Name)
+			return false
+		}
+	}
+	framework.Logf("Nvidia GPUs exist on all schedulable nodes")
+	return true
+}
+
+func getGPUsAvailable(f *framework.Framework) int64 {
+	nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{})
+	framework.ExpectNoError(err, "getting node list")
+	var gpusAvailable int64
+	for _, node := range nodeList.Items {
+		gpusAvailable += node.Status.Capacity.NvidiaGPU().Value()
+	}
+	return gpusAvailable
+}
+
+func testNvidiaGPUsOnCOS(f *framework.Framework) {
+	// Skip the test if the base image is not COS.
+	// TODO: Add support for other base images.
+	// CUDA apps require host mounts which is not portable across base images (yet).
+	framework.Logf("Checking base image")
+	if !isClusterRunningCOS(f) {
+		Skip("Nvidia GPU tests are supproted only on Container Optimized OS image currently")
+	}
+	framework.Logf("Cluster is running on COS. Proceeding with test")
+	// GPU drivers might have already been installed.
+	if !areGPUsAvailableOnAllSchedulableNodes(f) {
+		// Install Nvidia Drivers.
+		ds := dsFromManifest(cosNvidiaDriverInstallerPath)
+		ds.Namespace = f.Namespace.Name
+		_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
+		framework.ExpectNoError(err, "failed to create daemonset")
+		framework.Logf("Successfully created daemonset to install Nvidia drivers. Waiting for drivers to be installed and GPUs to be available in Node Capacity...")
+		// Wait for Nvidia GPUs to be available on nodes
+		Eventually(func() bool {
+			return areGPUsAvailableOnAllSchedulableNodes(f)
+		}, driverInstallTimeout, time.Second).Should(BeTrue())
+	}
+	framework.Logf("Creating as many pods as there are Nvidia GPUs and have the pods run a CUDA app")
+	podList := []*v1.Pod{}
+	for i := int64(0); i < getGPUsAvailable(f); i++ {
+		podList = append(podList, f.PodClient().Create(makeCudaAdditionTestPod()))
+	}
+	framework.Logf("Wait for all test pods to succeed")
+	// Wait for all pods to succeed
+	for _, po := range podList {
+		f.PodClient().WaitForSuccess(po.Name, 5*time.Minute)
+	}
+}
+
+// dsFromManifest reads a .json/yaml file and returns the daemonset in it.
+func dsFromManifest(fileName string) *extensions.DaemonSet {
+	var controller extensions.DaemonSet
+	framework.Logf("Parsing ds from %v", fileName)
+	data := generated.ReadOrDie(fileName)
+
+	json, err := utilyaml.ToJSON(data)
+	Expect(err).NotTo(HaveOccurred())
+
+	Expect(runtime.DecodeInto(api.Codecs.UniversalDecoder(), json, &controller)).NotTo(HaveOccurred())
+	return &controller
+}
+
+var _ = framework.KubeDescribe("[Feature:GPU]", func() {
+	f := framework.NewDefaultFramework("gpus")
+	It("run Nvidia GPU tests on Container Optimized OS only", func() {
+		testNvidiaGPUsOnCOS(f)
+	})
+})
diff --git a/test/e2e_node/gpus.go b/test/e2e_node/gpus.go
index 26d943da4cc4..4baa75f34d55 100644
--- a/test/e2e_node/gpus.go
+++ b/test/e2e_node/gpus.go
@@ -18,6 +18,7 @@ package e2e_node
 
 import (
 	"fmt"
+	"os/exec"
 	"time"
 
 	"k8s.io/apimachinery/pkg/api/resource"
@@ -33,11 +34,49 @@ import (
 
 const acceleratorsFeatureGate = "Accelerators=true"
 
+func getGPUsAvailable(f *framework.Framework) int64 {
+	nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{})
+	framework.ExpectNoError(err, "getting node list")
+	var gpusAvailable int64
+	for _, node := range nodeList.Items {
+		gpusAvailable += node.Status.Capacity.NvidiaGPU().Value()
+	}
+	return gpusAvailable
+}
+
+func gpusExistOnAllNodes(f *framework.Framework) bool {
+	nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{})
+	framework.ExpectNoError(err, "getting node list")
+	for _, node := range nodeList.Items {
+		if node.Name == "kubernetes-master" {
+			continue
+		}
+		if node.Status.Capacity.NvidiaGPU().Value() == 0 {
+			return false
+		}
+	}
+	return true
+}
+
+func checkIfNvidiaGPUsExistOnNode() bool {
+	// Cannot use `lspci` because it is not installed on all distros by default.
+	err := exec.Command("/bin/sh", "-c", "find /sys/devices/pci* -type f | grep vendor | xargs cat | grep 0x10de").Run()
+	if err != nil {
+		framework.Logf("check for nvidia GPUs failed. Got Error: %v", err)
+		return false
+	}
+	return true
+}
+
 // Serial because the test updates kubelet configuration.
 var _ = framework.KubeDescribe("GPU [Serial]", func() {
 	f := framework.NewDefaultFramework("gpu-test")
 	Context("attempt to use GPUs if available", func() {
 		It("setup the node and create pods to test gpus", func() {
+			By("ensuring that Nvidia GPUs exist on the node")
+			if !checkIfNvidiaGPUsExistOnNode() {
+				Skip("Nvidia GPUs do not exist on the node. Skipping test.")
+			}
 			By("ensuring that dynamic kubelet configuration is enabled")
 			enabled, err := isKubeletConfigEnabled(f)
 			framework.ExpectNoError(err)
@@ -65,19 +104,11 @@ var _ = framework.KubeDescribe("GPU [Serial]", func() {
 			}
 			framework.ExpectNoError(setKubeletConfiguration(f, newCfg))
 
-			By("Getting the local node object from the api server")
-			nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{})
-			framework.ExpectNoError(err, "getting node list")
-			Expect(len(nodeList.Items)).To(Equal(1))
-			node := nodeList.Items[0]
-			gpusAvailable := node.Status.Capacity.NvidiaGPU()
-			By("Skipping the test if GPUs aren't available")
-			if gpusAvailable.IsZero() {
-				Skip("No GPUs available on local node. Skipping test.")
-			}
+			By("Waiting for GPUs to become available on the local node")
+			Eventually(gpusExistOnAllNodes(f), 10*time.Minute, time.Second).Should(BeTrue())
 
 			By("Creating a pod that will consume all GPUs")
-			podSuccess := makePod(gpusAvailable.Value(), "gpus-success")
+			podSuccess := makePod(getGPUsAvailable(f), "gpus-success")
 			podSuccess = f.PodClient().CreateSync(podSuccess)
 
 			By("Checking the containers in the pod had restarted at-least twice successfully thereby ensuring GPUs are reused")
diff --git a/test/e2e_node/jenkins/gci-init-gpu.yaml b/test/e2e_node/jenkins/gci-init-gpu.yaml
new file mode 100644
index 000000000000..3119e8b6c41a
--- /dev/null
+++ b/test/e2e_node/jenkins/gci-init-gpu.yaml
@@ -0,0 +1,19 @@
+#cloud-config
+
+runcmd:
+  - mount /tmp /tmp -o remount,exec,suid
+  - usermod -a -G docker jenkins
+  - mkdir -p /var/lib/kubelet
+  - mkdir -p /home/kubernetes/containerized_mounter/rootfs
+  - mount --bind /home/kubernetes/containerized_mounter/ /home/kubernetes/containerized_mounter/
+  - mount -o remount, exec /home/kubernetes/containerized_mounter/
+  - wget https://storage.googleapis.com/kubernetes-release/gci-mounter/mounter.tar -O /tmp/mounter.tar
+  - tar xvf /tmp/mounter.tar -C /home/kubernetes/containerized_mounter/rootfs
+  - mkdir -p /home/kubernetes/containerized_mounter/rootfs/var/lib/kubelet
+  - mount --rbind /var/lib/kubelet /home/kubernetes/containerized_mounter/rootfs/var/lib/kubelet
+  - mount --make-rshared /home/kubernetes/containerized_mounter/rootfs/var/lib/kubelet
+  - mount --bind /proc /home/kubernetes/containerized_mounter/rootfs/proc
+  - mount --bind /dev /home/kubernetes/containerized_mounter/rootfs/dev
+  - rm /tmp/mounter.tar
+  - modprobe configs
+  - docker run -v /dev:/dev -v /home/kubernetes/bin/nvidia:/rootfs/nvidia -v /etc/os-release:/rootfs/etc/os-release -v /proc/sysrq-trigger:/sysrq -e LAKITU_KERNEL_SHA1=2fdf6034a0fae9794d80e4d218e237771224ba8f -e BASE_DIR=/rootfs/nvidia --privileged gcr.io/google_containers/cos-nvidia-driver-install@sha256:ad83ede6e0c6d768bf7cf69a7dec972aa5e8f88778142ca46afd3286ad58cfc8
diff --git a/test/e2e_node/jenkins/image-config-serial.yaml b/test/e2e_node/jenkins/image-config-serial.yaml
index 30baceb13e4d..71ccbcfd8275 100644
--- a/test/e2e_node/jenkins/image-config-serial.yaml
+++ b/test/e2e_node/jenkins/image-config-serial.yaml
@@ -25,4 +25,9 @@ images:
   gci:
     image_regex: gci-stable-56-9000-84-2 # docker 1.11.2
     project: google-containers
-    metadata: "user-data<test/e2e_node/jenkins/gci-init.yaml,gci-update-strategy=update_disabled"
+    metadata: "user-data<test/e2e_node/jenkins/gci-init-gpu.yaml,gci-update-strategy=update_disabled"
+    resources:
+      accelerators:
+        - type: nvidia-tesla-k80
+          count: 2
+          
diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go
index f6b3ee39037a..8a1d38daa91c 100644
--- a/test/e2e_node/runner/remote/run_remote.go
+++ b/test/e2e_node/runner/remote/run_remote.go
@@ -523,6 +523,7 @@ func createInstance(imageConfig *internalGCEImage) (string, error) {
 				Type:       "PERSISTENT",
 				InitializeParams: &compute.AttachedDiskInitializeParams{
 					SourceImage: sourceImage(imageConfig.image, imageConfig.project),
+					DiskSizeGb:  20,
 				},
 			},
 		},
diff --git a/test/images/nvidia-cuda/Dockerfile b/test/images/nvidia-cuda/Dockerfile
new file mode 100644
index 000000000000..2d68b97f5f79
--- /dev/null
+++ b/test/images/nvidia-cuda/Dockerfile
@@ -0,0 +1,24 @@
+# Copyright 2017 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM nvidia/cuda:8.0-devel-ubuntu16.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        cuda-samples-$CUDA_PKG_VERSION && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /usr/local/cuda/samples/0_Simple/vectorAdd
+RUN make
+
+CMD ./vectorAdd
\ No newline at end of file
diff --git a/test/images/nvidia-cuda/Makefile b/test/images/nvidia-cuda/Makefile
new file mode 100644
index 000000000000..616f796c66f0
--- /dev/null
+++ b/test/images/nvidia-cuda/Makefile
@@ -0,0 +1,28 @@
+# Copyright 2017 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.PHONY: all push build
+
+TAG ?= v0.1
+
+REGISTRY ?= gcr.io/google-containers
+IMAGE = $(REGISTRY)/cuda-vector-add
+
+build:
+	docker build --pull -t $(IMAGE):$(TAG) .
+
+push:
+	gcloud docker -- push $(IMAGE):$(TAG)
+
+all: build
diff --git a/test/images/nvidia-cuda/README.md b/test/images/nvidia-cuda/README.md
new file mode 100644
index 000000000000..db2ad437df82
--- /dev/null
+++ b/test/images/nvidia-cuda/README.md
@@ -0,0 +1,13 @@
+## cuda_vector_add
+
+This is a small CUDA application that performs a simple vector addition. Useful for testing CUDA support in Kubernetes.
+
+## How to release:
+
+```
+# Build
+$ make
+
+# Push
+$ make push
+```

From 86b5edb79a0be72d79da2c09861ddf5ea0aad696 Mon Sep 17 00:00:00 2001
From: Vishnu kannan <vishnuk@google.com>
Date: Sat, 20 May 2017 05:23:39 -0700
Subject: [PATCH 2/3] Update COS version to m59

Signed-off-by: Vishnu kannan <vishnuk@google.com>
---
 cluster/gce/config-default.sh                 |  2 +-
 cluster/gce/config-test.sh                    |  2 +-
 cluster/gce/util.sh                           | 39 ++++++++++++-------
 cluster/kubemark/gce/config-default.sh        |  2 +-
 .../jenkins/benchmark/benchmark-config.yaml   |  6 +--
 .../e2e_node/jenkins/image-config-serial.yaml |  4 +-
 test/e2e_node/jenkins/image-config.yaml       |  2 +-
 test/kubemark/gce/util.sh                     |  9 ++++-
 8 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh
index 1bcf7b05c17b..12f6f8fcd77f 100755
--- a/cluster/gce/config-default.sh
+++ b/cluster/gce/config-default.sh
@@ -70,7 +70,7 @@ fi
 CVM_VERSION=${CVM_VERSION:-container-vm-v20170214}
 # NOTE: Update the kernel commit SHA in cluster/addons/nvidia-gpus/cos-installer-daemonset.yaml
 # while updating the COS version here.
-GCI_VERSION=${KUBE_GCI_VERSION:-gci-stable-56-9000-84-2}
+GCI_VERSION=${KUBE_GCI_VERSION:-cos-beta-59-9460-20-0}
 MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
 MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
 NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh
index 2ba8163799b8..bcc3c102957c 100755
--- a/cluster/gce/config-test.sh
+++ b/cluster/gce/config-test.sh
@@ -67,7 +67,7 @@ fi
 # variable. Also please update corresponding image for node e2e at:
 # https://github.com/kubernetes/kubernetes/blob/master/test/e2e_node/jenkins/image-config.yaml
 CVM_VERSION=${CVM_VERSION:-container-vm-v20170214}
-GCI_VERSION=${KUBE_GCI_VERSION:-gci-stable-56-9000-84-2}
+GCI_VERSION=${KUBE_GCI_VERSION:-cos-beta-59-9460-20-0}
 MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
 MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
 NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh
index a6d497b73065..173c866d2118 100755
--- a/cluster/gce/util.sh
+++ b/cluster/gce/util.sh
@@ -38,27 +38,36 @@ else
 fi
 
 if [[ "${MASTER_OS_DISTRIBUTION}" == "gci" ]]; then
-  # If the master image is not set, we use the latest GCI image.
-  # Otherwise, we respect whatever is set by the user.
-  MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-${GCI_VERSION}}
-  MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
+    DEFAULT_GCI_PROJECT=google-containers
+    if [[ "${GCI_VERSION}" == "cos"* ]]; then
+        DEFAULT_GCI_PROJECT=cos-cloud
+    fi
+    MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-${DEFAULT_GCI_PROJECT}}
+    # If the master image is not set, we use the latest GCI image.
+    # Otherwise, we respect whatever is set by the user.
+    MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-${GCI_VERSION}}
 elif [[ "${MASTER_OS_DISTRIBUTION}" == "debian" ]]; then
-  MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-${CVM_VERSION}}
-  MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
+    MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-${CVM_VERSION}}
+    MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
 fi
 
 # Sets node image based on the specified os distro. Currently this function only
 # supports gci and debian.
 function set-node-image() {
-  if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
-    # If the node image is not set, we use the latest GCI image.
-    # Otherwise, we respect whatever is set by the user.
-    NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${GCI_VERSION}}
-    NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
-  elif [[ "${NODE_OS_DISTRIBUTION}" == "debian" ]]; then
-    NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
-    NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
-  fi
+    if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
+        DEFAULT_GCI_PROJECT=google-containers
+        if [[ "${GCI_VERSION}" == "cos"* ]]; then
+            DEFAULT_GCI_PROJECT=cos-cloud
+        fi
+
+        # If the node image is not set, we use the latest GCI image.
+        # Otherwise, we respect whatever is set by the user.
+        NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${GCI_VERSION}}
+        NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-${DEFAULT_GCI_PROJECT}}
+    elif [[ "${NODE_OS_DISTRIBUTION}" == "debian" ]]; then
+        NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
+        NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
+    fi
 }
 
 set-node-image
diff --git a/cluster/kubemark/gce/config-default.sh b/cluster/kubemark/gce/config-default.sh
index 8aee0259322c..1fc342e91853 100644
--- a/cluster/kubemark/gce/config-default.sh
+++ b/cluster/kubemark/gce/config-default.sh
@@ -36,7 +36,7 @@ PREEMPTIBLE_NODE=${PREEMPTIBLE_NODE:-false}
 
 MASTER_OS_DISTRIBUTION=${KUBE_MASTER_OS_DISTRIBUTION:-gci}
 NODE_OS_DISTRIBUTION=${KUBE_NODE_OS_DISTRIBUTION:-debian}
-MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-gci-stable-56-9000-84-2}
+MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-cos-beta-59-9460-20-0}
 MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
 
 NETWORK=${KUBE_GCE_NETWORK:-default}
diff --git a/test/e2e_node/jenkins/benchmark/benchmark-config.yaml b/test/e2e_node/jenkins/benchmark/benchmark-config.yaml
index 883f4247e311..95d1ee2f2815 100644
--- a/test/e2e_node/jenkins/benchmark/benchmark-config.yaml
+++ b/test/e2e_node/jenkins/benchmark/benchmark-config.yaml
@@ -49,21 +49,21 @@ images:
     tests:
       - 'resource tracking for 105 pods per node \[Benchmark\]'
   gci-resource1:
-    image: gci-stable-56-9000-84-2
+    image: cos-beta-59-9460-20-0
     project: google-containers
     machine: n1-standard-1
     metadata: "user-data<test/e2e_node/jenkins/gci-init.yaml,gci-update-strategy=update_disabled"
     tests:
       - 'resource tracking for 0 pods per node \[Benchmark\]'
   gci-resource2:
-    image: gci-stable-56-9000-84-2
+    image: cos-beta-59-9460-20-0
     project: google-containers
     machine: n1-standard-1
     metadata: "user-data<test/e2e_node/jenkins/gci-init.yaml,gci-update-strategy=update_disabled"
     tests:
       - 'resource tracking for 35 pods per node \[Benchmark\]'
   gci-resource3:
-    image: gci-stable-56-9000-84-2
+    image: cos-beta-59-9460-20-0
     project: google-containers
     machine: n1-standard-1
     metadata: "user-data<test/e2e_node/jenkins/gci-init.yaml,gci-update-strategy=update_disabled"
diff --git a/test/e2e_node/jenkins/image-config-serial.yaml b/test/e2e_node/jenkins/image-config-serial.yaml
index 71ccbcfd8275..92aaa6e2d135 100644
--- a/test/e2e_node/jenkins/image-config-serial.yaml
+++ b/test/e2e_node/jenkins/image-config-serial.yaml
@@ -23,8 +23,8 @@ images:
     image: e2e-node-containervm-v20161208-image # docker 1.11.2
     project: kubernetes-node-e2e-images
   gci:
-    image_regex: gci-stable-56-9000-84-2 # docker 1.11.2
-    project: google-containers
+    image_regex: cos-beta-59-9460-20-0 # docker 1.11.2
+    project: cos-cloud
     metadata: "user-data<test/e2e_node/jenkins/gci-init-gpu.yaml,gci-update-strategy=update_disabled"
     resources:
       accelerators:
diff --git a/test/e2e_node/jenkins/image-config.yaml b/test/e2e_node/jenkins/image-config.yaml
index f7d6a14042c7..02fc27b3abbf 100644
--- a/test/e2e_node/jenkins/image-config.yaml
+++ b/test/e2e_node/jenkins/image-config.yaml
@@ -16,6 +16,6 @@ images:
     image: e2e-node-containervm-v20161208-image # docker 1.11.2
     project: kubernetes-node-e2e-images
   gci:
-    image_regex: gci-stable-56-9000-84-2 # docker 1.11.2
+    image_regex: cos-beta-59-9460-20-0 # docker 1.11.2
     project: google-containers
     metadata: "user-data<test/e2e_node/jenkins/gci-init.yaml,gci-update-strategy=update_disabled"
diff --git a/test/kubemark/gce/util.sh b/test/kubemark/gce/util.sh
index 991348e39fe1..d37ee974a3b2 100644
--- a/test/kubemark/gce/util.sh
+++ b/test/kubemark/gce/util.sh
@@ -66,7 +66,14 @@ function create-master-instance-with-resources {
   
   MASTER_IP=$(gcloud compute addresses describe "${MASTER_NAME}-ip" \
     --project "${PROJECT}" --region "${REGION}" -q --format='value(address)')
-  
+
+  # Override the master image project to cos-cloud for COS images staring with `cos` string prefix.
+  DEFAULT_GCI_PROJECT=google-containers
+  if [[ "${GCI_VERSION}" == "cos"* ]]; then
+      DEFAULT_GCI_PROJECT=cos-cloud
+  fi
+  MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-${DEFAULT_GCI_PROJECT}}
+
   run-gcloud-compute-with-retries instances create "${MASTER_NAME}" \
     ${GCLOUD_COMMON_ARGS} \
     --address "${MASTER_IP}" \

From 333e571bee76610da7ee3185c029c8c93cec2a02 Mon Sep 17 00:00:00 2001
From: Vishnu kannan <vishnuk@google.com>
Date: Sat, 20 May 2017 21:21:23 -0700
Subject: [PATCH 3/3] update default project to cos-cloud in gce configs

Signed-off-by: Vishnu kannan <vishnuk@google.com>
---
 cluster/gce/config-default.sh | 4 ++--
 cluster/gce/config-test.sh    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh
index 12f6f8fcd77f..aee2134ef910 100755
--- a/cluster/gce/config-default.sh
+++ b/cluster/gce/config-default.sh
@@ -72,9 +72,9 @@ CVM_VERSION=${CVM_VERSION:-container-vm-v20170214}
 # while updating the COS version here.
 GCI_VERSION=${KUBE_GCI_VERSION:-cos-beta-59-9460-20-0}
 MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
-MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
+MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-cos-cloud}
 NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
-NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
+NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-cos-cloud}
 CONTAINER_RUNTIME=${KUBE_CONTAINER_RUNTIME:-docker}
 RKT_VERSION=${KUBE_RKT_VERSION:-1.23.0}
 RKT_STAGE1_IMAGE=${KUBE_RKT_STAGE1_IMAGE:-coreos.com/rkt/stage1-coreos}
diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh
index bcc3c102957c..a87f5fe110b4 100755
--- a/cluster/gce/config-test.sh
+++ b/cluster/gce/config-test.sh
@@ -69,9 +69,9 @@ fi
 CVM_VERSION=${CVM_VERSION:-container-vm-v20170214}
 GCI_VERSION=${KUBE_GCI_VERSION:-cos-beta-59-9460-20-0}
 MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
-MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
+MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-cos-cloud}
 NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
-NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
+NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-cos-cloud}
 CONTAINER_RUNTIME=${KUBE_CONTAINER_RUNTIME:-docker}
 GCI_DOCKER_VERSION=${KUBE_GCI_DOCKER_VERSION:-}
 RKT_VERSION=${KUBE_RKT_VERSION:-1.23.0}