Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable "kick the tires" support for Nvidia GPUs in COS #45136

Merged
merged 3 commits into from May 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion cluster/gce/BUILD
Expand Up @@ -32,7 +32,10 @@ filegroup(

filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
srcs = [
":package-srcs",
"//cluster/gce/gci/nvidia-gpus:all-srcs",
],
tags = ["automanaged"],
)

Expand Down
8 changes: 5 additions & 3 deletions cluster/gce/config-default.sh
Expand Up @@ -68,11 +68,13 @@ fi
# variable. Also please update corresponding image for node e2e at:
# https://github.com/kubernetes/kubernetes/blob/master/test/e2e_node/jenkins/image-config.yaml
CVM_VERSION=${CVM_VERSION:-container-vm-v20170214}
GCI_VERSION=${KUBE_GCI_VERSION:-gci-stable-56-9000-84-2}
# NOTE: Update the kernel commit SHA in cluster/addons/nvidia-gpus/cos-installer-daemonset.yaml
# while updating the COS version here.
GCI_VERSION=${KUBE_GCI_VERSION:-cos-beta-59-9460-20-0}
MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-cos-cloud}
NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-cos-cloud}
CONTAINER_RUNTIME=${KUBE_CONTAINER_RUNTIME:-docker}
RKT_VERSION=${KUBE_RKT_VERSION:-1.23.0}
RKT_STAGE1_IMAGE=${KUBE_RKT_STAGE1_IMAGE:-coreos.com/rkt/stage1-coreos}
Expand Down
6 changes: 3 additions & 3 deletions cluster/gce/config-test.sh
Expand Up @@ -67,11 +67,11 @@ fi
# variable. Also please update corresponding image for node e2e at:
# https://github.com/kubernetes/kubernetes/blob/master/test/e2e_node/jenkins/image-config.yaml
CVM_VERSION=${CVM_VERSION:-container-vm-v20170214}
GCI_VERSION=${KUBE_GCI_VERSION:-gci-stable-56-9000-84-2}
GCI_VERSION=${KUBE_GCI_VERSION:-cos-beta-59-9460-20-0}
MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-cos-cloud}
NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-cos-cloud}
CONTAINER_RUNTIME=${KUBE_CONTAINER_RUNTIME:-docker}
GCI_DOCKER_VERSION=${KUBE_GCI_DOCKER_VERSION:-}
RKT_VERSION=${KUBE_RKT_VERSION:-1.23.0}
Expand Down
1 change: 1 addition & 0 deletions cluster/gce/gci/configure-helper.sh
Expand Up @@ -1605,4 +1605,5 @@ else
fi
reset-motd
prepare-mounter-rootfs
modprobe configs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For IKCONFIG? Note it is not listed in the GKE node image spec https://docs.google.com/document/d/1qmiJOuLYqjJZF-PTfn-xvTbLvzTgFw8gMcWavX7qiQ0/edit

So we may have to double check our images to ensure they are built with this module.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. How are folks expected to discover the doc you posted? Can you add an entry for /proc/configz?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, Nvidia driver installation may not require configs on all distros. I'm not sure about that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dchen1107 is the person to talk to about changing the node spec

echo "Done for the configuration for kubernetes"
24 changes: 24 additions & 0 deletions cluster/gce/gci/nvidia-gpus/BUILD
@@ -0,0 +1,24 @@
package(default_visibility = ["//visibility:public"])

load("@io_bazel//tools/build_defs/pkg:pkg.bzl", "pkg_tar")
load("@io_kubernetes_build//defs:build.bzl", "release_filegroup")

filegroup(
name = "sources",
srcs = glob([
"**/*",
]),
)

filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)

filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)
28 changes: 28 additions & 0 deletions cluster/gce/gci/nvidia-gpus/Dockerfile
@@ -0,0 +1,28 @@
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM ubuntu:16.04

# Disable prompts from apt
ENV DEBIAN_FRONTEND noninteractive

RUN apt-get -qq update
RUN apt-get install -qq pciutils gcc g++ git make dpkg-dev bc module-init-tools curl

RUN mkdir /lakitu-kernel
RUN git clone https://chromium.googlesource.com/chromiumos/third_party/kernel /lakitu-kernel

ADD installer.sh /usr/bin/nvidia-installer.sh
RUN chmod a+x /usr/bin/nvidia-installer.sh
CMD ["/usr/bin/nvidia-installer.sh"]
27 changes: 27 additions & 0 deletions cluster/gce/gci/nvidia-gpus/Makefile
@@ -0,0 +1,27 @@
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

TAG?=v0.1
REGISTRY?=gcr.io/google_containers
IMAGE=cos-nvidia-driver-install

all: container

container:
docker build --pull -t ${REGISTRY}/${IMAGE}:${TAG} .
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

consider adding the arch you build for to the container name, e.g. -amd64

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is currently only meant for COS, which is restricted to amd64 on GCP. Do you think more code is useful?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't take much to future-proof the name. No need to actually make cross build work.


push:
gcloud docker -- push ${REGISTRY}/${IMAGE}:${TAG}

.PHONY: all container push
57 changes: 57 additions & 0 deletions cluster/gce/gci/nvidia-gpus/cos-installer-daemonset.yaml
@@ -0,0 +1,57 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: cos-nvidia-installer
namespace: kube-system
spec:
template:
metadata:
labels:
name: cos-nvidia-installer
# Update the version tag here and `LAKITU_KERNEL_SHA1` while using against new COS releases.
cos-version: cos-beta-59-9460-20-0
spec:
hostNetwork: true
hostPID: true
volumes:
- name: dev
hostPath:
path: /dev
- name: nvidia-overlay
hostPath:
path: /home/kubernetes/bin/nvidia
- name: os-release
hostPath:
path: /etc/os-release
- name: sysrq
hostPath:
path: /proc/sysrq-trigger
containers:
- image: gcr.io/google_containers/cos-nvidia-driver-install@sha256:ad83ede6e0c6d768bf7cf69a7dec972aa5e8f88778142ca46afd3286ad58cfc8
command: ["/bin/sh", "-c"]
args: ["usr/bin/nvidia-installer.sh && sleep infinity"]
env:
- name: BASE_DIR
value: "/rootfs/nvidia"
name: nvidia-driver-installer
resources:
requests:
cpu: 0.15
securityContext:
privileged: true
env:
# The kernel SHA1 here should correspond to the GCI_VERSION specified by default under cluster/gce/config-default.sh
- name: LAKITU_KERNEL_SHA1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please add a TODO to determine the SHA1 programmatically at runtime? I have some changes under review to make it possible on COS images.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. It's there in the installer script.

value: 26481563cb3788ad254c2bf2126b843c161c7e48
- name: BASE_DIR
value: "/rootfs/nvidia"
volumeMounts:
- name: nvidia-overlay
mountPath: /rootfs/nvidia
- name: dev
mountPath: /dev
- name: os-release
mountPath: /rootfs/etc/os-release
- name: sysrq
mountPath: /sysrq

207 changes: 207 additions & 0 deletions cluster/gce/gci/nvidia-gpus/installer.sh
@@ -0,0 +1,207 @@
#!/bin/bash

# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script is for dynamically installing nvidia kernel drivers in Container Optimized OS

set -o errexit
set -o pipefail
set -x

# The script must be run as a root.
# Prerequisites:
#
# LAKITU_KERNEL_SHA1 - The env variable is expected to be set to HEAD of the kernel version used on the host.
# BASE_DIR - Directory that is mapped to a stateful partition on host. Defaults to `/rootfs/nvidia`.
#
# The script will output the following artifacts:
# ${BASE_DIR}/lib* --> Nvidia CUDA libraries
# ${BASE_DIR}/bin/* --> Nvidia debug utilities
# ${BASE_DIR}/.cache/* --> Nvidia driver artifacts cached for idempotency.
#

BASE_DIR=${BASE_DIR:-"/rootfs/nvidia"}
CACHE_DIR="${BASE_DIR}/.cache"
USR_WORK_DIR="${CACHE_DIR}/usr-work"
USR_WRITABLE_DIR="${CACHE_DIR}/usr-writable"
LIB_WORK_DIR="${CACHE_DIR}/lib-work"
LIB_WRITABLE_DIR="${CACHE_DIR}/lib-writable"

LIB_OUTPUT_DIR="${BASE_DIR}/lib"
BIN_OUTPUT_DIR="${BASE_DIR}/bin"

KERNEL_SRC_DIR="/lakitu-kernel"
NVIDIA_DRIVER_DIR="/nvidia"
NVIDIA_DRIVER_VERSION="375.26"

# Source: https://developer.nvidia.com/cuda-downloads
NVIDIA_CUDA_URL="https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run"
NVIDIA_CUDA_MD5SUM="33e1bd980e91af4e55f3ef835c103f9b"
NVIDIA_CUDA_PKG_NAME="cuda_8.0.61_375.26_linux.run"
NVIDIA_DRIVER_PKG_NAME="NVIDIA-Linux-x86_64-375.26.run"

check_nvidia_device() {
lspci
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe worth grepping for NVIDIA devices here too, for less output?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can optimize it by wrapping around a command, but I actually like verbose output.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough

if ! lspci | grep -i -q NVIDIA; then
echo "No NVIDIA devices attached to this instance."
exit 0
fi
echo "Found NVIDIA device on this instance."
}

prepare_kernel_source() {
local kernel_git_repo="https://chromium.googlesource.com/chromiumos/third_party/kernel"
local kernel_version="$(uname -r)"
local kernel_version_stripped="$(echo ${kernel_version} | sed 's/\+//')"

# Checkout the correct tag.
echo "Downloading kernel source at tag ${kernel_version_stripped} ..."
pushd "${KERNEL_SRC_DIR}"
# TODO: Consume KERNEL SHA1 from COS image directly.
# git checkout "tags/v${kernel_version_stripped}"
git checkout ${LAKITU_KERNEL_SHA1}

# Prepare kernel configu and source for modules.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/configu/config

echo "Preparing kernel sources ..."
zcat "/proc/config.gz" > ".config"
make olddefconfig
make modules_prepare
# Done.
popd
}

download_install_nvidia() {
local pkg_name="${NVIDIA_CUDA_PKG_NAME}"
local url="${NVIDIA_CUDA_URL}"
local log_file_name="${NVIDIA_DRIVER_DIR}/nvidia-installer.log"

mkdir -p "${NVIDIA_DRIVER_DIR}"
pushd "${NVIDIA_DRIVER_DIR}"

echo "Downloading Nvidia CUDA package from ${url} ..."
curl -L -s "${url}" -o "${pkg_name}"
echo "${NVIDIA_CUDA_MD5SUM} ${pkg_name}" | md5sum --check

echo "Extracting Nvidia CUDA package ..."
sh ${pkg_name} --extract="$(pwd)"

echo "Running the Nvidia driver installer ..."
if ! sh "${NVIDIA_DRIVER_PKG_NAME}" --kernel-source-path="${KERNEL_SRC_DIR}" --silent --accept-license --keep --log-file-name="${log_file_name}"; then
echo "Nvidia installer failed, log below:"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also print where people can find the full log file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the command run by this script will be printed - set -x. So the tail command below will display the log file name.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

echo "==================================="
tail -50 "${log_file_name}"
echo "==================================="
exit 1
fi
# Create unified memory device file.
nvidia-modprobe -c0 -u
popd
}

unlock_loadpin_and_reboot_if_needed() {
kernel_cmdline="$(cat /proc/cmdline)"
if echo "${kernel_cmdline}" | grep -q -v "lsm.module_locking=0"; then
local -r esp_partition="/dev/sda12"
local -r mount_path="/tmp/esp"
local -r grub_cfg="efi/boot/grub.cfg"

mkdir -p "${mount_path}"
mount "${esp_partition}" "${mount_path}"

pushd "${mount_path}"
cp "${grub_cfg}" "${grub_cfg}.orig"
sed 's/cros_efi/cros_efi lsm.module_locking=0/g' -i "efi/boot/grub.cfg"
cat "${grub_cfg}"
popd
sync
umount "${mount_path}"
# Restart the node for loadpin to be disabled.
echo b > /sysrq
fi
}

create_uvm_device() {
# Create unified memory device file.
nvidia-modprobe -c0 -u
}

verify_base_image() {
mount --bind /rootfs/etc/os-release /etc/os-release
local id="$(grep "^ID=" /etc/os-release)"
if [[ "${id#*=}" != "cos" ]]; then
echo "This installer is designed to run on Container-Optimized OS only"
exit 1
fi
}

setup_overlay_mounts() {
mkdir -p ${USR_WRITABLE_DIR} ${USR_WORK_DIR} ${LIB_WRITABLE_DIR} ${LIB_WORK_DIR}
mount -t overlay -o lowerdir=/usr,upperdir=${USR_WRITABLE_DIR},workdir=${USR_WORK_DIR} none /usr
mount -t overlay -o lowerdir=/lib,upperdir=${LIB_WRITABLE_DIR},workdir=${LIB_WORK_DIR} none /lib
}

exit_if_install_not_needed() {
if nvidia-smi; then
echo "nvidia drivers already installed. Skipping installation"
post_installation_sequence
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nipick: preceding whitespaces are inconsistent in the if block

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

exit 0
fi
}

restart_kubelet() {
echo "Sending SIGTERM to kubelet"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to restart Kubelet during the installation? This kind of dependencies could be error-prone: deploying and managing this nvidia-gpus daemonset depends on Kubelet, but in the middle of running the daemonset (installer.sh), Kubelet is restarted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is necessary for the kubelet to pick up the GPUs. Kubelet cannot support hotplugging GPUs for various reasons. We tried that with just PCI based data and it's proving to be hard.
Ideally, we need to reboot the entire node, but we haven't gotten there yet.

pkill -SIGTERM kubelet
}

# Copy user space libraries and debug utilities to a special output directory on the host.
# Make these artifacts world readable and executable.
copy_files_to_host() {
mkdir -p ${LIB_OUTPUT_DIR} ${BIN_OUTPUT_DIR}
cp -r ${USR_WRITABLE_DIR}/lib/x86_64-linux-gnu/* ${LIB_OUTPUT_DIR}/
cp -r ${USR_WRITABLE_DIR}/bin/* ${BIN_OUTPUT_DIR}/
chmod -R a+rx ${LIB_OUTPUT_DIR}
chmod -R a+rx ${BIN_OUTPUT_DIR}
}

post_installation_sequence() {
create_uvm_device
# Copy nvidia user space libraries and debug tools to the host for use from other containers.
copy_files_to_host
# Restart the kubelet for it to pick up the GPU devices.
restart_kubelet
}

main() {
# Do not run the installer unless the base image is Container Optimized OS (COS)
verify_base_image
# Do not run the installer unless a Nvidia device is found on the PCI bus
check_nvidia_device
# Setup overlay mounts to capture nvidia driver artificats in a more permanent storage on the host.
setup_overlay_mounts
# Disable a critical security feature in COS that will allow for dynamically loading Nvidia drivers
unlock_loadpin_and_reboot_if_needed
# Exit if installation is not required (for idempotency)
exit_if_install_not_needed
# Checkout kernel sources appropriate for the base image.
prepare_kernel_source
# Download, compile and install nvidia drivers.
download_install_nvidia
# Verify that the Nvidia drivers have been successfully installed.
nvidia-smi
# Perform post installation steps - copying artifacts, restarting kubelet, etc.
post_installation_sequence
}

main "$@"