From de88ad178bcf3486886d419ff5aeff53235eedec Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Wed, 18 Jan 2023 14:31:59 +0000 Subject: [PATCH 01/39] Add basic ansible configuration for bazel and installing apt pkgs --- docker/experimental-ansible/Dockerfile | 11 ++++++++++ .../playbooks/.ansible-lint | 4 ++++ .../playbooks/config/apt.yaml | 9 ++++++++ .../experimental-ansible/playbooks/dev.yaml | 22 +++++++++++++++++++ .../playbooks/roles/bazel/defaults/main.yaml | 1 + .../playbooks/roles/bazel/tasks/main.yaml | 5 +++++ .../roles/build_deps/tasks/main.yaml | 6 +++++ .../roles/init_repos/tasks/main.yaml | 0 8 files changed, 58 insertions(+) create mode 100644 docker/experimental-ansible/Dockerfile create mode 100644 docker/experimental-ansible/playbooks/.ansible-lint create mode 100644 docker/experimental-ansible/playbooks/config/apt.yaml create mode 100644 docker/experimental-ansible/playbooks/dev.yaml create mode 100644 docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml create mode 100644 docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml create mode 100644 docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml create mode 100644 docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml diff --git a/docker/experimental-ansible/Dockerfile b/docker/experimental-ansible/Dockerfile new file mode 100644 index 000000000000..caf44bb03d4a --- /dev/null +++ b/docker/experimental-ansible/Dockerfile @@ -0,0 +1,11 @@ +ARG python_version=3.8 +ARG debian_version=buster + +FROM python:${python_version}-${debian_version} AS build + +RUN pip install ansible + +COPY ansible /ansible +WORKDIR /ansible + +RUN ansible-playbook dev.yaml \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/.ansible-lint b/docker/experimental-ansible/playbooks/.ansible-lint new file mode 100644 index 000000000000..a4632065083f --- /dev/null +++ b/docker/experimental-ansible/playbooks/.ansible-lint @@ -0,0 +1,4 @@ +--- +# .ansible-lint + +profile: moderate \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/config/apt.yaml b/docker/experimental-ansible/playbooks/config/apt.yaml new file mode 100644 index 000000000000..5cec0ff9074a --- /dev/null +++ b/docker/experimental-ansible/playbooks/config/apt.yaml @@ -0,0 +1,9 @@ +apt_packages: + build_amd64: + - clang-11 + - vim + + build_aarch64: + - scons + - gcc-10 + - g++-10 diff --git a/docker/experimental-ansible/playbooks/dev.yaml b/docker/experimental-ansible/playbooks/dev.yaml new file mode 100644 index 000000000000..63ca4a158593 --- /dev/null +++ b/docker/experimental-ansible/playbooks/dev.yaml @@ -0,0 +1,22 @@ +- name: "Install build dependencies" + hosts: 127.0.0.1 + connection: local + vars_prompt: + - name: stage_env + prompt: "Stage and env type (possible options: build_aarch64, build_amd64)" + private: false + + pre_tasks: + - name: Include vars of config/apt.yaml + ansible.builtin.include_vars: + file: config/apt.yaml + name: apt + + roles: + - role: bazel + vars: + some_var: "123" + + - role: build_deps + vars: + apt_packages: "{{ apt.apt_packages[stage_env] }}" diff --git a/docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml b/docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml new file mode 100644 index 000000000000..88eac45a311a --- /dev/null +++ b/docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml @@ -0,0 +1 @@ +bazelisk_version: 1.11.0 diff --git a/docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml new file mode 100644 index 000000000000..4331a45d9082 --- /dev/null +++ b/docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml @@ -0,0 +1,5 @@ +- name: "Download bazelisk" + ansible.builtin.get_url: + url: "https://github.com/bazelbuild/bazelisk/releases/download/v{{ bazelisk_version }}/bazelisk-linux-amd64" + dest: /usr/local/bin/bazel + mode: 'u=rxw,g=rw,o=r' diff --git a/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml new file mode 100644 index 000000000000..f0bbe7949b93 --- /dev/null +++ b/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml @@ -0,0 +1,6 @@ +- name: Install required apt packages + ansible.builtin.apt: + name: "{{ item }}" + update_cache: true + cache_valid_time: "{{ 60 * 60 * 12 }}" # 12h, set in seconds + loop: "{{ apt_packages }}" diff --git a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml new file mode 100644 index 000000000000..e69de29bb2d1 From 24e4baff5aab284328af5b2088876f2abc7a5a76 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Wed, 18 Jan 2023 16:16:02 +0000 Subject: [PATCH 02/39] Add apt repos and some signing keys --- .../playbooks/config/apt.yaml | 27 ++++++++++++------- .../playbooks/config/common.yaml | 3 +++ .../experimental-ansible/playbooks/dev.yaml | 16 +++++++---- .../roles/build_deps/tasks/main.yaml | 3 +-- .../roles/init_repos/tasks/main.yaml | 11 ++++++++ 5 files changed, 44 insertions(+), 16 deletions(-) create mode 100644 docker/experimental-ansible/playbooks/config/common.yaml diff --git a/docker/experimental-ansible/playbooks/config/apt.yaml b/docker/experimental-ansible/playbooks/config/apt.yaml index 5cec0ff9074a..46bb93e5109d 100644 --- a/docker/experimental-ansible/playbooks/config/apt.yaml +++ b/docker/experimental-ansible/playbooks/config/apt.yaml @@ -1,9 +1,18 @@ -apt_packages: - build_amd64: - - clang-11 - - vim - - build_aarch64: - - scons - - gcc-10 - - g++-10 +apt: + pkgs: + common: + + build_amd64: + - clang-11 + + build_aarch64: + - scons + - gcc-10 + - g++-10 + + signing_keys: + - https://apt.llvm.org/llvm-snapshot.gpg.key + + repos: + - "deb http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" + - "deb-src http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/config/common.yaml b/docker/experimental-ansible/playbooks/config/common.yaml new file mode 100644 index 000000000000..aaf1ff0fd1ff --- /dev/null +++ b/docker/experimental-ansible/playbooks/config/common.yaml @@ -0,0 +1,3 @@ +cuda_repo: ubuntu1804 +debian_version: buster +clang_version: 11 \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/dev.yaml b/docker/experimental-ansible/playbooks/dev.yaml index 63ca4a158593..baa78c7514e5 100644 --- a/docker/experimental-ansible/playbooks/dev.yaml +++ b/docker/experimental-ansible/playbooks/dev.yaml @@ -7,16 +7,22 @@ private: false pre_tasks: - - name: Include vars of config/apt.yaml + - name: Include vars from config/common.yaml + ansible.builtin.include_vars: + file: config/common.yaml + + - name: Include vars from config/apt.yaml ansible.builtin.include_vars: file: config/apt.yaml - name: apt roles: - - role: bazel + - bazel + + - role: init_repos vars: - some_var: "123" + apt_keys: "{{ apt.signing_keys }}" - role: build_deps vars: - apt_packages: "{{ apt.apt_packages[stage_env] }}" + apt_pkgs: "{{ apt.pkgs[stage_env] }}" + apt_repos: "{{ apt.repos }}" diff --git a/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml index f0bbe7949b93..3401ef44420c 100644 --- a/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml @@ -2,5 +2,4 @@ ansible.builtin.apt: name: "{{ item }}" update_cache: true - cache_valid_time: "{{ 60 * 60 * 12 }}" # 12h, set in seconds - loop: "{{ apt_packages }}" + loop: "{{ apt_pkgs }}" diff --git a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml index e69de29bb2d1..ce82b8f756e9 100644 --- a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml @@ -0,0 +1,11 @@ +- name: Add apt keys + ansible.builtin.apt_key: + url: "{{ item }}" + state: present + loop: "{{ apt_keys }}" + +- name: Add apt repositories into sources list + ansible.builtin.apt_repository: + repo: "{{ item }}" + state: present + loop: "{{ apt_repos }}" From a514d19d8ede3a3a948e8f88367cbc7b1ba183d4 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Thu, 19 Jan 2023 12:10:36 +0000 Subject: [PATCH 03/39] Add pip packages --- .../playbooks/config/pip.yaml | 41 +++++++++++++++++++ .../experimental-ansible/playbooks/dev.yaml | 31 ++++++++++---- .../playbooks/roles/bazel/tasks/main.yaml | 2 +- .../roles/build_deps/tasks/main.yaml | 14 ++++++- 4 files changed, 79 insertions(+), 9 deletions(-) create mode 100644 docker/experimental-ansible/playbooks/config/pip.yaml diff --git a/docker/experimental-ansible/playbooks/config/pip.yaml b/docker/experimental-ansible/playbooks/config/pip.yaml new file mode 100644 index 000000000000..aae12147e5e1 --- /dev/null +++ b/docker/experimental-ansible/playbooks/config/pip.yaml @@ -0,0 +1,41 @@ +pip: + pkgs: + common: + - astunparse + - cffi + - cloud-tpu-client + - cmake + - coverage + - dataclasses + - expecttest==0.1.3 + - future + - git-archive-all + - google-api-python-client + - google-cloud-storage + - hypothesis + - lark-parser + - ninja + - numpy + - oauth2client + - pyyaml + - requests + - setuptools + - six + - tensorboard + - tensorboardX + - tqdm + - typing + - typing_extensions + + build_amd64: + - mkl + - mkl-include + + build_aarch64: + + pkgs_nodeps: + common: + + build_amd64: + + build_aarch64: diff --git a/docker/experimental-ansible/playbooks/dev.yaml b/docker/experimental-ansible/playbooks/dev.yaml index baa78c7514e5..472b998fb36c 100644 --- a/docker/experimental-ansible/playbooks/dev.yaml +++ b/docker/experimental-ansible/playbooks/dev.yaml @@ -7,13 +7,14 @@ private: false pre_tasks: - - name: Include vars from config/common.yaml + - name: "Include vars from config/{{ item }}" ansible.builtin.include_vars: - file: config/common.yaml - - - name: Include vars from config/apt.yaml - ansible.builtin.include_vars: - file: config/apt.yaml + file: "config/{{ item }}" + loop: + # common.yaml should be the first as other config files depend on it. + - common.yaml + - apt.yaml + - pip.yaml roles: - bazel @@ -24,5 +25,21 @@ - role: build_deps vars: - apt_pkgs: "{{ apt.pkgs[stage_env] }}" + # If apt.pkgs.common is defined, but not set to anything + # it cannot be concatenated with a list. Use default(v, [], true) to + # set `v` to an empty array if it evaluates to false. + # See https://jinja.palletsprojects.com/en/2.11.x/templates/#default + + apt_pkgs: "{{ + apt.pkgs.common | default([], true) + apt.pkgs[stage_env] | default([], true) + }}" + apt_repos: "{{ apt.repos }}" + + pip_pkgs: "{{ + pip.pkgs.common | default([], true) + pip.pkgs[stage_env] | default([], true) + }}" + + pip_pkgs_nodeps: "{{ + pip.pkgs_nodeps.common | default([], true) + pip.pkgs_nodeps[stage_env] | default([], true) + }}" diff --git a/docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml index 4331a45d9082..aad6bc3332bc 100644 --- a/docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml @@ -1,4 +1,4 @@ -- name: "Download bazelisk" +- name: "Download bazelisk v{{ bazelisk_version }}" ansible.builtin.get_url: url: "https://github.com/bazelbuild/bazelisk/releases/download/v{{ bazelisk_version }}/bazelisk-linux-amd64" dest: /usr/local/bin/bazel diff --git a/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml index 3401ef44420c..571f3fb2e864 100644 --- a/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml @@ -1,5 +1,17 @@ -- name: Install required apt packages +- name: Install apt packages ansible.builtin.apt: name: "{{ item }}" update_cache: true loop: "{{ apt_pkgs }}" + +- name: Install pip packages + ansible.builtin.pip: + name: "{{ item }}" + loop: "{{ pip_pkgs }}" + + +- name: Install pip packages without deps (--no-deps) + ansible.builtin.pip: + name: "{{ item }}" + extra_args: "--no-deps" + loop: "{{ pip_pkgs_nodeps }}" From 4e8cfb24b158d5caf126a5e7d723d9d6697d4122 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Thu, 19 Jan 2023 13:43:44 +0000 Subject: [PATCH 04/39] Don't use apt-key for adding repo keys --- .../playbooks/config/apt.yaml | 14 ++++++++++++-- .../playbooks/roles/bazel/defaults/main.yaml | 2 +- .../playbooks/roles/init_repos/tasks/main.yaml | 18 +++++++++++++++--- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/docker/experimental-ansible/playbooks/config/apt.yaml b/docker/experimental-ansible/playbooks/config/apt.yaml index 46bb93e5109d..c25684e8197f 100644 --- a/docker/experimental-ansible/playbooks/config/apt.yaml +++ b/docker/experimental-ansible/playbooks/config/apt.yaml @@ -10,9 +10,19 @@ apt: - gcc-10 - g++-10 + # Specify objects with string fields `url` and `keyring`. + # The keyring path should start with /usr/share/keyrings/ for debian and ubuntu. signing_keys: - - https://apt.llvm.org/llvm-snapshot.gpg.key + - url: https://apt.llvm.org/llvm-snapshot.gpg.key + keyring: /usr/share/keyrings/llvm.pgp + - url: https://packages.cloud.google.com/apt/doc/apt-key.gpg + keyring: /usr/share/keyrings/cloud.google.gpg + - url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/3bf863cc.pub" + keyring: /usr/share/keyrings/cuda.pgp repos: - "deb http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" - - "deb-src http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" \ No newline at end of file + - "deb-src http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" + # signed-by path should match the Google Cloud keyring path above. + - "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" + - "deb https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/ /" \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml b/docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml index 88eac45a311a..6ddadb8b6863 100644 --- a/docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml @@ -1 +1 @@ -bazelisk_version: 1.11.0 +bazelisk_version: 1.15.0 diff --git a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml index ce82b8f756e9..def3311d8e6a 100644 --- a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml @@ -1,11 +1,23 @@ - name: Add apt keys - ansible.builtin.apt_key: - url: "{{ item }}" - state: present + ansible.builtin.get_url: + url: "{{ item.url }}" + dest: "{{ item.keyring }}" + mode: 'u=rw,g=r,o=r' + # ansible.builtin.apt_key: + # url: "{{ item if item is string else item.url }}" + # keyring: "{{ None if item is string else item.keyring }}" + # state: present loop: "{{ apt_keys }}" + register: apt_key - name: Add apt repositories into sources list ansible.builtin.apt_repository: repo: "{{ item }}" state: present loop: "{{ apt_repos }}" + register: apt_repo + +- name: Update apt cache + apt: + update_cache: true + when: apt_key.changed or apt_repo.changed From 1be9ba75fd8c05b6c0eab8d6a28fda7d1ac00a3a Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Thu, 19 Jan 2023 13:43:44 +0000 Subject: [PATCH 05/39] Don't use apt-key for adding repo keys --- .../playbooks/roles/init_repos/tasks/main.yaml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml index def3311d8e6a..3209bf4bfe99 100644 --- a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml @@ -1,23 +1,21 @@ - name: Add apt keys + # Don't use apt-key for adding repo keys since it's deprecated. + # Instead place gpg and pgp files in /usr/share/keyrings/ (debian, ubuntu). ansible.builtin.get_url: url: "{{ item.url }}" dest: "{{ item.keyring }}" mode: 'u=rw,g=r,o=r' - # ansible.builtin.apt_key: - # url: "{{ item if item is string else item.url }}" - # keyring: "{{ None if item is string else item.keyring }}" - # state: present loop: "{{ apt_keys }}" - register: apt_key + register: add_apt_key - name: Add apt repositories into sources list ansible.builtin.apt_repository: repo: "{{ item }}" state: present loop: "{{ apt_repos }}" - register: apt_repo + register: add_apt_repo - name: Update apt cache apt: update_cache: true - when: apt_key.changed or apt_repo.changed + when: add_apt_key.changed or add_apt_repo.changed From 9613b090431d4acb3f8cfaf23daa786069e991da Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Fri, 20 Jan 2023 10:36:55 +0000 Subject: [PATCH 06/39] Add fetch_srcs role for fetching PyTorch and XLA repos --- .../playbooks/config/apt.yaml | 20 ++++++++++++++++++ .../config/{common.yaml => vars.yaml} | 0 .../experimental-ansible/playbooks/dev.yaml | 21 ++++++++++--------- .../roles/build_deps/tasks/main.yaml | 17 --------------- .../roles/fetch_srcs/defaults/main.yaml | 3 +++ .../roles/fetch_srcs/tasks/main.yaml | 21 +++++++++++++++++++ .../tasks/main.yaml | 18 ++++++++++++++++ 7 files changed, 73 insertions(+), 27 deletions(-) rename docker/experimental-ansible/playbooks/config/{common.yaml => vars.yaml} (100%) delete mode 100644 docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml create mode 100644 docker/experimental-ansible/playbooks/roles/fetch_srcs/defaults/main.yaml create mode 100644 docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml rename docker/experimental-ansible/playbooks/roles/{init_repos => install_deps}/tasks/main.yaml (61%) diff --git a/docker/experimental-ansible/playbooks/config/apt.yaml b/docker/experimental-ansible/playbooks/config/apt.yaml index c25684e8197f..01adea2e7200 100644 --- a/docker/experimental-ansible/playbooks/config/apt.yaml +++ b/docker/experimental-ansible/playbooks/config/apt.yaml @@ -1,6 +1,26 @@ apt: pkgs: common: + - ccache + - curl + - git + - gnupg + - libomp5 + - libopenblas-dev + - ninja-build + - procps + - python3-pip + - rename + - vim + - wget + + accelerator: + build_cuda: + - cuda-libraries-11-2 + - cuda-toolkit-11-2 + - cuda-minimal-build-11-2 + - libcudnn8=8.1.1.33-1+cuda11.2 + - libcudnn8-dev=8.1.1.33-1+cuda11.2 build_amd64: - clang-11 diff --git a/docker/experimental-ansible/playbooks/config/common.yaml b/docker/experimental-ansible/playbooks/config/vars.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/config/common.yaml rename to docker/experimental-ansible/playbooks/config/vars.yaml diff --git a/docker/experimental-ansible/playbooks/dev.yaml b/docker/experimental-ansible/playbooks/dev.yaml index 472b998fb36c..3cb003375030 100644 --- a/docker/experimental-ansible/playbooks/dev.yaml +++ b/docker/experimental-ansible/playbooks/dev.yaml @@ -11,25 +11,22 @@ ansible.builtin.include_vars: file: "config/{{ item }}" loop: - # common.yaml should be the first as other config files depend on it. - - common.yaml + # vars.yaml should be the first as other config files depend on it. + - vars.yaml - apt.yaml - pip.yaml roles: - bazel - - role: init_repos + - role: install_deps vars: apt_keys: "{{ apt.signing_keys }}" - - role: build_deps - vars: - # If apt.pkgs.common is defined, but not set to anything - # it cannot be concatenated with a list. Use default(v, [], true) to - # set `v` to an empty array if it evaluates to false. - # See https://jinja.palletsprojects.com/en/2.11.x/templates/#default - + # If a variable (like `apt.pkgs.common`) is defined, but not set to + # anything it cannot be concatenated with a list. + # Use `v | default([], true)` to set `v` to an empty array if it evaluates to false. + # See https://jinja.palletsprojects.com/en/3.0.x/templates/#jinja-filters.default. apt_pkgs: "{{ apt.pkgs.common | default([], true) + apt.pkgs[stage_env] | default([], true) }}" @@ -43,3 +40,7 @@ pip_pkgs_nodeps: "{{ pip.pkgs_nodeps.common | default([], true) + pip.pkgs_nodeps[stage_env] | default([], true) }}" + + - role: fetch_srcs + vars: + src_root: "/src" diff --git a/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml deleted file mode 100644 index 571f3fb2e864..000000000000 --- a/docker/experimental-ansible/playbooks/roles/build_deps/tasks/main.yaml +++ /dev/null @@ -1,17 +0,0 @@ -- name: Install apt packages - ansible.builtin.apt: - name: "{{ item }}" - update_cache: true - loop: "{{ apt_pkgs }}" - -- name: Install pip packages - ansible.builtin.pip: - name: "{{ item }}" - loop: "{{ pip_pkgs }}" - - -- name: Install pip packages without deps (--no-deps) - ansible.builtin.pip: - name: "{{ item }}" - extra_args: "--no-deps" - loop: "{{ pip_pkgs_nodeps }}" diff --git a/docker/experimental-ansible/playbooks/roles/fetch_srcs/defaults/main.yaml b/docker/experimental-ansible/playbooks/roles/fetch_srcs/defaults/main.yaml new file mode 100644 index 000000000000..54b409da6a22 --- /dev/null +++ b/docker/experimental-ansible/playbooks/roles/fetch_srcs/defaults/main.yaml @@ -0,0 +1,3 @@ +# See https://docs.ansible.com/ansible/latest/collections/ansible/builtin/git_module.html#parameter-version +pytorch_git_rev: HEAD +xla_git_rev: HEAD diff --git a/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml new file mode 100644 index 000000000000..2c0df21ad87c --- /dev/null +++ b/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml @@ -0,0 +1,21 @@ +- name: "Create source root directory at {{ src_root }}" + ansible.builtin.file: + path: "{{ src_root }}" + state: directory + mode: '0755' + +- name: Clone PyTorch and XLA repos + ansible.builtin.git: + repo: "{{ item.repo }}" + dest: "{{ item.dest }}" + version: "{{ item.version }}" + depth: 1 + bare: true + loop: + - repo: https://github.com/pytorch/pytorch + dest: "{{ (src_root, 'pytorch') | path_join }}" + version: "{{ pytorch_git_rev }}" + + - repo: https://github.com/pytorch/xla + dest: "{{ (src_root, 'pytorch/xla') | path_join }}" + version: "{{ xla_git_rev }}" diff --git a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml similarity index 61% rename from docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml rename to docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml index 3209bf4bfe99..8de47730873e 100644 --- a/docker/experimental-ansible/playbooks/roles/init_repos/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml @@ -19,3 +19,21 @@ apt: update_cache: true when: add_apt_key.changed or add_apt_repo.changed + +- name: Install apt packages + ansible.builtin.apt: + name: "{{ item }}" + update_cache: true + loop: "{{ apt_pkgs }}" + +- name: Install pip packages + ansible.builtin.pip: + name: "{{ item }}" + loop: "{{ pip_pkgs }}" + + +- name: Install pip packages without deps (--no-deps) + ansible.builtin.pip: + name: "{{ item }}" + extra_args: "--no-deps" + loop: "{{ pip_pkgs_nodeps }}" From 3187aed3c2b88d292cb187374cc5e09012d11b85 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Fri, 20 Jan 2023 11:35:27 +0000 Subject: [PATCH 07/39] Add patches application --- .../roles/fetch_srcs/tasks/main.yaml | 20 +++++++++++++++++-- .../roles/install_deps/tasks/main.yaml | 10 +++------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml index 2c0df21ad87c..246055b343ad 100644 --- a/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml @@ -4,13 +4,12 @@ state: directory mode: '0755' -- name: Clone PyTorch and XLA repos +- name: "Clone git repo {{ item.repo }}" ansible.builtin.git: repo: "{{ item.repo }}" dest: "{{ item.dest }}" version: "{{ item.version }}" depth: 1 - bare: true loop: - repo: https://github.com/pytorch/pytorch dest: "{{ (src_root, 'pytorch') | path_join }}" @@ -19,3 +18,20 @@ - repo: https://github.com/pytorch/xla dest: "{{ (src_root, 'pytorch/xla') | path_join }}" version: "{{ xla_git_rev }}" + +- name: Find *.diff files in pytorch/xla/tf_patches + ansible.builtin.find: + path: "{{ (src_root, 'pytorch/xla/tf_patches') | path_join }}" + pattern: "*.diff" + register: tf_patches + +- name: Apply patches to Tensorflow + ansible.posix.patch: + src: "{{ item }}" + # Use source file on the target machine instead of the one where + # the playbook is located. Has no effect when the target machine is + # localhost. + remote_src: true + strip: 1 + basedir: "{{ (src_root, 'pytorch/xla/third_party/tensorflow') | path_join }}" + loop: "{{ tf_patches.files | map(attribute='path') }}" diff --git a/docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml index 8de47730873e..5782b3544c20 100644 --- a/docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml @@ -22,18 +22,14 @@ - name: Install apt packages ansible.builtin.apt: - name: "{{ item }}" + name: "{{ apt_pkgs }}" update_cache: true - loop: "{{ apt_pkgs }}" - name: Install pip packages ansible.builtin.pip: - name: "{{ item }}" - loop: "{{ pip_pkgs }}" - + name: "{{ pip_pkgs }}" - name: Install pip packages without deps (--no-deps) ansible.builtin.pip: - name: "{{ item }}" + name: "{{ pip_pkgs_nodeps }}" extra_args: "--no-deps" - loop: "{{ pip_pkgs_nodeps }}" From 6f59512155a22b086b5198ac671800c85fca3ee1 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Fri, 20 Jan 2023 13:02:41 +0000 Subject: [PATCH 08/39] Add role for compling PyTorch and XLA sources --- .../experimental-ansible/playbooks/.ansible-lint | 4 +++- .../playbooks/config/vars.yaml | 8 +++++++- docker/experimental-ansible/playbooks/dev.yaml | 4 ++++ .../playbooks/roles/build_srcs/tasks/main.yaml | 15 +++++++++++++++ .../playbooks/roles/fetch_srcs/tasks/main.yaml | 2 +- 5 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml diff --git a/docker/experimental-ansible/playbooks/.ansible-lint b/docker/experimental-ansible/playbooks/.ansible-lint index a4632065083f..a8661e612016 100644 --- a/docker/experimental-ansible/playbooks/.ansible-lint +++ b/docker/experimental-ansible/playbooks/.ansible-lint @@ -1,4 +1,6 @@ --- # .ansible-lint -profile: moderate \ No newline at end of file +profile: moderate +skip_list: + - schema[tasks] \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/config/vars.yaml b/docker/experimental-ansible/playbooks/config/vars.yaml index aaf1ff0fd1ff..c0a3a95033fa 100644 --- a/docker/experimental-ansible/playbooks/config/vars.yaml +++ b/docker/experimental-ansible/playbooks/config/vars.yaml @@ -1,3 +1,9 @@ cuda_repo: ubuntu1804 debian_version: buster -clang_version: 11 \ No newline at end of file +clang_version: 11 +# PyTorch and PyTorch/XLA wheel version. +package_version: 1.14 +tpu_vm: +cuda: 1 +tf_cuda_compute_capabilities: 7.0,7.5,8.0 +build_cpp_tests: 0 \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/dev.yaml b/docker/experimental-ansible/playbooks/dev.yaml index 3cb003375030..e67ceb2e6ada 100644 --- a/docker/experimental-ansible/playbooks/dev.yaml +++ b/docker/experimental-ansible/playbooks/dev.yaml @@ -44,3 +44,7 @@ - role: fetch_srcs vars: src_root: "/src" + + - role: build_srcs + vars: + src_root: "/src" diff --git a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml new file mode 100644 index 000000000000..d89b16b02be8 --- /dev/null +++ b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml @@ -0,0 +1,15 @@ +- name: Build XLA computation client library + ansible.builtin.command: + cmd: bash build_torch_xla_libs.sh -O -D_GLIBCXX_USE_CXX11_ABI=1 + chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" + +- name: Build PyTorch/XLA + ansible.builtin.command: + cmd: python setup.py bdist_wheel + chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" + environment: + PYTORCH_BUILD_VERSION: "{{ package_version }}" + TPUVM_MODE: "{{ tpu_vm }}" + XLA_CUDA: "{{ cuda }}" + TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" + BUILD_CPP_TESTS: "{{ build_cpp_tests }}" diff --git a/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml index 246055b343ad..98a661afa6df 100644 --- a/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml @@ -4,7 +4,7 @@ state: directory mode: '0755' -- name: "Clone git repo {{ item.repo }}" +- name: "Clone git PyTorch and XLA git repos" ansible.builtin.git: repo: "{{ item.repo }}" dest: "{{ item.dest }}" From 50eafb3607d052969167e52e762eb467a2c7f085 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Fri, 20 Jan 2023 16:30:19 +0000 Subject: [PATCH 09/39] WIP in build srcs --- .../playbooks/config/vars.yaml | 6 +- .../roles/build_srcs/tasks/main.yaml | 58 +++++++++++++++++-- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/docker/experimental-ansible/playbooks/config/vars.yaml b/docker/experimental-ansible/playbooks/config/vars.yaml index c0a3a95033fa..ea90d28e9cda 100644 --- a/docker/experimental-ansible/playbooks/config/vars.yaml +++ b/docker/experimental-ansible/playbooks/config/vars.yaml @@ -3,7 +3,7 @@ debian_version: buster clang_version: 11 # PyTorch and PyTorch/XLA wheel version. package_version: 1.14 -tpu_vm: -cuda: 1 +tpu_vm: 1 +cuda: 0 tf_cuda_compute_capabilities: 7.0,7.5,8.0 -build_cpp_tests: 0 \ No newline at end of file +build_cpp_tests: 0 diff --git a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml index d89b16b02be8..c9017ea2994e 100644 --- a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml @@ -1,11 +1,36 @@ -- name: Build XLA computation client library +- name: Build PyTorch ansible.builtin.command: - cmd: bash build_torch_xla_libs.sh -O -D_GLIBCXX_USE_CXX11_ABI=1 - chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" + cmd: python setup.py bdist_wheel + chdir: "{{ (src_root, 'pytorch') | path_join }}" + creates: "{{ (src_root, 'pytorch/dist/*.whl') | path_join }}" + environment: + TPUVM_MODE: "{{ tpu_vm }}" + XLA_CUDA: "{{ cuda }}" + USE_CUDA: 0 + TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" + BUILD_CPP_TESTS: "{{ build_cpp_tests }}" + CC: "clang-{{ clang_version }}" + CXX: "clang++-{{ clang_version }}" + PYTORCH_BUILD_NUMBER: 1 + PYTORCH_BUILD_VERSION: "{{ package_version }}" + # TODO: make this env dependent + ARCH: amd64 + ACCELERATOR: "tpu" + LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + +- name: Find *.whl files in pytorch/dist + ansible.builtin.find: + path: "{{ (src_root, 'pytorch/dist') | path_join }}" + pattern: "*.whl" + register: pytorch_wheels + +- name: Install PyTorch wheels + ansible.builtin.pip: + name: "{{ pytorch_wheels.files | map(attribute='path') }}" -- name: Build PyTorch/XLA +- name: Build XLA computation client library ansible.builtin.command: - cmd: python setup.py bdist_wheel + cmd: bash build_torch_xla_libs.sh -O -D_GLIBCXX_USE_CXX11_ABI=1 chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" environment: PYTORCH_BUILD_VERSION: "{{ package_version }}" @@ -13,3 +38,26 @@ XLA_CUDA: "{{ cuda }}" TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" BUILD_CPP_TESTS: "{{ build_cpp_tests }}" + CC: "clang-{{ clang_version }}" + CXX: "clang++-{{ clang_version }}" + # TODO: make this env dependent + ARCH: amd64 + ACCELERATOR: "tpu" + LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + +# - name: Build PyTorch/XLA +# ansible.builtin.command: +# cmd: python setup.py bdist_wheel +# chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" +# environment: +# PYTORCH_BUILD_VERSION: "{{ package_version }}" +# TPUVM_MODE: "{{ tpu_vm }}" +# XLA_CUDA: "{{ cuda }}" +# TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" +# BUILD_CPP_TESTS: "{{ build_cpp_tests }}" +# CC: "clang-{{ clang_version }}" +# CXX: "clang++-{{ clang_version }}" +# # TODO: make this env dependent +# ARCH: amd64 +# ACCELERATOR: "tpu" +# LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" From f211dce8248c3492dc2513338f3463a6607de3ac Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 10:57:03 +0000 Subject: [PATCH 10/39] Succesfully build XLA --- .../playbooks/config/vars.yaml | 2 +- .../roles/build_srcs/tasks/main.yaml | 34 ++++++++++--------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/docker/experimental-ansible/playbooks/config/vars.yaml b/docker/experimental-ansible/playbooks/config/vars.yaml index ea90d28e9cda..d1a2ced4c5f7 100644 --- a/docker/experimental-ansible/playbooks/config/vars.yaml +++ b/docker/experimental-ansible/playbooks/config/vars.yaml @@ -1,7 +1,7 @@ cuda_repo: ubuntu1804 debian_version: buster clang_version: 11 -# PyTorch and PyTorch/XLA wheel version. +# PyTorch and PyTorch/XLA wheel versions. package_version: 1.14 tpu_vm: 1 cuda: 0 diff --git a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml index c9017ea2994e..5673d36a291c 100644 --- a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml @@ -40,24 +40,26 @@ BUILD_CPP_TESTS: "{{ build_cpp_tests }}" CC: "clang-{{ clang_version }}" CXX: "clang++-{{ clang_version }}" + XLA_SANDBOX_BUILD: 1 # TODO: make this env dependent ARCH: amd64 ACCELERATOR: "tpu" LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" -# - name: Build PyTorch/XLA -# ansible.builtin.command: -# cmd: python setup.py bdist_wheel -# chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" -# environment: -# PYTORCH_BUILD_VERSION: "{{ package_version }}" -# TPUVM_MODE: "{{ tpu_vm }}" -# XLA_CUDA: "{{ cuda }}" -# TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" -# BUILD_CPP_TESTS: "{{ build_cpp_tests }}" -# CC: "clang-{{ clang_version }}" -# CXX: "clang++-{{ clang_version }}" -# # TODO: make this env dependent -# ARCH: amd64 -# ACCELERATOR: "tpu" -# LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" +- name: Build PyTorch/XLA + ansible.builtin.command: + cmd: python setup.py bdist_wheel + chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" + environment: + PYTORCH_BUILD_VERSION: "{{ package_version }}" + TPUVM_MODE: "{{ tpu_vm }}" + XLA_CUDA: "{{ cuda }}" + TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" + BUILD_CPP_TESTS: "{{ build_cpp_tests }}" + CC: "clang-{{ clang_version }}" + CXX: "clang++-{{ clang_version }}" + XLA_SANDBOX_BUILD: 1 + # TODO: make this env dependent + ARCH: amd64 + ACCELERATOR: "tpu" + LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" From 0890c5840a20269ac0cb5250f51187b3d777340f Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 14:57:54 +0000 Subject: [PATCH 11/39] Clean-up and merge env variables; Separate stage; arch and accelerator params --- .../playbooks/config/apt.yaml | 15 +++-- .../playbooks/config/env.yaml | 26 +++++++++ .../playbooks/config/pip.yaml | 2 +- .../playbooks/config/vars.yaml | 6 +- .../experimental-ansible/playbooks/dev.yaml | 30 ++++++++-- .../roles/build_srcs/tasks/main.yaml | 56 +++++-------------- 6 files changed, 75 insertions(+), 60 deletions(-) create mode 100644 docker/experimental-ansible/playbooks/config/env.yaml diff --git a/docker/experimental-ansible/playbooks/config/apt.yaml b/docker/experimental-ansible/playbooks/config/apt.yaml index 01adea2e7200..2d31923c19c5 100644 --- a/docker/experimental-ansible/playbooks/config/apt.yaml +++ b/docker/experimental-ansible/playbooks/config/apt.yaml @@ -1,6 +1,6 @@ apt: pkgs: - common: + build_common: - ccache - curl - git @@ -14,13 +14,12 @@ apt: - vim - wget - accelerator: - build_cuda: - - cuda-libraries-11-2 - - cuda-toolkit-11-2 - - cuda-minimal-build-11-2 - - libcudnn8=8.1.1.33-1+cuda11.2 - - libcudnn8-dev=8.1.1.33-1+cuda11.2 + build_cuda: + - cuda-libraries-11-2 + - cuda-toolkit-11-2 + - cuda-minimal-build-11-2 + - libcudnn8=8.1.1.33-1+cuda11.2 + - libcudnn8-dev=8.1.1.33-1+cuda11.2 build_amd64: - clang-11 diff --git a/docker/experimental-ansible/playbooks/config/env.yaml b/docker/experimental-ansible/playbooks/config/env.yaml new file mode 100644 index 000000000000..427ce2ef03a5 --- /dev/null +++ b/docker/experimental-ansible/playbooks/config/env.yaml @@ -0,0 +1,26 @@ +# Specified environment variables for building PyTorch, XLA libs. +# Specify only variables required for scripts used during building. +# Variables needed to execute ansible tasks specify in vars.yaml. +env: + common: + LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + # See explictyly to 0 as setup.py defualt this flag to true if unset. + BUILD_CPP_TESTS: 0 + CC: "clang-{{ clang_version }}" + CXX: "clang++-{{ clang_version }}" + PYTORCH_BUILD_NUMBER: 1 + PYTORCH_BUILD_VERSION: "{{ package_version }}" + + amd64: + ARCH: amd64 + + aarch64: + + cuda: + TF_CUDA_COMPUTE_CAPABILITIES: 7.0,7.5,8.0 + XLA_CUDA: 1 + + tpu: + ACCELERATOR: tpu + TPUVM_MODE: 1 + diff --git a/docker/experimental-ansible/playbooks/config/pip.yaml b/docker/experimental-ansible/playbooks/config/pip.yaml index aae12147e5e1..fdf81e8825dc 100644 --- a/docker/experimental-ansible/playbooks/config/pip.yaml +++ b/docker/experimental-ansible/playbooks/config/pip.yaml @@ -1,6 +1,6 @@ pip: pkgs: - common: + build_common: - astunparse - cffi - cloud-tpu-client diff --git a/docker/experimental-ansible/playbooks/config/vars.yaml b/docker/experimental-ansible/playbooks/config/vars.yaml index d1a2ced4c5f7..8e5ddac762fd 100644 --- a/docker/experimental-ansible/playbooks/config/vars.yaml +++ b/docker/experimental-ansible/playbooks/config/vars.yaml @@ -2,8 +2,4 @@ cuda_repo: ubuntu1804 debian_version: buster clang_version: 11 # PyTorch and PyTorch/XLA wheel versions. -package_version: 1.14 -tpu_vm: 1 -cuda: 0 -tf_cuda_compute_capabilities: 7.0,7.5,8.0 -build_cpp_tests: 0 +package_version: 1.14 \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/dev.yaml b/docker/experimental-ansible/playbooks/dev.yaml index e67ceb2e6ada..f990eb34f6da 100644 --- a/docker/experimental-ansible/playbooks/dev.yaml +++ b/docker/experimental-ansible/playbooks/dev.yaml @@ -2,8 +2,16 @@ hosts: 127.0.0.1 connection: local vars_prompt: - - name: stage_env - prompt: "Stage and env type (possible options: build_aarch64, build_amd64)" + - name: stage + prompt: "Stage (accepted values: build, release)" + private: false + + - name: arch + prompt: "Complete steps for a given architecture (accepted values: aarch64, amd64)" + private: false + + - name: accelerator + prompt: "Accelerator type (accepted values: tpu, cuda)" private: false pre_tasks: @@ -15,6 +23,7 @@ - vars.yaml - apt.yaml - pip.yaml + - env.yaml roles: - bazel @@ -28,17 +37,23 @@ # Use `v | default([], true)` to set `v` to an empty array if it evaluates to false. # See https://jinja.palletsprojects.com/en/3.0.x/templates/#jinja-filters.default. apt_pkgs: "{{ - apt.pkgs.common | default([], true) + apt.pkgs[stage_env] | default([], true) + apt.pkgs[stage + '_common'] | default([], true) + + apt.pkgs[stage + '_' + arch] | default([], true) + + apt.pkgs[stage + '_' + accelerator] | default([], true) }}" apt_repos: "{{ apt.repos }}" pip_pkgs: "{{ - pip.pkgs.common | default([], true) + pip.pkgs[stage_env] | default([], true) + pip.pkgs[stage + '_common'] | default([], true) + + pip.pkgs[stage + '_' + arch] | default([], true) + + pip.pkgs[stage + '_' + accelerator] | default([], true) }}" pip_pkgs_nodeps: "{{ - pip.pkgs_nodeps.common | default([], true) + pip.pkgs_nodeps[stage_env] | default([], true) + pip.pkgs_nodeps[stage + '_common'] | default([], true) + + pip.pkgs_nodeps[stage + '_' + arch] | default([], true) + + pip.pkgs_nodeps[stage + '_' + accelerator] | default([], true) }}" - role: fetch_srcs @@ -48,3 +63,8 @@ - role: build_srcs vars: src_root: "/src" + env: "{{ + env.common | default([], true) | + combine(env[arch] | default([], true)) | + combine(env[accelerator] | default([], true)) + }}" diff --git a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml index 5673d36a291c..40280b49c294 100644 --- a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml @@ -3,22 +3,10 @@ cmd: python setup.py bdist_wheel chdir: "{{ (src_root, 'pytorch') | path_join }}" creates: "{{ (src_root, 'pytorch/dist/*.whl') | path_join }}" - environment: - TPUVM_MODE: "{{ tpu_vm }}" - XLA_CUDA: "{{ cuda }}" - USE_CUDA: 0 - TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" - BUILD_CPP_TESTS: "{{ build_cpp_tests }}" - CC: "clang-{{ clang_version }}" - CXX: "clang++-{{ clang_version }}" - PYTORCH_BUILD_NUMBER: 1 - PYTORCH_BUILD_VERSION: "{{ package_version }}" - # TODO: make this env dependent - ARCH: amd64 - ACCELERATOR: "tpu" - LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + # Set `USE_CUDA=0` as PyTorch cannot be used with GPU in eager and XLA mode. + environment: "{{ env | combine({'USE_CUDA': 0}) }}" -- name: Find *.whl files in pytorch/dist +- name: Find PyTorch *.whl files in pytorch/dist ansible.builtin.find: path: "{{ (src_root, 'pytorch/dist') | path_join }}" pattern: "*.whl" @@ -32,34 +20,20 @@ ansible.builtin.command: cmd: bash build_torch_xla_libs.sh -O -D_GLIBCXX_USE_CXX11_ABI=1 chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" - environment: - PYTORCH_BUILD_VERSION: "{{ package_version }}" - TPUVM_MODE: "{{ tpu_vm }}" - XLA_CUDA: "{{ cuda }}" - TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" - BUILD_CPP_TESTS: "{{ build_cpp_tests }}" - CC: "clang-{{ clang_version }}" - CXX: "clang++-{{ clang_version }}" - XLA_SANDBOX_BUILD: 1 - # TODO: make this env dependent - ARCH: amd64 - ACCELERATOR: "tpu" - LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + environment: "{{ env }}" - name: Build PyTorch/XLA ansible.builtin.command: cmd: python setup.py bdist_wheel chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" - environment: - PYTORCH_BUILD_VERSION: "{{ package_version }}" - TPUVM_MODE: "{{ tpu_vm }}" - XLA_CUDA: "{{ cuda }}" - TF_CUDA_COMPUTE_CAPABILITIES: "{{ tf_cuda_compute_capabilities }}" - BUILD_CPP_TESTS: "{{ build_cpp_tests }}" - CC: "clang-{{ clang_version }}" - CXX: "clang++-{{ clang_version }}" - XLA_SANDBOX_BUILD: 1 - # TODO: make this env dependent - ARCH: amd64 - ACCELERATOR: "tpu" - LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + environment: "{{ env }}" + +- name: Find XLA *.whl files in pytorch/xla/dist + ansible.builtin.find: + path: "{{ (src_root, 'pytorch/xla/dist') | path_join }}" + pattern: "*.whl" + register: xla_wheels + +- name: Install XLA wheels + ansible.builtin.pip: + name: "{{ xla_wheels.files | map(attribute='path') }}" From d9591e505d6117a8f4b031b21d355802ce4b386e Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 15:11:10 +0000 Subject: [PATCH 12/39] Fix passing env variables; Add missing XLA_SANDBOX_BUILD --- docker/experimental-ansible/playbooks/config/env.yaml | 1 + docker/experimental-ansible/playbooks/dev.yaml | 8 ++++---- .../playbooks/roles/build_srcs/tasks/main.yaml | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docker/experimental-ansible/playbooks/config/env.yaml b/docker/experimental-ansible/playbooks/config/env.yaml index 427ce2ef03a5..18c7a2cf358a 100644 --- a/docker/experimental-ansible/playbooks/config/env.yaml +++ b/docker/experimental-ansible/playbooks/config/env.yaml @@ -10,6 +10,7 @@ env: CXX: "clang++-{{ clang_version }}" PYTORCH_BUILD_NUMBER: 1 PYTORCH_BUILD_VERSION: "{{ package_version }}" + XLA_SANDBOX_BUILD: 1 amd64: ARCH: amd64 diff --git a/docker/experimental-ansible/playbooks/dev.yaml b/docker/experimental-ansible/playbooks/dev.yaml index f990eb34f6da..1531a3b162a3 100644 --- a/docker/experimental-ansible/playbooks/dev.yaml +++ b/docker/experimental-ansible/playbooks/dev.yaml @@ -63,8 +63,8 @@ - role: build_srcs vars: src_root: "/src" - env: "{{ - env.common | default([], true) | - combine(env[arch] | default([], true)) | - combine(env[accelerator] | default([], true)) + env_vars: "{{ + env.common | default({}, true) | + combine(env[arch] | default({}, true)) | + combine(env[accelerator] | default({}, true)) }}" diff --git a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml index 40280b49c294..b305e99cdc7b 100644 --- a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml @@ -4,7 +4,7 @@ chdir: "{{ (src_root, 'pytorch') | path_join }}" creates: "{{ (src_root, 'pytorch/dist/*.whl') | path_join }}" # Set `USE_CUDA=0` as PyTorch cannot be used with GPU in eager and XLA mode. - environment: "{{ env | combine({'USE_CUDA': 0}) }}" + environment: "{{ env_vars | combine({'USE_CUDA': 0}) }}" - name: Find PyTorch *.whl files in pytorch/dist ansible.builtin.find: @@ -20,13 +20,13 @@ ansible.builtin.command: cmd: bash build_torch_xla_libs.sh -O -D_GLIBCXX_USE_CXX11_ABI=1 chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" - environment: "{{ env }}" + environment: "{{ env_vars }}" - name: Build PyTorch/XLA ansible.builtin.command: cmd: python setup.py bdist_wheel chdir: "{{ (src_root, 'pytorch/xla') | path_join }}" - environment: "{{ env }}" + environment: "{{ env_vars }}" - name: Find XLA *.whl files in pytorch/xla/dist ansible.builtin.find: From 50cf442a348d3aca4b8408d21a5ada5b8dae91b5 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 15:13:25 +0000 Subject: [PATCH 13/39] Rename playbooks dir to ansible --- docker/experimental-ansible/Dockerfile | 4 ++-- .../experimental-ansible/{playbooks => ansible}/.ansible-lint | 0 .../{playbooks => ansible}/config/apt.yaml | 0 .../{playbooks => ansible}/config/env.yaml | 0 .../{playbooks => ansible}/config/pip.yaml | 0 .../{playbooks => ansible}/config/vars.yaml | 0 .../{playbooks/dev.yaml => ansible/playbook.yaml} | 0 .../{playbooks => ansible}/roles/bazel/defaults/main.yaml | 0 .../{playbooks => ansible}/roles/bazel/tasks/main.yaml | 0 .../{playbooks => ansible}/roles/build_srcs/tasks/main.yaml | 0 .../roles/fetch_srcs/defaults/main.yaml | 0 .../{playbooks => ansible}/roles/fetch_srcs/tasks/main.yaml | 0 .../{playbooks => ansible}/roles/install_deps/tasks/main.yaml | 0 13 files changed, 2 insertions(+), 2 deletions(-) rename docker/experimental-ansible/{playbooks => ansible}/.ansible-lint (100%) rename docker/experimental-ansible/{playbooks => ansible}/config/apt.yaml (100%) rename docker/experimental-ansible/{playbooks => ansible}/config/env.yaml (100%) rename docker/experimental-ansible/{playbooks => ansible}/config/pip.yaml (100%) rename docker/experimental-ansible/{playbooks => ansible}/config/vars.yaml (100%) rename docker/experimental-ansible/{playbooks/dev.yaml => ansible/playbook.yaml} (100%) rename docker/experimental-ansible/{playbooks => ansible}/roles/bazel/defaults/main.yaml (100%) rename docker/experimental-ansible/{playbooks => ansible}/roles/bazel/tasks/main.yaml (100%) rename docker/experimental-ansible/{playbooks => ansible}/roles/build_srcs/tasks/main.yaml (100%) rename docker/experimental-ansible/{playbooks => ansible}/roles/fetch_srcs/defaults/main.yaml (100%) rename docker/experimental-ansible/{playbooks => ansible}/roles/fetch_srcs/tasks/main.yaml (100%) rename docker/experimental-ansible/{playbooks => ansible}/roles/install_deps/tasks/main.yaml (100%) diff --git a/docker/experimental-ansible/Dockerfile b/docker/experimental-ansible/Dockerfile index caf44bb03d4a..2e8947161b43 100644 --- a/docker/experimental-ansible/Dockerfile +++ b/docker/experimental-ansible/Dockerfile @@ -6,6 +6,6 @@ FROM python:${python_version}-${debian_version} AS build RUN pip install ansible COPY ansible /ansible -WORKDIR /ansible +WORKDIR /ansible -RUN ansible-playbook dev.yaml \ No newline at end of file +RUN ansible-playbook -v playbook.yaml -e "stage=build arch=amd64 accelerator=tpu" \ No newline at end of file diff --git a/docker/experimental-ansible/playbooks/.ansible-lint b/docker/experimental-ansible/ansible/.ansible-lint similarity index 100% rename from docker/experimental-ansible/playbooks/.ansible-lint rename to docker/experimental-ansible/ansible/.ansible-lint diff --git a/docker/experimental-ansible/playbooks/config/apt.yaml b/docker/experimental-ansible/ansible/config/apt.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/config/apt.yaml rename to docker/experimental-ansible/ansible/config/apt.yaml diff --git a/docker/experimental-ansible/playbooks/config/env.yaml b/docker/experimental-ansible/ansible/config/env.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/config/env.yaml rename to docker/experimental-ansible/ansible/config/env.yaml diff --git a/docker/experimental-ansible/playbooks/config/pip.yaml b/docker/experimental-ansible/ansible/config/pip.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/config/pip.yaml rename to docker/experimental-ansible/ansible/config/pip.yaml diff --git a/docker/experimental-ansible/playbooks/config/vars.yaml b/docker/experimental-ansible/ansible/config/vars.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/config/vars.yaml rename to docker/experimental-ansible/ansible/config/vars.yaml diff --git a/docker/experimental-ansible/playbooks/dev.yaml b/docker/experimental-ansible/ansible/playbook.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/dev.yaml rename to docker/experimental-ansible/ansible/playbook.yaml diff --git a/docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml b/docker/experimental-ansible/ansible/roles/bazel/defaults/main.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/roles/bazel/defaults/main.yaml rename to docker/experimental-ansible/ansible/roles/bazel/defaults/main.yaml diff --git a/docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/bazel/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/roles/bazel/tasks/main.yaml rename to docker/experimental-ansible/ansible/roles/bazel/tasks/main.yaml diff --git a/docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/roles/build_srcs/tasks/main.yaml rename to docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml diff --git a/docker/experimental-ansible/playbooks/roles/fetch_srcs/defaults/main.yaml b/docker/experimental-ansible/ansible/roles/fetch_srcs/defaults/main.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/roles/fetch_srcs/defaults/main.yaml rename to docker/experimental-ansible/ansible/roles/fetch_srcs/defaults/main.yaml diff --git a/docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/roles/fetch_srcs/tasks/main.yaml rename to docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml diff --git a/docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/install_deps/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/playbooks/roles/install_deps/tasks/main.yaml rename to docker/experimental-ansible/ansible/roles/install_deps/tasks/main.yaml From 56dfbe2a0cbdfd57614b6211566e23bc65c3398e Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 15:25:33 +0000 Subject: [PATCH 14/39] Add cloudbuild file that uses ansible playbook --- docker/experimental-ansible/cloudbuild.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 docker/experimental-ansible/cloudbuild.yaml diff --git a/docker/experimental-ansible/cloudbuild.yaml b/docker/experimental-ansible/cloudbuild.yaml new file mode 100644 index 000000000000..53f820a9802e --- /dev/null +++ b/docker/experimental-ansible/cloudbuild.yaml @@ -0,0 +1,15 @@ +steps: +- name: 'gcr.io/cloud-builders/docker' + args: + - build + - -t=us-central2-docker.pkg.dev/$PROJECT_ID/docker-repo/toolchain_tpu:latest + - '.' +images: +- us-central2-docker.pkg.dev/$PROJECT_ID/docker-repo/toolchain_tpu:latest + +options: + pool: + name: 'projects/core-ml-engprod-build-farm/locations/europe-west1/workerPools/compilerfarm' + dynamic_substitutions: true + substitution_option: 'ALLOW_LOOSE' +timeout: 24000s From a319f14b6805d1104358096ceeedf7550861d628 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 15:39:29 +0000 Subject: [PATCH 15/39] Add 'signed-by' to all apt repos --- docker/experimental-ansible/ansible/config/apt.yaml | 8 ++++---- docker/experimental-ansible/ansible/config/pip.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/experimental-ansible/ansible/config/apt.yaml b/docker/experimental-ansible/ansible/config/apt.yaml index 2d31923c19c5..1d9089246367 100644 --- a/docker/experimental-ansible/ansible/config/apt.yaml +++ b/docker/experimental-ansible/ansible/config/apt.yaml @@ -40,8 +40,8 @@ apt: keyring: /usr/share/keyrings/cuda.pgp repos: - - "deb http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" - - "deb-src http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" - # signed-by path should match the Google Cloud keyring path above. + # signed-by path should match the corresponding keyring path above. + - "deb [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" + - "deb-src [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" - "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" - - "deb https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/ /" \ No newline at end of file + - "deb [signed-by=/usr/share/keyrings/cuda.pgp] https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/ /" \ No newline at end of file diff --git a/docker/experimental-ansible/ansible/config/pip.yaml b/docker/experimental-ansible/ansible/config/pip.yaml index fdf81e8825dc..4ced1056cb9e 100644 --- a/docker/experimental-ansible/ansible/config/pip.yaml +++ b/docker/experimental-ansible/ansible/config/pip.yaml @@ -34,7 +34,7 @@ pip: build_aarch64: pkgs_nodeps: - common: + build_common: build_amd64: From 31d6c22e9e466ac15ff0914335a55ebb1f194c9b Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 15:44:40 +0000 Subject: [PATCH 16/39] Add placeholders for release config vars --- docker/experimental-ansible/ansible/config/apt.yaml | 6 ++++++ docker/experimental-ansible/ansible/config/pip.yaml | 10 ++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docker/experimental-ansible/ansible/config/apt.yaml b/docker/experimental-ansible/ansible/config/apt.yaml index 1d9089246367..e9803b67b885 100644 --- a/docker/experimental-ansible/ansible/config/apt.yaml +++ b/docker/experimental-ansible/ansible/config/apt.yaml @@ -29,6 +29,12 @@ apt: - gcc-10 - g++-10 + release_common: # TODO + + release_cuda: + + release_amd64: + # Specify objects with string fields `url` and `keyring`. # The keyring path should start with /usr/share/keyrings/ for debian and ubuntu. signing_keys: diff --git a/docker/experimental-ansible/ansible/config/pip.yaml b/docker/experimental-ansible/ansible/config/pip.yaml index 4ced1056cb9e..6add492e0208 100644 --- a/docker/experimental-ansible/ansible/config/pip.yaml +++ b/docker/experimental-ansible/ansible/config/pip.yaml @@ -33,9 +33,11 @@ pip: build_aarch64: - pkgs_nodeps: - build_common: + release_common: - build_amd64: + release_tpu: - build_aarch64: + release_amd64: + + pkgs_nodeps: + release_common: From d298a416b0c3da58015d4ad3c3118331a4133e53 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 16:10:18 +0000 Subject: [PATCH 17/39] Add release build --- docker/experimental-ansible/Dockerfile | 7 ++++++- .../experimental-ansible/ansible/config/apt.yaml | 15 ++++++++++++--- .../experimental-ansible/ansible/config/pip.yaml | 9 +++++++-- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/docker/experimental-ansible/Dockerfile b/docker/experimental-ansible/Dockerfile index 2e8947161b43..3500c6eef68b 100644 --- a/docker/experimental-ansible/Dockerfile +++ b/docker/experimental-ansible/Dockerfile @@ -8,4 +8,9 @@ RUN pip install ansible COPY ansible /ansible WORKDIR /ansible -RUN ansible-playbook -v playbook.yaml -e "stage=build arch=amd64 accelerator=tpu" \ No newline at end of file +RUN ansible-playbook -v playbook.yaml -e "stage=build arch=amd64 accelerator=tpu" + +FROM build AS release + +WORKDIR /ansible +RUN ansible-playbook -v playbook.yaml -e "stage=release arch=amd64 accelerator=tpu" diff --git a/docker/experimental-ansible/ansible/config/apt.yaml b/docker/experimental-ansible/ansible/config/apt.yaml index e9803b67b885..6cb1e2d2c822 100644 --- a/docker/experimental-ansible/ansible/config/apt.yaml +++ b/docker/experimental-ansible/ansible/config/apt.yaml @@ -29,11 +29,20 @@ apt: - gcc-10 - g++-10 - release_common: # TODO + release_common: + - curl + - git + - gnupg + - google-cloud-cli + - libgomp1 + - libomp5 + - libopenblas-base + - patch release_cuda: - - release_amd64: + - cuda-libraries-11-2 + - cuda-minimal-build-11-2 + - libcudnn8=8.1.1.33-1+cuda11.2 # Specify objects with string fields `url` and `keyring`. # The keyring path should start with /usr/share/keyrings/ for debian and ubuntu. diff --git a/docker/experimental-ansible/ansible/config/pip.yaml b/docker/experimental-ansible/ansible/config/pip.yaml index 6add492e0208..45c8ef00e498 100644 --- a/docker/experimental-ansible/ansible/config/pip.yaml +++ b/docker/experimental-ansible/ansible/config/pip.yaml @@ -34,10 +34,15 @@ pip: build_aarch64: release_common: + - numpy + - pyyaml + - mkl + - mkl-include release_tpu: - - release_amd64: + - torch_xla[tpuvm] pkgs_nodeps: release_common: + - torchvision + - pillow From c59b3cb9ffd750d5918e927b3a1dd117a2ed4301 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 23 Jan 2023 17:16:17 +0000 Subject: [PATCH 18/39] Disable verbose ansible in docker build --- docker/experimental-ansible/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/experimental-ansible/Dockerfile b/docker/experimental-ansible/Dockerfile index 3500c6eef68b..5ff0204ef15f 100644 --- a/docker/experimental-ansible/Dockerfile +++ b/docker/experimental-ansible/Dockerfile @@ -8,9 +8,9 @@ RUN pip install ansible COPY ansible /ansible WORKDIR /ansible -RUN ansible-playbook -v playbook.yaml -e "stage=build arch=amd64 accelerator=tpu" +RUN ansible-playbook playbook.yaml -e "stage=build arch=amd64 accelerator=tpu" FROM build AS release WORKDIR /ansible -RUN ansible-playbook -v playbook.yaml -e "stage=release arch=amd64 accelerator=tpu" +RUN ansible-playbook playbook.yaml -e "stage=release arch=amd64 accelerator=tpu" From 96fbb5f8c602f458fc1f74072d2382b53ff98bac Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 24 Jan 2023 10:27:12 +0000 Subject: [PATCH 19/39] Add ansible config file and enable displaying tasks duration --- docker/experimental-ansible/ansible/ansible.cfg | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docker/experimental-ansible/ansible/ansible.cfg diff --git a/docker/experimental-ansible/ansible/ansible.cfg b/docker/experimental-ansible/ansible/ansible.cfg new file mode 100644 index 000000000000..839924ec2de9 --- /dev/null +++ b/docker/experimental-ansible/ansible/ansible.cfg @@ -0,0 +1,3 @@ +[defaults] +# Displays tasks execution duration. +callbacks_enabled = profile_tasks From 2be17cd3aac55d7ed20ac9540837bd10b0d2de8d Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 24 Jan 2023 11:23:31 +0000 Subject: [PATCH 20/39] Add TORCH_XLA_VERSION env variable, which is used when building XLA --- docker/experimental-ansible/ansible/config/env.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/experimental-ansible/ansible/config/env.yaml b/docker/experimental-ansible/ansible/config/env.yaml index 18c7a2cf358a..4065b80a0a44 100644 --- a/docker/experimental-ansible/ansible/config/env.yaml +++ b/docker/experimental-ansible/ansible/config/env.yaml @@ -9,6 +9,7 @@ env: CC: "clang-{{ clang_version }}" CXX: "clang++-{{ clang_version }}" PYTORCH_BUILD_NUMBER: 1 + TORCH_XLA_VERSION: "{{ package_version }}" PYTORCH_BUILD_VERSION: "{{ package_version }}" XLA_SANDBOX_BUILD: 1 From 642bc8e9143a662b9e6b3b359a313bd34f3f108d Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 24 Jan 2023 13:06:23 +0000 Subject: [PATCH 21/39] Disable Ansible warnings about no inventory; Force git clone; revert to previous version of PyTorch and XLA repos --- docker/experimental-ansible/ansible/ansible.cfg | 8 ++++++++ docker/experimental-ansible/ansible/playbook.yaml | 2 ++ .../ansible/roles/fetch_srcs/tasks/main.yaml | 1 + 3 files changed, 11 insertions(+) diff --git a/docker/experimental-ansible/ansible/ansible.cfg b/docker/experimental-ansible/ansible/ansible.cfg index 839924ec2de9..b6c86aec01f0 100644 --- a/docker/experimental-ansible/ansible/ansible.cfg +++ b/docker/experimental-ansible/ansible/ansible.cfg @@ -1,3 +1,11 @@ +# See https://docs.ansible.com/ansible/latest/reference_appendices/config.html +# for various configuration options. + [defaults] # Displays tasks execution duration. callbacks_enabled = profile_tasks +# The playbooks is only run on the implicit localhost. +# Silence warning about empty hosts inventory. +localhost_warning = False +# Silence warning about no inventory. +inventory_unparsed_warning = False \ No newline at end of file diff --git a/docker/experimental-ansible/ansible/playbook.yaml b/docker/experimental-ansible/ansible/playbook.yaml index 1531a3b162a3..4aacc6dc1a38 100644 --- a/docker/experimental-ansible/ansible/playbook.yaml +++ b/docker/experimental-ansible/ansible/playbook.yaml @@ -59,6 +59,8 @@ - role: fetch_srcs vars: src_root: "/src" + pytorch_git_rev: "afe6ea884fa21d424d614efff6bd3a75ec4357af" + xla_git_rev: "aff3513234fe344bbf159af5e376e5a45792f5dc" - role: build_srcs vars: diff --git a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml index 98a661afa6df..3c7695d0e9a9 100644 --- a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml @@ -10,6 +10,7 @@ dest: "{{ item.dest }}" version: "{{ item.version }}" depth: 1 + force: true loop: - repo: https://github.com/pytorch/pytorch dest: "{{ (src_root, 'pytorch') | path_join }}" From 28fa5cdc70e3a1c29c2668dd818e5ad316975660 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 24 Jan 2023 14:01:07 +0000 Subject: [PATCH 22/39] Add basic tests for bazel and fetch_srcs roles --- docker/experimental-ansible/ansible/ansible.cfg | 3 +++ docker/experimental-ansible/ansible/playbook.yaml | 2 +- .../ansible/roles/bazel/tasks/main.yaml | 5 +++++ .../ansible/roles/bazel/tasks/tests.yaml | 3 +++ .../ansible/roles/fetch_srcs/tasks/main.yaml | 5 +++++ .../ansible/roles/fetch_srcs/tasks/tests.yaml | 13 +++++++++++++ 6 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 docker/experimental-ansible/ansible/roles/bazel/tasks/tests.yaml create mode 100644 docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml diff --git a/docker/experimental-ansible/ansible/ansible.cfg b/docker/experimental-ansible/ansible/ansible.cfg index b6c86aec01f0..cb7519265802 100644 --- a/docker/experimental-ansible/ansible/ansible.cfg +++ b/docker/experimental-ansible/ansible/ansible.cfg @@ -7,5 +7,8 @@ callbacks_enabled = profile_tasks # The playbooks is only run on the implicit localhost. # Silence warning about empty hosts inventory. localhost_warning = False + +[inventory] # Silence warning about no inventory. +# This option is available since Ansible 2.14 (available only with Python 3.9+). inventory_unparsed_warning = False \ No newline at end of file diff --git a/docker/experimental-ansible/ansible/playbook.yaml b/docker/experimental-ansible/ansible/playbook.yaml index 4aacc6dc1a38..727867b76c62 100644 --- a/docker/experimental-ansible/ansible/playbook.yaml +++ b/docker/experimental-ansible/ansible/playbook.yaml @@ -1,5 +1,5 @@ - name: "Install build dependencies" - hosts: 127.0.0.1 + hosts: localhost connection: local vars_prompt: - name: stage diff --git a/docker/experimental-ansible/ansible/roles/bazel/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/bazel/tasks/main.yaml index aad6bc3332bc..038a5a1cefa5 100644 --- a/docker/experimental-ansible/ansible/roles/bazel/tasks/main.yaml +++ b/docker/experimental-ansible/ansible/roles/bazel/tasks/main.yaml @@ -3,3 +3,8 @@ url: "https://github.com/bazelbuild/bazelisk/releases/download/v{{ bazelisk_version }}/bazelisk-linux-amd64" dest: /usr/local/bin/bazel mode: 'u=rxw,g=rw,o=r' + +- name: "Tests" + include_tasks: tests.yaml + tags: + - tests diff --git a/docker/experimental-ansible/ansible/roles/bazel/tasks/tests.yaml b/docker/experimental-ansible/ansible/roles/bazel/tasks/tests.yaml new file mode 100644 index 000000000000..4cb19f772109 --- /dev/null +++ b/docker/experimental-ansible/ansible/roles/bazel/tasks/tests.yaml @@ -0,0 +1,3 @@ +- name: "Bazel --version runs succesfully" + ansible.builtin.command: + cmd: bazel --version diff --git a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml index 3c7695d0e9a9..d8eb34834c9e 100644 --- a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml @@ -36,3 +36,8 @@ strip: 1 basedir: "{{ (src_root, 'pytorch/xla/third_party/tensorflow') | path_join }}" loop: "{{ tf_patches.files | map(attribute='path') }}" + +- name: "Tests" + include_tasks: tests.yaml + tags: + - tests diff --git a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml new file mode 100644 index 000000000000..3c5ebde5146b --- /dev/null +++ b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml @@ -0,0 +1,13 @@ +- name: Check that setup.py files are present in XLA and PyTorch repos + ansible.builtin.stat: + path: "{{ item }}" + register: _res + loop: + - "{{ (src_root, 'pytorch/setup.py') | path_join }}" + - "{{ (src_root, 'pytorch/xla/setup.py') | path_join }}" + +- name: Assert stat results + ansible.builtin.assert: + that: "{{ item.stat.exists }}" + fail_msg: "{{ item.item }} doesn't exist" + loop: "{{ _res.results }}" From bcda96e8a2c36bfd7d37ed87bd23a7c2bd52ceee Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 24 Jan 2023 14:49:06 +0000 Subject: [PATCH 23/39] Add import tests for build_srcs --- docker/experimental-ansible/ansible/playbook.yaml | 4 ++-- .../ansible/roles/build_srcs/tasks/main.yaml | 5 +++++ .../ansible/roles/build_srcs/tasks/tests.yaml | 11 +++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml diff --git a/docker/experimental-ansible/ansible/playbook.yaml b/docker/experimental-ansible/ansible/playbook.yaml index 727867b76c62..07c4b29ddf46 100644 --- a/docker/experimental-ansible/ansible/playbook.yaml +++ b/docker/experimental-ansible/ansible/playbook.yaml @@ -59,8 +59,8 @@ - role: fetch_srcs vars: src_root: "/src" - pytorch_git_rev: "afe6ea884fa21d424d614efff6bd3a75ec4357af" - xla_git_rev: "aff3513234fe344bbf159af5e376e5a45792f5dc" + pytorch_git_rev: "44b7a0b7efff24c8110ebc4e0651b1a59e44e27b" + xla_git_rev: "5c54655a8d43f48d973b664c20afaa91c7278574" - role: build_srcs vars: diff --git a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml index b305e99cdc7b..4709c19cd173 100644 --- a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml @@ -37,3 +37,8 @@ - name: Install XLA wheels ansible.builtin.pip: name: "{{ xla_wheels.files | map(attribute='path') }}" + +- name: "Tests" + include_tasks: tests.yaml + tags: + - tests diff --git a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml new file mode 100644 index 000000000000..236550ea83d9 --- /dev/null +++ b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml @@ -0,0 +1,11 @@ +- name: "Check that various import statements work" + ansible.builtin.command: + cmd: "{{ item }}" + loop: + - python -c "import torchgen" + - python -c "import torch" + # Currently doesn't work due to: + # ImportError: /usr/local/lib/python3.8/site-packages/_XLAC.cpython-38-x86_64-linux-gnu.so: + # undefined symbol: _ZNK3c1010TensorImpl18compute_contiguousENS0_8identityINS_7SymBoolEEE" + # - python -c "import torch_xla" + # - python -c "import torch_xla.core.xla_model" From ea4d548ab6e18101e52f67a609d42328c3058374 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Fri, 27 Jan 2023 11:06:39 +0000 Subject: [PATCH 24/39] Set git versions for which imports work --- docker/experimental-ansible/ansible/playbook.yaml | 4 ++-- .../ansible/roles/build_srcs/tasks/main.yaml | 2 ++ .../ansible/roles/build_srcs/tasks/tests.yaml | 7 ++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docker/experimental-ansible/ansible/playbook.yaml b/docker/experimental-ansible/ansible/playbook.yaml index 07c4b29ddf46..f3ea2e25e4ca 100644 --- a/docker/experimental-ansible/ansible/playbook.yaml +++ b/docker/experimental-ansible/ansible/playbook.yaml @@ -59,8 +59,8 @@ - role: fetch_srcs vars: src_root: "/src" - pytorch_git_rev: "44b7a0b7efff24c8110ebc4e0651b1a59e44e27b" - xla_git_rev: "5c54655a8d43f48d973b664c20afaa91c7278574" + pytorch_git_rev: 5e9fa0a8fc87f9a626f144bb5527da0426ac384b + xla_git_rev: 6acc0c9cc7aa0738ec792ddd4780d14f66d6bd8c - role: build_srcs vars: diff --git a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml index 4709c19cd173..667ee43f9219 100644 --- a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml +++ b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml @@ -15,6 +15,7 @@ - name: Install PyTorch wheels ansible.builtin.pip: name: "{{ pytorch_wheels.files | map(attribute='path') }}" + state: "forcereinstall" - name: Build XLA computation client library ansible.builtin.command: @@ -37,6 +38,7 @@ - name: Install XLA wheels ansible.builtin.pip: name: "{{ xla_wheels.files | map(attribute='path') }}" + state: "forcereinstall" - name: "Tests" include_tasks: tests.yaml diff --git a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml index 236550ea83d9..7d27577bc5cb 100644 --- a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml +++ b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml @@ -4,8 +4,5 @@ loop: - python -c "import torchgen" - python -c "import torch" - # Currently doesn't work due to: - # ImportError: /usr/local/lib/python3.8/site-packages/_XLAC.cpython-38-x86_64-linux-gnu.so: - # undefined symbol: _ZNK3c1010TensorImpl18compute_contiguousENS0_8identityINS_7SymBoolEEE" - # - python -c "import torch_xla" - # - python -c "import torch_xla.core.xla_model" + - python -c "import torch_xla" + - python -c "import torch_xla.core.xla_model" From a79495d44029146a24d63d5a799efdfbafdfd894 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Fri, 27 Jan 2023 14:12:11 +0000 Subject: [PATCH 25/39] Pass env vars to imports test --- .../ansible/roles/build_srcs/tasks/tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml index 7d27577bc5cb..9e925700ddeb 100644 --- a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml +++ b/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml @@ -1,6 +1,7 @@ - name: "Check that various import statements work" ansible.builtin.command: cmd: "{{ item }}" + environment: "{{ env_vars | combine({'USE_CUDA': 0}) }}" loop: - python -c "import torchgen" - python -c "import torch" From 271c90cc0ff073f5e1e8a9dad430cf5c604ddb92 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 30 Jan 2023 12:53:22 +0000 Subject: [PATCH 26/39] Add configure_env role and apply minor cleanup --- .../ansible/config/env.yaml | 24 +++++++++++++++---- .../ansible/playbook.yaml | 15 +++++++++--- .../roles/configure_env/tasks/main.yaml | 8 +++++++ .../ansible/roles/fetch_srcs/tasks/tests.yaml | 4 ++-- 4 files changed, 41 insertions(+), 10 deletions(-) create mode 100644 docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml diff --git a/docker/experimental-ansible/ansible/config/env.yaml b/docker/experimental-ansible/ansible/config/env.yaml index 4065b80a0a44..c4458dd112d9 100644 --- a/docker/experimental-ansible/ansible/config/env.yaml +++ b/docker/experimental-ansible/ansible/config/env.yaml @@ -1,10 +1,24 @@ -# Specified environment variables for building PyTorch, XLA libs. -# Specify only variables required for scripts used during building. -# Variables needed to execute ansible tasks specify in vars.yaml. -env: +# Variables that will be stored in /etc/environment file. They'll be accessible +# for all processes on the host. +runtime_env: + common: + CC: "clang-{{ clang_version }}" + CXX: "clang++-{{ clang_version }}" + LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" + + tpu: + ACCELERATOR: tpu + TPUVM_MODE: 1 + + cuda: + TF_CUDA_COMPUTE_CAPABILITIES: 7.0,7.5,8.0 + XLA_CUDA: 1 + +# Variables that will be passed to shell environment only for building PyTorch and XLA libs. +build_env: common: LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib" - # See explictyly to 0 as setup.py defualt this flag to true if unset. + # Set explicitly to 0 as setup.py defaults this flag to true if unset. BUILD_CPP_TESTS: 0 CC: "clang-{{ clang_version }}" CXX: "clang++-{{ clang_version }}" diff --git a/docker/experimental-ansible/ansible/playbook.yaml b/docker/experimental-ansible/ansible/playbook.yaml index f3ea2e25e4ca..d4265e805557 100644 --- a/docker/experimental-ansible/ansible/playbook.yaml +++ b/docker/experimental-ansible/ansible/playbook.yaml @@ -66,7 +66,16 @@ vars: src_root: "/src" env_vars: "{{ - env.common | default({}, true) | - combine(env[arch] | default({}, true)) | - combine(env[accelerator] | default({}, true)) + build_env.common | default({}, true) | + combine(build_env[arch] | default({}, true)) | + combine(build_env[accelerator] | default({}, true)) }}" + + - role: configure_env + vars: + env_vars: "{{ + runtime_env.common | default({}, true) | + combine(runtime_env[arch] | default({}, true)) | + combine(runtime_env[accelerator] | default({}, true)) + }}" + when: stage == "release" diff --git a/docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml new file mode 100644 index 000000000000..b8585c2501ca --- /dev/null +++ b/docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml @@ -0,0 +1,8 @@ +- name: Set environment variables required during runtime in /etc/environment + ansible.builtin.lineinfile: + path: /etc/environment + # Match existing entry for a given env variable and replace it with + # a new value. + regexp: "^{{ item }}=" + line: "{{ item }}={{ env_vars[item] }}" + loop: "{{ env_vars.keys() | list }}" diff --git a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml index 3c5ebde5146b..f3c341197dc3 100644 --- a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml +++ b/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml @@ -1,4 +1,4 @@ -- name: Check that setup.py files are present in XLA and PyTorch repos +- name: Retrieve status of setup.py files in XLA and PyTorch repos ansible.builtin.stat: path: "{{ item }}" register: _res @@ -6,7 +6,7 @@ - "{{ (src_root, 'pytorch/setup.py') | path_join }}" - "{{ (src_root, 'pytorch/xla/setup.py') | path_join }}" -- name: Assert stat results +- name: Assert that setup.py files exist ansible.builtin.assert: that: "{{ item.stat.exists }}" fail_msg: "{{ item.item }} doesn't exist" From b88d16978ce1c106950ee077900bf06f8b41285d Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 30 Jan 2023 14:21:59 +0000 Subject: [PATCH 27/39] Don't replace existing env var entries in /etc/environment --- .../ansible/roles/configure_env/tasks/main.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml b/docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml index b8585c2501ca..949ad0d02bdb 100644 --- a/docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml +++ b/docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml @@ -1,8 +1,5 @@ -- name: Set environment variables required during runtime in /etc/environment +- name: Append environment variables required during runtime to /etc/environment ansible.builtin.lineinfile: path: /etc/environment - # Match existing entry for a given env variable and replace it with - # a new value. - regexp: "^{{ item }}=" line: "{{ item }}={{ env_vars[item] }}" loop: "{{ env_vars.keys() | list }}" From b6892c197c8882523c6e4a03f85535e0eb65d5b6 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Mon, 30 Jan 2023 14:54:02 +0000 Subject: [PATCH 28/39] Move ansible dir to /docker/experimental --- docker/experimental-ansible/Dockerfile | 16 ---------------- docker/experimental-ansible/cloudbuild.yaml | 15 --------------- .../ansible/.ansible-lint | 0 .../ansible/ansible.cfg | 0 .../ansible/config/apt.yaml | 0 .../ansible/config/env.yaml | 0 .../ansible/config/pip.yaml | 0 .../ansible/config/vars.yaml | 0 .../ansible/playbook.yaml | 0 .../ansible/roles/bazel/defaults/main.yaml | 0 .../ansible/roles/bazel/tasks/main.yaml | 0 .../ansible/roles/bazel/tasks/tests.yaml | 0 .../ansible/roles/build_srcs/tasks/main.yaml | 0 .../ansible/roles/build_srcs/tasks/tests.yaml | 0 .../ansible/roles/configure_env/tasks/main.yaml | 0 .../ansible/roles/fetch_srcs/defaults/main.yaml | 0 .../ansible/roles/fetch_srcs/tasks/main.yaml | 0 .../ansible/roles/fetch_srcs/tasks/tests.yaml | 0 .../ansible/roles/install_deps/tasks/main.yaml | 0 19 files changed, 31 deletions(-) delete mode 100644 docker/experimental-ansible/Dockerfile delete mode 100644 docker/experimental-ansible/cloudbuild.yaml rename docker/{experimental-ansible => experimental}/ansible/.ansible-lint (100%) rename docker/{experimental-ansible => experimental}/ansible/ansible.cfg (100%) rename docker/{experimental-ansible => experimental}/ansible/config/apt.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/config/env.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/config/pip.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/config/vars.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/playbook.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/bazel/defaults/main.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/bazel/tasks/main.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/bazel/tasks/tests.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/build_srcs/tasks/main.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/build_srcs/tasks/tests.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/configure_env/tasks/main.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/fetch_srcs/defaults/main.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/fetch_srcs/tasks/main.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/fetch_srcs/tasks/tests.yaml (100%) rename docker/{experimental-ansible => experimental}/ansible/roles/install_deps/tasks/main.yaml (100%) diff --git a/docker/experimental-ansible/Dockerfile b/docker/experimental-ansible/Dockerfile deleted file mode 100644 index 5ff0204ef15f..000000000000 --- a/docker/experimental-ansible/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -ARG python_version=3.8 -ARG debian_version=buster - -FROM python:${python_version}-${debian_version} AS build - -RUN pip install ansible - -COPY ansible /ansible -WORKDIR /ansible - -RUN ansible-playbook playbook.yaml -e "stage=build arch=amd64 accelerator=tpu" - -FROM build AS release - -WORKDIR /ansible -RUN ansible-playbook playbook.yaml -e "stage=release arch=amd64 accelerator=tpu" diff --git a/docker/experimental-ansible/cloudbuild.yaml b/docker/experimental-ansible/cloudbuild.yaml deleted file mode 100644 index 53f820a9802e..000000000000 --- a/docker/experimental-ansible/cloudbuild.yaml +++ /dev/null @@ -1,15 +0,0 @@ -steps: -- name: 'gcr.io/cloud-builders/docker' - args: - - build - - -t=us-central2-docker.pkg.dev/$PROJECT_ID/docker-repo/toolchain_tpu:latest - - '.' -images: -- us-central2-docker.pkg.dev/$PROJECT_ID/docker-repo/toolchain_tpu:latest - -options: - pool: - name: 'projects/core-ml-engprod-build-farm/locations/europe-west1/workerPools/compilerfarm' - dynamic_substitutions: true - substitution_option: 'ALLOW_LOOSE' -timeout: 24000s diff --git a/docker/experimental-ansible/ansible/.ansible-lint b/docker/experimental/ansible/.ansible-lint similarity index 100% rename from docker/experimental-ansible/ansible/.ansible-lint rename to docker/experimental/ansible/.ansible-lint diff --git a/docker/experimental-ansible/ansible/ansible.cfg b/docker/experimental/ansible/ansible.cfg similarity index 100% rename from docker/experimental-ansible/ansible/ansible.cfg rename to docker/experimental/ansible/ansible.cfg diff --git a/docker/experimental-ansible/ansible/config/apt.yaml b/docker/experimental/ansible/config/apt.yaml similarity index 100% rename from docker/experimental-ansible/ansible/config/apt.yaml rename to docker/experimental/ansible/config/apt.yaml diff --git a/docker/experimental-ansible/ansible/config/env.yaml b/docker/experimental/ansible/config/env.yaml similarity index 100% rename from docker/experimental-ansible/ansible/config/env.yaml rename to docker/experimental/ansible/config/env.yaml diff --git a/docker/experimental-ansible/ansible/config/pip.yaml b/docker/experimental/ansible/config/pip.yaml similarity index 100% rename from docker/experimental-ansible/ansible/config/pip.yaml rename to docker/experimental/ansible/config/pip.yaml diff --git a/docker/experimental-ansible/ansible/config/vars.yaml b/docker/experimental/ansible/config/vars.yaml similarity index 100% rename from docker/experimental-ansible/ansible/config/vars.yaml rename to docker/experimental/ansible/config/vars.yaml diff --git a/docker/experimental-ansible/ansible/playbook.yaml b/docker/experimental/ansible/playbook.yaml similarity index 100% rename from docker/experimental-ansible/ansible/playbook.yaml rename to docker/experimental/ansible/playbook.yaml diff --git a/docker/experimental-ansible/ansible/roles/bazel/defaults/main.yaml b/docker/experimental/ansible/roles/bazel/defaults/main.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/bazel/defaults/main.yaml rename to docker/experimental/ansible/roles/bazel/defaults/main.yaml diff --git a/docker/experimental-ansible/ansible/roles/bazel/tasks/main.yaml b/docker/experimental/ansible/roles/bazel/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/bazel/tasks/main.yaml rename to docker/experimental/ansible/roles/bazel/tasks/main.yaml diff --git a/docker/experimental-ansible/ansible/roles/bazel/tasks/tests.yaml b/docker/experimental/ansible/roles/bazel/tasks/tests.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/bazel/tasks/tests.yaml rename to docker/experimental/ansible/roles/bazel/tasks/tests.yaml diff --git a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml b/docker/experimental/ansible/roles/build_srcs/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/build_srcs/tasks/main.yaml rename to docker/experimental/ansible/roles/build_srcs/tasks/main.yaml diff --git a/docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml b/docker/experimental/ansible/roles/build_srcs/tasks/tests.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/build_srcs/tasks/tests.yaml rename to docker/experimental/ansible/roles/build_srcs/tasks/tests.yaml diff --git a/docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml b/docker/experimental/ansible/roles/configure_env/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/configure_env/tasks/main.yaml rename to docker/experimental/ansible/roles/configure_env/tasks/main.yaml diff --git a/docker/experimental-ansible/ansible/roles/fetch_srcs/defaults/main.yaml b/docker/experimental/ansible/roles/fetch_srcs/defaults/main.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/fetch_srcs/defaults/main.yaml rename to docker/experimental/ansible/roles/fetch_srcs/defaults/main.yaml diff --git a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml b/docker/experimental/ansible/roles/fetch_srcs/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/main.yaml rename to docker/experimental/ansible/roles/fetch_srcs/tasks/main.yaml diff --git a/docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml b/docker/experimental/ansible/roles/fetch_srcs/tasks/tests.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/fetch_srcs/tasks/tests.yaml rename to docker/experimental/ansible/roles/fetch_srcs/tasks/tests.yaml diff --git a/docker/experimental-ansible/ansible/roles/install_deps/tasks/main.yaml b/docker/experimental/ansible/roles/install_deps/tasks/main.yaml similarity index 100% rename from docker/experimental-ansible/ansible/roles/install_deps/tasks/main.yaml rename to docker/experimental/ansible/roles/install_deps/tasks/main.yaml From 074bd49e7b5b0d197862e69fcb4cab81660c0ce1 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 31 Jan 2023 14:58:02 +0000 Subject: [PATCH 29/39] Remove vars_prompt so that the playbook is not interactive --- docker/experimental/ansible/playbook.yaml | 31 ++++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/docker/experimental/ansible/playbook.yaml b/docker/experimental/ansible/playbook.yaml index d4265e805557..b03e61edeeef 100644 --- a/docker/experimental/ansible/playbook.yaml +++ b/docker/experimental/ansible/playbook.yaml @@ -1,20 +1,27 @@ - name: "Install build dependencies" hosts: localhost connection: local - vars_prompt: - - name: stage - prompt: "Stage (accepted values: build, release)" - private: false - - - name: arch - prompt: "Complete steps for a given architecture (accepted values: aarch64, amd64)" - private: false - - - name: accelerator - prompt: "Accelerator type (accepted values: tpu, cuda)" - private: false + # The playbook requires passing 3 variables explicitly: + # - stage: build or release. Different packages are installed depending on + # the chosen stage. + # - arch: aarch64 or amd64. Architecture of the built image and wheels. + # - accelerator: tpu or cuda. Available accelerator. pre_tasks: + - name: "Validate required variables" + ansible.builtin.assert: + that: "{{ lookup('ansible.builtin.vars', item.name) is regex(item.pattern) }}" + fail_msg: | + "Variable '{{ item.name }}' doesn't match pattern '{{ item.pattern }}'" + "Pass required variables with -e option: --e \"{{ item.name }}=\"" + loop: + - name: stage + pattern: ^(build|release)$ + - name: arch + pattern: ^(aarch64|amd64)$ + - name: accelerator + pattern: ^(tpu|cuda)$ + - name: "Include vars from config/{{ item }}" ansible.builtin.include_vars: file: "config/{{ item }}" From e673729bac8b691d9515f4b71384adc05977978f Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 31 Jan 2023 14:58:50 +0000 Subject: [PATCH 30/39] Shorten variable validation error message --- docker/experimental/ansible/playbook.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/experimental/ansible/playbook.yaml b/docker/experimental/ansible/playbook.yaml index b03e61edeeef..15b08048928b 100644 --- a/docker/experimental/ansible/playbook.yaml +++ b/docker/experimental/ansible/playbook.yaml @@ -13,7 +13,7 @@ that: "{{ lookup('ansible.builtin.vars', item.name) is regex(item.pattern) }}" fail_msg: | "Variable '{{ item.name }}' doesn't match pattern '{{ item.pattern }}'" - "Pass required variables with -e option: --e \"{{ item.name }}=\"" + "Pass the required variable with: --e \"{{ item.name }}=\"" loop: - name: stage pattern: ^(build|release)$ From 281c4aac203cd509020c6f7dbebc58235dd2ccb5 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 31 Jan 2023 16:02:37 +0000 Subject: [PATCH 31/39] Add readme file; cleanup some variables --- docker/experimental/ansible/README.md | 53 ++++++++++++++++++++ docker/experimental/ansible/config/apt.yaml | 5 +- docker/experimental/ansible/config/env.yaml | 6 +-- docker/experimental/ansible/config/pip.yaml | 4 ++ docker/experimental/ansible/config/vars.yaml | 6 ++- docker/experimental/ansible/playbook.yaml | 6 +-- 6 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 docker/experimental/ansible/README.md diff --git a/docker/experimental/ansible/README.md b/docker/experimental/ansible/README.md new file mode 100644 index 000000000000..acfd35123985 --- /dev/null +++ b/docker/experimental/ansible/README.md @@ -0,0 +1,53 @@ +# Ansible playbook + +This ansible playbook will perform the following actions on the localhost: + * install required pip and apt packages, depending on the specified stage, + architecture and accelerator (see [apt.yaml](config/apt.yaml) and [pip.yaml](config/pip.yaml)). + * fetch bazel (version configured in [vars.yaml](config/vars.yaml)), + * set required environment variables (see [env.yaml](config/env.yaml)), + * build and install PyTorch and XLA wheels, + * apply infrastructure tests (see `*/tests.yaml` files in [roles](roles)). + +## Prerequisites + +* Python 3.8+ +* Ansible. Install with `pip install ansible`. + +## Running + +The playbook requires passing explicitly 3 variables that configure playbook +behavior (installed pip/apt packages and set environment variables): +* `stage`: build or release. Different packages are installed depending on + the chosen stage. +* `arch`: aarch64 or amd64. Architecture of the built image and wheels. +* `accelerator`: tpu or cuda. Available accelerator. + +The variables can be passed through `-e` flag: `-e "="`. + +Example: `ansible-playbook playbook.yaml -e "stage=build arch=amd64 accelerator=tpu"` + +## Config structure + +The playbook configuration is split into 4 files, per each logical system. +The configuration is simply loaded as playbook variables which are then passed +to specific roles and tasks. +Only variables in [config/env.yaml](config/env.yaml) are passed as env variables. + +* [apt.yaml](config/apt.yaml) - specifies apt packages for each stage and architecture or accelerator. + Packages shared between all architectures and accelerators in a given stage + are specified in `*_common`. They are appended to any architecture specific list. + + This config also contains a list of required apt repos and signing keys. + These variables are mainly consumed by the [install_deps](roles/install_deps/tasks/main.yaml) role. + +* [pip.yaml](config/pip.yaml) - similarly to apt.yaml, lists pip packages per stage and arch / accelerator. + In both pip and apt config files stage and and arch / accelerator are + concatenated together and specified under one key (e.g. build_amd64, release_tpu). + +* [env.yaml](config/env.yaml) - contains Ansible variables that are passed as env variables when + building PyTorch and XLA (`build_env`). Variables in `release_env` are saved in `/etc/environment` (executed for the `release` stage). + +* [vars.yaml](config/vars.yaml) - Ansible variables used in other config files and throughout the playbook. + Not associated with any particular system. + +Variables from these config files are dynamically loaded (during playbook execution), see [playbook.yaml](playbook.yaml). diff --git a/docker/experimental/ansible/config/apt.yaml b/docker/experimental/ansible/config/apt.yaml index 6cb1e2d2c822..dc5455e741f7 100644 --- a/docker/experimental/ansible/config/apt.yaml +++ b/docker/experimental/ansible/config/apt.yaml @@ -1,3 +1,4 @@ +# Contains lists of apt packages for each stage (build|release) and arch or accelerator. apt: pkgs: build_common: @@ -56,7 +57,7 @@ apt: repos: # signed-by path should match the corresponding keyring path above. - - "deb [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" - - "deb-src [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ debian_version }}/ llvm-toolchain-{{ debian_version }}-{{ clang_version }} main" + - "deb [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ llvm_debian_repo }}/ llvm-toolchain-{{ llvm_debian_repo }}-{{ clang_version }} main" + - "deb-src [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ llvm_debian_repo }}/ llvm-toolchain-{{ llvm_debian_repo }}-{{ clang_version }} main" - "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" - "deb [signed-by=/usr/share/keyrings/cuda.pgp] https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/ /" \ No newline at end of file diff --git a/docker/experimental/ansible/config/env.yaml b/docker/experimental/ansible/config/env.yaml index c4458dd112d9..ce1e53d004cd 100644 --- a/docker/experimental/ansible/config/env.yaml +++ b/docker/experimental/ansible/config/env.yaml @@ -1,6 +1,6 @@ -# Variables that will be stored in /etc/environment file. They'll be accessible -# for all processes on the host. -runtime_env: +# Variables that will be stored in /etc/environment file for the release stage. +# They'll be accessible for all processes on the host. +release_env: common: CC: "clang-{{ clang_version }}" CXX: "clang++-{{ clang_version }}" diff --git a/docker/experimental/ansible/config/pip.yaml b/docker/experimental/ansible/config/pip.yaml index 45c8ef00e498..249d82a3c628 100644 --- a/docker/experimental/ansible/config/pip.yaml +++ b/docker/experimental/ansible/config/pip.yaml @@ -1,5 +1,7 @@ +# Contains lists of pip packages for each stage (build|release) and arch or accelerator. pip: pkgs: + # Shared between all architectures and accelerators for the build stage. build_common: - astunparse - cffi @@ -33,6 +35,7 @@ pip: build_aarch64: + # Shared between all architectures and accelerators for the release stage. release_common: - numpy - pyyaml @@ -42,6 +45,7 @@ pip: release_tpu: - torch_xla[tpuvm] + # Packages that will be installed with the `--nodeps` flag. pkgs_nodeps: release_common: - torchvision diff --git a/docker/experimental/ansible/config/vars.yaml b/docker/experimental/ansible/config/vars.yaml index 8e5ddac762fd..00d537fff2bd 100644 --- a/docker/experimental/ansible/config/vars.yaml +++ b/docker/experimental/ansible/config/vars.yaml @@ -1,5 +1,7 @@ +# Used for fetching cuda from the right repo, see apt.yaml. cuda_repo: ubuntu1804 -debian_version: buster +# Used for fetching clang from the right repo, see apt.yaml. +llvm_debian_repo: buster clang_version: 11 # PyTorch and PyTorch/XLA wheel versions. -package_version: 1.14 \ No newline at end of file +package_version: 2.0 \ No newline at end of file diff --git a/docker/experimental/ansible/playbook.yaml b/docker/experimental/ansible/playbook.yaml index 15b08048928b..e9eb2927040d 100644 --- a/docker/experimental/ansible/playbook.yaml +++ b/docker/experimental/ansible/playbook.yaml @@ -81,8 +81,8 @@ - role: configure_env vars: env_vars: "{{ - runtime_env.common | default({}, true) | - combine(runtime_env[arch] | default({}, true)) | - combine(runtime_env[accelerator] | default({}, true)) + release_env.common | default({}, true) | + combine(release_env[arch] | default({}, true)) | + combine(release_env[accelerator] | default({}, true)) }}" when: stage == "release" From c7ded27fb0499d4b96a74649dcdc066edcfb223f Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 31 Jan 2023 16:12:39 +0000 Subject: [PATCH 32/39] Change git revisions to head --- docker/experimental/ansible/README.md | 11 ++++++++--- docker/experimental/ansible/playbook.yaml | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/docker/experimental/ansible/README.md b/docker/experimental/ansible/README.md index acfd35123985..b0b901157df1 100644 --- a/docker/experimental/ansible/README.md +++ b/docker/experimental/ansible/README.md @@ -2,8 +2,11 @@ This ansible playbook will perform the following actions on the localhost: * install required pip and apt packages, depending on the specified stage, - architecture and accelerator (see [apt.yaml](config/apt.yaml) and [pip.yaml](config/pip.yaml)). + architecture and accelerator (see [apt.yaml](config/apt.yaml) and + [pip.yaml](config/pip.yaml)). * fetch bazel (version configured in [vars.yaml](config/vars.yaml)), + * fetch PyTorch and XLA sources at master (or specific revisions, + see role `fetch_srcs` in [playbook.yaml]). * set required environment variables (see [env.yaml](config/env.yaml)), * build and install PyTorch and XLA wheels, * apply infrastructure tests (see `*/tests.yaml` files in [roles](roles)). @@ -33,7 +36,8 @@ The configuration is simply loaded as playbook variables which are then passed to specific roles and tasks. Only variables in [config/env.yaml](config/env.yaml) are passed as env variables. -* [apt.yaml](config/apt.yaml) - specifies apt packages for each stage and architecture or accelerator. +* [apt.yaml](config/apt.yaml) - specifies apt packages for each stage and + architecture or accelerator. Packages shared between all architectures and accelerators in a given stage are specified in `*_common`. They are appended to any architecture specific list. @@ -50,4 +54,5 @@ Only variables in [config/env.yaml](config/env.yaml) are passed as env variables * [vars.yaml](config/vars.yaml) - Ansible variables used in other config files and throughout the playbook. Not associated with any particular system. -Variables from these config files are dynamically loaded (during playbook execution), see [playbook.yaml](playbook.yaml). +Variables from these config files are dynamically loaded (during playbook execution), +see [playbook.yaml](playbook.yaml). diff --git a/docker/experimental/ansible/playbook.yaml b/docker/experimental/ansible/playbook.yaml index e9eb2927040d..5509a6f359e2 100644 --- a/docker/experimental/ansible/playbook.yaml +++ b/docker/experimental/ansible/playbook.yaml @@ -66,8 +66,8 @@ - role: fetch_srcs vars: src_root: "/src" - pytorch_git_rev: 5e9fa0a8fc87f9a626f144bb5527da0426ac384b - xla_git_rev: 6acc0c9cc7aa0738ec792ddd4780d14f66d6bd8c + pytorch_git_rev: HEAD + xla_git_rev: HEAD - role: build_srcs vars: From c053a5bc44bbe8778f23ec30aaa9351f70789281 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Tue, 31 Jan 2023 16:20:26 +0000 Subject: [PATCH 33/39] Remove variable from task name that's not substituted --- docker/experimental/ansible/playbook.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/experimental/ansible/playbook.yaml b/docker/experimental/ansible/playbook.yaml index 5509a6f359e2..7d89fc335383 100644 --- a/docker/experimental/ansible/playbook.yaml +++ b/docker/experimental/ansible/playbook.yaml @@ -22,7 +22,7 @@ - name: accelerator pattern: ^(tpu|cuda)$ - - name: "Include vars from config/{{ item }}" + - name: "Include vars from config files" ansible.builtin.include_vars: file: "config/{{ item }}" loop: From 0d6c8a7fc653f79cca3e76bc07f4975b009150bb Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Thu, 9 Feb 2023 06:20:54 +0000 Subject: [PATCH 34/39] Fix link formatting in README.md --- docker/experimental/ansible/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/experimental/ansible/README.md b/docker/experimental/ansible/README.md index b0b901157df1..9094f645de30 100644 --- a/docker/experimental/ansible/README.md +++ b/docker/experimental/ansible/README.md @@ -6,7 +6,7 @@ This ansible playbook will perform the following actions on the localhost: [pip.yaml](config/pip.yaml)). * fetch bazel (version configured in [vars.yaml](config/vars.yaml)), * fetch PyTorch and XLA sources at master (or specific revisions, - see role `fetch_srcs` in [playbook.yaml]). + see role `fetch_srcs` in [playbook.yaml](playbook.yaml)). * set required environment variables (see [env.yaml](config/env.yaml)), * build and install PyTorch and XLA wheels, * apply infrastructure tests (see `*/tests.yaml` files in [roles](roles)). From a5d36decb16d892791f1ea1226fbefec60874f41 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Wed, 15 Feb 2023 10:19:09 +0000 Subject: [PATCH 35/39] Append env variables to bashrc and zshrc instead of /etc/environment --- .../ansible/roles/configure_env/tasks/main.yaml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/docker/experimental/ansible/roles/configure_env/tasks/main.yaml b/docker/experimental/ansible/roles/configure_env/tasks/main.yaml index 949ad0d02bdb..807912019d56 100644 --- a/docker/experimental/ansible/roles/configure_env/tasks/main.yaml +++ b/docker/experimental/ansible/roles/configure_env/tasks/main.yaml @@ -1,5 +1,13 @@ -- name: Append environment variables required during runtime to /etc/environment +- name: Append environment variables required during runtime to ~/.bashrc ansible.builtin.lineinfile: - path: /etc/environment - line: "{{ item }}={{ env_vars[item] }}" + path: ~/.bashrc + line: "export {{ item }}={{ env_vars[item] }}" + create: true + loop: "{{ env_vars.keys() | list }}" + +- name: Append environment variables required during runtime to ~/.zshrc + ansible.builtin.lineinfile: + path: ~/.zshrc + line: "export {{ item }}={{ env_vars[item] }}" + create: true loop: "{{ env_vars.keys() | list }}" From 666df60d0817a88d2a7f8ec92fba5187398c044d Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Wed, 15 Feb 2023 10:59:04 +0000 Subject: [PATCH 36/39] Bump cuda packages version; add sympy --- docker/experimental/ansible/config/apt.yaml | 16 ++++++++-------- docker/experimental/ansible/config/pip.yaml | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docker/experimental/ansible/config/apt.yaml b/docker/experimental/ansible/config/apt.yaml index dc5455e741f7..d3ceb36699d5 100644 --- a/docker/experimental/ansible/config/apt.yaml +++ b/docker/experimental/ansible/config/apt.yaml @@ -16,11 +16,11 @@ apt: - wget build_cuda: - - cuda-libraries-11-2 - - cuda-toolkit-11-2 - - cuda-minimal-build-11-2 - - libcudnn8=8.1.1.33-1+cuda11.2 - - libcudnn8-dev=8.1.1.33-1+cuda11.2 + - cuda-libraries-11-8 + - cuda-toolkit-11-8 + - cuda-minimal-build-11-8 + - libcudnn8=8.1.1.33-1+cuda11.8 + - libcudnn8-dev=8.1.1.33-1+cuda11.8 build_amd64: - clang-11 @@ -41,9 +41,9 @@ apt: - patch release_cuda: - - cuda-libraries-11-2 - - cuda-minimal-build-11-2 - - libcudnn8=8.1.1.33-1+cuda11.2 + - cuda-libraries-11-8 + - cuda-minimal-build-11-8 + - libcudnn8=8.1.1.33-1+cuda11.8 # Specify objects with string fields `url` and `keyring`. # The keyring path should start with /usr/share/keyrings/ for debian and ubuntu. diff --git a/docker/experimental/ansible/config/pip.yaml b/docker/experimental/ansible/config/pip.yaml index 249d82a3c628..add0fb0e221d 100644 --- a/docker/experimental/ansible/config/pip.yaml +++ b/docker/experimental/ansible/config/pip.yaml @@ -28,6 +28,7 @@ pip: - tqdm - typing - typing_extensions + - sympy build_amd64: - mkl From 13b36a312d4fb76acda0065a3d7aca26a9aa4fb8 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Wed, 15 Feb 2023 11:18:33 +0000 Subject: [PATCH 37/39] Downgrade to clang-10 --- docker/experimental/ansible/config/apt.yaml | 4 +++- docker/experimental/ansible/config/vars.yaml | 2 +- docker/experimental/ansible/roles/fetch_srcs/tasks/main.yaml | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docker/experimental/ansible/config/apt.yaml b/docker/experimental/ansible/config/apt.yaml index d3ceb36699d5..3d24f59e78e0 100644 --- a/docker/experimental/ansible/config/apt.yaml +++ b/docker/experimental/ansible/config/apt.yaml @@ -7,6 +7,7 @@ apt: - git - gnupg - libomp5 + - libomp5-7 - libopenblas-dev - ninja-build - procps @@ -23,7 +24,7 @@ apt: - libcudnn8-dev=8.1.1.33-1+cuda11.8 build_amd64: - - clang-11 + - "clang-{{ clang_version }}" build_aarch64: - scons @@ -37,6 +38,7 @@ apt: - google-cloud-cli - libgomp1 - libomp5 + - libomp5-7 - libopenblas-base - patch diff --git a/docker/experimental/ansible/config/vars.yaml b/docker/experimental/ansible/config/vars.yaml index 00d537fff2bd..4afb567aacb2 100644 --- a/docker/experimental/ansible/config/vars.yaml +++ b/docker/experimental/ansible/config/vars.yaml @@ -2,6 +2,6 @@ cuda_repo: ubuntu1804 # Used for fetching clang from the right repo, see apt.yaml. llvm_debian_repo: buster -clang_version: 11 +clang_version: 10 # PyTorch and PyTorch/XLA wheel versions. package_version: 2.0 \ No newline at end of file diff --git a/docker/experimental/ansible/roles/fetch_srcs/tasks/main.yaml b/docker/experimental/ansible/roles/fetch_srcs/tasks/main.yaml index d8eb34834c9e..929a2404ac65 100644 --- a/docker/experimental/ansible/roles/fetch_srcs/tasks/main.yaml +++ b/docker/experimental/ansible/roles/fetch_srcs/tasks/main.yaml @@ -36,6 +36,7 @@ strip: 1 basedir: "{{ (src_root, 'pytorch/xla/third_party/tensorflow') | path_join }}" loop: "{{ tf_patches.files | map(attribute='path') }}" + ignore_errors: true - name: "Tests" include_tasks: tests.yaml From 00ce2bfc91c519c0e39febcc8356f7182ebd5208 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Wed, 15 Feb 2023 13:26:55 +0000 Subject: [PATCH 38/39] Remove libomp5 --- docker/experimental/ansible/config/apt.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker/experimental/ansible/config/apt.yaml b/docker/experimental/ansible/config/apt.yaml index 3d24f59e78e0..971e4f496c28 100644 --- a/docker/experimental/ansible/config/apt.yaml +++ b/docker/experimental/ansible/config/apt.yaml @@ -6,8 +6,6 @@ apt: - curl - git - gnupg - - libomp5 - - libomp5-7 - libopenblas-dev - ninja-build - procps @@ -37,8 +35,6 @@ apt: - gnupg - google-cloud-cli - libgomp1 - - libomp5 - - libomp5-7 - libopenblas-base - patch From 7124a8cf60d33ef8a08dbdb32882db9a09ada797 Mon Sep 17 00:00:00 2001 From: Mateusz Lewko Date: Wed, 15 Feb 2023 13:59:54 +0000 Subject: [PATCH 39/39] Set correct version for libcudnn8 --- docker/experimental/ansible/config/apt.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/experimental/ansible/config/apt.yaml b/docker/experimental/ansible/config/apt.yaml index 971e4f496c28..97ce1755f234 100644 --- a/docker/experimental/ansible/config/apt.yaml +++ b/docker/experimental/ansible/config/apt.yaml @@ -18,8 +18,8 @@ apt: - cuda-libraries-11-8 - cuda-toolkit-11-8 - cuda-minimal-build-11-8 - - libcudnn8=8.1.1.33-1+cuda11.8 - - libcudnn8-dev=8.1.1.33-1+cuda11.8 + - libcudnn8=8.8.0.121-1+cuda11.8 + - libcudnn8-dev=8.8.0.121-1+cuda11.8 build_amd64: - "clang-{{ clang_version }}" @@ -41,7 +41,7 @@ apt: release_cuda: - cuda-libraries-11-8 - cuda-minimal-build-11-8 - - libcudnn8=8.1.1.33-1+cuda11.8 + - libcudnn8=8.8.0.121-1+cuda11.8 # Specify objects with string fields `url` and `keyring`. # The keyring path should start with /usr/share/keyrings/ for debian and ubuntu.