Skip to content

Commit

Permalink
Update on "[JIT][WIP] memorization memory planning"
Browse files Browse the repository at this point in the history
This PR extends memory planning strategies to support memory allocations and frees collected using the `MemoryTracingAllocator` (which follows the pattern from kineto). These plans can then be deployed using `MemoryPlanningAllocator` in combination with `prim::PreAllocateTensor` ops (inserted into the graph) to appropriately give out slices of the initially allocated region.


Differential Revision: [D30769097](https://our.internmc.facebook.com/intern/diff/D30769097)

[ghstack-poisoned]
  • Loading branch information
makslevental committed Nov 3, 2021
2 parents ae1bc3a + bb91f36 commit 20114a8
Show file tree
Hide file tree
Showing 2,448 changed files with 157,086 additions and 58,281 deletions.
2 changes: 1 addition & 1 deletion .azure_pipelines/job_templates/prepare-build-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ steps:
curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe
curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe
copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output .\tmp_bin\randomtemp.exe
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output .\tmp_bin\randomtemp.exe
displayName: Install sccache and randomtemp
condition: not(eq(variables.CUDA_VERSION, ''))
Expand Down
4 changes: 1 addition & 3 deletions .azure_pipelines/job_templates/set-environment-variables.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,7 @@ steps:
Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH"
Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin"
Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500"
Write-Host "##vso[task.setvariable variable=RANDOMTEMP_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\nvcc.exe"
Write-Host "##vso[task.setvariable variable=CUDA_NVCC_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\randomtemp.exe"
Write-Host "##vso[task.setvariable variable=RANDOMTEMP_BASEDIR;]$(Build.SourcesDirectory)\tmp_bin"
Write-Host "##vso[task.setvariable variable=CMAKE_CUDA_COMPILER_LAUNCHER;]$(Build.SourcesDirectory)/tmp_bin/randomtemp.exe;$(Build.SourcesDirectory)/tmp_bin/sccache.exe"
displayName: Set MKL, sccache and randomtemp environment variables
# View current environment variables
Expand Down
3 changes: 2 additions & 1 deletion .circleci/cimodel/data/binary_build_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def get_processor_arch_name(gpu_version):
],
)),
windows=(
[v for v in dimensions.GPU_VERSIONS if v not in dimensions.ROCM_VERSION_LABELS],
# Stop building Win+CU102, see https://github.com/pytorch/pytorch/issues/65648
[v for v in dimensions.GPU_VERSIONS if v not in dimensions.ROCM_VERSION_LABELS and v != "cuda102"],
OrderedDict(
wheel=dimensions.STANDARD_PYTHON_VERSIONS,
conda=dimensions.STANDARD_PYTHON_VERSIONS,
Expand Down
2 changes: 1 addition & 1 deletion .circleci/cimodel/data/dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
]

ROCM_VERSIONS = [
"4.0.1",
"4.1",
"4.2",
"4.3.1",
]

ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
Expand Down
23 changes: 0 additions & 23 deletions .circleci/cimodel/data/pytorch_build_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,6 @@
]),
]),
# TODO: bring back libtorch test
("7", [X("3.6")]),
]),
("clang", [
("7", [
("3.6", [
("asan", [
(True, [
("shard_test", [XImportant(True)]),
]),
]),
("onnx", [XImportant(True)]),
]),
]),
]),
("cuda", [
("10.2", [
Expand Down Expand Up @@ -52,7 +39,6 @@
("9", [
("3.6", [
("xla", [XImportant(True)]),
("vulkan", [XImportant(True)]),
]),
]),
]),
Expand Down Expand Up @@ -145,7 +131,6 @@ def child_constructor(self):
"build_only": BuildOnlyConfigNode,
"shard_test": ShardTestConfigNode,
"cuda_gcc_override": CudaGccOverrideConfigNode,
"coverage": CoverageConfigNode,
"pure_torch": PureTorchConfigNode,
"slow_gradcheck": SlowGradcheckConfigNode,
}
Expand Down Expand Up @@ -289,14 +274,6 @@ def child_constructor(self):
return ImportantConfigNode


class CoverageConfigNode(TreeConfigNode):
def init2(self, node_name):
self.props["is_coverage"] = node_name

def child_constructor(self):
return ExperimentalFeatureConfigNode


class ImportantConfigNode(TreeConfigNode):
def modify_label(self, label):
return "IMPORTANT=" + str(label)
Expand Down
27 changes: 0 additions & 27 deletions .circleci/cimodel/data/pytorch_build_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ def instantiate_configs(only_slow_gradcheck):
compiler_version = fc.find_prop("compiler_version")
is_xla = fc.find_prop("is_xla") or False
is_asan = fc.find_prop("is_asan") or False
is_coverage = fc.find_prop("is_coverage") or False
is_noarch = fc.find_prop("is_noarch") or False
is_onnx = fc.find_prop("is_onnx") or False
is_pure_torch = fc.find_prop("is_pure_torch") or False
Expand Down Expand Up @@ -284,10 +283,6 @@ def instantiate_configs(only_slow_gradcheck):
python_version = fc.find_prop("pyver")
parms_list[0] = fc.find_prop("abbreviated_pyver")

if is_coverage:
parms_list_ignored_for_docker_image.append("coverage")
python_version = fc.find_prop("pyver")

if is_noarch:
parms_list_ignored_for_docker_image.append("noarch")

Expand Down Expand Up @@ -357,28 +352,6 @@ def instantiate_configs(only_slow_gradcheck):
tags_list=RC_PATTERN)
c.dependent_tests = gen_docs_configs(c)

if (
compiler_name != "clang"
and not rocm_version
and not is_libtorch
and not is_vulkan
and not is_pure_torch
and not is_noarch
and not is_slow_gradcheck
and not only_slow_gradcheck
and not build_only
):
distributed_test = Conf(
c.gen_build_name("") + "distributed",
[],
is_xla=False,
restrict_phases=["test"],
is_libtorch=False,
is_important=True,
parent_build=c,
)
c.dependent_tests.append(distributed_test)

config_list.append(c)

return config_list
Expand Down
6 changes: 0 additions & 6 deletions .circleci/cimodel/data/simple/android_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,6 @@ def gen_tree(self):
["pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_32_build"],
is_master_only=False,
is_pr_only=True),
AndroidGradleJob(
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
"pytorch_android_gradle_custom_build_single",
[DOCKER_REQUIREMENT_NDK],
is_master_only=False,
is_pr_only=True),
AndroidGradleJob(
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
"pytorch_android_gradle_custom_build_single",
Expand Down
10 changes: 5 additions & 5 deletions .circleci/cimodel/data/simple/binary_smoketest.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ def gen_tree(self):
),
SmoketestJob(
"binary_windows_build",
["wheel", "3.7", "cu102"],
["wheel", "3.7", "cu113"],
None,
"binary_windows_wheel_3_7_cu102_build",
"binary_windows_wheel_3_7_cu113_build",
is_master_only=True,
),

Expand All @@ -144,11 +144,11 @@ def gen_tree(self):
),
SmoketestJob(
"binary_windows_test",
["wheel", "3.7", "cu102"],
["wheel", "3.7", "cu113"],
None,
"binary_windows_wheel_3_7_cu102_test",
"binary_windows_wheel_3_7_cu113_test",
is_master_only=True,
requires=["binary_windows_wheel_3_7_cu102_build"],
requires=["binary_windows_wheel_3_7_cu113_build"],
extra_props={
"executor": "windows-with-nvidia-gpu",
},
Expand Down
27 changes: 5 additions & 22 deletions .circleci/cimodel/data/simple/docker_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,21 @@
from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN


# TODO: make this generated from a matrix rather than just a static list
# NOTE: All hardcoded docker image builds have been migrated to GHA
IMAGE_NAMES = [
"pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
"pytorch-linux-bionic-py3.6-clang9",
"pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9",
"pytorch-linux-bionic-py3.8-gcc9",
"pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
"pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
"pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
"pytorch-linux-xenial-py3-clang5-asan",
"pytorch-linux-xenial-py3-clang7-asan",
"pytorch-linux-xenial-py3-clang7-onnx",
"pytorch-linux-xenial-py3.8",
"pytorch-linux-xenial-py3.6-clang7",
"pytorch-linux-xenial-py3.6-gcc5.4", # this one is used in doc builds
"pytorch-linux-xenial-py3.6-gcc7.2",
"pytorch-linux-xenial-py3.6-gcc7",
"pytorch-linux-bionic-rocm4.1-py3.6",
"pytorch-linux-bionic-rocm4.2-py3.6",
"pytorch-linux-bionic-rocm4.3.1-py3.6",
]

# This entry should be an element from the list above
# This should contain the image matching the "slow_gradcheck" entry in
# pytorch_build_data.py
SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"

def get_workflow_jobs(only_slow_gradcheck=False):
def get_workflow_jobs(images=IMAGE_NAMES, only_slow_gradcheck=False):
"""Generates a list of docker image build definitions"""
ret = []
for image_name in IMAGE_NAMES:
for image_name in images:
if image_name.startswith('docker-'):
image_name = image_name.lstrip('docker-')
if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
continue

Expand Down
6 changes: 6 additions & 0 deletions .circleci/cimodel/data/simple/ios_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ def gen_tree(self):
IOSJob(XCODE_VERSION, ArchVariant("arm64", "custom"), extra_props={
"op_list": "mobilenetv2.yaml",
"lite_interpreter": miniutils.quote(str(int(True)))}),
IOSJob(XCODE_VERSION, ArchVariant("x86_64", "coreml"), is_org_member_context=False, extra_props={
"use_coreml": miniutils.quote(str(int(True))),
"lite_interpreter": miniutils.quote(str(int(True)))}),
IOSJob(XCODE_VERSION, ArchVariant("arm64", "coreml"), extra_props={
"use_coreml": miniutils.quote(str(int(True))),
"lite_interpreter": miniutils.quote(str(int(True)))}),
]


Expand Down
33 changes: 0 additions & 33 deletions .circleci/cimodel/data/simple/mobile_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@

import cimodel.lib.miniutils as miniutils
import cimodel.data.simple.util.branch_filters
from cimodel.data.simple.util.docker_constants import (
DOCKER_IMAGE_ASAN,
DOCKER_REQUIREMENT_ASAN,
DOCKER_IMAGE_NDK,
DOCKER_REQUIREMENT_NDK
)


class MobileJob:
Expand Down Expand Up @@ -52,33 +46,6 @@ def gen_tree(self):


WORKFLOW_DATA = [
MobileJob(
DOCKER_IMAGE_ASAN,
[DOCKER_REQUIREMENT_ASAN],
["build"]
),

# Use LLVM-DEV toolchain in android-ndk-r19c docker image
MobileJob(
DOCKER_IMAGE_NDK,
[DOCKER_REQUIREMENT_NDK],
["custom", "build", "dynamic"]
),

MobileJob(
DOCKER_IMAGE_NDK,
[DOCKER_REQUIREMENT_NDK],
["custom", "build", "static"]
),

# Use LLVM-DEV toolchain in android-ndk-r19c docker image
# Most of this CI is already covered by "mobile-custom-build-dynamic" job
MobileJob(
DOCKER_IMAGE_NDK,
[DOCKER_REQUIREMENT_NDK],
["code", "analysis"],
True
),
]


Expand Down
Loading

0 comments on commit 20114a8

Please sign in to comment.