From 167962e8ea1553b695c5d879155d1dc29542d0a1 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 1 Aug 2023 17:21:22 -0700 Subject: [PATCH 1/4] all the llamas --- .../canary_models/llama_v2_13b/__init__.py | 15 +++++++++++++++ .../canary_models/llama_v2_13b/install.py | 9 +++++++++ .../canary_models/llama_v2_13b/metadata.yaml | 12 ++++++++++++ .../canary_models/llama_v2_70b/__init__.py | 15 +++++++++++++++ .../canary_models/llama_v2_70b/install.py | 9 +++++++++ .../canary_models/llama_v2_70b/metadata.yaml | 12 ++++++++++++ .../canary_models/llama_v2_7b/__init__.py | 15 +++++++++++++++ .../canary_models/llama_v2_7b/install.py | 9 +++++++++ .../canary_models/llama_v2_7b/metadata.yaml | 12 ++++++++++++ .../util/framework/huggingface/model_factory.py | 3 +++ 10 files changed, 111 insertions(+) create mode 100644 torchbenchmark/canary_models/llama_v2_13b/__init__.py create mode 100644 torchbenchmark/canary_models/llama_v2_13b/install.py create mode 100644 torchbenchmark/canary_models/llama_v2_13b/metadata.yaml create mode 100644 torchbenchmark/canary_models/llama_v2_70b/__init__.py create mode 100644 torchbenchmark/canary_models/llama_v2_70b/install.py create mode 100644 torchbenchmark/canary_models/llama_v2_70b/metadata.yaml create mode 100644 torchbenchmark/canary_models/llama_v2_7b/__init__.py create mode 100644 torchbenchmark/canary_models/llama_v2_7b/install.py create mode 100644 torchbenchmark/canary_models/llama_v2_7b/metadata.yaml diff --git a/torchbenchmark/canary_models/llama_v2_13b/__init__.py b/torchbenchmark/canary_models/llama_v2_13b/__init__.py new file mode 100644 index 0000000000..7b05fae202 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_13b/__init__.py @@ -0,0 +1,15 @@ +from torchbenchmark.tasks import NLP +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin + +class Model(HuggingFaceModel, HuggingFaceAuthMixin): + task = NLP.LANGUAGE_MODELING + DEFAULT_TRAIN_BSIZE = 1 + DEFAULT_EVAL_BSIZE = 1 + DEEPCOPY = False + + def __init__(self, test, device, batch_size=None, extra_args=[]): + HuggingFaceAuthMixin.__init__(self) + super().__init__(name="llama_v2_13b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + + def train(self): + return NotImplementedError("7b LLAMA model will OOM on CI GPU machines") diff --git a/torchbenchmark/canary_models/llama_v2_13b/install.py b/torchbenchmark/canary_models/llama_v2_13b/install.py new file mode 100644 index 0000000000..e22f9518b2 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_13b/install.py @@ -0,0 +1,9 @@ + +import subprocess +import sys +import os +from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model + +if __name__ == '__main__': + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name) diff --git a/torchbenchmark/canary_models/llama_v2_13b/metadata.yaml b/torchbenchmark/canary_models/llama_v2_13b/metadata.yaml new file mode 100644 index 0000000000..81a62e29bb --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_13b/metadata.yaml @@ -0,0 +1,12 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 1 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +not_implemented: +- device: cpu +- device: cuda + test: train +train_benchmark: false +train_deterministic: false diff --git a/torchbenchmark/canary_models/llama_v2_70b/__init__.py b/torchbenchmark/canary_models/llama_v2_70b/__init__.py new file mode 100644 index 0000000000..f3da4a7805 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_70b/__init__.py @@ -0,0 +1,15 @@ +from torchbenchmark.tasks import NLP +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin + +class Model(HuggingFaceModel, HuggingFaceAuthMixin): + task = NLP.LANGUAGE_MODELING + DEFAULT_TRAIN_BSIZE = 1 + DEFAULT_EVAL_BSIZE = 1 + DEEPCOPY = False + + def __init__(self, test, device, batch_size=None, extra_args=[]): + HuggingFaceAuthMixin.__init__(self) + super().__init__(name="llama_v2_70b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + + def train(self): + return NotImplementedError("7b LLAMA model will OOM on CI GPU machines") diff --git a/torchbenchmark/canary_models/llama_v2_70b/install.py b/torchbenchmark/canary_models/llama_v2_70b/install.py new file mode 100644 index 0000000000..e22f9518b2 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_70b/install.py @@ -0,0 +1,9 @@ + +import subprocess +import sys +import os +from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model + +if __name__ == '__main__': + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name) diff --git a/torchbenchmark/canary_models/llama_v2_70b/metadata.yaml b/torchbenchmark/canary_models/llama_v2_70b/metadata.yaml new file mode 100644 index 0000000000..81a62e29bb --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_70b/metadata.yaml @@ -0,0 +1,12 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 1 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +not_implemented: +- device: cpu +- device: cuda + test: train +train_benchmark: false +train_deterministic: false diff --git a/torchbenchmark/canary_models/llama_v2_7b/__init__.py b/torchbenchmark/canary_models/llama_v2_7b/__init__.py new file mode 100644 index 0000000000..e5b4dd9cf8 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_7b/__init__.py @@ -0,0 +1,15 @@ +from torchbenchmark.tasks import NLP +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin + +class Model(HuggingFaceModel, HuggingFaceAuthMixin): + task = NLP.LANGUAGE_MODELING + DEFAULT_TRAIN_BSIZE = 1 + DEFAULT_EVAL_BSIZE = 1 + DEEPCOPY = False + + def __init__(self, test, device, batch_size=None, extra_args=[]): + HuggingFaceAuthMixin.__init__(self) + super().__init__(name="llama_v2_7b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + + def train(self): + return NotImplementedError("7b LLAMA model will OOM on CI GPU machines") diff --git a/torchbenchmark/canary_models/llama_v2_7b/install.py b/torchbenchmark/canary_models/llama_v2_7b/install.py new file mode 100644 index 0000000000..e22f9518b2 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_7b/install.py @@ -0,0 +1,9 @@ + +import subprocess +import sys +import os +from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model + +if __name__ == '__main__': + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name) diff --git a/torchbenchmark/canary_models/llama_v2_7b/metadata.yaml b/torchbenchmark/canary_models/llama_v2_7b/metadata.yaml new file mode 100644 index 0000000000..81a62e29bb --- /dev/null +++ b/torchbenchmark/canary_models/llama_v2_7b/metadata.yaml @@ -0,0 +1,12 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 1 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +not_implemented: +- device: cpu +- device: cuda + test: train +train_benchmark: false +train_deterministic: false diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py index 3327ad72db..d317483a67 100644 --- a/torchbenchmark/util/framework/huggingface/model_factory.py +++ b/torchbenchmark/util/framework/huggingface/model_factory.py @@ -30,6 +30,9 @@ 'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'), # default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real 'llama_v2_7b_16h' : (512,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'), + 'llama_v2_7b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-7b-hf")', 'AutoModelForCausalLM'), + 'llama_v2_13b' : (512,512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-13b-hf")', 'AutoModelForCausalLM'), + 'llama_v2_70b' : (512, 512, 'AutoConfig.from_pretrained("meta-llama/Llama-2-70b-hf")', 'AutoModelForMaskedLM'), } cpu_input_slice = { From ed7015359cc80dccf24d40bb6580e348e90dbd45 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 1 Aug 2023 17:29:33 -0700 Subject: [PATCH 2/4] add fsdp comment --- .../canary_models/llama_v2_13b/__init__.py | 2 +- .../canary_models/llama_v2_70b/__init__.py | 4 +- torchbenchmark/models/imagebind/__init__.py | 38 +++++++++++++++++++ torchbenchmark/models/imagebind/install.py | 23 +++++++++++ torchbenchmark/models/imagebind/origin | 1 + .../models/imagebind/requirements.txt | 13 +++++++ 6 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 torchbenchmark/models/imagebind/__init__.py create mode 100644 torchbenchmark/models/imagebind/install.py create mode 100644 torchbenchmark/models/imagebind/origin create mode 100644 torchbenchmark/models/imagebind/requirements.txt diff --git a/torchbenchmark/canary_models/llama_v2_13b/__init__.py b/torchbenchmark/canary_models/llama_v2_13b/__init__.py index 7b05fae202..42678f629a 100644 --- a/torchbenchmark/canary_models/llama_v2_13b/__init__.py +++ b/torchbenchmark/canary_models/llama_v2_13b/__init__.py @@ -12,4 +12,4 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): super().__init__(name="llama_v2_13b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) def train(self): - return NotImplementedError("7b LLAMA model will OOM on CI GPU machines") + return NotImplementedError("FSDP should implement a training loop") diff --git a/torchbenchmark/canary_models/llama_v2_70b/__init__.py b/torchbenchmark/canary_models/llama_v2_70b/__init__.py index f3da4a7805..66bd49bed4 100644 --- a/torchbenchmark/canary_models/llama_v2_70b/__init__.py +++ b/torchbenchmark/canary_models/llama_v2_70b/__init__.py @@ -11,5 +11,7 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): HuggingFaceAuthMixin.__init__(self) super().__init__(name="llama_v2_70b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + def train(self): - return NotImplementedError("7b LLAMA model will OOM on CI GPU machines") + return NotImplementedError("FSDP should implement a training loop") + diff --git a/torchbenchmark/models/imagebind/__init__.py b/torchbenchmark/models/imagebind/__init__.py new file mode 100644 index 0000000000..2b24e029b5 --- /dev/null +++ b/torchbenchmark/models/imagebind/__init__.py @@ -0,0 +1,38 @@ +import data +import torch +from models import imagebind_model +from models.imagebind_model import ModalityType + +text_list=["A dog.", "A car", "A bird"] +image_paths=[".assets/dog_image.jpg", ".assets/car_image.jpg", ".assets/bird_image.jpg"] +audio_paths=[".assets/dog_audio.wav", ".assets/car_audio.wav", ".assets/bird_audio.wav"] + +device = "cuda:0" if torch.cuda.is_available() else "cpu" + +# Instantiate model +model = imagebind_model.imagebind_huge(pretrained=True) +model.eval() +model.to(device) + +# Load data +inputs = { + ModalityType.TEXT: data.load_and_transform_text(text_list, device), + ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device), + ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, device), +} + +with torch.no_grad(): + embeddings = model(inputs) + +print( + "Vision x Text: ", + torch.softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1), +) +print( + "Audio x Text: ", + torch.softmax(embeddings[ModalityType.AUDIO] @ embeddings[ModalityType.TEXT].T, dim=-1), +) +print( + "Vision x Audio: ", + torch.softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.AUDIO].T, dim=-1), +) \ No newline at end of file diff --git a/torchbenchmark/models/imagebind/install.py b/torchbenchmark/models/imagebind/install.py new file mode 100644 index 0000000000..86cb5c2dc9 --- /dev/null +++ b/torchbenchmark/models/imagebind/install.py @@ -0,0 +1,23 @@ +import os +import subprocess +import sys + +def pip_install_requirements(): + subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt']) + +def download_checkpoint(): + subprocess.check_call(['wget', '-P', '.data', 'https://dl.fbaipublicfiles.com/imagebind/imagebind_huge.pth']) + +def download_data(): + subprocess.check_call(['wget', '-P', '.data', 'https://github.com/facebookresearch/segment-anything/raw/main/notebooks/images/truck.jpg']) + +if __name__ == '__main__': + pip_install_requirements() + + # Create .data folder in the script's directory + data_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), '.data') + os.makedirs(data_folder, exist_ok=True) + + # Download checkpoint and data files to the .data folder + download_checkpoint() + download_data() \ No newline at end of file diff --git a/torchbenchmark/models/imagebind/origin b/torchbenchmark/models/imagebind/origin new file mode 100644 index 0000000000..a308fe8528 --- /dev/null +++ b/torchbenchmark/models/imagebind/origin @@ -0,0 +1 @@ +origin https://github.com/facebookresearch/ImageBind \ No newline at end of file diff --git a/torchbenchmark/models/imagebind/requirements.txt b/torchbenchmark/models/imagebind/requirements.txt new file mode 100644 index 0000000000..7ab8c8aa68 --- /dev/null +++ b/torchbenchmark/models/imagebind/requirements.txt @@ -0,0 +1,13 @@ +pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@28fe037d212663c6a24f373b94cc5d478c8c1a1d +timm==0.6.7 +ftfy +regex +einops +fvcore +decord==0.6.0 +iopath +numpy +matplotlib +types-regex +mayavi +cartopy \ No newline at end of file From 5b8d4dad42efea38da0edb9cdd9335875fd64d2a Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 1 Aug 2023 17:30:38 -0700 Subject: [PATCH 3/4] bla --- torchbenchmark/canary_models/llama_v2_7b/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/torchbenchmark/canary_models/llama_v2_7b/__init__.py b/torchbenchmark/canary_models/llama_v2_7b/__init__.py index e5b4dd9cf8..731a01ba83 100644 --- a/torchbenchmark/canary_models/llama_v2_7b/__init__.py +++ b/torchbenchmark/canary_models/llama_v2_7b/__init__.py @@ -11,5 +11,7 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): HuggingFaceAuthMixin.__init__(self) super().__init__(name="llama_v2_7b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + def train(self): - return NotImplementedError("7b LLAMA model will OOM on CI GPU machines") + return NotImplementedError("FSDP should implement a training loop") + From 65f651513185d8e1cc2142669b546ed9b7e96f98 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 1 Aug 2023 21:19:24 -0700 Subject: [PATCH 4/4] remove imagebind --- torchbenchmark/models/imagebind/__init__.py | 38 ------------------- torchbenchmark/models/imagebind/install.py | 23 ----------- torchbenchmark/models/imagebind/origin | 1 - .../models/imagebind/requirements.txt | 13 ------- 4 files changed, 75 deletions(-) delete mode 100644 torchbenchmark/models/imagebind/__init__.py delete mode 100644 torchbenchmark/models/imagebind/install.py delete mode 100644 torchbenchmark/models/imagebind/origin delete mode 100644 torchbenchmark/models/imagebind/requirements.txt diff --git a/torchbenchmark/models/imagebind/__init__.py b/torchbenchmark/models/imagebind/__init__.py deleted file mode 100644 index 2b24e029b5..0000000000 --- a/torchbenchmark/models/imagebind/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -import data -import torch -from models import imagebind_model -from models.imagebind_model import ModalityType - -text_list=["A dog.", "A car", "A bird"] -image_paths=[".assets/dog_image.jpg", ".assets/car_image.jpg", ".assets/bird_image.jpg"] -audio_paths=[".assets/dog_audio.wav", ".assets/car_audio.wav", ".assets/bird_audio.wav"] - -device = "cuda:0" if torch.cuda.is_available() else "cpu" - -# Instantiate model -model = imagebind_model.imagebind_huge(pretrained=True) -model.eval() -model.to(device) - -# Load data -inputs = { - ModalityType.TEXT: data.load_and_transform_text(text_list, device), - ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device), - ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, device), -} - -with torch.no_grad(): - embeddings = model(inputs) - -print( - "Vision x Text: ", - torch.softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1), -) -print( - "Audio x Text: ", - torch.softmax(embeddings[ModalityType.AUDIO] @ embeddings[ModalityType.TEXT].T, dim=-1), -) -print( - "Vision x Audio: ", - torch.softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.AUDIO].T, dim=-1), -) \ No newline at end of file diff --git a/torchbenchmark/models/imagebind/install.py b/torchbenchmark/models/imagebind/install.py deleted file mode 100644 index 86cb5c2dc9..0000000000 --- a/torchbenchmark/models/imagebind/install.py +++ /dev/null @@ -1,23 +0,0 @@ -import os -import subprocess -import sys - -def pip_install_requirements(): - subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt']) - -def download_checkpoint(): - subprocess.check_call(['wget', '-P', '.data', 'https://dl.fbaipublicfiles.com/imagebind/imagebind_huge.pth']) - -def download_data(): - subprocess.check_call(['wget', '-P', '.data', 'https://github.com/facebookresearch/segment-anything/raw/main/notebooks/images/truck.jpg']) - -if __name__ == '__main__': - pip_install_requirements() - - # Create .data folder in the script's directory - data_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), '.data') - os.makedirs(data_folder, exist_ok=True) - - # Download checkpoint and data files to the .data folder - download_checkpoint() - download_data() \ No newline at end of file diff --git a/torchbenchmark/models/imagebind/origin b/torchbenchmark/models/imagebind/origin deleted file mode 100644 index a308fe8528..0000000000 --- a/torchbenchmark/models/imagebind/origin +++ /dev/null @@ -1 +0,0 @@ -origin https://github.com/facebookresearch/ImageBind \ No newline at end of file diff --git a/torchbenchmark/models/imagebind/requirements.txt b/torchbenchmark/models/imagebind/requirements.txt deleted file mode 100644 index 7ab8c8aa68..0000000000 --- a/torchbenchmark/models/imagebind/requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ -pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@28fe037d212663c6a24f373b94cc5d478c8c1a1d -timm==0.6.7 -ftfy -regex -einops -fvcore -decord==0.6.0 -iopath -numpy -matplotlib -types-regex -mayavi -cartopy \ No newline at end of file