[ONNX] Add huggingface models into CI tests

[ONNX] Add transformers models into no runtime test of fx exporter ghstack-source-id: 84f2b39baf9f4a44e2ce4c6c50c4a0364ea7b8eb Pull Request resolved: #107247
pytorch · Aug 15, 2023 · d3be05e · d3be05e
1 parent f76250f
commit d3be05e
Show file tree

Hide file tree

Showing 3 changed files with 237 additions and 15 deletions.
diff --git a/.ci/docker/common/install_onnx.sh b/.ci/docker/common/install_onnx.sh
@@ -37,6 +37,12 @@ pip_install onnxscript-preview==0.1.0.dev20230809 --no-deps
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
 as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'transformers.AutoModel.from_pretrained("bigscience/bloom-560m"); transformers.AutoTokenizer.from_pretrained("bigscience/bloom-560m");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'transformers.AutoModelForCausalLM.from_pretrained("mosaicml/mpt-7b"); transformers.AutoTokenizer.from_pretrained("mosaicml/mpt-7b");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'transformers.AutoModel.from_pretrained("openai/whisper-tiny"); transformers.WhisperConfig.from_pretrained("openai/whisper-tiny");transformers.WhisperProcessor.from_pretrained("openai/whisper-tiny");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'transformers.AutoModel.from_pretrained("google/flan-t5-small"); transformers.AutoTokenizer.from_pretrained("google/flan-t5-small");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'transformers.AutoModel.from_pretrained("databricks/dolly-v2-3b"); transformers.AutoTokenizer.from_pretrained("databricks/dolly-v2-3b");' > "${IMPORT_SCRIPT_FILENAME}"
+
 
 # Need a PyTorch version for transformers to work
 pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu

diff --git a/test/onnx/test_fx_to_onnx.py b/test/onnx/test_fx_to_onnx.py
@@ -406,10 +406,83 @@ def forward(self, x):
                     fake_model, real_x, export_options=export_options
                 )
 
-    def test_fake_tensor_mode_huggingface_bigscience__bloom_560m(self):
+    def test_fake_tensor_mode_huggingface_gpt2(self):
+        from transformers import GPT2Config, GPT2Model  # type: ignore[import]
+
+        config = GPT2Config()
+        batch, seq = 4, 256
+
+        with torch.onnx.enable_fake_mode() as fake_context:
+            model = GPT2Model(config).eval()
+            input_ids = torch.randint(0, config.vocab_size, (batch, seq))
+            attention_mask = torch.ones(batch, seq, dtype=torch.bool)
+            position_ids = torch.arange(0, seq, dtype=torch.long)
+            position_ids = position_ids.unsqueeze(0).view(-1, seq)
+
+            export_options = torch.onnx.ExportOptions(fake_context=fake_context)
+            export_output = torch.onnx.dynamo_export(
+                model,
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                position_ids=position_ids,
+                export_options=export_options,
+            )
+            onnx.checker.check_model(export_output.model_proto)
+            onnx.shape_inference.infer_shapes(export_output.model_proto)
+
+    def test_fake_tensor_mode_huggingface_bigscience_bloom(self):
+        from transformers import BloomConfig, BloomModel  # type: ignore[import]
+
+        config = BloomConfig()
+        batch, seq = 4, 256
+
+        with torch.onnx.enable_fake_mode() as fake_context:
+            model = BloomModel(config).eval()
+            input_ids = torch.randint(0, config.vocab_size, (batch, seq))
+            attention_mask = torch.ones(batch, seq, dtype=torch.bool)
+
+            export_options = torch.onnx.ExportOptions(fake_context=fake_context)
+            export_output = torch.onnx.dynamo_export(
+                model,
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                export_options=export_options,
+            )
+            onnx.checker.check_model(export_output.model_proto)
+            onnx.shape_inference.infer_shapes(export_output.model_proto)
+
+    def test_fake_tensor_mode_huggingface_open_llama(self):
+        from transformers import OpenLlamaConfig, OpenLlamaModel  # type: ignore[import]
+
+        config = OpenLlamaConfig()
+        batch, seq = 4, 256
+
+        with torch.onnx.enable_fake_mode() as fake_context:
+            model = OpenLlamaModel(config).eval()
+            input_ids = torch.randint(0, config.vocab_size, (batch, seq))
+            attention_mask = torch.ones(batch, seq, dtype=torch.bool)
+            position_ids = torch.arange(0, seq, dtype=torch.long)
+            position_ids = position_ids.unsqueeze(0).view(-1, seq)
+
+            export_options = torch.onnx.ExportOptions(fake_context=fake_context)
+            export_output = torch.onnx.dynamo_export(
+                model,
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                position_ids=position_ids,
+                export_options=export_options,
+            )
+            onnx.checker.check_model(export_output.model_proto)
+            onnx.shape_inference.infer_shapes(export_output.model_proto)
+
+    # TODO: From Config/Model
+    @pytorch_test_common.xfail(
+        "SymFloat in OnnxFUnction attribute is not supported yet."
+    )
+    def test_fake_tensor_mode_huggingface_databricks_dolly_v2_3b(self):
         from transformers import AutoModel, AutoTokenizer  # type: ignore[import]
 
-        model_name = "bigscience/bloom-560m"
+        model_name = "databricks/dolly-v2-3b"
         with torch.onnx.enable_fake_mode() as fake_context:
             tokenizer = AutoTokenizer.from_pretrained(model_name)
             inputs = tokenizer("Hello world!", return_tensors="pt")
@@ -422,6 +495,112 @@ def test_fake_tensor_mode_huggingface_bigscience__bloom_560m(self):
             onnx.checker.check_model(export_output.model_proto)
             onnx.shape_inference.infer_shapes(export_output.model_proto)
 
+    # TODO: From Config/Model
+    def test_fake_tensor_mode_huggingface_google_flan_t5_small(self):
+        from transformers import AutoModel, AutoTokenizer  # type: ignore[import]
+
+        model_name = "google/flan-t5-small"
+        with torch.onnx.enable_fake_mode() as fake_context:
+            model = AutoModel.from_pretrained(model_name)
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+            decoder_input_ids = tokenizer(
+                "Studies show that", return_tensors="pt"
+            ).input_ids  # Batch size 1
+            # preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model.
+            # This is not needed for torch's T5ForConditionalGeneration as it does this internally using labels arg.
+            decoder_input_ids = model._shift_right(decoder_input_ids)
+            inputs = tokenizer("Hello world!", return_tensors="pt")
+            inputs["decoder_input_ids"] = decoder_input_ids
+
+            export_options = torch.onnx.ExportOptions(fake_context=fake_context)
+            export_output = torch.onnx.dynamo_export(
+                model, **inputs, export_options=export_options
+            )
+            onnx.checker.check_model(export_output.model_proto)
+            onnx.shape_inference.infer_shapes(export_output.model_proto)
+
+    # TODO: From Config/Model
+    def test_fake_tensor_mode_huggingface_openai_whisper_tiny(self):
+        from datasets import load_dataset  # type: ignore[import]
+        from transformers import (  # type: ignore[import]
+            AutoModel,
+            WhisperConfig,
+            WhisperProcessor,
+        )
+
+        model_name = "openai/whisper-tiny"
+        with torch.onnx.enable_fake_mode() as fake_context:
+            config = WhisperConfig.from_pretrained(model_name)
+            processor = WhisperProcessor.from_pretrained(model_name)
+            ds = load_dataset(
+                "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation"
+            )
+            input_features = processor(
+                [ds[0]["audio"]["array"]], return_tensors="pt"
+            ).input_features
+            decoder_input_ids = torch.tensor([[1, 1]]) * config.decoder_start_token_id
+
+            model = AutoModel.from_pretrained(model_name)
+            export_options = torch.onnx.ExportOptions(fake_context=fake_context)
+            export_output = torch.onnx.dynamo_export(
+                model,
+                input_features,
+                decoder_input_ids=decoder_input_ids,
+                export_options=export_options,
+            )
+            onnx.checker.check_model(export_output.model_proto)
+            onnx.shape_inference.infer_shapes(export_output.model_proto)
+
+    @pytorch_test_common.xfail(
+        "AssertionError: Mutating module attribute seq_len_cached during export."
+        "self.seq_len_cached = seq_len"
+    )
+    def test_fake_tensor_mode_huggingface_tiiuae_falcon(self):
+        from transformers import FalconConfig, FalconModel  # type: ignore[import]
+
+        config = FalconConfig()
+        batch, seq = 4, 256
+
+        with torch.onnx.enable_fake_mode() as fake_context:
+            model = FalconModel(config).eval()
+            input_ids = torch.randint(0, config.vocab_size, (batch, seq))
+            attention_mask = torch.ones(batch, seq, dtype=torch.bool)
+
+            export_options = torch.onnx.ExportOptions(fake_context=fake_context)
+            export_output = torch.onnx.dynamo_export(
+                model,
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                export_options=export_options,
+            )
+            onnx.checker.check_model(export_output.model_proto)
+            onnx.shape_inference.infer_shapes(export_output.model_proto)
+
+    @pytorch_test_common.xfail(
+        "torch._dynamo.exc.UserError: Dynamic control flow is not supported at the moment."
+        "Please use functorch.experimental.control_flow.cond to explicitly capture the control flow."
+        "if attention_mask is not None and attention_mask[:, 0].sum() != attention_mask.shape[0] and self.training:"
+    )
+    def test_fake_tensor_mode_huggingface_mosaicml_mpt_7b(self):
+        from transformers import (  # type: ignore[import]
+            AutoModelForCausalLM,
+            AutoTokenizer,
+        )
+
+        model_name = "mosaicml/mpt-7b"
+        with torch.onnx.enable_fake_mode() as fake_context:
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            inputs = tokenizer("Hello world!", return_tensors="pt")
+            model = AutoModelForCausalLM.from_pretrained(model_name)
+
+            export_options = torch.onnx.ExportOptions(fake_context=fake_context)
+            export_output = torch.onnx.dynamo_export(
+                model, **inputs, export_options=export_options
+            )
+            onnx.checker.check_model(export_output.model_proto)
+            onnx.shape_inference.infer_shapes(export_output.model_proto)
+
 
 if __name__ == "__main__":
     common_utils.run_tests()
diff --git a/test/onnx/test_fx_to_onnx_with_onnxruntime.py b/test/onnx/test_fx_to_onnx_with_onnxruntime.py
@@ -14,7 +14,6 @@
 import pytorch_test_common
 import torch
 import torch.onnx
-import transformers  # type: ignore[import]
 from torch import nn
 
 from torch._subclasses import fake_tensor
@@ -556,20 +555,57 @@ def func(x):
         )
 
     def test_gpt2_tiny(self):
-        model_name = "sshleifer/tiny-gpt2"
-        # Download pytorch model
-        model = transformers.AutoModel.from_pretrained(model_name)
-        tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
-
-        # Transform input tokens
-        inputs = tokenizer("Hello world!", return_tensors="pt")
-        another_inputs = tokenizer("Another Hello world!", return_tensors="pt")
+        from transformers.models.gpt2.configuration_gpt2 import GPT2Config
+        from transformers.models.gpt2.modeling_gpt2 import GPT2Model
+
+        device = "cpu"
+        # Model
+        config = GPT2Config(
+            num_hidden_layers=4,
+            vocab_size=8096,
+            hidden_size=16,
+            intermediate_size=16,
+            max_position_embeddings=512,
+            num_attention_heads=2,
+            hidden_dropout_prob=0.0,
+            attention_dropout_prob=0.0,
+        )
+        model = GPT2Model(config).to(device).eval()
+
+        # Encoded inputs
+        batch, seq = 2, 128
+        input_ids = torch.randint(0, 8096, (batch, seq)).to(device)
+        attention_mask = torch.ones(batch, seq, dtype=torch.bool).to(device)
+        position_ids = torch.arange(0, seq, dtype=torch.long).to(device)
+        position_ids = position_ids.unsqueeze(0).view(-1, seq)
+
+        # Another encoded inputs to test dynamic shapes
+        another_batch, another_seq = 3, 256
+        another_input_ids = torch.randint(0, 8096, (another_batch, another_seq)).to(
+            device
+        )
+        another_attention_mask = torch.ones(
+            another_batch, another_seq, dtype=torch.bool
+        ).to(device)
+        another_position_ids = torch.arange(0, another_seq, dtype=torch.long).to(device)
+        another_position_ids = another_position_ids.unsqueeze(0).view(-1, another_seq)
 
         self.run_test_with_fx_to_onnx_exporter_and_onnx_runtime(
             model,
-            [],
-            input_kwargs=inputs,
-            additional_test_inputs=[((), another_inputs)],
+            (input_ids,),
+            input_kwargs={
+                "attention_mask": attention_mask,
+                "position_ids": position_ids,
+            },
+            additional_test_inputs=[
+                (
+                    (another_input_ids,),
+                    {
+                        "attention_mask": another_attention_mask,
+                        "position_ids": another_position_ids,
+                    },
+                )
+            ],
         )
 
     def test_prims_device_put(self):
@@ -636,7 +672,8 @@ def _test_fx_symbolic_tracer_large_scale_exporter(
             with ctx, ftm:
                 # Toy model with parameters and buffers as FakeTensor's.
                 fake_model = create_model()
-                fake_model.load_state_dict(torch.load(tmp_file.name))
+                model_state_dict = torch.load(tmp_file.name)
+                fake_model.load_state_dict(model_state_dict)
                 # Toy inputs as FakeTensor's.
                 fake_args = create_args()
                 # Export ONNX model without initializers while ctx.paths records