onnx · jcwchen · Jun 20, 2023 · Apr 27, 2023 · Apr 27, 2023 · Apr 27, 2023
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -13,10 +13,9 @@ name: "CodeQL"
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [ main, new-models]
   pull_request:
-    # The branches below must be a subset of the branches above
-    branches: [ "main" ]
+    branches: [ main, new-models]
   schedule:
     - cron: '31 11 * * 4'
 

diff --git a/.github/workflows/linux_ci.yml b/.github/workflows/linux_ci.yml
@@ -3,12 +3,11 @@
 
 name: Linux CI
 
-# Triggers the workflow on push or pull request events but only for the main branch
 on:
   push:
-    branches: [ main ]
+    branches: [ main, new-models]
   pull_request:
-    branches: [ main ]
+    branches: [ main, new-models]
 
 jobs:
   # This workflow contains a single job called "build"
@@ -43,10 +42,6 @@ jobs:
           python workflow_scripts/generate_onnx_hub_manifest.py --target diff --drop
           git diff --exit-code -- ONNX_HUB_MANIFEST.json || { echo 'Please use "python workflow_scripts/generate_onnx_hub_manifest.py --target diff" to update ONNX_HUB_MANIFEST.json.' ; exit 1; }
 
-      - name: Test new models by onnx
+      - name: Test new models by onnx and onnxruntime
         run: |
-          python workflow_scripts/test_models.py --target onnx --drop
-
-      - name: Test new models by onnxruntime
-        run: |
-          python workflow_scripts/test_models.py --target onnxruntime --drop
+          python workflow_scripts/test_models.py --target all --drop
diff --git a/.github/workflows/mlagility_validation.yml b/.github/workflows/mlagility_validation.yml
@@ -0,0 +1,39 @@
+name: Validate created ONNX model from mlagility
+
+on:
+  push:
+    branches: [ main, new-models]
+  pull_request:
+    branches: [ main, new-models]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.8']
+
+    steps:
+      - uses: actions/checkout@v3
+        name: Checkout repo
+      - uses: conda-incubator/setup-miniconda@v2
+        with:
+          miniconda-version: "latest"
+          activate-environment: mla
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies and mlagility
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install onnx onnxruntime requests py-cpuinfo
+          # Print CPU info for debugging ONNX Runtime inference difference
+          python -m cpuinfo
+          git clone https://github.com/groq/mlagility.git
+          cd mlagility
+          pip install -r models/requirements.txt
+          pip install -e .
+
+      - name: Validate created ONNX model from mlagility
+        run: |
+          pip install -r models/mlagility/requirements.txt
+          python workflow_scripts/run_mlagility.py
diff --git a/.github/workflows/windows_ci.yml b/.github/workflows/windows_ci.yml
@@ -3,12 +3,11 @@
 
 name: Windows CI
 
-# Triggers the workflow on push or pull request events but only for the main branch
 on:
   push:
-    branches: [ main ]
+    branches: [ main, new-models]
   pull_request:
-    branches: [ main ]
+    branches: [ main, new-models]
 
 jobs:
   # This workflow contains a single job called "build"
@@ -37,10 +36,6 @@ jobs:
           # Print CPU info for debugging ONNX Runtime inference difference
           python -m cpuinfo
 
-      - name: Test new models by onnx
+      - name: Test new models by onnx and onnxruntime
         run: |
-          python workflow_scripts/test_models.py --target onnx --drop
-
-      - name: Test new models by onnxruntime
-        run: |
-          python workflow_scripts/test_models.py --target onnxruntime --drop
+          python workflow_scripts/test_models.py --target all --drop
diff --git a/models/mlagility/alexnet/alexnet-18.onnx b/models/mlagility/alexnet/alexnet-18.onnx
diff --git a/models/mlagility/alexnet/test_data_set_0/input_0.pb b/models/mlagility/alexnet/test_data_set_0/input_0.pb
diff --git a/models/mlagility/alexnet/test_data_set_0/output_0.pb b/models/mlagility/alexnet/test_data_set_0/output_0.pb
diff --git a/models/mlagility/fasterrcnn_resnet50_fpn_v2/fasterrcnn_resnet50_fpn_v2-18.onnx b/models/mlagility/fasterrcnn_resnet50_fpn_v2/fasterrcnn_resnet50_fpn_v2-18.onnx
diff --git a/models/mlagility/fasterrcnn_resnet50_fpn_v2/test_data_set_0/input_0.pb b/models/mlagility/fasterrcnn_resnet50_fpn_v2/test_data_set_0/input_0.pb
diff --git a/models/mlagility/fasterrcnn_resnet50_fpn_v2/test_data_set_0/output_0.pb b/models/mlagility/fasterrcnn_resnet50_fpn_v2/test_data_set_0/output_0.pb
diff --git a/models/mlagility/fasterrcnn_resnet50_fpn_v2/test_data_set_0/output_1.pb b/models/mlagility/fasterrcnn_resnet50_fpn_v2/test_data_set_0/output_1.pb
diff --git a/models/mlagility/fasterrcnn_resnet50_fpn_v2/test_data_set_0/output_2.pb b/models/mlagility/fasterrcnn_resnet50_fpn_v2/test_data_set_0/output_2.pb
diff --git a/models/mlagility/requirements.txt b/models/mlagility/requirements.txt
@@ -0,0 +1,2 @@
+torch==2.0.0
+torchvision==0.15.1
diff --git a/models/mlagility/resnet50/resnet50-18.onnx b/models/mlagility/resnet50/resnet50-18.onnx
diff --git a/models/mlagility/resnet50/test_data_set_0/input_0.pb b/models/mlagility/resnet50/test_data_set_0/input_0.pb
diff --git a/models/mlagility/resnet50/test_data_set_0/output_0.pb b/models/mlagility/resnet50/test_data_set_0/output_0.pb
diff --git a/workflow_scripts/check_model.py b/workflow_scripts/check_model.py
@@ -16,7 +16,7 @@ def has_vnni_support():
 
 def run_onnx_checker(model_path):
     model = onnx.load(model_path)
-    onnx.checker.check_model(model)
+    onnx.checker.check_model(model, full_check=True)
 
 
 def ort_skip_reason(model_path):
@@ -66,3 +66,10 @@ def run_backend_ort(model_path, test_data_set=None, tar_gz_path=None):
         ort_test_dir_utils.run_test_dir(test_dir_from_tar)
     # remove the produced test_dir from ORT
     test_utils.remove_onnxruntime_test_dir()
+
+def run_backend_ort_with_data(model_path):
+    skip_reason = ort_skip_reason(model_path)
+    if skip_reason:
+        print(skip_reason)
+        return
+    ort_test_dir_utils.run_test_dir(model_path)
diff --git a/workflow_scripts/config.py b/workflow_scripts/config.py
@@ -0,0 +1,6 @@
+models_info = [
+    # (script_path, model_name, model_zoo_directory)
+    ("torch_hub/alexnet.py", "alexnet_torch_hub_7df2a577", "alexnet"),
+    ("torch_hub/resnet50.py", "resnet50_torch_hub_31acb52e", "resnet50"),
+    ("torchvision/fasterrcnn_resnet50_fpn_v2.py", "fasterrcnn_resnet50_fpn_v2_torchvision_ec445cac", "fasterrcnn_resnet50_fpn_v2"),
+]
diff --git a/workflow_scripts/run_mlagility.py b/workflow_scripts/run_mlagility.py
@@ -0,0 +1,39 @@
+import config
+import os.path as osp
+from pathlib import Path
+import shutil
+import subprocess
+import sys
+
+
+ZOO_OPSET_VERSION = "18"
+base_name = f"-op{ZOO_OPSET_VERSION}-base.onnx"
+cwd_path = Path.cwd()
+mlagility_root = "mlagility/models"
+mlagility_models_dir = "models/mlagility"
+cache_converted_dir = ".cache"
+
+errors = 0
+
+for script_path, model_name, model_zoo_dir in config.models_info:
+    try:
+        print(f"----------------Checking {model_zoo_dir}----------------")
+        final_model_path = osp.join(mlagility_models_dir, model_zoo_dir, f"{model_zoo_dir}-{ZOO_OPSET_VERSION}.onnx")
+        subprocess.run(["benchit", osp.join(mlagility_root, script_path), "--cache-dir", cache_converted_dir,
+                        "--onnx-opset", ZOO_OPSET_VERSION, "--export-only"],
+                        cwd=cwd_path, stdout=sys.stdout,
+                        stderr=sys.stderr)
+        shutil.copy(osp.join(cache_converted_dir, model_name, "onnx", model_name + base_name), final_model_path)
+        subprocess.run(["git", "diff", "--exit-code", "--", final_model_path],
+                        cwd=cwd_path, stdout=sys.stdout,
+                        stderr=sys.stderr)
+        print(f"Successfully checked {model_zoo_dir}.")
+    except Exception as e:
+        errors += 1
+        print(f"Failed to check {model_zoo_dir} because of {e}.")
+
+if errors > 0:
+    print(f"All {len(config.models_info)} model(s) have been checked, but {errors} model(s) failed.")
+    sys.exit(1)
+else:
+    print(f"All {len(config.models_info)} model(s) have been checked.")
diff --git a/workflow_scripts/test_models.py b/workflow_scripts/test_models.py
@@ -15,7 +15,7 @@
 
 def get_all_models():
     model_list = []
-    for directory in ["text", "vision"]:
+    for directory in ["text", "vision", "models"]:
         for root, _, files in os.walk(directory):
             for file in files:
                 if file.endswith(tar_ext_name) or file.endswith(onnx_ext_name):
@@ -100,10 +100,35 @@ def main():
                     print("[PASS] {} is checked by onnx. ".format(model_name))
             # check uploaded standalone ONNX model by ONNX
             elif onnx_ext_name in model_name:
+                test_utils.pull_lfs_file(model_path)
                 if args.target == "onnx" or args.target == "all":
-                    test_utils.pull_lfs_file(model_path)
                     check_model.run_onnx_checker(model_path)
                     print("[PASS] {} is checked by onnx. ".format(model_name))
+                if args.target == "onnxruntime" or args.target == "all":
+                    try:
+                        # git lfs pull those test_data_set_* folders
+                        root_dir = Path(model_path).parent
+                        for _, dirs, _ in os.walk(root_dir):
+                            for dir in dirs:
+                                if "test_data_set_" in dir:
+                                    test_data_set_dir = os.path.join(root_dir, dir)
+                                    for _, _, files in os.walk(test_data_set_dir):
+                                        for file in files:
+                                            if file.endswith(".pb"):
+                                                test_utils.pull_lfs_file(os.path.join(test_data_set_dir, file))
+                        check_model.run_backend_ort_with_data(model_path)
+                        print("[PASS] {} is checked by onnxruntime. ".format(model_name))
+                    except Exception as e:
+                        if not args.create:
+                            raise
+                        else:
+                            print("Warning: original test data for {} is broken: {}".format(model_path, e))
+                            test_utils.remove_onnxruntime_test_dir()
+                        if (not model_name.endswith("-int8.onnx") and not model_name.endswith("-qdq.onnx")) or check_model.has_vnni_support():
+                            check_model.run_backend_ort(model_path, None, model_path)
+                        else:
+                            print("Skip quantized  models because their test_data_set was created in avx512vnni machines. ")
+                        print("[PASS] {} is checked by onnxruntime. ".format(model_name))
 
         except Exception as e:
             print("[FAIL] {}: {}".format(model_name, e))