From dd953929a5d88b13027e8707de09aa6e87053778 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 00:12:29 -0700
Subject: [PATCH 01/11] Fix eval sanity check CI

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .github/workflows/pull.yml | 55 +++++++++++++++++++-------------------
 torchchat/usages/eval.py   |  4 +--
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 839d3b9b2..237f5b9cf 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -123,6 +123,7 @@ jobs:
           bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
       - name: Run validation
         run: |
+          # @NOCOMMIT Debug
           python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
           pushd ${TORCHCHAT_ROOT}
           bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
@@ -164,39 +165,37 @@ jobs:
           bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float16"
 
   test-cpu-eval-sanity-check-float32:
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     name: test-cpu-eval-sanity-check-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
     needs: gather-models-cpu
     strategy:
       matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
       fail-fast: false
-    runs-on: ${{ matrix.runner }}
-    env:
-      TORCHCHAT_ROOT: ${{ github.workspace }}
-      REPO_NAME: ${{ matrix.repo_name }}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-      - name: Setup Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.10.11'
-      - name: Print machine info
-        run: |
-          echo "$(uname -a)"
-      - name: Install dependencies
-        run: |
-          ./install/install_requirements.sh
-          pip3 list
-          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
-      - name: Download checkpoints
-        run: |
-          bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
-      - name: Run validation
-        run: |
-          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
-          pushd ${TORCHCHAT_ROOT}
-          bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
-          bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
+    with:
+      runner: linux.4xlarge
+      script: |
+        echo "::group::Print machine info"
+        uname -a
+        echo "::endgroup::"
+
+        echo "::group::Install dependencies"
+        ./install/install_requirements.sh
+        pip3 list
+        python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+        echo "::endgroup::"
+
+        echo "::group::Download checkpoint"
+        export REPO_NAME=${{ matrix.repo_name }}
+        bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
+        echo "::endgroup::"
+
+        echo "::group::Convert checkpoint"
+        bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
+        echo "::endgroup::"
+
+        echo "::group::Run eval"
+        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
+        echo "::endgroup::"
 
   gather-models-gpu:
     runs-on: ubuntu-22.04
diff --git a/torchchat/usages/eval.py b/torchchat/usages/eval.py
index 5993c3781..9fec815bd 100644
--- a/torchchat/usages/eval.py
+++ b/torchchat/usages/eval.py
@@ -10,6 +10,8 @@
 import torch._dynamo.config
 import torch._inductor.config
 
+import lm_eval  # noqa
+
 from torchchat.cli.builder import (
     _initialize_model,
     _initialize_tokenizer,
@@ -28,8 +30,6 @@
 torch._inductor.config.triton.cudagraphs = True
 torch._dynamo.config.cache_size_limit = 100000
 
-import lm_eval
-
 from lm_eval.evaluator import evaluate
 from lm_eval.models.huggingface import HFLM as eval_wrapper
 from lm_eval.tasks import get_task_dict

From b6e3db1eb53d1fcdea7af8483f5aaed1057feed4 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 01:19:08 -0700
Subject: [PATCH 02/11] Fixate psutil to 6.0.0

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 install/install_requirements.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install/install_requirements.sh b/install/install_requirements.sh
index b483acae4..d3b19d6b7 100755
--- a/install/install_requirements.sh
+++ b/install/install_requirements.sh
@@ -103,5 +103,5 @@ fi
 
 (
   set -x
-  $PIP_EXECUTABLE install lm-eval=="0.4.2"
+  $PIP_EXECUTABLE install lm-eval=="0.4.2" psutil=="6.0.0"
 )

From 775fd60ac834b7f91e6e84ff954ca7517f090666 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 01:34:31 -0700
Subject: [PATCH 03/11] Pre-import evaluate

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 torchchat.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/torchchat.py b/torchchat.py
index 9f85f0692..c3e49e050 100644
--- a/torchchat.py
+++ b/torchchat.py
@@ -8,6 +8,7 @@
 import logging
 import subprocess
 import sys
+import evaluate  # noqa
 
 from torchchat.cli.cli import (
     add_arguments_for_verb,

From 2f6acd27c0bbb3f24a635fb40239e5dc5b0abeed Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 09:23:48 -0700
Subject: [PATCH 04/11] Force install evaluate

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 install/install_requirements.sh | 2 +-
 torchchat.py                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/install/install_requirements.sh b/install/install_requirements.sh
index d3b19d6b7..7031c7afa 100755
--- a/install/install_requirements.sh
+++ b/install/install_requirements.sh
@@ -103,5 +103,5 @@ fi
 
 (
   set -x
-  $PIP_EXECUTABLE install lm-eval=="0.4.2" psutil=="6.0.0"
+  $PIP_EXECUTABLE install lm-eval=="0.4.2" evaluate=="0.4.3" psutil=="6.0.0"
 )
diff --git a/torchchat.py b/torchchat.py
index c3e49e050..a84fdac20 100644
--- a/torchchat.py
+++ b/torchchat.py
@@ -8,7 +8,7 @@
 import logging
 import subprocess
 import sys
-import evaluate  # noqa
+import lm_eval  # noqa
 
 from torchchat.cli.cli import (
     add_arguments_for_verb,

From b9d7bd2793aa7ca798a47d298ed6c89d57f1306a Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 10:19:44 -0700
Subject: [PATCH 05/11] Revert test-cpu-eval-sanity-check-float32 changes since
 it breaks AOTI

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 install/install_requirements.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install/install_requirements.sh b/install/install_requirements.sh
index 7031c7afa..afc7e898d 100755
--- a/install/install_requirements.sh
+++ b/install/install_requirements.sh
@@ -103,5 +103,5 @@ fi
 
 (
   set -x
-  $PIP_EXECUTABLE install lm-eval=="0.4.2" evaluate=="0.4.3" psutil=="6.0.0"
+  $PIP_EXECUTABLE install evaluate=="0.4.3" lm-eval=="0.4.2" "numpy>=1.17,<2.0" psutil=="6.0.0"
 )

From 1d6c13dfdbfd32421e1541336d4a2fe7a8a579e8 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 10:20:55 -0700
Subject: [PATCH 06/11] Remove debug log

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .github/workflows/pull.yml | 54 ++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 237f5b9cf..335b4da5e 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -123,7 +123,6 @@ jobs:
           bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
       - name: Run validation
         run: |
-          # @NOCOMMIT Debug
           python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
           pushd ${TORCHCHAT_ROOT}
           bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
@@ -171,31 +170,34 @@ jobs:
     strategy:
       matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
       fail-fast: false
-    with:
-      runner: linux.4xlarge
-      script: |
-        echo "::group::Print machine info"
-        uname -a
-        echo "::endgroup::"
-
-        echo "::group::Install dependencies"
-        ./install/install_requirements.sh
-        pip3 list
-        python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
-        echo "::endgroup::"
-
-        echo "::group::Download checkpoint"
-        export REPO_NAME=${{ matrix.repo_name }}
-        bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
-        echo "::endgroup::"
-
-        echo "::group::Convert checkpoint"
-        bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
-        echo "::endgroup::"
-
-        echo "::group::Run eval"
-        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
-        echo "::endgroup::"
+    runs-on: ${{ matrix.runner }}
+    env:
+      TORCHCHAT_ROOT: ${{ github.workspace }}
+      REPO_NAME: ${{ matrix.repo_name }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10.11'
+      - name: Print machine info
+        run: |
+          echo "$(uname -a)"
+      - name: Install dependencies
+        run: |
+          ./install/install_requirements.sh
+          pip3 list
+          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+      - name: Download checkpoints
+        run: |
+          bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
+      - name: Run validation
+        run: |
+          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+          pushd ${TORCHCHAT_ROOT}
+          bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
+          bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
 
   gather-models-gpu:
     runs-on: ubuntu-22.04

From a8c1e1ffb456e188ea5b07e6e10997161c0cea3e Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 10:22:00 -0700
Subject: [PATCH 07/11] Cleanup

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .github/workflows/pull.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 335b4da5e..839d3b9b2 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -164,7 +164,6 @@ jobs:
           bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float16"
 
   test-cpu-eval-sanity-check-float32:
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     name: test-cpu-eval-sanity-check-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
     needs: gather-models-cpu
     strategy:

From 477833286e0e82efce0807f358d2038737ccc195 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 10:27:12 -0700
Subject: [PATCH 08/11] Remove redundant import

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 torchchat.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/torchchat.py b/torchchat.py
index a84fdac20..9f85f0692 100644
--- a/torchchat.py
+++ b/torchchat.py
@@ -8,7 +8,6 @@
 import logging
 import subprocess
 import sys
-import lm_eval  # noqa
 
 from torchchat.cli.cli import (
     add_arguments_for_verb,

From 1f7748da0e19d0738204170d7234336b853f5616 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 10:31:23 -0700
Subject: [PATCH 09/11] Ok can't remove import lm_eval. Move it inside eval
 condition

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 install/install_requirements.sh       | 2 +-
 torchchat/model.py                    | 1 +
 torchchat/usages/eval.py              | 4 ++--
 torchchat/utils/scripts/install_et.sh | 5 +++++
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/install/install_requirements.sh b/install/install_requirements.sh
index afc7e898d..fa921cc69 100755
--- a/install/install_requirements.sh
+++ b/install/install_requirements.sh
@@ -103,5 +103,5 @@ fi
 
 (
   set -x
-  $PIP_EXECUTABLE install evaluate=="0.4.3" lm-eval=="0.4.2" "numpy>=1.17,<2.0" psutil=="6.0.0"
+  $PIP_EXECUTABLE install evaluate=="0.4.3" lm-eval=="0.4.2" psutil=="6.0.0"
 )
diff --git a/torchchat/model.py b/torchchat/model.py
index ab0bc7e21..313f5f45a 100644
--- a/torchchat/model.py
+++ b/torchchat/model.py
@@ -31,6 +31,7 @@
 )
 from torch.nn import functional as F
 
+import lm_eval  # noqa
 from torchtune.models.clip import clip_vision_encoder
 from torchtune.models.llama3_1._component_builders import llama3_1 as llama3_1_builder
 from torchtune.models.llama3_2_vision._component_builders import (
diff --git a/torchchat/usages/eval.py b/torchchat/usages/eval.py
index 9fec815bd..5993c3781 100644
--- a/torchchat/usages/eval.py
+++ b/torchchat/usages/eval.py
@@ -10,8 +10,6 @@
 import torch._dynamo.config
 import torch._inductor.config
 
-import lm_eval  # noqa
-
 from torchchat.cli.builder import (
     _initialize_model,
     _initialize_tokenizer,
@@ -30,6 +28,8 @@
 torch._inductor.config.triton.cudagraphs = True
 torch._dynamo.config.cache_size_limit = 100000
 
+import lm_eval
+
 from lm_eval.evaluator import evaluate
 from lm_eval.models.huggingface import HFLM as eval_wrapper
 from lm_eval.tasks import get_task_dict
diff --git a/torchchat/utils/scripts/install_et.sh b/torchchat/utils/scripts/install_et.sh
index 1d8c6e2b2..04db3b287 100755
--- a/torchchat/utils/scripts/install_et.sh
+++ b/torchchat/utils/scripts/install_et.sh
@@ -20,4 +20,9 @@ find_cmake_prefix_path
 clone_executorch
 install_executorch_libs $ENABLE_ET_PYBIND
 install_executorch_python_libs $ENABLE_ET_PYBIND
+# TODO: figure out the root cause of 'AttributeError: module 'evaluate'
+# has no attribute 'utils'' error from evaluate CI jobs and remove
+# `import lm_eval` from torchchat.py since it requires a specific version
+# of numpy.
+pip install numpy=='1.26.4'
 popd

From a602e23ca0efdbcb1507b3172437422beb18fd06 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 12:04:02 -0700
Subject: [PATCH 10/11] Add file hash to cache key

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .github/workflows/pull.yml | 4 ++--
 torchchat/model.py         | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 839d3b9b2..ea4cc60a1 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -904,9 +904,9 @@ jobs:
           echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
       - name: Load or install ET
         id: install-et
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         env:
-          cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}
+          cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('torchchat/utils/scripts/install_et.sh') }}
         with:
           path: ./et-build
           key: ${{env.cache-key}}
diff --git a/torchchat/model.py b/torchchat/model.py
index 313f5f45a..0f60ea318 100644
--- a/torchchat/model.py
+++ b/torchchat/model.py
@@ -30,8 +30,9 @@
     SequenceParallel,
 )
 from torch.nn import functional as F
-
-import lm_eval  # noqa
+# TODO: remove this after we figure out where in torchtune an `evaluate` module
+# is being imported, which is being confused with huggingface's `evaluate``.
+import lm_eval  # noqa 
 from torchtune.models.clip import clip_vision_encoder
 from torchtune.models.llama3_1._component_builders import llama3_1 as llama3_1_builder
 from torchtune.models.llama3_2_vision._component_builders import (

From 0917c14d8dc969470b968b95d01f2636da3965f6 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 26 Sep 2024 13:20:29 -0700
Subject: [PATCH 11/11] Update cache key for runner-et

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .github/workflows/pull.yml | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index ea4cc60a1..3e92ed9c0 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -905,13 +905,11 @@ jobs:
       - name: Load or install ET
         id: install-et
         uses: actions/cache@v4
-        env:
-          cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('torchchat/utils/scripts/install_et.sh') }}
         with:
-          path: ./et-build
-          key: ${{env.cache-key}}
-          restore-keys: |
-            ${{env.cache-key}}
+          path: |
+            ./et-build
+            ./torchchat/utils/scripts
+          key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
       - if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
         continue-on-error: true
         run: |