From 0d07492a57c15eaf9fbc7111c203c1fc3ad43777 Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Fri, 12 Sep 2025 11:50:36 -0700 Subject: [PATCH 1/9] init --- .github/workflows/_link_check.yml | 24 +++++++++++++++++++ scripts/lint_file_size.sh | 38 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 scripts/lint_file_size.sh diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml index aadd6c07420..a84d3c93274 100644 --- a/.github/workflows/_link_check.yml +++ b/.github/workflows/_link_check.yml @@ -55,3 +55,27 @@ jobs: echo "Or add \`@lint-ignore\` somewhere on the same line as the reference you want to skip checking." exit 1 } + + lint-file-size: + if: ${{ github.event_name == 'pull_request' }} + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + runner: linux.2xlarge + docker-image: ci-image:executorch-ubuntu-22.04-linter + submodules: false + fetch-depth: 0 + ref: ${{ inputs.ref }} + timeout: 30 + script: | + ./scripts/lint_file_size.sh $( + if [ "${{ github.event_name }}" = "pull_request" ]; then + echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" + else + echo "${{ github.event.before }}" "${{ github.sha }}" + fi + ) || { + echo + echo "File size lint failed: some files exceed the 1 MB limit." + echo "If you really need large files, consider using Git LFS or storing them elsewhere." + exit 1 + } diff --git a/scripts/lint_file_size.sh b/scripts/lint_file_size.sh new file mode 100644 index 00000000000..a42ce6ca455 --- /dev/null +++ b/scripts/lint_file_size.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -euo pipefail + +# Default max file size in bytes (1 MB) +MAX_SIZE=$((1024 * 1024)) +status=0 + +green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; reset='\e[0m' + +if [ $# -eq 2 ]; then + base=$1 + head=$2 + echo "Checking changed files between $base...$head" + files=$(git diff --name-only "$base...$head") +else + echo "Checking all files in repository" + files=$(git ls-files) +fi + +for file in $files; do + if [ -f "$file" ]; then + size=$(wc -c <"$file") + if [ "$size" -gt "$MAX_SIZE" ]; then + echo -e "${red}FAIL${reset} $file (${cyan}${size} bytes${reset}) exceeds ${MAX_SIZE} bytes" + status=1 + else + echo -e "${green}OK${reset} $file (${size} bytes)" + fi + fi +done + +exit $status From d33dc1d18dbeefa19f51b9132ba83421fa730cc1 Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Fri, 12 Sep 2025 11:57:54 -0700 Subject: [PATCH 2/9] set different file size limit for pictures --- scripts/lint_file_size.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/lint_file_size.sh b/scripts/lint_file_size.sh index a42ce6ca455..4b07ec5b208 100644 --- a/scripts/lint_file_size.sh +++ b/scripts/lint_file_size.sh @@ -8,7 +8,12 @@ set -euo pipefail # Default max file size in bytes (1 MB) -MAX_SIZE=$((1024 * 1024)) +if [[ "$filepath" =~ \.(png|jpg|jpeg|gif|svg)$ ]]; then + MAX_SIZE=$((5 * 1024 * 1024)) # 5 MB limit for pictures +else + MAX_SIZE=$((1 * 1024 * 1024)) # 1 MB for others +fi + status=0 green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; reset='\e[0m' From 25372731c4cfd3d262d6ba415a960ccc0f47660b Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Fri, 12 Sep 2025 12:12:45 -0700 Subject: [PATCH 3/9] increase size limit --- scripts/lint_file_size.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/lint_file_size.sh b/scripts/lint_file_size.sh index 4b07ec5b208..fd246153b2c 100644 --- a/scripts/lint_file_size.sh +++ b/scripts/lint_file_size.sh @@ -7,13 +7,6 @@ set -euo pipefail -# Default max file size in bytes (1 MB) -if [[ "$filepath" =~ \.(png|jpg|jpeg|gif|svg)$ ]]; then - MAX_SIZE=$((5 * 1024 * 1024)) # 5 MB limit for pictures -else - MAX_SIZE=$((1 * 1024 * 1024)) # 1 MB for others -fi - status=0 green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; reset='\e[0m' @@ -30,6 +23,13 @@ fi for file in $files; do if [ -f "$file" ]; then + # Set size limit depending on extension + if [[ "$file" =~ \.(png|jpg|jpeg|gif|svg|mp3|mp4)$ ]]; then + MAX_SIZE=$((8 * 1024 * 1024)) # 5 MB for pictures + else + MAX_SIZE=$((1 * 1024 * 1024)) # 1 MB for others + fi + size=$(wc -c <"$file") if [ "$size" -gt "$MAX_SIZE" ]; then echo -e "${red}FAIL${reset} $file (${cyan}${size} bytes${reset}) exceeds ${MAX_SIZE} bytes" From 6febe36bf6912a5423a6fd934b5dcf04afbbb2a3 Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Mon, 15 Sep 2025 13:29:35 -0700 Subject: [PATCH 4/9] add exception list --- .github/workflows/_link_check.yml | 1 + scripts/lint_file_size.sh | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml index a84d3c93274..76ec66ffb5b 100644 --- a/.github/workflows/_link_check.yml +++ b/.github/workflows/_link_check.yml @@ -77,5 +77,6 @@ jobs: echo echo "File size lint failed: some files exceed the 1 MB limit." echo "If you really need large files, consider using Git LFS or storing them elsewhere." + echo "If you really need to get unblocked and check in the file, can add it to the EXCEPTIONS list in scripts/lint_file_size.sh." exit 1 } diff --git a/scripts/lint_file_size.sh b/scripts/lint_file_size.sh index fd246153b2c..9547bc569b3 100644 --- a/scripts/lint_file_size.sh +++ b/scripts/lint_file_size.sh @@ -11,6 +11,27 @@ status=0 green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; reset='\e[0m' +# List of files to skip (relative paths) +EXCEPTIONS=( + "examples/models/llama/params/demo_rand_params.pth" + "examples/models/llama/tokenizer/test/resources/test_tiktoken_tokenizer.model" + "examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte" + # Following needs to be clean up + "examples/mediatek/models/llm_models/weights/Llama-3.2-1B-Instruct/tokenizer.json" + "examples/mediatek/models/llm_models/weights/Llama-3.2-3B-Instruct/tokenizer.json" + "examples/mediatek/models/llm_models/weights/llama3-8B-instruct/tokenizer.json" +) + +is_exception() { + local f=$1 + for ex in "${EXCEPTIONS[@]}"; do + if [[ "$f" == "$ex" ]]; then + return 0 + fi + done + return 1 +} + if [ $# -eq 2 ]; then base=$1 head=$2 @@ -22,6 +43,10 @@ else fi for file in $files; do + if is_exception "$file"; then + echo -e "${cyan}SKIP${reset} $file (in exception list)" + continue + fi if [ -f "$file" ]; then # Set size limit depending on extension if [[ "$file" =~ \.(png|jpg|jpeg|gif|svg|mp3|mp4)$ ]]; then From cfbadfa95a35583a337785d7d12c69a183469ac9 Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Mon, 15 Sep 2025 14:12:26 -0700 Subject: [PATCH 5/9] add sleep for debugging --- .github/workflows/_link_check.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml index 76ec66ffb5b..d4fa61c6ac8 100644 --- a/.github/workflows/_link_check.yml +++ b/.github/workflows/_link_check.yml @@ -67,6 +67,7 @@ jobs: ref: ${{ inputs.ref }} timeout: 30 script: | + sleep 900 ./scripts/lint_file_size.sh $( if [ "${{ github.event_name }}" = "pull_request" ]; then echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" From b746e9327b6a3c2b898eda6e130919dc9274d3a4 Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Mon, 15 Sep 2025 14:19:21 -0700 Subject: [PATCH 6/9] add permission --- .github/workflows/_link_check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml index d4fa61c6ac8..89b3655986c 100644 --- a/.github/workflows/_link_check.yml +++ b/.github/workflows/_link_check.yml @@ -67,7 +67,7 @@ jobs: ref: ${{ inputs.ref }} timeout: 30 script: | - sleep 900 + chmod +x ./scripts/lint_file_size.sh ./scripts/lint_file_size.sh $( if [ "${{ github.event_name }}" = "pull_request" ]; then echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" From 5bba482655cd4ce004d2a1eabce35b21f4c69915 Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Mon, 15 Sep 2025 14:32:29 -0700 Subject: [PATCH 7/9] add comments for explaining the rules --- scripts/lint_file_size.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/lint_file_size.sh b/scripts/lint_file_size.sh index 9547bc569b3..78adf341ae6 100644 --- a/scripts/lint_file_size.sh +++ b/scripts/lint_file_size.sh @@ -11,6 +11,11 @@ status=0 green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; reset='\e[0m' +# Following is the rules for the file size linting: +# 1. For all files, the file size can't be larger than 1MB +# 2. For images/vidoes, the files size can't be larger than 7MB +# 3. There is an exception list defined in the script if it's really needed + # List of files to skip (relative paths) EXCEPTIONS=( "examples/models/llama/params/demo_rand_params.pth" From 8e846bfa96e46c6b1c9e533d9775779f04d819cd Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Tue, 16 Sep 2025 15:29:55 -0700 Subject: [PATCH 8/9] typos and update linux machine --- .github/workflows/_link_check.yml | 2 +- scripts/lint_file_size.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml index 89b3655986c..7c08613bd78 100644 --- a/.github/workflows/_link_check.yml +++ b/.github/workflows/_link_check.yml @@ -60,7 +60,7 @@ jobs: if: ${{ github.event_name == 'pull_request' }} uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: - runner: linux.2xlarge + runner: linux.large docker-image: ci-image:executorch-ubuntu-22.04-linter submodules: false fetch-depth: 0 diff --git a/scripts/lint_file_size.sh b/scripts/lint_file_size.sh index 78adf341ae6..e1d666fe6b6 100644 --- a/scripts/lint_file_size.sh +++ b/scripts/lint_file_size.sh @@ -13,7 +13,7 @@ green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; reset='\e[0m' # Following is the rules for the file size linting: # 1. For all files, the file size can't be larger than 1MB -# 2. For images/vidoes, the files size can't be larger than 7MB +# 2. For images/videos, the file size can't be larger than 8MB # 3. There is an exception list defined in the script if it's really needed # List of files to skip (relative paths) @@ -55,7 +55,7 @@ for file in $files; do if [ -f "$file" ]; then # Set size limit depending on extension if [[ "$file" =~ \.(png|jpg|jpeg|gif|svg|mp3|mp4)$ ]]; then - MAX_SIZE=$((8 * 1024 * 1024)) # 5 MB for pictures + MAX_SIZE=$((8 * 1024 * 1024)) # 8 MB for pictures else MAX_SIZE=$((1 * 1024 * 1024)) # 1 MB for others fi From 4d3f6221e449189a6196e62e737a6cc97795d59e Mon Sep 17 00:00:00 2001 From: Chen Lai Date: Tue, 16 Sep 2025 15:37:24 -0700 Subject: [PATCH 9/9] use linux 2xlarge --- .github/workflows/_link_check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml index 7c08613bd78..89b3655986c 100644 --- a/.github/workflows/_link_check.yml +++ b/.github/workflows/_link_check.yml @@ -60,7 +60,7 @@ jobs: if: ${{ github.event_name == 'pull_request' }} uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: - runner: linux.large + runner: linux.2xlarge docker-image: ci-image:executorch-ubuntu-22.04-linter submodules: false fetch-depth: 0