trace [run_process_replay] #3006
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Benchmarks | |
on: | |
push: | |
branches: | |
- master | |
- update_benchmark | |
jobs: | |
testmacbenchmark: | |
name: Mac Benchmark | |
runs-on: [self-hosted, macOS] | |
defaults: | |
run: | |
shell: bash -o pipefail {0} | |
if: github.repository_owner == 'tinygrad' | |
env: | |
PYTHONPATH: . | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Symlink models and datasets | |
run: | | |
mkdir -p weights | |
ln -s ~/tinygrad/extra/disassemblers/applegpu extra/disassemblers/applegpu | |
ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt | |
ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz | |
ln -s ~/tinygrad/weights/LLaMA weights/LLaMA | |
ln -s ~/tinygrad/extra/datasets/cifar-10-python.tar.gz extra/datasets/cifar-10-python.tar.gz | |
- name: Update process replay reference (master only) | |
if: github.ref == 'refs/heads/master' | |
run: | | |
export TRACE_PATH="$HOME/traces/$GITHUB_SHA" && echo "TRACE_PATH=$TRACE_PATH" >> $GITHUB_ENV | |
rm -rf "$HOME/traces" | |
echo "SAVE_TRACE=1" >> $GITHUB_ENV | |
- name: Setup process replay | |
if: contains(github.event.head_commit.message, '[run_process_replay]') | |
run: | | |
export TRACE_PATH="$HOME/traces/a921f3317f644c208c5bd6dbe1ed813eff4ab315" && echo "TRACE_PATH=$TRACE_PATH" >> $GITHUB_ENV | |
rm -rd "$HOME/traces" | |
#rm -rf "$TRACE_PATH" | |
echo "SAVE_TRACE=1" >> $GITHUB_ENV | |
- name: Run Stable Diffusion | |
run: JIT=2 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt | |
- name: Run model inference benchmark | |
run: METAL=1 python3 test/external/external_model_benchmark.py | |
- name: Test speed vs torch | |
run: BIG=2 MPS=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt | |
- name: Test tensor cores | |
run: METAL=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded | |
- name: Run Tensor Core GEMM | |
run: | | |
DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt | |
DEBUG=2 HALF=1 python3 extra/gemm/simple_matmul.py | tee matmul_half.txt | |
- name: Fuzz Padded Tensor Core GEMM | |
run: METAL=1 M_START=6 M_STOP=10 M_STEP=1 N_START=6 N_STOP=10 N_STEP=1 K_START=6 K_STOP=24 K_STEP=1 TC_OPT=2 DEBUG=2 python3 ./extra/gemm/fuzz_matmul.py | |
- name: Run LLaMA | |
run: | | |
JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt | |
JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt | |
- name: Run LLaMA with BEAM | |
run: JIT=1 BEAM=2 CACHELEVEL=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_beam.txt | |
- name: Run LLaMA 7B on 4 (virtual) GPUs | |
run: JIT=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_four_gpu.txt | |
- name: Run GPT2 | |
run: | | |
JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt | |
JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt | |
- name: Run GPT2 w HALF | |
run: JIT=1 HALF=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt | |
- name: Run GPT2 w HALF/BEAM | |
run: JIT=1 HALF=1 BEAM=2 CACHELEVEL=0 CAST_BEFORE_VIEW=0 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt | |
- name: Train MNIST | |
run: time PYTHONPATH=. TARGET_EVAL_ACC_PCT=97.3 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt | |
- name: Run 10 CIFAR training steps | |
run: JIT=2 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt | |
- name: Run 10 CIFAR training steps w HALF | |
run: JIT=2 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_half.txt | |
#- name: Run 10 CIFAR training steps w BF16 | |
# run: STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py | tee train_cifar_bf16.txt | |
- name: Run 10 CIFAR training steps w winograd | |
run: JIT=2 WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt | |
- name: Run process replay tests | |
if: contains(github.event.head_commit.message, '[run_process_replay]') | |
run: python3 test/external/replay_benchmarks.py | |
- uses: actions/upload-artifact@v4 | |
with: | |
name: Speed (Mac) | |
path: | | |
onnx_inference_speed.csv | |
torch_speed.txt | |
llama_unjitted.txt | |
llama_jitted.txt | |
llama_beam.txt | |
llama_four_gpu.txt | |
gpt2_unjitted.txt | |
gpt2_jitted.txt | |
gpt2_half.txt | |
gpt2_half_beam.txt | |
matmul.txt | |
matmul_half.txt | |
sd.txt | |
beautiful_mnist.txt | |
train_cifar.txt | |
train_cifar_half.txt | |
train_cifar_bf16.txt | |
train_cifar_wino.txt |