-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
commit e5010caebc5a135e40464a06432a5cf1fc965203 Author: Ritwik Das <ritdas@microsoft.comcd Date: Mon Jun 27 23:32:49 2022 +0000 Merged PR 2721: Remove unnecessary logging in benchmarks Remove unnecessary logging in benchmarks commit e0c5945d3ef218a5be858bc0934274793972abdb Author: Lisa Ong <onglisa@microsoft.com> Date: Tue Jun 21 01:12:02 2022 +0000 Merged PR 2674: Support emitting runtime array sizes in the Value DSL * Minimum set of changes to support runtime sizes in the Value DSL without transformations * Add a ScalarDimension type (name TBC) which is aliased to Scalar * Support variable ends in MemoryLayout, ScheduledLoopOp, RangeValueAnalysis * Use mlir::ShapedType::kDynamicSize and mlir::ShapedType::kDynamicStrideOrOffset as sentinel values, following the pattern in MemRefOps, TensorOps, etc. * TODO: E2E verification in the next PR * TODO: Python DSL changes in the next PR Output of mlir-translate for the runtime_sizes_all case, where %21, %22 and %23 are the runtime sizes for M, N, and K: ``` define void @NestMatMul(float* %0, float* %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, float* %7, float* %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, float* %14, float* %15, i64 %16, i64 %17, i64 %18, i64 %19, i64 %20, i64 %21, i64 %22, i64 %23) !dbg !3 { br label %25, !dbg !7 25: ; preds = %57, %24 %26 = phi i64 [ %58, %57 ], [ 0, %24 ] %27 = icmp slt i64 %26, %21, !dbg !9 br i1 %27, label %28, label %59, !dbg !10 28: ; preds = %25 br label %29, !dbg !11 29: ; preds = %55, %28 %30 = phi i64 [ %56, %55 ], [ 0, %28 ] %31 = icmp slt i64 %30, %22, !dbg !12 br i1 %31, label %32, label %57, !dbg !13 32: ; preds = %29 br label %33, !dbg !14 33: ; preds = %36, %32 %34 = phi i64 [ %54, %36 ], [ 0, %32 ] %35 = icmp slt i64 %34, %23, !dbg !15 br i1 %35, label %36, label %55, !dbg !16 36: ; preds = %33 %37 = mul i64 %26, %5, !dbg !17 %38 = add i64 %37, %34, !dbg !18 %39 = getelementptr float, float* %1, i64 %38, !dbg !19 %40 = load float, float* %39, align 4, !dbg !20 %41 = mul i64 %34, %12, !dbg !21 %42 = add i64 %41, %30, !dbg !22 %43 = getelementptr float, float* %8, i64 %42, !dbg !23 %44 = load float, float* %43, align 4, !dbg !24 %45 = fmul float %40, %44, !dbg !25 %46 = mul i64 %26, %19, !dbg !26 %47 = add i64 %46, %30, !dbg !27 %48 = getelementptr float, float* %15, i64 %47, !dbg !28 %49 = load float, float* %48, align 4, !dbg !29 %50 = fadd float %49, %45, !dbg !30 %51 = mul i64 %26, %19, !dbg !31 %52 = add i64 %51, %30, !dbg !32 %53 = getelementptr float, float* %15, i64 %52, !dbg !33 store float %50, float* %53, align 4, !dbg !34 %54 = add i64 %34, 1, !dbg !35 br label %33, !dbg !36 55: ; preds = %33 %56 = add i64 %30, 1, !dbg !37 br label %29, !dbg !38 57: ; preds = %29 %58 = add i64 %26, 1, !dbg !39 br label %25, !dbg !40 59: ; preds = %25 ret void, !dbg !41 } ``` Related work items: #3716, #3717 commit 51a07e5c60009c47c3b375b402ac96f47619ca8f Author: Ritwik Das <ritdas@microsoft.com> Date: Tue Jun 21 00:18:02 2022 +0000 Merged PR 2682: Add nvidia device optimized sizes and some benchmark fixes Add nvidia dev opt sizes and some bench fixes commit 6325b5e5bc68136d29e4a65d657699a4e781214d Author: Ritwik Das <ritdas@microsoft.com> Date: Sat Jun 18 17:59:50 2022 +0000 Merged PR 2676: Add automated weekly rocm baseline benchmark https://intelligentdevices.visualstudio.com/ELL/_build/results?buildId=41316&view=logs&j=4f7f213a-5f0f-58b0-1189-99ef12faf0d8&t=687344d2-d6b6-5d8c-dd9d-6aab558fd96c https://intelligentdevices.visualstudio.com/ELL/_build/results?buildId=41314&view=logs&j=4f7f213a-5f0f-58b0-1189-99ef12faf0d8 commit 940e599ff7026e7c41cb1b2566eec44d70709e96 Author: Ritwik Das <ritdas@microsoft.com> Date: Fri Jun 17 16:34:22 2022 +0000 Merged PR 2673: Add automated weekly baseline benchmarks on Nvidia GPU
- Loading branch information
Lisa Ong
committed
Jun 30, 2022
1 parent
2459eb8
commit e5d75a6
Showing
66 changed files
with
1,471 additions
and
812 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
schedules: | ||
- cron: "0 0 * * 6" | ||
displayName: Sat_5PM | ||
branches: | ||
include: | ||
- main | ||
|
||
trigger: none | ||
|
||
jobs: | ||
- job: "CUDA_Benchmarking_Baseline" | ||
timeoutInMinutes: 360 | ||
|
||
pool: | ||
name: LinuxNVGPUPool | ||
demands: | ||
- Target.Model -equals NVIDIA_RTX_A6000 | ||
|
||
steps: | ||
- bash: | | ||
sudo sysctl -w kernel.core_pattern="$(Build.SourcesDirectory)/build/core-%e-%s-%u-%g-%p-%t.dump" | ||
ulimit -c unlimited | ||
python -m pip install -U pip | ||
python -m pip install -r $(Build.SourcesDirectory)/requirements.txt | ||
python -m pip install -r $(Build.SourcesDirectory)/tools/benchmarkers/requirements.txt | ||
python -m pip install -U cmake | ||
echo "mkdir $HOME/.ccache" | ||
mkdir $HOME/.ccache | ||
echo "ln -s $HOME/.ccache $(System.DefaultWorkingDirectory)/ccache" | ||
ln -s $HOME/.ccache $(System.DefaultWorkingDirectory)/ccache | ||
conan remote add accera $(CONAN_REMOTE) | ||
conan user -p $(CONAN_PWD) -r accera $(CONAN_USERNAME) | ||
displayName: Install prereqs | ||
env: | ||
CONAN_PWD: $(CONAN_PWD) | ||
- bash: | | ||
git submodule init | ||
git submodule update | ||
./external/vcpkg/bootstrap-vcpkg.sh | ||
./external/vcpkg/vcpkg install catch2 tomlplusplus | ||
displayName: Update vcpkg dependencies | ||
workingDirectory: "$(Build.SourcesDirectory)" | ||
- bash: | | ||
python ./setup.py build | ||
displayName: Python build | ||
workingDirectory: "$(Build.SourcesDirectory)" | ||
- bash: | | ||
ninja -C $(Build.SourcesDirectory)/build/temp.linux-x86_64-3.8 cublas_gemm | ||
displayName: Cublas build | ||
workingDirectory: "$(Build.SourcesDirectory)" | ||
- bash: | | ||
export PYTHONPATH=$(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8 | ||
python gpu_benchmark_tool.py --type h --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cublas $(Build.SourcesDirectory)/build/temp.linux-x86_64-3.8/tools/benchmarkers/cublas/cublas_gemm --input gemm_rectangle_A6000.csv | ||
python gpu_benchmark_tool.py --type h --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cublas $(Build.SourcesDirectory)/build/temp.linux-x86_64-3.8/tools/benchmarkers/cublas/cublas_gemm --input gemm_square.csv | ||
python gpu_benchmark_tool.py --type h --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cublas $(Build.SourcesDirectory)/build/temp.linux-x86_64-3.8/tools/benchmarkers/cublas/cublas_gemm --input gemm_bert_assorted.csv | ||
python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cublas $(Build.SourcesDirectory)/build/temp.linux-x86_64-3.8/tools/benchmarkers/cublas/cublas_gemm --input gemm_rectangle_A6000.csv | ||
python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cublas $(Build.SourcesDirectory)/build/temp.linux-x86_64-3.8/tools/benchmarkers/cublas/cublas_gemm --input gemm_square.csv | ||
python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cublas $(Build.SourcesDirectory)/build/temp.linux-x86_64-3.8/tools/benchmarkers/cublas/cublas_gemm --input gemm_bert_assorted.csv | ||
python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cublas $(Build.SourcesDirectory)/build/temp.linux-x86_64-3.8/tools/benchmarkers/cublas/cublas_gemm --input gemm_resnet_inception.csv | ||
displayName: Run CUBLAS benchmarks | ||
workingDirectory: "$(Build.SourcesDirectory)/tools/benchmarkers" | ||
env: | ||
ACCOUNT_KEY: $(ACCOUNT_KEY) | ||
- bash: | | ||
git clone https://github.com/NVIDIA/cutlass.git | ||
cd cutlass && mkdir build && cd build | ||
export CUDACXX=`which nvcc` | ||
cmake .. -DCUTLASS_NVCC_ARCHS=86 -DCUTLASS_LIBRARY_KERNELS=all | ||
make cutlass_profiler -j | ||
displayName: CUTLASS build | ||
- bash: | | ||
export PYTHONPATH=$(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8 | ||
python gpu_benchmark_tool.py --type h --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cutlass $(System.DefaultWorkingDirectory)/cutlass/build/tools/profiler/cutlass_profiler --input gemm_rectangle_A6000.csv | ||
python gpu_benchmark_tool.py --type h --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cutlass $(System.DefaultWorkingDirectory)/cutlass/build/tools/profiler/cutlass_profiler --input gemm_square.csv | ||
python gpu_benchmark_tool.py --type h --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cutlass $(System.DefaultWorkingDirectory)/cutlass/build/tools/profiler/cutlass_profiler --input gemm_bert_assorted.csv | ||
python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cutlass $(System.DefaultWorkingDirectory)/cutlass/build/tools/profiler/cutlass_profiler --input gemm_rectangle_A6000.csv | ||
python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cutlass $(System.DefaultWorkingDirectory)/cutlass/build/tools/profiler/cutlass_profiler --input gemm_square.csv | ||
python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cutlass $(System.DefaultWorkingDirectory)/cutlass/build/tools/profiler/cutlass_profiler --input gemm_bert_assorted.csv | ||
python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --janitor True --verbose True --cutlass $(System.DefaultWorkingDirectory)/cutlass/build/tools/profiler/cutlass_profiler --input gemm_resnet_inception.csv | ||
displayName: Run CUTLASS benchmarks | ||
workingDirectory: "$(Build.SourcesDirectory)/tools/benchmarkers" | ||
env: | ||
ACCOUNT_KEY: $(ACCOUNT_KEY) | ||
- bash: | | ||
rm -rf cutlass | ||
displayName: Cleanup CUTLASS build dir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.