-
Notifications
You must be signed in to change notification settings - Fork 129
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
151 changed files
with
5,317 additions
and
10,825 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
{ | ||
"build": { | ||
"dockerfile": "../Dockerfile" | ||
"dockerfile": "../Dockerfile.dev" | ||
}, | ||
"runArgs": [ | ||
"--gpus", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# LoRAX base image | ||
FROM ghcr.io/predibase/lorax:latest as base | ||
|
||
# Install server | ||
COPY proto proto | ||
COPY server server | ||
COPY server/Makefile server/Makefile | ||
|
||
# Final image | ||
FROM base | ||
|
||
COPY container-entrypoint.sh entrypoint.sh | ||
RUN chmod +x entrypoint.sh | ||
COPY sync.sh sync.sh | ||
RUN chmod +x sync.sh | ||
|
||
# ENTRYPOINT ["./entrypoint.sh"] | ||
ENTRYPOINT ["lorax-launcher"] | ||
CMD ["--json-output"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Fork that adds only the correct stream to this kernel in order | ||
# to make cuda graphs work. | ||
awq_commit := bd1dc2d5254345cc76ab71894651fb821275bdd4 | ||
|
||
awq: | ||
rm -rf llm-awq | ||
git clone https://github.com/huggingface/llm-awq | ||
|
||
build-awq: awq | ||
cd llm-awq/ && git fetch && git checkout $(awq_commit) | ||
cd llm-awq/awq/kernels && python setup.py build | ||
|
||
install-awq: build-awq | ||
pip uninstall awq_inference_engine -y || true | ||
cd llm-awq/awq/kernels && python setup.py install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,29 @@ | ||
flash_att_v2_commit := 2c3baba4a63c4007c8a132c5380edc9430f88a22 | ||
flash_att_v2_commit_cuda := v2.5.8 | ||
flash_att_v2_commit_rocm := 2554f490101742ccdc56620a938f847f61754be6 | ||
|
||
flash-attention-v2: | ||
# Clone flash attention | ||
pip install packaging | ||
git clone https://github.com/HazyResearch/flash-attention.git flash-attention-v2 | ||
|
||
build-flash-attention-v2: flash-attention-v2 | ||
cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit) | ||
flash-attention-v2-cuda: | ||
# Clone flash attention | ||
pip install -U packaging ninja --no-cache-dir | ||
git clone https://github.com/Dao-AILab/flash-attention.git flash-attention-v2 | ||
|
||
build-flash-attention-v2-cuda: flash-attention-v2-cuda | ||
cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_cuda) | ||
cd flash-attention-v2 && git submodule update --init --recursive | ||
cd flash-attention-v2 && python setup.py build | ||
|
||
# install-flash-attention-v2: build-flash-attention-v2 | ||
# cd flash-attention-v2 && python setup.py install | ||
install-flash-attention-v2-cuda: build-flash-attention-v2-cuda | ||
cd flash-attention-v2 && git submodule update --init --recursive && python setup.py install | ||
|
||
flash-attention-v2-rocm: | ||
# Clone flash attention | ||
pip install -U packaging ninja --no-cache-dir | ||
git clone https://github.com/ROCm/flash-attention.git flash-attention-v2 | ||
|
||
build-flash-attention-v2-rocm: flash-attention-v2-rocm | ||
cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_rocm) | ||
cd flash-attention-v2 && git submodule update --init --recursive | ||
cd flash-attention-v2 && GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build | ||
|
||
# Install from pip because the target commit is actually a release commit | ||
# and the pip wheels do not require nvcc to be installed. | ||
# Reference: https://github.com/Dao-AILab/flash-attention/issues/509 | ||
install-flash-attention-v2: | ||
FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn==2.3.0 --no-build-isolation | ||
install-flash-attention-v2-rocm: build-flash-attention-v2-rocm | ||
cd flash-attention-v2 && git submodule update --init --recursive && python setup.py install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,25 @@ | ||
vllm_commit := 6d592eb430a37a7f8f5f9beb2dbc014bf3aa76bc | ||
|
||
vllm: | ||
vllm-cuda: | ||
# Clone vllm | ||
git clone https://github.com/vllm-project/vllm.git | ||
pip install -U ninja packaging --no-cache-dir | ||
git clone https://github.com/Narsil/vllm.git vllm | ||
|
||
build-vllm: vllm | ||
cd vllm && git fetch && git checkout $(vllm_commit) | ||
build-vllm-cuda: vllm-cuda | ||
cd vllm && git fetch && git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa | ||
cd vllm && python setup.py build | ||
|
||
install-vllm: build-vllm | ||
install-vllm-cuda: build-vllm-cuda | ||
pip uninstall vllm -y || true | ||
cd vllm && python setup.py install | ||
|
||
vllm-rocm: | ||
# Clone vllm | ||
pip install -U ninja packaging --no-cache-dir | ||
git clone https://github.com/fxmarty/rocm-vllm.git vllm | ||
|
||
build-vllm-rocm: vllm-rocm | ||
cd vllm && git fetch && git checkout ca6913b3c2ffacdcb7d15e914dc34adbc6c89479 | ||
cd vllm && PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install | ||
|
||
install-vllm-rocm: build-vllm-rocm | ||
pip uninstall vllm -y || true | ||
cd vllm && python setup.py install | ||
cd vllm && python setup.py install |
Oops, something went wrong.