Skip to content

Commit

Permalink
add llama2-7b and llama2-70b examples (#146)
Browse files Browse the repository at this point in the history
*  prevent OOM of gcsfuse sidecar
* Improve performance of GCS fuse sidecar
  • Loading branch information
samos123 committed Aug 5, 2023
1 parent daa1a5d commit 01f48f1
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ kind: Model
metadata:
name: falcon-7b-instruct-k8s-custom-prompt
spec:
image:
name: substratusai/model-trainer-huggingface
image: substratusai/model-trainer-huggingface
baseModel:
name: falcon-7b-instruct
trainingDataset:
Expand Down
13 changes: 13 additions & 0 deletions examples/llama2-70b/base-model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: substratus.ai/v1
kind: Model
metadata:
name: llama-2-70b
spec:
image: substratusai/model-loader-huggingface
params:
name: meta-llama/Llama-2-70b-hf
# Make sure to replace ${HUGGINGFACE_TOKEN} with your token
# for example run this to use your own token:
# export HUGGINGFACE_TOKEN=replace-me-with-your-token
# cat base-model.yaml | envsubst | kubectl apply -f -
hugging_face_hub_token: ${HUGGINGFACE_TOKEN}
12 changes: 12 additions & 0 deletions examples/llama2-70b/server.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: substratus.ai/v1
kind: Server
metadata:
name: llama-2-70b
spec:
image: substratusai/model-server-basaran
model:
name: llama-2-70b
resources:
gpu:
type: nvidia-a100
count: 2
13 changes: 13 additions & 0 deletions examples/llama2-7b/base-model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: substratus.ai/v1
kind: Model
metadata:
name: llama-2-7b
spec:
image: substratusai/model-loader-huggingface
params:
name: meta-llama/Llama-2-7b-hf
# Make sure to replace ${HUGGINGFACE_TOKEN} with your token
# for example run this to use your own token:
# export HUGGINGFACE_TOKEN=replace-me-with-your-token
# cat base-model.yaml | envsubst | kubectl apply -f -
hugging_face_hub_token: ${HUGGINGFACE_TOKEN}
20 changes: 20 additions & 0 deletions examples/llama2-7b/finetuned-model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: substratus.ai/v1
kind: Model
metadata:
name: llama-2-7b-k8s
spec:
image: substratusai/model-trainer-huggingface
baseModel:
name: llama-2-7b
trainingDataset:
name: k8s-instructions
params:
# See HuggingFace transformers.TrainingArguments for all parameters
# https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments
num_train_epochs: 1
# Save to checkpoint every 5 steps for a dataset with ~70 steps total
save_steps: 5
resources:
gpu:
count: 4
type: nvidia-l4
12 changes: 12 additions & 0 deletions examples/llama2-7b/finetuned-server.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: substratus.ai/v1
kind: Server
metadata:
name: llama-2-7b-k8s
spec:
image: substratusai/model-server-basaran
model:
name: llama-2-7b-k8s
resources:
gpu:
type: nvidia-l4
count: 1
12 changes: 12 additions & 0 deletions examples/llama2-7b/server.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: substratus.ai/v1
kind: Server
metadata:
name: llama-2-7b
spec:
image: substratusai/model-server-basaran
model:
name: llama-2-7b
resources:
gpu:
type: nvidia-l4
count: 1
3 changes: 3 additions & 0 deletions internal/cloud/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ func (gcp *GCP) MountBucket(podMetadata *metav1.ObjectMeta, podSpec *corev1.PodS
podMetadata.Annotations = map[string]string{}
}
podMetadata.Annotations["gke-gcsfuse/volumes"] = "true"
podMetadata.Annotations["gke-gcsfuse/cpu-limit"] = "2"
podMetadata.Annotations["gke-gcsfuse/memory-limit"] = "800Mi"
podMetadata.Annotations["gke-gcsfuse/ephemeral-storage-limit"] = "100Gi"

var bktURL *BucketURL
if statusURL := obj.GetStatusArtifacts().URL; statusURL != "" {
Expand Down

0 comments on commit 01f48f1

Please sign in to comment.