add llama2-7b and llama2-70b examples (#146)

* prevent OOM of gcsfuse sidecar * Improve performance of GCS fuse sidecar
substratusai · Aug 5, 2023 · 01f48f1 · 01f48f1
1 parent daa1a5d
commit 01f48f1
Show file tree

Hide file tree

Showing 8 changed files with 86 additions and 2 deletions.
diff --git a/examples/falcon-7b-instruct/finetuned-model-custom-prompt.yaml b/examples/falcon-7b-instruct/finetuned-model-custom-prompt.yaml
@@ -3,8 +3,7 @@ kind: Model
 metadata:
   name: falcon-7b-instruct-k8s-custom-prompt
 spec:
-  image:
-    name: substratusai/model-trainer-huggingface
+  image: substratusai/model-trainer-huggingface
   baseModel:
     name: falcon-7b-instruct
   trainingDataset:

diff --git a/examples/llama2-70b/base-model.yaml b/examples/llama2-70b/base-model.yaml
@@ -0,0 +1,13 @@
+apiVersion: substratus.ai/v1
+kind: Model
+metadata:
+  name: llama-2-70b
+spec:
+  image: substratusai/model-loader-huggingface
+  params:
+    name: meta-llama/Llama-2-70b-hf
+    # Make sure to replace ${HUGGINGFACE_TOKEN} with your token
+    # for example run this to use your own token:
+    # export HUGGINGFACE_TOKEN=replace-me-with-your-token
+    # cat base-model.yaml | envsubst | kubectl apply -f -
+    hugging_face_hub_token: ${HUGGINGFACE_TOKEN}
diff --git a/examples/llama2-70b/server.yaml b/examples/llama2-70b/server.yaml
@@ -0,0 +1,12 @@
+apiVersion: substratus.ai/v1
+kind: Server
+metadata:
+  name: llama-2-70b
+spec:
+  image: substratusai/model-server-basaran
+  model:
+    name: llama-2-70b
+  resources:
+    gpu:
+      type: nvidia-a100
+      count: 2
diff --git a/examples/llama2-7b/base-model.yaml b/examples/llama2-7b/base-model.yaml
@@ -0,0 +1,13 @@
+apiVersion: substratus.ai/v1
+kind: Model
+metadata:
+  name: llama-2-7b
+spec:
+  image: substratusai/model-loader-huggingface
+  params:
+    name: meta-llama/Llama-2-7b-hf
+    # Make sure to replace ${HUGGINGFACE_TOKEN} with your token
+    # for example run this to use your own token:
+    # export HUGGINGFACE_TOKEN=replace-me-with-your-token
+    # cat base-model.yaml | envsubst | kubectl apply -f -
+    hugging_face_hub_token: ${HUGGINGFACE_TOKEN}
diff --git a/examples/llama2-7b/finetuned-model.yaml b/examples/llama2-7b/finetuned-model.yaml
@@ -0,0 +1,20 @@
+apiVersion: substratus.ai/v1
+kind: Model
+metadata:
+  name: llama-2-7b-k8s
+spec:
+  image: substratusai/model-trainer-huggingface
+  baseModel:
+    name: llama-2-7b
+  trainingDataset:
+    name: k8s-instructions
+  params:
+    # See HuggingFace transformers.TrainingArguments for all parameters
+    # https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments
+    num_train_epochs: 1
+    # Save to checkpoint every 5 steps for a dataset with ~70 steps total
+    save_steps: 5
+  resources:
+    gpu:
+      count: 4
+      type: nvidia-l4
diff --git a/examples/llama2-7b/finetuned-server.yaml b/examples/llama2-7b/finetuned-server.yaml
@@ -0,0 +1,12 @@
+apiVersion: substratus.ai/v1
+kind: Server
+metadata:
+  name: llama-2-7b-k8s
+spec:
+  image: substratusai/model-server-basaran
+  model:
+    name: llama-2-7b-k8s
+  resources:
+    gpu:
+      type: nvidia-l4
+      count: 1
diff --git a/examples/llama2-7b/server.yaml b/examples/llama2-7b/server.yaml
@@ -0,0 +1,12 @@
+apiVersion: substratus.ai/v1
+kind: Server
+metadata:
+  name: llama-2-7b
+spec:
+  image: substratusai/model-server-basaran
+  model:
+    name: llama-2-7b
+  resources:
+    gpu:
+      type: nvidia-l4
+      count: 1
diff --git a/internal/cloud/gcp.go b/internal/cloud/gcp.go
@@ -68,6 +68,9 @@ func (gcp *GCP) MountBucket(podMetadata *metav1.ObjectMeta, podSpec *corev1.PodS
 		podMetadata.Annotations = map[string]string{}
 	}
 	podMetadata.Annotations["gke-gcsfuse/volumes"] = "true"
+	podMetadata.Annotations["gke-gcsfuse/cpu-limit"] = "2"
+	podMetadata.Annotations["gke-gcsfuse/memory-limit"] = "800Mi"
+	podMetadata.Annotations["gke-gcsfuse/ephemeral-storage-limit"] = "100Gi"
 
 	var bktURL *BucketURL
 	if statusURL := obj.GetStatusArtifacts().URL; statusURL != "" {