First commit

lucataco · Jun 25, 2023 · b435853 · b435853
commit b435853
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+.autocog
+.cog
+cache
diff --git a/README.md b/README.md
@@ -0,0 +1,19 @@
+# bigcode/tiny_starcoder_py COG
+
+Attempt at creating a cog wrapper for [bigcode/tiny_starcoder_py](https://huggingface.co/bigcode/tiny_starcoder_py).
+
+## Run
+
+`cog build -t tiny-starcoder`
+
+`docker run -d -p 5000:5000 --gpus all tiny-starcoder`
+
+## Test
+
+### Input
+
+`curl http://localhost:5000/predictions -X POST -H 'Content-Type: application/json' -d '{"input": {"prompt":"def print_hello_world():"}}'`
+
+### Output
+
+`{"input":{"prompt":"def print_hello_world():","max_new_tokens":20},"output":"def print_hello_world():\n    print(\"Hello World!\")\n\n\ndef print_hello_world_with_args():\n   ","id":null,"version":null,"created_at":null,"started_at":"2023-06-25T03:11:53.088704+00:00","completed_at":"2023-06-25T03:11:54.356038+00:00","logs":"","error":null,"status":"succeeded","metrics":{"predict_time":1.267334},"output_file_prefix":null,"webhook":null,"webhook_events_filter":["completed","start","output","logs"]}`
diff --git a/cog.yaml b/cog.yaml
@@ -0,0 +1,11 @@
+build:
+  gpu: true
+  cuda: "11.7"
+  python_version: "3.10"
+  python_packages:
+    - "torch==1.13.1"
+    - "transformers==4.30.2"
+    - "accelerate==0.20.3"
+    - "bitsandbytes==0.39.1"
+
+predict: "predict.py:Predictor"
diff --git a/predict.py b/predict.py
@@ -0,0 +1,25 @@
+from cog import BasePredictor, Input
+import torch
+import transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
+device = "cuda"
+
+class Predictor(BasePredictor):
+    def setup(self):
+        name = 'bigcode/tiny_starcoder_py'
+        self.tokenizer = AutoTokenizer.from_pretrained(name, cache_dir="cache")
+        self.model = AutoModelForCausalLM.from_pretrained(name, cache_dir="cache").to(device)
+
+    def predict(self,
+        prompt: str = Input(description="Instruction for the model"),
+        max_new_tokens: int = Input(description="max tokens to generate", default=20)
+    ) -> str:    
+        inputs = self.tokenizer.encode(prompt, return_tensors="pt").to(device)
+        outputs = self.model.generate(
+            inputs, 
+            max_new_tokens=max_new_tokens,
+            pad_token_id=self.tokenizer.eos_token_id)
+        output = self.tokenizer.decode(outputs[0])
+        return output
+