Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
lucataco committed Jun 25, 2023
0 parents commit b435853
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
__pycache__
.autocog
.cog
cache
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# bigcode/tiny_starcoder_py COG

Attempt at creating a cog wrapper for [bigcode/tiny_starcoder_py](https://huggingface.co/bigcode/tiny_starcoder_py).

## Run

`cog build -t tiny-starcoder`

`docker run -d -p 5000:5000 --gpus all tiny-starcoder`

## Test

### Input

`curl http://localhost:5000/predictions -X POST -H 'Content-Type: application/json' -d '{"input": {"prompt":"def print_hello_world():"}}'`

### Output

`{"input":{"prompt":"def print_hello_world():","max_new_tokens":20},"output":"def print_hello_world():\n print(\"Hello World!\")\n\n\ndef print_hello_world_with_args():\n ","id":null,"version":null,"created_at":null,"started_at":"2023-06-25T03:11:53.088704+00:00","completed_at":"2023-06-25T03:11:54.356038+00:00","logs":"","error":null,"status":"succeeded","metrics":{"predict_time":1.267334},"output_file_prefix":null,"webhook":null,"webhook_events_filter":["completed","start","output","logs"]}`
11 changes: 11 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
build:
gpu: true
cuda: "11.7"
python_version: "3.10"
python_packages:
- "torch==1.13.1"
- "transformers==4.30.2"
- "accelerate==0.20.3"
- "bitsandbytes==0.39.1"

predict: "predict.py:Predictor"
25 changes: 25 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from cog import BasePredictor, Input
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

device = "cuda"

class Predictor(BasePredictor):
def setup(self):
name = 'bigcode/tiny_starcoder_py'
self.tokenizer = AutoTokenizer.from_pretrained(name, cache_dir="cache")
self.model = AutoModelForCausalLM.from_pretrained(name, cache_dir="cache").to(device)

def predict(self,
prompt: str = Input(description="Instruction for the model"),
max_new_tokens: int = Input(description="max tokens to generate", default=20)
) -> str:
inputs = self.tokenizer.encode(prompt, return_tensors="pt").to(device)
outputs = self.model.generate(
inputs,
max_new_tokens=max_new_tokens,
pad_token_id=self.tokenizer.eos_token_id)
output = self.tokenizer.decode(outputs[0])
return output

0 comments on commit b435853

Please sign in to comment.