First commit

lucataco · Sep 7, 2023 · 030638e · 030638e
commit 030638e
Show file tree

Hide file tree

Showing 7 changed files with 137 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+__pycache__
+.cog
+model-cache
+token-cache
+SimSun.ttf
diff --git a/README.md b/README.md
@@ -0,0 +1,11 @@
+# QwenLM/Qwen-VL-Chat Cog model
+
+This is an implementation of [QwenLM/Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat) as a Cog model. [Cog packages machine learning models as standard containers.](https://github.com/replicate/cog)
+
+First, download the pre-trained weights:
+
+    cog run script/download-weights
+
+Then, you can run predictions:
+
+    cog predict -i image=@poster.jpeg -i prompt="What is the name of the movie in the poster?"
diff --git a/cog.yaml b/cog.yaml
@@ -0,0 +1,18 @@
+# Configuration for Cog
+build:
+  gpu: true
+  cuda: "11.8"
+  python_version: "3.10"
+  python_packages:
+    - "torch==2.0.0"
+    - "torchvision"
+    - "accelerate==0.22.0"
+    - "transformers"
+    - "optimum"
+    - "matplotlib"
+    - "tiktoken"
+    - "einops"
+    - "transformers_stream_generator"
+
+# predict.py defines how predictions are run on your model
+predict: "predict.py:Predictor"
diff --git a/hf.py b/hf.py
@@ -0,0 +1,32 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation import GenerationConfig
+
+MODEL_NAME = "Qwen/Qwen-VL-Chat"
+MODEL_CACHE = "model-cache"
+TOKEN_CACHE = "token-cache"
+
+tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_NAME,
+    # trust_remote_code=True,
+    cache_dir=TOKEN_CACHE
+)
+
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    trust_remote_code=True,
+    cache_dir=MODEL_CACHE
+).to("cuda")
+
+model.generation_config = GenerationConfig.from_pretrained(
+    MODEL_NAME,
+    trust_remote_code=True
+)
+
+query = tokenizer.from_list_format([
+    {'image': 'poster.jpeg'},
+    {'text': 'What is the name of the movie in the poster?'},
+])
+
+response, history = model.chat(tokenizer, query=query, history=None)
+print(response)
diff --git a/poster.jpeg b/poster.jpeg
diff --git a/predict.py b/predict.py
@@ -0,0 +1,45 @@
+# Prediction interface for Cog ⚙️
+# https://github.com/replicate/cog/blob/main/docs/python.md
+
+from cog import BasePredictor, Input, Path
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation import GenerationConfig
+
+MODEL_NAME = "Qwen/Qwen-VL-Chat"
+MODEL_CACHE = "model-cache"
+TOKEN_CACHE = "token-cache"
+
+class Predictor(BasePredictor):
+    def setup(self) -> None:
+        """Load the model into memory to make running multiple predictions efficient"""
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            trust_remote_code=True,
+            cache_dir=TOKEN_CACHE
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            trust_remote_code=True,
+            cache_dir=MODEL_CACHE
+        )
+        model.generation_config = GenerationConfig.from_pretrained(
+            MODEL_NAME,
+            trust_remote_code=True,
+            cache_dir=MODEL_CACHE
+        )
+        self.model = model.to("cuda")
+
+    def predict(
+        self,
+        image: Path = Input(description="Input image"),
+        prompt: str = Input(description="Question", default="What is the name of the movie in the poster?"),
+    ) -> str:
+        """Run a single prediction on the model"""
+        query = self.tokenizer.from_list_format([
+            {'image': str(image)},
+            {'text': prompt},
+        ])
+
+        response, history = self.model.chat(tokenizer=self.tokenizer, query=query, history=None)
+        return response
diff --git a/script/download-weights b/script/download-weights
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+# Run this before you deploy it on replicate
+import os
+import sys
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+# append project directory to path so predict.py can be imported
+sys.path.append('.')
+from predict import MODEL_NAME, MODEL_CACHE, TOKEN_CACHE
+
+# Make cache folders
+if not os.path.exists(MODEL_CACHE):
+    os.makedirs(MODEL_CACHE)
+
+tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_NAME,
+    trust_remote_code=True,
+    cache_dir=TOKEN_CACHE
+)
+
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    trust_remote_code=True,
+    cache_dir=MODEL_CACHE
+)