Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
lucataco committed Sep 7, 2023
0 parents commit 030638e
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__pycache__
.cog
model-cache
token-cache
SimSun.ttf
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# QwenLM/Qwen-VL-Chat Cog model

This is an implementation of [QwenLM/Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat) as a Cog model. [Cog packages machine learning models as standard containers.](https://github.com/replicate/cog)

First, download the pre-trained weights:

cog run script/download-weights

Then, you can run predictions:

cog predict -i image=@poster.jpeg -i prompt="What is the name of the movie in the poster?"
18 changes: 18 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Configuration for Cog
build:
gpu: true
cuda: "11.8"
python_version: "3.10"
python_packages:
- "torch==2.0.0"
- "torchvision"
- "accelerate==0.22.0"
- "transformers"
- "optimum"
- "matplotlib"
- "tiktoken"
- "einops"
- "transformers_stream_generator"

# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
32 changes: 32 additions & 0 deletions hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig

MODEL_NAME = "Qwen/Qwen-VL-Chat"
MODEL_CACHE = "model-cache"
TOKEN_CACHE = "token-cache"

tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
# trust_remote_code=True,
cache_dir=TOKEN_CACHE
)

model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
cache_dir=MODEL_CACHE
).to("cuda")

model.generation_config = GenerationConfig.from_pretrained(
MODEL_NAME,
trust_remote_code=True
)

query = tokenizer.from_list_format([
{'image': 'poster.jpeg'},
{'text': 'What is the name of the movie in the poster?'},
])

response, history = model.chat(tokenizer, query=query, history=None)
print(response)
Binary file added poster.jpeg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
45 changes: 45 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md

from cog import BasePredictor, Input, Path
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig

MODEL_NAME = "Qwen/Qwen-VL-Chat"
MODEL_CACHE = "model-cache"
TOKEN_CACHE = "token-cache"

class Predictor(BasePredictor):
def setup(self) -> None:
"""Load the model into memory to make running multiple predictions efficient"""
self.tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
cache_dir=TOKEN_CACHE
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
cache_dir=MODEL_CACHE
)
model.generation_config = GenerationConfig.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
cache_dir=MODEL_CACHE
)
self.model = model.to("cuda")

def predict(
self,
image: Path = Input(description="Input image"),
prompt: str = Input(description="Question", default="What is the name of the movie in the poster?"),
) -> str:
"""Run a single prediction on the model"""
query = self.tokenizer.from_list_format([
{'image': str(image)},
{'text': prompt},
])

response, history = self.model.chat(tokenizer=self.tokenizer, query=query, history=None)
return response
26 changes: 26 additions & 0 deletions script/download-weights
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python

# Run this before you deploy it on replicate
import os
import sys
from transformers import AutoModelForCausalLM, AutoTokenizer

# append project directory to path so predict.py can be imported
sys.path.append('.')
from predict import MODEL_NAME, MODEL_CACHE, TOKEN_CACHE

# Make cache folders
if not os.path.exists(MODEL_CACHE):
os.makedirs(MODEL_CACHE)

tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
cache_dir=TOKEN_CACHE
)

model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
cache_dir=MODEL_CACHE
)

0 comments on commit 030638e

Please sign in to comment.