In [1]:
!pip install torch==2.1.2
!pip install torchvision==0.16.2
!pip install transformers==4.40.0
!pip install sentencepiece==0.1.99
!pip install flash_attn



In [2]:
!pip install bitsandbytes
!pip install -U accelerate



In [4]:
# This downloads and sets up the Ngrok executable in the Google Colab instance
!curl -sSL https://ngrok-agent.s3.amazonaws.com/ngrok.asc | sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null && echo "deb https://ngrok-agent.s3.amazonaws.com buster main" | sudo tee /etc/apt/sources.list.d/ngrok.list && sudo apt update && sudo apt install ngrok

deb https://ngrok-agent.s3.amazonaws.com buster main
[33m0% [Working][0m            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:7 https://ngrok-agent.s3.amazonaws.com buster InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Ign:10 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:12 https://r2u.stat.illinois.edu/ubuntu jammy Release
Reading package lists... Done
Building dependency tree... Done
Reading state info

In [5]:
!set "CMAKE_ARGS=-DLLAMA_OPENBLAS=on"
!set "FORCE_CMAKE=1"
!pip install llama-cpp-python --no-cache-dir



In [6]:
!pip install fastapi[all] uvicorn python-multipart transformers pydantic tensorflow
# https://dashboard.ngrok.com/signup



In [7]:
!ngrok authtoken 2nNUZdRQbZlLoeQQ9KsH7bMkTU9_84WjTbC38tEG1sFKCaRms

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [8]:
%%writefile app.py

from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
from PIL import Image
from transformers import AutoModel, AutoTokenizer
import io
from pydantic import BaseModel
from typing import Optional
import torch


app = FastAPI()

model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
model.eval()

class TextInput(BaseModel):
    inputs: str

@app.post("/generate_text")
async def generate_text(data: TextInput):
    try:
        msgs = [{'role': 'user', 'content': [data.inputs]}]
        response = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
        return JSONResponse(content={"generated_text": response})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/generate_text_image")
async def generate_text_image(file: UploadFile = File(...), prompt: str = None):
    try:
        image_content = await file.read()
        image = Image.open(io.BytesIO(image_content)).convert('RGB')

        msgs = [{'role': 'user', 'content': [image, prompt]}]
        response = model.chat(image=None, msgs=msgs, tokenizer=tokenizer)
        return JSONResponse(content={"generated_text": response})
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
async def root():
    return {"message": " Model is running "}

Overwriting app.py


In [9]:
# This cell finishes quickly because it just needs to start up the server
# The server will start the model download and will take a while to start up
# ~5 minutes
!ls
!uvicorn app:app --host 0.0.0.0 --port 8000 > server.log 2>&1 &



app.py	__pycache__  sample_data  server.log


In [11]:
# If you see "Failed to connect", it's because the server is still starting up
# Wait for the model to be downloaded and the server to fully start
# Check the server.log file to see the status
!curl localhost:8000

{"message":" Model is running "}

In [12]:
# This starts Ngrok and creates the public URL
from IPython import get_ipython
get_ipython().system_raw('ngrok http 8000 &')

In [13]:
# Get the Public URL
# If this doesn't work, make sure you verified your email
# Then run the previous code cell and this one again
!curl -s http://localhost:4040/api/tunnels | python3 -c "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

https://478a-35-240-247-214.ngrok-free.app


In [14]:
!pip install PyQt5

