In [7]:
from transformers import  AutoProcessor, AutoModelForImageTextToText, AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
import torch

In [21]:
LOCAL_CACHE = "D:/Hackathon/notebook"

VLM_MODEL_PATH = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
TRANSLATOR_PATH = "facebook/nllb-200-distilled-600M"

TARGET_LANG = "pol_Latn"
SOURCE_LANG = "eng_Latn"

DESC_PROMPT = "Can you describe with details how the lost item shown on this image looks like? Focus on the item, do not describe the background."
DESC_MAX_RESPONSE_LENTGH = 512
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
dtype = torch.bfloat16

CLASSES = 'DOCUMENTS_AND_WALLETS, ELECTRONICS, CLOTHES_AND_ACCESSORIES, KEYS, JEWELRY_AND_WATCHES, CASH, OTHERS'
classes_insert = str(CLASSES.split(', '))
CL_PROMPT = f"A lost item has been found. Your job is to determine what kind of object is it. The only allowed categories are {classes_insert}. Reply only with class index from the list, starting from index 0."
CL_MAX_RESPONSE_LENTGH = 32
print(CL_PROMPT)

CL_PROMPT = '''
Classify this lost item into one of the categories below. Reply only with the number:

0 – Documents/Wallets
1 – Electronics
2 – Clothes/Accessories
3 – Keys
4 – Jewelry/Watches
5 – Cash
6 – Other
'''
print(CL_PROMPT)

cuda
A lost item has been found. Your job is to determine what kind of object is it. The only allowed categories are ['DOCUMENTS_AND_WALLETS', 'ELECTRONICS', 'CLOTHES_AND_ACCESSORIES', 'KEYS', 'JEWELRY_AND_WATCHES', 'CASH', 'OTHERS']. Reply only with class index from the list, starting from index 0.

Classify this lost item into one of the categories below. Reply only with the number:

0 – Documents/Wallets
1 – Electronics
2 – Clothes/Accessories
3 – Keys
4 – Jewelry/Watches
5 – Cash
6 – Other



In [9]:
processor = AutoProcessor.from_pretrained(VLM_MODEL_PATH, cache_dir=LOCAL_CACHE)

vlm_model = AutoModelForImageTextToText.from_pretrained(
    VLM_MODEL_PATH,
    dtype=dtype,
).to(device)

Fetching 2 files: 100%|███████████████████████████████████████████████| 2/2 [05:19<00:00, 159.57s/it]
Loading checkpoint shards: 100%|███████████████████████████████████████| 2/2 [00:19<00:00,  9.94s/it]


In [16]:
def inference_vlm(image_path: str, prompt: str, max_length: int):
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "path": image_path},
                {"type": "text", "text": prompt},
            ],
        }
    ]

    inputs = processor.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    ).to(vlm_model.device)

    generated_ids = vlm_model.generate(
        **inputs,
        do_sample=False,
        max_new_tokens=max_length,
    )

    prompt_len = inputs["input_ids"].shape[-1]
    gen_only = generated_ids[:, prompt_len:]

    output = processor.batch_decode(
        gen_only,
        skip_special_tokens=True,
    )[0].strip()
    return output

In [22]:
inference_vlm("rower.png", DESC_PROMPT, DESC_MAX_RESPONSE_LENTGH)

'The lost item is a black bicycle with a black seat, black handlebars, and black tires. It has a black frame and a black basket on the back. The bicycle is standing on a dirt path in a forest.'

In [None]:
from langchain_openai import ChatOpenAI

api_key = ""
base_url = "https://apim-pllum-tst-pcn.azure-api.net/vllm/v1"
model_name = "CYFRAGOVPL/pllum-12b-nc-chat-250715"

llm = ChatOpenAI(
    model=model_name,
    api_key="EMPTY",              # required but unused
    base_url=base_url,            # correct parameter name
    temperature=0.7,
    max_tokens=300,
    default_headers={
        "Ocp-Apim-Subscription-Key": api_key
    }
)

response = llm.invoke("Prztłumacz na polski. Zwróć w formacie json {}: The lost item is a black bicycle with a black seat, black handlebars, and black tires. It has a black frame and a black basket on the back. The bicycle is standing on a dirt path in a forest.")
print(response.content)


Przedstawiam tekst przetłumaczony na polski:

{
  "Polish": "Zgubiony przedmiot to czarny rower z czarnym siodełkiem, czarną kierownicą i czarnymi oponami. Ma czarną ramę i czarny koszyk z tyłu. Rower stoi na ścieżce w lesie."
}


In [29]:
!pip install langchain-openai

Defaulting to user installation because normal site-packages is not writeable
