In [1]:
from transformers import (
    Mask2FormerForUniversalSegmentation, Mask2FormerImageProcessor
)
from config import VIS_LABEL_MAP as LABEL_COLORS_LIST
from utils import (
    draw_segmentation_map, 
    image_overlay,
    predict
)

import argparse
import cv2
import os
import glob

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Take inputs from the user
image_path = input("Enter the path to the input image file: ").strip()
device = input("Enter the compute device (cpu or cuda): ").strip() or "cuda:0"
model_path = input("Enter the path to the pretrained model: ").strip() or "outputs3/final_model"

Enter the path to the input image file:  /home/sharma.shris/VisualQA/Dataset/valid_images/083.png
Enter the compute device (cpu or cuda):  
Enter the path to the pretrained model:  


In [3]:
imgsz_input = input("Enter the width and height for resizing (e.g., 512 512), or press Enter to skip: ").strip()
imgsz = list(map(int, imgsz_input.split())) if imgsz_input else None

# Output directory
out_dir = 'outputs3/model_test_infer'
os.makedirs(out_dir, exist_ok=True)

Enter the width and height for resizing (e.g., 512 512), or press Enter to skip:  400 400


In [4]:
processor = Mask2FormerImageProcessor()
model = Mask2FormerForUniversalSegmentation.from_pretrained(model_path)
model.to(device).eval()

# Read the input image
image = cv2.imread(image_path)

if image is None:
    raise ValueError(f"Could not read the image from path: {image_path}")

# Resize image if specified
if imgsz is not None:
    image = cv2.resize(image, (imgsz[0], imgsz[1]))

# Convert to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Predict labels
labels = predict(model, processor, image, device)

# Generate segmentation map
seg_map = draw_segmentation_map(
    labels.cpu(), LABEL_COLORS_LIST
)

# Overlay segmentation map on the image
outputs = image_overlay(image, seg_map)

# Save result
image_name = os.path.basename(image_path)
save_path = os.path.join(out_dir, image_name)
cv2.imwrite(save_path, outputs)

print(f"Processed image saved at: {save_path}")

Processed image saved at: outputs3/model_test_infer/083.png


In [5]:
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch

# default: Load the model on the available device(s)
model = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch.float16, device_map="auto"
)

processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")

`Qwen2VLRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.46
Loading checkpoint shards: 100%|██████████| 5/5 [02:24<00:00, 28.81s/it]


In [6]:
messages = [
    {
        "role": "user",
        "content": [
            {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_000.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'manipulated front' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_004.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'manipulated front' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_032.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'scratch head' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_047.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'scratch head' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_049.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'scratch neck' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_054.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Image of a defective screw with defect 'scratch neck' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_079.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'thread side' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_087.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'thread side' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_101.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'thread top' highlighted with red color"
    },
    {
        "type": "image",
        "image": "/home/sharma.shris/VisualQA/Dataset/segmented_images/overlay_109.png",
        "resized_height": 400,
        "resized_width": 400,
    },
    {
        "type": "text",
        "text": "Example: Image of a defective screw with defect 'thread top' highlighted with red color"
    },
    {
        "type": "image",
        "image": image_path,
    },
    {
        "type": "text",
        "text": "Analyze the given screw image. What defect is highlighted in red? Use one of the following defect categories: 'manipulated front', 'scratch head', 'scratch neck', 'thread side', 'thread top'."
    },
        ],
    }
]

# Preparation for inference
text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,
    padding=True,
    return_tensors="pt",
)
inputs = inputs.to("cuda")

In [7]:
generated_ids = model.generate(**inputs, max_new_tokens=128, temperature=0.7, top_p=0.8, top_k=50)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(output_text)

["The defect highlighted in red in the given screw image is 'thread top'."]
