# Предобработка изображений

In [None]:
import os
import shutil

import numpy as np
import cv2
import matplotlib.pyplot as plt
import random
from tqdm import tqdm

import torch

from groundingdino.util.inference import load_model, load_image, predict

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

IMAGES_DIR = "../data_sirius"
NEW_IMAGES_DIR = "../data_processed"

# download config https://github.com/IDEA-Research/GroundingDINO/tree/main/groundingdino/config
CONFIG_PATH = "../notebooks/grounding_dino/GroundingDINO_SwinT_OGC.py"

# download weights https://huggingface.co/ShilongLiu/GroundingDINO/blob/main/groundingdino_swint_ogc.pth
WEIGHTS_PATH = "../notebooks/grounding_dino/groundingdino_swint_ogc.pth" 

OUT_DIR = "../data"

## Zero-shot Grounging DINO 
https://huggingface.co/ShilongLiu/GroundingDINO

In [None]:
# !pip install torch torchvision torchaudio
# !pip install git+https://github.com/IDEA-Research/GroundingDINO.git
# !pip install supervision opencv-python pillow matplotlib

In [None]:
PROMPT = "bank logo with stylized T letter inside a shield emblem"

model = load_model(CONFIG_PATH, WEIGHTS_PATH, device=DEVICE)

In [None]:
def denormalize_boxes(boxes, W, H):
    out = []
    for (cx, cy, w, h) in boxes:
        x1 = int((cx - w/2) * W)
        y1 = int((cy - h/2) * H)
        x2 = int((cx + w/2) * W)
        y2 = int((cy + h/2) * H)
        out.append([x1, y1, x2, y2])
    return np.array(out)

def show_boxes(img_path, boxes, logits, phrases):
    img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
    
    for (x1, y1, x2, y2), score, phrase in zip(boxes.astype(int), logits, phrases):
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img, f"{phrase} {score:.2f}", (x1, max(0, y1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255, 0), 1)
        
    plt.figure(figsize=(8, 8))
    plt.imshow(img)
    plt.axis("off")
    plt.show()


def save_yolo(label_path, boxes):
    lines = []
    for (cx, cy, w, h) in boxes:
        lines.append(f"0 {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")
    os.makedirs(os.path.dirname(label_path), exist_ok=True)
    with open(label_path, "w") as f:
        f.write("\n".join(lines))
        
        
def process_image(img_path, model, promt, idx, box_thr=0.4, text_thr=0.25, device="cpu"):
    image_source, image = load_image(img_path)
    H, W = image_source.shape[:2]
    
    boxes, logits, phrases = predict(
        model=model,
        image=image,
        caption=promt,
        box_threshold=box_thr,
        text_threshold=text_thr,
        device=device
    )
    
    if not boxes.shape[0]:
        neg_dir = f"{OUT_DIR}/negative"
        new_name = f"{idx:05d}.jpg"
        shutil.copy(img_path, os.path.join(neg_dir, new_name))
        print(f"[{os.path.basename(img_path)}] NEGATIVE → {new_name}")
        return None  
    
    new_img_name = f"{idx:05d}.jpg"
    new_label_name = f"{idx:05d}.txt"
    
    img_out = os.path.join(f"{OUT_DIR}/positive/images", new_img_name)
    label_out = os.path.join(f"{OUT_DIR}/positive/labels", new_label_name)
    os.makedirs(os.path.dirname(img_out), exist_ok=True)
    os.makedirs(os.path.dirname(label_out), exist_ok=True)
    
    shutil.copy(img_path, img_out)
    save_yolo(label_out, boxes)
    
    pixel_boxes = denormalize_boxes(boxes, W, H)
    # show_boxes(img_path, pixel_boxes, logits, phrases)
    print(f"[{os.path.basename(img_path)}] POSITIVE → {new_img_name}, {new_label_name}")

shield with letter T -> T bank -> T logo -> 

In [None]:
all_images = [f for f in os.listdir(f'{NEW_IMAGES_DIR}/positive_t-bank') if f.lower().endswith((".jpg", ".jpeg", ".png"))]

for idx, fname in enumerate(tqdm(all_images, desc="Processing images"), start=1):
    img_path = os.path.join(f'{NEW_IMAGES_DIR}/positive_t-bank', fname)
    process_image(img_path, model, PROMPT, idx, device=DEVICE)