# Ground Truth Generation for Building Segmentation

Ансамбль Grounded SAM + YOLOv8 с TTA для сегментации зданий (Inria Aerial Dataset).


In [1]:
from __future__ import annotations

import random
from dataclasses import dataclass, field
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from sam2.sam2_image_predictor import SAM2ImagePredictor
from skimage.morphology import remove_small_holes, remove_small_objects
from tqdm import tqdm
from transformers import AutoModelForZeroShotObjectDetection, AutoProcessor
from ultralytics import YOLO

# Фиксация seed для воспроизводимости
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {DEVICE}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}, {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

  from .autonotebook import tqdm as notebook_tqdm


Device: cuda
GPU: Tesla V100-SXM2-16GB, 16.9 GB


## Configuration

In [2]:
@dataclass
class Config:
    """Pipeline configuration."""
    # Режим
    debug: bool = False
    debug_samples: int = 10
    project_root: Path = field(default_factory=lambda: Path.cwd().parent)
    
    # Тайлинг — 768 оптимально для баланса контекст/детализация
    tile_size: int = 768
    overlap: int = 192
    
    # TTA — multi-scale + flips + rotations
    use_flips: bool = True
    use_rotations: bool = True  # +90°, -90°, 180°
    scales: tuple[float, ...] = (0.75, 0.85, 1.0, 1.15, 1.25)  # Расширенный multi-scale для recall
    
    # Ансамбль — акцент на recall
    grounded_sam_weight: float = 0.50
    yolo_weight: float = 0.50
    threshold: float = 0.12  # Понижен для увеличения recall
    
    # Постобработка
    min_building_area: int = 50  # Понижен для мелких зданий
    max_hole_area: int = 600  # Увеличен для заполнения дыр
    morph_kernel: int = 5  # Увеличен для лучшего соединения
    use_edge_refinement: bool = True  # Bilateral filter для границ
    bilateral_d: int = 9
    bilateral_sigma_color: float = 75.0
    bilateral_sigma_space: float = 75.0
    
    # Модели
    gdino_model: str = "IDEA-Research/grounding-dino-base"
    sam2_model: str = "facebook/sam2.1-hiera-large"
    yolo_model: str = "keremberke/yolov8m-building-segmentation"
    
    @property
    def data_dir(self) -> Path:
        return self.project_root / "data" / "raw"
    
    @property
    def train_images(self) -> Path:
        return self.data_dir / "train" / "images"
    
    @property
    def train_gt(self) -> Path:
        return self.data_dir / "train" / "gt"
    
    @property
    def test_images(self) -> Path:
        return self.data_dir / "test" / "images"
    
    @property
    def test_gt(self) -> Path:
        return self.project_root / "data" / "processed" / "test" / "gt"
    
    @property
    def val_gt(self) -> Path:
        return self.data_dir / "train" / "gt_val"


cfg = Config()
cfg.test_gt.mkdir(parents=True, exist_ok=True)
cfg.val_gt.mkdir(parents=True, exist_ok=True)

print(f"Mode: {'DEBUG' if cfg.debug else 'PRODUCTION'}")
print(f"Images: {cfg.train_images if cfg.debug else cfg.test_images}")

Mode: PRODUCTION
Images: /home/user/code/dls_project/data/raw/test/images


## Segmentors

In [3]:
class GroundedSAMSegmentor:
    """Grounding DINO + SAM2.1 для детекции и сегментации зданий."""
    
    # Расширенный промпт для лучшего recall
    TEXT_PROMPT = "building . house . roof . warehouse . shed . garage . barn . residential building . commercial building . structure . facility . construction . apartment . villa . cottage . hut . cabin ."
    BOX_THRESHOLD = 0.08  # Понижен для увеличения recall
    TEXT_THRESHOLD = 0.08
    
    def __init__(self, gdino_model: str, sam_model: str):
        self.processor = AutoProcessor.from_pretrained(gdino_model, use_fast=True)
        self.gdino = AutoModelForZeroShotObjectDetection.from_pretrained(gdino_model).to(DEVICE)
        self.gdino.eval()
        # SAM2 требует явного указания device
        self.sam = SAM2ImagePredictor.from_pretrained(sam_model, device=DEVICE)
        print("Loaded: Grounded SAM")
    
    def predict(self, image: np.ndarray) -> np.ndarray:
        """Return probability map [0, 1]."""
        h, w = image.shape[:2]
        prob_map = np.zeros((h, w), dtype=np.float32)
        
        # Детекция
        inputs = self.processor(images=image, text=self.TEXT_PROMPT, return_tensors="pt").to(DEVICE)
        with torch.no_grad():
            outputs = self.gdino(**inputs)
        
        results = self.processor.post_process_grounded_object_detection(
            outputs, inputs.input_ids,
            threshold=self.BOX_THRESHOLD,
            text_threshold=self.TEXT_THRESHOLD,
            target_sizes=[(h, w)]
        )[0]
        
        boxes, scores = results["boxes"].cpu().numpy(), results["scores"].cpu().numpy()
        if len(boxes) == 0:
            return prob_map
        
        # Сегментация через SAM с inference_mode для эффективности
        with torch.inference_mode():
            self.sam.set_image(image)
            
            # Обработка чанками для экономии памяти
            for i in range(0, len(boxes), 8):
                chunk_boxes = boxes[i:i+8]
                chunk_scores = scores[i:i+8]
                
                masks, iou_scores, _ = self.sam.predict(box=chunk_boxes, multimask_output=True)
                
                # SAM возвращает разную размерность для 1 бокса vs нескольких
                if len(chunk_boxes) == 1:
                    masks = masks[np.newaxis, ...]
                    iou_scores = iou_scores[np.newaxis, ...]
                
                for j, det_score in enumerate(chunk_scores):
                    best_idx = iou_scores[j].argmax()
                    score = det_score * iou_scores[j, best_idx]
                    prob_map = np.maximum(prob_map, masks[j, best_idx].astype(np.float32) * score)
        
        return prob_map


class YOLOSegmentor:
    """YOLOv8, специализированная на сегментации зданий."""
    
    def __init__(self, model_name: str):
        weights_path = hf_hub_download(repo_id=model_name, filename="best.pt")
        self.model = YOLO(weights_path)
        self.model.overrides.update({"conf": 0.04, "iou": 0.25, "max_det": 3000})  # Понижен conf для recall
        self.model.to(DEVICE)
        print("Loaded: YOLO Building")
    
    def predict(self, image: np.ndarray) -> np.ndarray:
        """Return probability map [0, 1]."""
        h, w = image.shape[:2]
        prob_map = np.zeros((h, w), dtype=np.float32)
        
        for result in self.model(image, verbose=False):
            if result.masks is not None:
                for mask, conf in zip(result.masks.data.cpu().numpy(), result.boxes.conf.cpu().numpy()):
                    mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
                    prob_map = np.maximum(prob_map, mask_resized * conf)
        
        return prob_map

## Generator

In [4]:
class GroundTruthGenerator:
    """Генератор GT масок с ансамблем и TTA."""
    
    def __init__(self, cfg: Config):
        self.cfg = cfg
        self.models: list[tuple[object, float]] = []
    
    def load_models(self) -> None:
        """Загрузка моделей."""
        print("Loading models...")
        self.models = [
            (GroundedSAMSegmentor(self.cfg.gdino_model, self.cfg.sam2_model), self.cfg.grounded_sam_weight),
            (YOLOSegmentor(self.cfg.yolo_model), self.cfg.yolo_weight),
        ]
        print(f"Loaded {len(self.models)} models")
    
    def _tile_image(self, image: np.ndarray) -> tuple[list[np.ndarray], list[tuple[int, int]]]:
        """Разбиение на тайлы с overlap."""
        h, w = image.shape[:2]
        stride = self.cfg.tile_size - self.cfg.overlap
        tiles, positions = [], []
        
        for y in range(0, h, stride):
            for x in range(0, w, stride):
                y_end, x_end = min(y + self.cfg.tile_size, h), min(x + self.cfg.tile_size, w)
                y_start, x_start = max(0, y_end - self.cfg.tile_size), max(0, x_end - self.cfg.tile_size)
                tiles.append(image[y_start:y_end, x_start:x_end])
                positions.append((y_start, x_start))
        
        return tiles, positions
    
    def _merge_tiles(self, tiles: list[np.ndarray], positions: list[tuple[int, int]], shape: tuple[int, int]) -> np.ndarray:
        """Объединение тайлов с weighted blending."""
        h, w = shape
        output = np.zeros((h, w), dtype=np.float32)
        weights = np.zeros((h, w), dtype=np.float32)
        overlap = self.cfg.overlap
        
        for tile, (y, x) in zip(tiles, positions):
            th, tw = tile.shape[:2]
            wy, wx = np.ones(th), np.ones(tw)
            
            if overlap > 0:
                ramp = np.linspace(0, 1, overlap)
                if y > 0 and th >= overlap:
                    wy[:overlap] = ramp
                if x > 0 and tw >= overlap:
                    wx[:overlap] = ramp
                if y + th < h and th >= overlap:
                    wy[-overlap:] = ramp[::-1]
                if x + tw < w and tw >= overlap:
                    wx[-overlap:] = ramp[::-1]
            
            tile_weight = np.outer(wy, wx)
            output[y:y+th, x:x+tw] += tile * tile_weight
            weights[y:y+th, x:x+tw] += tile_weight
        
        return output / np.maximum(weights, 1e-8)
    
    def _postprocess(self, prob_map: np.ndarray, image: np.ndarray | None = None) -> np.ndarray:
        """Постобработка: edge refinement + бинаризация + морфология."""
        # #region agent log
        import json; open('/home/user/code/dls_project/.cursor/debug.log','a').write(json.dumps({"location":"cell7:_postprocess","message":"prob_map stats before filter","data":{"min":float(prob_map.min()),"max":float(prob_map.max()),"mean":float(prob_map.mean()),"shape":list(prob_map.shape),"use_edge_ref":self.cfg.use_edge_refinement,"has_image":image is not None},"hypothesisId":"H2,H5","timestamp":__import__('time').time()})+'\n')
        # #endregion
        # Edge-aware refinement через bilateral filter
        if self.cfg.use_edge_refinement and image is not None:
            # Bilateral filter сглаживает prob_map с учётом границ изображения
            prob_uint8 = (prob_map * 255).astype(np.uint8)
            prob_filtered = cv2.bilateralFilter(
                prob_uint8, 
                self.cfg.bilateral_d, 
                self.cfg.bilateral_sigma_color, 
                self.cfg.bilateral_sigma_space
            )
            prob_map = prob_filtered.astype(np.float32) / 255.0
            # #region agent log
            import json; open('/home/user/code/dls_project/.cursor/debug.log','a').write(json.dumps({"location":"cell7:_postprocess","message":"prob_map stats after filter","data":{"min":float(prob_map.min()),"max":float(prob_map.max()),"mean":float(prob_map.mean())},"hypothesisId":"H5","timestamp":__import__('time').time()})+'\n')
            # #endregion
        
        binary = (prob_map >= self.cfg.threshold).astype(np.uint8)
        
        # Морфология: closing для заполнения, opening для шума
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (self.cfg.morph_kernel, self.cfg.morph_kernel))
        binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)
        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1)
        
        # Удаление мелких объектов и заполнение дыр
        binary = remove_small_objects(binary.astype(bool), min_size=self.cfg.min_building_area)
        binary = remove_small_holes(binary, area_threshold=self.cfg.max_hole_area)
        
        return binary.astype(np.uint8) * 255
    
    def _process_tile(self, tile: np.ndarray) -> np.ndarray:
        """Обработка одного тайла ансамблем."""
        th, tw = tile.shape[:2]
        ensemble = np.zeros((th, tw), dtype=np.float32)
        # #region agent log
        total_weight = sum(mw for _, mw in self.models)
        import json; open('/home/user/code/dls_project/.cursor/debug.log','a').write(json.dumps({"location":"cell7:_process_tile","message":"total_weight calc","data":{"tile_shape":[th,tw],"total_weight":total_weight,"n_models":len(self.models)},"hypothesisId":"H1","timestamp":__import__('time').time()})+'\n')
        # #endregion
        
        for model, model_weight in self.models:
            pred = model.predict(tile)
            ensemble += pred * model_weight
        
        return ensemble / total_weight
    
    def _process_at_scale(self, image: np.ndarray, scale: float) -> np.ndarray:
        """Обработка изображения на заданном масштабе."""
        h, w = image.shape[:2]
        
        if scale != 1.0:
            new_h, new_w = int(h * scale), int(w * scale)
            scaled = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
        else:
            scaled = image
        
        tiles, positions = self._tile_image(scaled)
        tile_preds = [self._process_tile(t) for t in tiles]
        prob = self._merge_tiles(tile_preds, positions, scaled.shape[:2])
        
        if scale != 1.0:
            prob = cv2.resize(prob, (w, h), interpolation=cv2.INTER_LINEAR)
        
        return prob
    
    def _rotate_image(self, image: np.ndarray, angle: int) -> np.ndarray:
        """Поворот изображения на 90, 180 или 270 градусов."""
        if angle == 90:
            return np.rot90(image, k=1)
        elif angle == 180:
            return np.rot90(image, k=2)
        elif angle == 270:
            return np.rot90(image, k=3)
        return image
    
    def _unrotate_prob(self, prob: np.ndarray, angle: int) -> np.ndarray:
        """Обратный поворот probability map."""
        if angle == 90:
            return np.rot90(prob, k=-1)
        elif angle == 180:
            return np.rot90(prob, k=-2)
        elif angle == 270:
            return np.rot90(prob, k=-3)
        return prob
    
    def process_image(self, image_path: Path) -> np.ndarray:
        """Обработка изображения с multi-scale TTA + flips + rotations."""
        image = np.array(Image.open(image_path))
        h, w = image.shape[:2]
        
        all_probs = []
        
        # Rotations: 0, 90, 180, 270 градусов
        rotations = [0]
        if self.cfg.use_rotations:
            rotations.extend([90, 180, 270])
        # #region agent log
        import json; open('/home/user/code/dls_project/.cursor/debug.log','a').write(json.dumps({"location":"cell7:process_image","message":"TTA config","data":{"image_shape":[h,w],"rotations":rotations,"scales":list(self.cfg.scales),"use_flips":self.cfg.use_flips,"expected_passes":len(rotations)*len(self.cfg.scales)*(3 if self.cfg.use_flips else 1)},"hypothesisId":"H3,H4","timestamp":__import__('time').time()})+'\n')
        # #endregion
        
        for rotation in rotations:
            rotated = self._rotate_image(image, rotation).copy()
            
            # Multi-scale TTA
            for scale in self.cfg.scales:
                # Базовая обработка
                prob = self._process_at_scale(rotated, scale)
                prob = self._unrotate_prob(prob, rotation)
                # Проверка размера после unrotate (для не-квадратных изображений)
                if prob.shape != (h, w):
                    prob = cv2.resize(prob, (w, h), interpolation=cv2.INTER_LINEAR)
                all_probs.append(prob)
                
                # Flip augmentations
                if self.cfg.use_flips:
                    # Horizontal flip
                    flipped_h = np.fliplr(rotated).copy()
                    prob_h = self._process_at_scale(flipped_h, scale)
                    prob_h = np.fliplr(prob_h)
                    prob_h = self._unrotate_prob(prob_h, rotation)
                    if prob_h.shape != (h, w):
                        prob_h = cv2.resize(prob_h, (w, h), interpolation=cv2.INTER_LINEAR)
                    all_probs.append(prob_h)
                    
                    # Vertical flip
                    flipped_v = np.flipud(rotated).copy()
                    prob_v = self._process_at_scale(flipped_v, scale)
                    prob_v = np.flipud(prob_v)
                    prob_v = self._unrotate_prob(prob_v, rotation)
                    if prob_v.shape != (h, w):
                        prob_v = cv2.resize(prob_v, (w, h), interpolation=cv2.INTER_LINEAR)
                    all_probs.append(prob_v)
        
        # Медиана вместо среднего — более робастна к выбросам
        # #region agent log
        import json; open('/home/user/code/dls_project/.cursor/debug.log','a').write(json.dumps({"location":"cell7:process_image","message":"TTA complete","data":{"actual_passes":len(all_probs),"prob_shapes":[list(p.shape) for p in all_probs[:3]],"all_same_shape":all(p.shape==(h,w) for p in all_probs)},"hypothesisId":"H3,H4","timestamp":__import__('time').time()})+'\n')
        # #endregion
        median_prob = np.median(all_probs, axis=0)
        return self._postprocess(median_prob, image)
    
    def generate_masks(self, image_paths: list[Path], output_dir: Path, skip_existing: bool = True) -> list[Path]:
        """Генерация масок для списка изображений."""
        output_dir.mkdir(parents=True, exist_ok=True)
        n_rotations = 4 if self.cfg.use_rotations else 1
        n_scales = len(self.cfg.scales)
        n_flips = 3 if self.cfg.use_flips else 1  # hflip + vflip + original
        n_tta = n_rotations * n_scales * n_flips
        print(f"TTA: {n_tta} passes ({n_rotations} rots × {n_scales} scales × {n_flips} flips) | Models: {len(self.models)}")
        
        generated = []
        for img_path in tqdm(image_paths, desc="Processing"):
            out_path = output_dir / img_path.name
            if skip_existing and out_path.exists():
                generated.append(out_path)
                continue
            
            mask = self.process_image(img_path)
            Image.fromarray(mask, mode="L").save(out_path, format="TIFF")
            generated.append(out_path)
            
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
        
        return generated

## Metrics & Validation

In [5]:
def get_city(path: Path) -> str:
    """Извлечь город из имени файла."""
    return "".join(c for c in path.stem if not c.isdigit())


def interleave_by_city(paths: list[Path]) -> list[Path]:
    """Равномерно перемешать по городам."""
    by_city: dict[str, list[Path]] = {}
    for p in paths:
        by_city.setdefault(get_city(p), []).append(p)
    
    for city in by_city:
        by_city[city].sort()
    
    result, cities = [], sorted(by_city.keys())
    indices = {c: 0 for c in cities}
    
    for _ in range(max(len(by_city[c]) for c in cities)):
        for city in cities:
            if indices[city] < len(by_city[city]):
                result.append(by_city[city][indices[city]])
                indices[city] += 1
    return result


def compute_metrics(pred: np.ndarray, gt: np.ndarray) -> dict[str, float]:
    """Вычисление метрик сегментации."""
    p, g = pred > 0, gt > 0
    intersection = np.logical_and(p, g).sum()
    union = np.logical_or(p, g).sum()
    tp = intersection
    fp = np.logical_and(p, ~g).sum()
    fn = np.logical_and(~p, g).sum()
    tn = np.logical_and(~p, ~g).sum()
    
    iou = intersection / union if union > 0 else 1.0
    dice = 2 * intersection / (p.sum() + g.sum()) if (p.sum() + g.sum()) > 0 else 1.0
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
    accuracy = (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) > 0 else 0.0
    
    return {"iou": iou, "dice": dice, "precision": precision, "recall": recall, "f1": f1, "accuracy": accuracy}


def run_validation(cfg: Config, generator: GroundTruthGenerator) -> pd.DataFrame:
    """Валидация на train данных."""
    all_images = list(cfg.train_images.glob("*.tif"))
    samples = interleave_by_city(all_images)[:cfg.debug_samples]
    cities = sorted(set(get_city(p) for p in samples))
    print(f"Validating on {len(samples)} images from {len(cities)} cities")
    
    generator.generate_masks(samples, cfg.val_gt, skip_existing=False)
    
    results = []
    for p in samples:
        pred = np.array(Image.open(cfg.val_gt / p.name))
        gt = np.array(Image.open(cfg.train_gt / p.name))
        results.append({"filename": p.name, **compute_metrics(pred, gt)})
    
    df = pd.DataFrame(results)
    
    print("\n" + "=" * 70)
    print("VALIDATION RESULTS")
    print("=" * 70)
    print(df[["filename", "iou", "dice", "f1", "precision", "recall", "accuracy"]].to_string(index=False, float_format="%.3f"))
    print("-" * 70)
    print(f"Mean IoU:       {df['iou'].mean():.3f} ± {df['iou'].std():.3f}")
    print(f"Mean Dice:      {df['dice'].mean():.3f} ± {df['dice'].std():.3f}")
    print(f"Mean F1:        {df['f1'].mean():.3f} ± {df['f1'].std():.3f}")
    print(f"Mean Precision: {df['precision'].mean():.3f} ± {df['precision'].std():.3f}")
    print(f"Mean Recall:    {df['recall'].mean():.3f} ± {df['recall'].std():.3f}")
    print(f"Mean Accuracy:  {df['accuracy'].mean():.3f} ± {df['accuracy'].std():.3f}")
    print("=" * 70)
    return df

## Visualization

In [6]:
def visualize_results(cfg: Config, results_df: pd.DataFrame | None = None, n_samples: int = 3) -> None:
    """Визуализация результатов."""
    if results_df is not None:
        sorted_df = results_df.sort_values("iou")
        samples = [sorted_df.iloc[i] for i in [0, len(sorted_df) // 2, len(sorted_df) - 1]]
        labels = ["WORST", "MEDIAN", "BEST"]
        images_dir, pred_dir, gt_dir = cfg.train_images, cfg.val_gt, cfg.train_gt
    else:
        all_masks = sorted(cfg.test_gt.glob("*.tif"))
        samples = random.sample(all_masks, min(n_samples, len(all_masks)))
        labels = [m.name for m in samples]
        images_dir, pred_dir, gt_dir = cfg.test_images, cfg.test_gt, None
    
    ncols = 4 if gt_dir else 3
    fig, axes = plt.subplots(len(samples), ncols, figsize=(4 * ncols, 4 * len(samples)))
    if len(samples) == 1:
        axes = axes.reshape(1, -1)
    
    for i, (sample, label) in enumerate(zip(samples, labels)):
        filename = sample["filename"] if isinstance(sample, pd.Series) else sample.name
        iou = sample["iou"] if isinstance(sample, pd.Series) else None
        
        image = np.array(Image.open(images_dir / filename))
        pred = np.array(Image.open(pred_dir / filename))
        
        axes[i, 0].imshow(image)
        axes[i, 0].set_title(f"{label}: {filename}" + (f"\nIoU={iou:.3f}" if iou else ""))
        axes[i, 0].axis("off")
        
        axes[i, 1].imshow(pred, cmap="gray")
        axes[i, 1].set_title("Predicted")
        axes[i, 1].axis("off")
        
        if gt_dir:
            gt = np.array(Image.open(gt_dir / filename))
            axes[i, 2].imshow(gt, cmap="gray")
            axes[i, 2].set_title("Ground Truth")
            axes[i, 2].axis("off")
            
            # Error overlay: TP=green, FP=red, FN=blue
            p, g = pred > 0, gt > 0
            overlay = (image * 0.4).astype(np.uint8)
            overlay[np.logical_and(p, g)] = [0, 200, 0]
            overlay[np.logical_and(p, ~g)] = [200, 0, 0]
            overlay[np.logical_and(~p, g)] = [0, 0, 200]
            axes[i, 3].imshow(overlay)
            axes[i, 3].set_title("Errors (G=TP, R=FP, B=FN)")
            axes[i, 3].axis("off")
        else:
            overlay = image.copy()
            overlay[pred > 0] = [255, 100, 100]
            axes[i, 2].imshow(overlay)
            axes[i, 2].set_title("Overlay")
            axes[i, 2].axis("off")
    
    plt.tight_layout()
    plt.show()

## Run

In [7]:
# Инициализация
generator = GroundTruthGenerator(cfg)
generator.load_models()

Loading models...


Loading weights: 100%|█| 1206/1206 [00:02<00:00, 428.43it/s, Materializing param


Loaded: Grounded SAM
Loaded: YOLO Building
Loaded 2 models


In [8]:
# Валидация
if cfg.debug:
    results_df = run_validation(cfg, generator)
    visualize_results(cfg, results_df)
else:
    # Production: генерация на test
    test_images = interleave_by_city(list(cfg.test_images.glob("*.tif")))
    print(f"Found {len(test_images)} test images")
    generator.generate_masks(test_images, cfg.test_gt)
    visualize_results(cfg, n_samples=5)

Found 180 test images
TTA: 60 passes (4 rots × 5 scales × 3 flips) | Models: 2


Processing:  28%|█████▌              | 50/180 [102:51:17<267:25:22, 7405.56s/it]


: 