# Triagem batch - Colab

Notebook para gerar JSON de achados usando o mesmo lote de radiografias do projeto.


## 1) Upload do zip (imagens + modelos)
Envie um zip com as imagens e os modelos. Localmente: `zip -r batch_test.zip doc/batch_test`.
Pode ser um unico zip contendo, por exemplo:
- images/ (radiografias)
- models/ (pesos dos modelos)

Se preferir, pode enviar mais de um zip (ex: um com imagens e outro com modelos).


In [None]:
from google.colab import files
import io
import os
import shutil
import zipfile

uploaded = files.upload()
if not uploaded:
    raise SystemExit("Nenhum arquivo enviado.")

extract_dir = "/content/triagem_payload"
if os.path.exists(extract_dir):
    shutil.rmtree(extract_dir)
os.makedirs(extract_dir, exist_ok=True)

for filename, data in uploaded.items():
    if not filename.lower().endswith(".zip"):
        print(f"Ignorando {filename} (nao e zip)")
        continue
    with zipfile.ZipFile(io.BytesIO(data)) as z:
        z.extractall(extract_dir)
        print(f"Extraido: {filename}")

print("Diretorio base:", extract_dir)


## 2) Preparar ambiente
Ajuste as instalacoes conforme o framework do seu modelo.
Defina o tipo de modelo e os caminhos para os pesos.


In [None]:
# Instalacao de dependencias (rode uma vez)
import subprocess
import sys
import os
import glob

INSTALL_MASK_RCNN = True
INSTALL_YOLO = True

def pip_install(packages):
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', *packages])

base_packages = ['opencv-python-headless', 'pyyaml']

if INSTALL_MASK_RCNN:
    pip_install(['git+https://github.com/facebookresearch/detectron2.git', 'Pillow==10.3.0', *base_packages])

if INSTALL_YOLO:
    pip_install(['ultralytics>=8.3', *base_packages])

MODEL_TYPE = 'mask_rcnn'  # ou 'yolo'
CONF_THRESHOLD = 0.4
INCLUDE_SEGMENTATION = False
SAMPLE_INDEX = 0

IMAGE_ROOT = os.path.join(extract_dir, 'images')
MODEL_ROOT = os.path.join(extract_dir, 'models')

if not os.path.isdir(IMAGE_ROOT):
    IMAGE_ROOT = extract_dir

if not os.path.isdir(MODEL_ROOT):
    MODEL_ROOT = extract_dir

image_paths = []
for ext in ('*.png', '*.jpg', '*.jpeg'):
    image_paths.extend(glob.glob(os.path.join(IMAGE_ROOT, '**', ext), recursive=True))
image_paths = sorted(image_paths)

print('Total imagens:', len(image_paths))
print('Primeira imagem:', image_paths[0] if image_paths else '(nenhuma)')

MASK_MODEL_PATH = os.path.join(MODEL_ROOT, 'model.pth')
YOLO_MODEL_PATH = os.path.join(MODEL_ROOT, 'best.pt')

if not os.path.exists(MASK_MODEL_PATH):
    mask_candidates = glob.glob(os.path.join(MODEL_ROOT, '**', '*.pth'), recursive=True)
    if mask_candidates:
        MASK_MODEL_PATH = mask_candidates[0]

if not os.path.exists(YOLO_MODEL_PATH):
    yolo_candidates = glob.glob(os.path.join(MODEL_ROOT, '**', '*.pt'), recursive=True)
    if yolo_candidates:
        YOLO_MODEL_PATH = yolo_candidates[0]

DATA_YAML_PATH = os.path.join(MODEL_ROOT, 'data.yaml')
YOLO_CLASS_NAMES = None
if os.path.exists(DATA_YAML_PATH):
    import yaml
    with open(DATA_YAML_PATH, 'r') as f:
        payload = yaml.safe_load(f) or {}
    YOLO_CLASS_NAMES = payload.get('names')

print('Mask R-CNN:', MASK_MODEL_PATH, os.path.exists(MASK_MODEL_PATH))
print('YOLO:', YOLO_MODEL_PATH, os.path.exists(YOLO_MODEL_PATH))
print('YOLO classes (data.yaml):', YOLO_CLASS_NAMES)


## 3) Gerar JSON (batch + unico)
Substitua `run_model` pelo seu pipeline real.
O retorno esperado: lista de dicts com `label`, `confidence` (opcional), `bbox`/`segmentation` (opcional).


In [None]:
import json
import os

MASK_CLASS_NAMES = ['dente', 'dentina', 'polpa', 'restauracao', 'carie']

def load_model():
    if MODEL_TYPE == 'mask_rcnn':
        from detectron2.config import get_cfg
        from detectron2 import model_zoo
        from detectron2.engine import DefaultPredictor
        import torch

        cfg = get_cfg()
        cfg.merge_from_file(model_zoo.get_config_file('COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml'))
        cfg.MODEL.WEIGHTS = MASK_MODEL_PATH
        cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(MASK_CLASS_NAMES)
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = CONF_THRESHOLD
        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 0.5, 1.0, 2.0, 3.0]]
        cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 28
        cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

        predictor = DefaultPredictor(cfg)
        return {'type': 'mask_rcnn', 'predictor': predictor}

    if MODEL_TYPE == 'yolo':
        from ultralytics import YOLO
        model = YOLO(YOLO_MODEL_PATH)
        return {'type': 'yolo', 'model': model}

    raise ValueError('MODEL_TYPE invalido.')

def mask_to_polygon(mask):
    import cv2
    import numpy as np

    mask = (mask > 0).astype('uint8')
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None
    contour = max(contours, key=cv2.contourArea)
    if contour is None or len(contour) < 3:
        return None
    points = contour.squeeze()
    if points.ndim != 2:
        return None
    return points.astype(float).tolist()

def get_label_from_names(names_source, class_id):
    if isinstance(names_source, dict):
        return names_source.get(class_id, names_source.get(str(class_id), str(class_id)))
    if isinstance(names_source, list):
        return names_source[class_id] if class_id < len(names_source) else str(class_id)
    return str(class_id)

def run_model(image_path, model):
    if model['type'] == 'mask_rcnn':
        import cv2
        img = cv2.imread(image_path)
        if img is None:
            return []
        outputs = model['predictor'](img)
        instances = outputs['instances'].to('cpu')
        if len(instances) == 0:
            return []
        boxes = instances.pred_boxes.tensor.numpy()
        scores = instances.scores.numpy()
        classes = instances.pred_classes.numpy()
        masks = instances.pred_masks.numpy() if INCLUDE_SEGMENTATION else None

        findings = []
        for i in range(len(classes)):
            score = float(scores[i])
            if score < CONF_THRESHOLD:
                continue
            class_id = int(classes[i])
            label = MASK_CLASS_NAMES[class_id] if class_id < len(MASK_CLASS_NAMES) else str(class_id)
            item = {
                'label': label,
                'confidence': score,
                'bbox': boxes[i].tolist(),
            }
            if INCLUDE_SEGMENTATION and masks is not None:
                polygon = mask_to_polygon(masks[i])
                if polygon:
                    item['segmentation'] = polygon
            findings.append(item)
        return findings

    if model['type'] == 'yolo':
        results = model['model'](image_path, verbose=False)
        if not results:
            return []
        result = results[0]
        boxes = result.boxes
        masks = result.masks
        names = result.names or YOLO_CLASS_NAMES

        findings = []
        if boxes is None:
            return findings
        for i in range(len(boxes)):
            score = float(boxes.conf[i])
            if score < CONF_THRESHOLD:
                continue
            class_id = int(boxes.cls[i])
            label = get_label_from_names(names, class_id)
            item = {
                'label': label,
                'confidence': score,
                'bbox': boxes.xyxy[i].tolist(),
            }
            if INCLUDE_SEGMENTATION and masks is not None and masks.xy is not None and i < len(masks.xy):
                poly = masks.xy[i]
                if poly is not None and len(poly) >= 3:
                    item['segmentation'] = [[float(x), float(y)] for x, y in poly]
            findings.append(item)
        return findings

    raise ValueError('Modelo nao suportado.')

if not image_paths:
    raise SystemExit('Nenhuma imagem encontrada.')

model = load_model()

cases = []
total = len(image_paths)
for idx, path in enumerate(image_paths, start=1):
    name = os.path.basename(path)
    if idx % 5 == 0 or idx == 1:
        print(f'Processando {idx}/{total}: {name}')
    findings = run_model(path, model)
    cases.append({
        'name': name,
        'model_type': MODEL_TYPE,
        'findings': findings,
    })

batch_path = '/content/triagem_batch_results.json'
with open(batch_path, 'w') as f:
    json.dump(cases, f, ensure_ascii=True, indent=2)

sample_index = min(max(SAMPLE_INDEX, 0), len(cases) - 1)
single_case = cases[sample_index]
single_path = '/content/triagem_single_result.json'
with open(single_path, 'w') as f:
    json.dump(single_case, f, ensure_ascii=True, indent=2)

from google.colab import files
files.download(batch_path)
files.download(single_path)


Depois, importe o JSON na pagina de triagem usando o botao "Importar JSON".
