In [1]:
!pip install effdet
import os
import numpy as np
from PIL import Image
import torch
from pycocotools.coco import COCO
from torch.utils.data import Dataset
import json



In [2]:
# ============================================================
# 1) MONTAR DRIVE E DESCOMPACTAR O DATASET
# ============================================================

from google.colab import drive
drive.mount('/content/drive')

zip_path = "/content/drive/MyDrive/dataset_final_coco.zip"

import zipfile
with zipfile.ZipFile(zip_path, 'r') as z:
    z.extractall('/content/dataset')

root_dir = "/content/dataset/dataset_final_coco"
print("Dataset extra√≠do em:", root_dir)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset extra√≠do em: /content/dataset/dataset_final_coco


In [3]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

# ------------------------------
# TRAIN TRANSFORMS (fortes)
# ------------------------------
def get_train_transforms():
    return A.Compose(
        [
            A.HueSaturationValue(
                hue_shift_limit=0.015,
                sat_shift_limit=0.7,
                val_shift_limit=0.4,
                p=1.0
            ),

            A.Rotate(limit=30, border_mode=0, p=1.0),

            A.Affine(
                translate_percent=0.1,
                scale=(0.5, 1.5),
                shear=(-5, 5),
                p=1.0
            ),

            A.Perspective(scale=(0.0005, 0.0005), p=1.0),

            A.HorizontalFlip(p=0.5),

            A.RandomCrop(height=512, width=512, p=1.0, pad_if_needed=True),
            A.Resize(height=640, width=640, p=1.0),

            A.Normalize(
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                max_pixel_value=255.0
            ),

            ToTensorV2()
        ],
        bbox_params=A.BboxParams(
            format="pascal_voc",
            label_fields=["labels"],
            min_visibility=0.2
        )
    )


# ------------------------------
# VAL/TEST TRANSFORMS (leves)
# ------------------------------
def get_eval_transforms():
    return A.Compose(
        [
            A.Resize(640, 640),
            A.Normalize(
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                max_pixel_value=255.0
            ),
            ToTensorV2()
        ],
        bbox_params=A.BboxParams(
            format="pascal_voc",
            label_fields=["labels"],
            min_visibility=0.0
        )
    )


In [4]:
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import torch
import os
from pycocotools.coco import COCO

class CocoAlbumentationsDataset(torch.utils.data.Dataset):
    def __init__(self, img_dir, ann_file, transforms=None):
        import json
        self.img_dir = img_dir
        self.transforms = transforms

        with open(ann_file, "r") as f:
            data = json.load(f)

        self.images = {img["id"]: img for img in data["images"]}

        # agrupar anota√ß√µes por imagem
        self.annotations = {}
        for ann in data["annotations"]:
            img_id = ann["image_id"]
            if img_id not in self.annotations:
                self.annotations[img_id] = []
            self.annotations[img_id].append(ann)

        self.ids = list(self.images.keys())

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        info = self.images[img_id]

        img_path = os.path.join(self.img_dir, info["file_name"])
        image = np.array(Image.open(img_path).convert("RGB"))

        annots = self.annotations.get(img_id, [])

        # se n√£o tem boxes ‚Üí pula imagem
        if len(annots) == 0:
            return None

        boxes = []
        labels = []

        for ann in annots:
            x, y, w, h = ann["bbox"]
            boxes.append([x, y, x + w, y + h])
            labels.append(ann["category_id"])

        if self.transforms:
            transformed = self.transforms(
                image=image,
                bboxes=boxes,
                labels=labels
            )

            image = transformed["image"]
            boxes = transformed["bboxes"]
            labels = transformed["labels"]

        # üö® Prote√ß√£o contra augmentations removerem TODAS as caixas
        if len(boxes) == 0:
            return None

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels
        }

        return image, target

    def __len__(self):
        return len(self.ids)



In [5]:
def collate_fn(batch):
    batch = [b for b in batch if b is not None]
    return tuple(zip(*batch))


In [6]:
train_dataset = CocoAlbumentationsDataset(
    f"{root_dir}/train/images",
    f"{root_dir}/coco_annotations_train.json",
    transforms=get_train_transforms()
)

val_dataset = CocoAlbumentationsDataset(
    f"{root_dir}/val/images",
    f"{root_dir}/coco_annotations_val.json",
    transforms=get_eval_transforms()
)

test_dataset = CocoAlbumentationsDataset(
    f"{root_dir}/test/images",
    f"{root_dir}/coco_annotations_test.json",
    transforms=get_eval_transforms()
)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    collate_fn=collate_fn
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    collate_fn=collate_fn
)


In [7]:
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
from effdet.efficientdet import HeadNet

num_classes = 1  # apenas GUN (EfficientDet n√£o usa "background")

# Config do modelo (D0: 512px, D1: 640px, D2: 768px)
config = get_efficientdet_config('tf_efficientdet_d1')

config.num_classes = num_classes
config.image_size = (640, 640)
config.norm_kwargs = dict(eps=1e-4)

# Carregar modelo pr√©-treinado
model = EfficientDet(config, pretrained_backbone=True)

# Substituir head
model.class_net = HeadNet(config, num_outputs=config.num_classes)

# Wrap para treinamento correto
model = DetBenchTrain(model, config)

model = model.cuda()

# Otimizador
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=0.01,
    momentum=0.937,
    weight_decay=0.0005
)

lr_scheduler = torch.optim.lr_scheduler.LinearLR(
    optimizer,
    start_factor=1.0,
    end_factor=0.01,
    total_iters=100  # 100 √©pocas
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [8]:
import os
import json

save_dir = "runs/fasterrcnn/train1"
os.makedirs(save_dir, exist_ok=True)

history = {
    "train_loss": [],
    "lr": [],
    "epoch": [],
    "map50": [],
    "map5095": []
}

with open(f"{save_dir}/history.json", "w") as f:
    json.dump(history, f, indent=4)


In [9]:
# ================================================================
# M√âTRICAS COCO COMPLETAS
# ================================================================

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch


def evaluate_coco(model, dataset, ann_path, device="cuda"):
    model.eval()

    coco_gt = COCO(ann_path)
    results = []

    for img, target in dataset:
        img_tensor = img.to(device).unsqueeze(0)

        with torch.no_grad():
            output = model(img_tensor)[0]

        boxes = output["boxes"].cpu().numpy()
        scores = output["scores"].cpu().numpy()
        labels = output["labels"].cpu().numpy()

        image_id = int(target["image_id"].item())

        for box, score, label in zip(boxes, scores, labels):
            x1, y1, x2, y2 = box.tolist()
            results.append({
                "image_id": image_id,
                "category_id": 1,
                "bbox": [x1, y1, x2 - x1, y2 - y1],
                "score": float(score)
            })

    coco_dt = coco_gt.loadRes(results)
    coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    # mAP50 = COCO metric 1
    # mAP50-95 = COCO metric 0
    map50 = coco_eval.stats[1]
    map5095 = coco_eval.stats[0]

    return map50, map5095


# ================================================================
# FUNCTION ‚Äî CONFUSION MATRIX
# ================================================================
def plot_confusion_matrix(y_true, y_pred, save_path):
    labels = ["background", "Gun"]
    cm = np.zeros((2, 2), dtype=int)

    for t, p in zip(y_true, y_pred):
        cm[t][p] += 1

    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt="d", xticklabels=labels, yticklabels=labels)
    plt.title("Confusion Matrix")
    plt.savefig(save_path)
    plt.close()


# ================================================================
# FUNCTION ‚Äî PRECISION-RECALL CURVE
# ================================================================
def plot_pr_curve(precisions, recalls, save_path):
    plt.figure(figsize=(7,5))
    plt.plot(recalls, precisions)
    plt.title("Precision-Recall Curve")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.grid()
    plt.savefig(save_path)
    plt.close()


# ================================================================
# FUNCTION ‚Äî F1 CURVE
# ================================================================
def plot_f1_curve(precisions, recalls, save_path):
    f1 = 2 * (precisions * recalls) / (precisions + recalls + 1e-6)
    plt.figure(figsize=(7,5))
    plt.plot(recalls, f1)
    plt.title("F1 Curve")
    plt.xlabel("Recall")
    plt.ylabel("F1-score")
    plt.grid()
    plt.savefig(save_path)
    plt.close()


In [None]:
import matplotlib.pyplot as plt

best_loss = float("inf")
patience = 20
counter = 0

for epoch in range(100):
    model.train()
    total_loss = 0

    for batch_idx, (imgs, targets) in enumerate(train_loader):

        # üîç LOG DO BATCH
        print(f"[Epoch {epoch}] Batch {batch_idx}: {len(imgs)} imagens")

        # detectar erros comuns
        for i, t in enumerate(targets):
            if t["boxes"].shape[0] == 0:
                print(f"  ‚ö†Ô∏è  Aten√ß√£o: imagem {i} no batch {batch_idx} est√° com boxes vazios!")

        # GPU -> imgs
        imgs = torch.stack([img.cuda() for img in imgs], dim=0)

        # GPU -> targets individuais
        targets = [{k: v.cuda() for k, v in t.items()} for t in targets]

        # =======================================================
        #   CONVERTER TARGETS PARA O FORMATO DO EFFICIENTDET
        # =======================================================

        batch_bboxes = [t["boxes"] for t in targets]
        batch_labels = [t["labels"] for t in targets]

        # n√∫mero m√°ximo de boxes no batch
        max_boxes = max(b.shape[0] for b in batch_bboxes)

        padded_boxes = []
        padded_labels = []

        for b, l in zip(batch_bboxes, batch_labels):
            num = b.shape[0]
            if num < max_boxes:
                pad_b = torch.zeros((max_boxes - num, 4), device=b.device)
                pad_l = torch.zeros((max_boxes - num,), device=l.device, dtype=l.dtype)
                b = torch.cat([b, pad_b], dim=0)
                l = torch.cat([l, pad_l], dim=0)
            padded_boxes.append(b)
            padded_labels.append(l)

        # agora EfficientDet aceita
        eff_target = {
            "bbox": torch.stack(padded_boxes, dim=0),   # [B, max_boxes, 4]
            "cls": torch.stack(padded_labels, dim=0),   # [B, max_boxes]
        }

        # =======================================================
        #   FORWARD
        # =======================================================
        # forward
        losses = model(imgs, eff_target)

        # EfficientDet retorna dict
        if isinstance(losses, dict):
            loss_value = losses["loss"]  # geralmente "loss", pode ser "loss_total" dependendo da vers√£o
        else:
            loss_value = losses          # (fallback)

        print(f"  Loss do batch: {loss_value.item():.4f}")

        # backward
        optimizer.zero_grad()
        loss_value.backward()
        optimizer.step()

        total_loss += loss_value.item()


    lr_scheduler.step()

    print(f"[Epoch {epoch}] Loss: {total_loss:.4f}")

    # =======================================================
    # AVALIA√á√ÉO COCO EM VAL
    # =======================================================
    map50, map5095 = evaluate_coco(
        model,
        val_dataset,
        f"{root_dir}/annotations/instances_val.json"
    )

    print(f"   mAP50={map50:.4f} | mAP50-95={map5095:.4f}")

    # =======================================================
    # Salvar hist√≥rico
    # =======================================================
    history["train_loss"].append(total_loss)
    history["map50"].append(map50)
    history["map5095"].append(map5095)
    history["lr"].append(optimizer.param_groups[0]["lr"])
    history["epoch"].append(epoch)

    with open(f"{save_dir}/history.json", "w") as f:
        json.dump(history, f, indent=4)

    # =======================================================
    # EARLY STOPPING
    # =======================================================
    if total_loss < best_loss:
        best_loss = total_loss
        counter = 0
        torch.save(model.state_dict(), f"{save_dir}/best_model.pth")
    else:
        counter += 1
        if counter >= patience:
            print("EARLY STOPPING")
            break


# =======================================================
# GERAR GR√ÅFICOS AP√ìS O TREINAMENTO
# =======================================================

# Loss plot
plt.figure()
plt.plot(history["epoch"], history["train_loss"])
plt.title("Training Loss")
plt.savefig(f"{save_dir}/losses.png")
plt.close()

# mAP plot
plt.figure()
plt.plot(history["epoch"], history["map50"], label="mAP50")
plt.plot(history["epoch"], history["map5095"], label="mAP50-95")
plt.legend()
plt.title("mAP Curve")
plt.savefig(f"{save_dir}/map_curve.png")
plt.close()

[Epoch 0] Batch 0: 3 imagens
  Loss do batch: 103.2849
[Epoch 0] Batch 1: 4 imagens
  Loss do batch: 289.4260
[Epoch 0] Batch 2: 4 imagens
  Loss do batch: 28.8074
[Epoch 0] Batch 3: 4 imagens
  Loss do batch: 4.4617
[Epoch 0] Batch 4: 3 imagens
  Loss do batch: 3.6976
[Epoch 0] Batch 5: 4 imagens
  Loss do batch: 4.0289
[Epoch 0] Batch 6: 3 imagens
  Loss do batch: 6.0310
[Epoch 0] Batch 7: 4 imagens
  Loss do batch: 6.2578
[Epoch 0] Batch 8: 4 imagens
  Loss do batch: 9.1418
[Epoch 0] Batch 9: 4 imagens
  Loss do batch: 5.6619
[Epoch 0] Batch 10: 4 imagens
  Loss do batch: 6.6357
[Epoch 0] Batch 11: 4 imagens
  Loss do batch: 5.2862
[Epoch 0] Batch 12: 3 imagens
  Loss do batch: 5.7345
[Epoch 0] Batch 13: 4 imagens
  Loss do batch: 11.7474
[Epoch 0] Batch 14: 4 imagens
  Loss do batch: 8.2129
[Epoch 0] Batch 15: 4 imagens
  Loss do batch: 8.8456
[Epoch 0] Batch 16: 3 imagens
  Loss do batch: 12.1127
[Epoch 0] Batch 17: 4 imagens
  Loss do batch: 18.6662
[Epoch 0] Batch 18: 4 imagens


In [None]:
# ================================================================
# FINAL EVALUATION FOR PR + F1 + CONFUSION MATRIX
# ================================================================

all_scores = []
all_labels = []
all_preds = []

model.eval()
for img, target in val_dataset:
    img_tensor = img.cuda().unsqueeze(0)

    with torch.no_grad():
        out = model(img_tensor)[0]

    scores = out["scores"].cpu().numpy()
    labels_pred = out["labels"].cpu().numpy()
    labels_true = target["labels"].numpy()

    # armazenar
    all_scores.extend(scores)
    all_preds.extend(labels_pred)
    all_labels.extend(labels_true)


# ORDENAR POR SCORE PARA PR CURVE
order = np.argsort(-np.array(all_scores))
preds = np.array(all_preds)[order]
labels = np.array(all_labels)[order]

tp = (preds == labels)
fp = (preds != labels)
fn = (labels != preds)

precision = np.cumsum(tp) / (np.cumsum(tp + fp) + 1e-6)
recall = np.cumsum(tp) / (len(labels) + 1e-6)

plot_pr_curve(precision, recall, f"{save_dir}/pr_curve.png")
plot_f1_curve(precision, recall, f"{save_dir}/f1_curve.png")
plot_confusion_matrix(all_labels, all_preds, f"{save_dir}/confusion_matrix.png")


In [None]:
from PIL import Image

img1 = Image.open(f"{save_dir}/losses.png")
img2 = Image.open(f"{save_dir}/map_curve.png")
img3 = Image.open(f"{save_dir}/pr_curve.png")
img4 = Image.open(f"{save_dir}/f1_curve.png")

width = max(img1.width, img2.width)
height = img1.height + img2.height + img3.height + img4.height

results = Image.new("RGB", (width, height), "white")

y = 0
for img in [img1, img2, img3, img4]:
    results.paste(img, (0, y))
    y += img.height

results.save(f"{save_dir}/results.png")


In [None]:
model.load_state_dict(torch.load("best_fasterrcnn.pth"))
model.eval()

results = []

for img, target in test_dataset:
    img_tensor = img.cuda().unsqueeze(0)

    with torch.no_grad():
        output = model(img_tensor)[0]

    results.append({
        "image_id": target["image_id"].item(),
        "boxes": output["boxes"].cpu().tolist(),
        "scores": output["scores"].cpu().tolist(),
        "labels": output["labels"].cpu().tolist()
    })

print("Infer√™ncia conclu√≠da!")
