<div style='background-color: #f0f8ff; border: 3px solid Blue;'>
    <font size="+1" color="Blue">
        <b>üöÄ Notebook de Treinamento SSD (Single Shot Multibox Detector) para Detec√ß√£o de Armas</b>
    </font>
</div>
<div style='background-color: #fff7f7; border: 2px solid Green;'>
    <font size="+1" color="Green">
        <b>‚úÖ Dataset configurado: dataset_final (formato YOLO)</b>
    </font>
</div>
<div style='background-color: #fffacd; border: 2px solid Orange;'>
    <font size="+1" color="Orange">
        <b>üì¶ Modelo: SSD Lite 320 com MobileNet V3 Large</b>
        <br>O modelo ser√° baixado automaticamente na primeira execu√ß√£o (weights='DEFAULT' ou 'COCO_V1')
    </font>
</div>


<div style='background-color: #fff7f7; border: 2px solid '>
    <font size="+2" color="blue" ><b>1. Instalar bibliotecas e carregar modelo pr√©-treinado</b></font>
</div>


In [None]:
%pip install torch torchvision torchaudio --upgrade -q

import os
import torch
import torchvision
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from torchvision.models.detection.ssd import SSDClassificationHead
from torchvision.models.detection import SSDLite320_MobileNet_V3_Large_Weights

# Verifica GPU
print("üîç Verificando GPU...")
if torch.cuda.is_available():
    print(f"‚úÖ GPU detectada: {torch.cuda.get_device_name(0)}")
    print(f"‚úÖ CUDA Version: {torch.version.cuda}")
    device = torch.device("cuda")
else:
    print("‚ö†Ô∏è  GPU n√£o dispon√≠vel - usando CPU")
    device = torch.device("cpu")

# Carrega modelo SSD pr√©-treinado
# O modelo ser√° baixado automaticamente na primeira execu√ß√£o
print("\nüì¶ Carregando modelo SSD Lite 320 com MobileNet V3 Large...")
print("üí° Nome do modelo: ssdlite320_mobilenet_v3_large")
print("üí° Pesos pr√©-treinados: SSDLite320_MobileNet_V3_Large_Weights.DEFAULT (COCO)")
weights = SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
model = ssdlite320_mobilenet_v3_large(weights=weights)
print(f"‚úÖ Modelo SSD carregado com sucesso!")
print(f"üìä Modelo pr√©-treinado em: {weights}")


<div style='background-color: #fff7f7; border: 2px solid '>
    <font size="+2" color="blue" ><b>2. Configura√ß√£o do Dataset</b></font>
</div>


In [None]:
import os
import yaml

# Define o diret√≥rio base e caminhos do dataset
BASE_DIR = os.getcwd()
DATASET_DIR = os.path.join(BASE_DIR, "dataset_final")
DATASET_YAML = os.path.join(DATASET_DIR, "dataset.yaml")

print(f"üìÅ Diret√≥rio base: {BASE_DIR}")
print(f"üìÅ Dataset: {DATASET_DIR}")
print(f"üìÑ Dataset YAML: {DATASET_YAML}")

# Verifica se o dataset existe
if os.path.exists(DATASET_YAML):
    print("‚úÖ Dataset encontrado!")
    
    # L√™ classes do YAML
    with open(DATASET_YAML, 'r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    class_names = data['names']
    num_classes = len(class_names)
    print(f"‚úÖ Classes encontradas: {class_names}")
    print(f"‚úÖ N√∫mero de classes: {num_classes}")
    
    # Conta arquivos
    train_path = os.path.join(DATASET_DIR, "train", "images")
    val_path = os.path.join(DATASET_DIR, "val", "images")
    test_path = os.path.join(DATASET_DIR, "test", "images")
    
    train_images = len([f for f in os.listdir(train_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]) if os.path.exists(train_path) else 0
    val_images = len([f for f in os.listdir(val_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]) if os.path.exists(val_path) else 0
    test_images = len([f for f in os.listdir(test_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]) if os.path.exists(test_path) else 0
    
    print(f"üìä Imagens de treino: {train_images}")
    print(f"üìä Imagens de valida√ß√£o: {val_images}")
    print(f"üìä Imagens de teste: {test_images}")
else:
    print("‚ùå Dataset n√£o encontrado! Verifique o caminho.")


<div style='background-color: #fff7f7; border: 2px solid '>
    <font size="+2" color="blue" ><b>3. Criar Dataset Customizado (YOLO ‚Üí SSD)</b></font>
</div>


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
from glob import glob
import numpy as np

class YOLODataset(Dataset):
    """Dataset customizado que converte formato YOLO para formato SSD"""
    
    def __init__(self, dataset_dir, split='train', transform=None, img_size=320):
        self.dataset_dir = dataset_dir
        self.split = split
        self.img_size = img_size
        self.transform = transform
        
        # Carrega nomes das classes
        yaml_path = os.path.join(dataset_dir, 'dataset.yaml')
        with open(yaml_path, 'r', encoding='utf-8') as f:
            data = yaml.safe_load(f)
        self.class_names = data['names']
        self.num_classes = len(self.class_names)
        
        # Busca todas as imagens
        images_dir = os.path.join(dataset_dir, split, 'images')
        labels_dir = os.path.join(dataset_dir, split, 'labels')
        
        self.image_paths = []
        self.label_paths = []
        
        for img_path in glob(os.path.join(images_dir, '*')):
            if img_path.lower().endswith(('.jpg', '.jpeg', '.png')):
                # Encontra label correspondente
                label_path = img_path.replace('images', 'labels')
                for ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '.png', '.PNG']:
                    label_path = label_path.replace(ext, '.txt')
                
                if os.path.exists(label_path):
                    self.image_paths.append(img_path)
                    self.label_paths.append(label_path)
        
        print(f"‚úÖ Carregados {len(self.image_paths)} pares imagem/label para {split}")
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # Carrega imagem
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        original_width, original_height = image.size
        
        # Redimensiona imagem para 320x320 (tamanho esperado pelo SSD)
        image = image.resize((self.img_size, self.img_size))
        scale_x = self.img_size / original_width
        scale_y = self.img_size / original_height
        
        # Carrega labels YOLO
        label_path = self.label_paths[idx]
        boxes = []
        labels = []
        
        with open(label_path, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) >= 5:
                    class_id = int(parts[0])
                    x_center = float(parts[1])
                    y_center = float(parts[2])
                    width = float(parts[3])
                    height = float(parts[4])
                    
                    # Converte YOLO (normalizado centro, largura, altura) para Pascal VOC (x_min, y_min, x_max, y_max)
                    x_min = (x_center - width / 2) * original_width * scale_x
                    y_min = (y_center - height / 2) * original_height * scale_y
                    x_max = (x_center + width / 2) * original_width * scale_x
                    y_max = (y_center + height / 2) * original_height * scale_y
                    
                    boxes.append([x_min, y_min, x_max, y_max])
                    labels.append(class_id + 1)  # +1 porque SSD usa 0 para background
        
        # Converte para tensores
        boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4), dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,), dtype=torch.int64)
        
        # Aplica transforma√ß√µes
        if self.transform:
            image = self.transform(image)
        
        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([idx]),
            'area': (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) if len(boxes) > 0 else torch.zeros((0,), dtype=torch.float32),
            'iscrowd': torch.zeros((len(boxes),), dtype=torch.int64) if len(boxes) > 0 else torch.zeros((0,), dtype=torch.int64)
        }
        
        return image, target

# Cria datasets
print("\nüì¶ Criando datasets...")
train_dataset = YOLODataset(DATASET_DIR, split='train', img_size=320)
val_dataset = YOLODataset(DATASET_DIR, split='val', img_size=320)
test_dataset = YOLODataset(DATASET_DIR, split='test', img_size=320)

print(f"\n‚úÖ Datasets criados com sucesso!")


In [None]:
# O modelo pr√©-treinado tem 91 classes (COCO), precisamos ajustar para nosso n√∫mero de classes
# SSD precisa de num_classes + 1 (incluindo background)
num_classes_custom = num_classes + 1  # +1 para background

print(f"üìä Ajustando modelo para {num_classes} classes (+1 background = {num_classes_custom} total)")

# Obt√©m o n√∫mero de classes do modelo atual
num_classes_pretrained = model.head.classification_head.cls_logits.out_channels // model.head.classification_head.num_anchors
print(f"üìä Modelo pr√©-treinado tem {num_classes_pretrained} classes")

# Substitui o head de classifica√ß√£o
from torchvision.models.detection.ssd import SSDClassificationHead

# Obt√©m configura√ß√£o do head atual
in_channels = model.head.classification_head.cls_logits.in_channels
num_anchors = model.head.classification_head.num_anchors

# Cria novo head de classifica√ß√£o
model.head.classification_head = SSDClassificationHead(
    in_channels=in_channels,
    num_anchors=num_anchors,
    num_classes=num_classes_custom
)

print(f"‚úÖ Modelo ajustado para {num_classes} classes!")


<div style='background-color: #fff7f7; border: 2px solid '>
    <font size="+2" color="blue" ><b>5. Visualiza√ß√£o dos Dados</b></font>
</div>


In [None]:
import cv2
import random
from matplotlib import pyplot as plt

class Visualizacao:
    def __init__(self, data_types, n_ims, rows, cmap=None):
        self.n_ims, self.rows = n_ims, rows
        self.cmap, self.data_types = cmap, data_types
        self.colors = ["firebrick", "darkorange", "blueviolet"]
        
        self.get_cls_names()
        self.get_bboxes()        
                
    def get_cls_names(self):
        with open(DATASET_YAML, 'r', encoding='utf-8') as file:
            data = yaml.safe_load(file)
        class_names = data['names']
        self.class_dict = {index: name for index, name in enumerate(class_names)}
        print(f"‚úÖ Classes encontradas: {list(self.class_dict.values())}")
    
    def get_bboxes(self):
        self.vis_datas, self.analysis_datas, self.im_paths = {}, {}, {}
        
        for data_type in self.data_types:
            all_bboxes, all_analysis_datas = [], {}
            im_paths = glob(os.path.join(DATASET_DIR, data_type, "images", "*"))
            im_paths = [p for p in im_paths if p.lower().endswith(('.jpg', '.jpeg', '.png'))]
            
            for idx, im_path in enumerate(im_paths):
                bboxes = []
                lbl_path = im_path.replace("images", "labels")
                for ext in ['.jpg', '.jpeg', '.JPG', '.JPEG', '.png', '.PNG']:
                    lbl_path = lbl_path.replace(ext, ".txt")
                
                if not os.path.isfile(lbl_path):
                    continue
                    
                meta_data = open(lbl_path, 'r').readlines()
                for data in meta_data:
                    parts = data.strip().split()[:5]
                    if len(parts) < 5:
                        continue
                    cls_name = self.class_dict[int(parts[0])]
                    bboxes.append([cls_name] + [float(x) for x in parts[1:]])
                    if cls_name not in all_analysis_datas:
                        all_analysis_datas[cls_name] = 1
                    else:
                        all_analysis_datas[cls_name] += 1
                all_bboxes.append(bboxes)
                    
            self.vis_datas[data_type] = all_bboxes
            self.analysis_datas[data_type] = all_analysis_datas
            self.im_paths[data_type] = im_paths
    
    def plot(self, rows, cols, count, im_path, bboxes):
        plt.subplot(rows, cols, count)
        or_im = np.array(Image.open(im_path).convert("RGB"))
        height, width, _ = or_im.shape

        for bbox in bboxes:
            class_id, x_center, y_center, w, h = bbox
            x_min = int((x_center - w / 2) * width)
            y_min = int((y_center - h / 2) * height)
            x_max = int((x_center + w / 2) * width)
            y_max = int((y_center + h / 2) * height)
            
            color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
            cv2.rectangle(img=or_im, pt1=(x_min, y_min), pt2=(x_max, y_max), color=color, thickness=3)
        
        plt.imshow(or_im)
        plt.axis("off")
        plt.title(f"H√° {len(bboxes)} objeto(s) na imagem.")
        
        return count + 1

    def vis(self, save_name):
        print(f"üìä Visualiza√ß√£o de dados {save_name.upper()} em processo...\n")
        cols = self.n_ims // self.rows
        count = 1
        
        plt.figure(figsize=(25, 20))
        
        if len(self.vis_datas[save_name]) == 0:
            print(f"‚ö†Ô∏è  Nenhuma imagem encontrada para {save_name}")
            return
            
        indices = [random.randint(a=0, b=len(self.vis_datas[save_name]) - 1) for _ in range(min(self.n_ims, len(self.vis_datas[save_name])))]

        for idx, index in enumerate(indices):
            if count == self.n_ims + 1:
                break
            
            im_path = self.im_paths[save_name][index]
            bboxes = self.vis_datas[save_name][index]
            count = self.plot(self.rows, cols, count, im_path=im_path, bboxes=bboxes)
            
        plt.show()

    def data_analysis(self, save_name, color):
        print(f"üìà An√°lise de dados {save_name} em processo...\n")
        
        if save_name not in self.analysis_datas or len(self.analysis_datas[save_name]) == 0:
            print(f"‚ö†Ô∏è  Nenhum dado encontrado para {save_name}")
            return
        
        width, text_width, text_height = 0.7, 0.05, 2
        cls_names = list(self.analysis_datas[save_name].keys())
        counts = list(self.analysis_datas[save_name].values())
        
        _, ax = plt.subplots(figsize=(30, 10))
        indices = np.arange(len(counts))

        ax.bar(indices, counts, width, color=color)
        ax.set_xlabel("Nomes das Classes", color="black")
        ax.set(xticks=indices, xticklabels=cls_names)
        ax.set_ylabel("Quantidade de Dados", color="black")
        ax.set_title(f"An√°lise de Desbalanceamento de Classes - Dataset {save_name.upper()}")

        for i, v in enumerate(counts):
            ax.text(i - text_width, v + text_height, str(v), color="royalblue")
        plt.show()
    
    def visualization(self):
        [self.vis(save_name) for save_name in self.data_types]
        
    def analysis(self):
        [self.data_analysis(save_name, color) for (save_name, color) in zip(self.data_types, self.colors)]
        
# Cria visualiza√ß√£o
vis = Visualizacao(data_types=["train", "val", "test"], n_ims=20, rows=5, cmap="rgb")
vis.analysis()


In [None]:
# Visualiza imagens com bounding boxes
vis.visualization()


In [None]:
import time
from datetime import datetime
from torch.utils.data import DataLoader
import torch.optim as optim

# Fun√ß√£o de collate para DataLoader
def collate_fn(batch):
    images, targets = zip(*batch)
    return list(images), list(targets)

# Transforma√ß√µes para treinamento
def get_transform(train):
    transforms_list = []
    if train:
        transforms_list.append(transforms.RandomHorizontalFlip(0.5))
    transforms_list.append(transforms.ToTensor())
    transforms_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
    return transforms.Compose(transforms_list)

# Atualiza datasets com transforma√ß√µes
train_dataset.transform = get_transform(train=True)
val_dataset.transform = get_transform(train=False)

# Cria DataLoaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=4, collate_fn=collate_fn)

# Move modelo para device
model = model.to(device)

# Otimizador
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

print("‚úÖ DataLoaders e otimizador configurados!")


In [None]:
# Fun√ß√£o de treinamento
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    total_loss = 0
    
    for images, targets in data_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
    
    return total_loss / len(data_loader)

# Fun√ß√£o de valida√ß√£o
def evaluate(model, data_loader, device):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for images, targets in data_loader:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            total_loss += losses.item()
    
    return total_loss / len(data_loader)

# Treinamento
num_epochs = 50
best_loss = float('inf')
os.makedirs('runs/ssd', exist_ok=True)

print(f"üöÄ Iniciando treinamento √†s {datetime.now().strftime('%H:%M:%S')}")
if device.type == 'cuda':
    print("üöÄ Usando GPU - treinamento ser√° muito mais r√°pido!")
else:
    print("‚ö†Ô∏è  Usando CPU - treinamento ser√° MUITO lento!")

start_time = time.time()

for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, optimizer, train_loader, device, epoch)
    val_loss = evaluate(model, val_loader, device)
    lr_scheduler.step()
    
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    
    # Salva melhor modelo
    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), 'runs/ssd/best_model.pth')
        print(f"  ‚úÖ Novo melhor modelo salvo! (Loss: {best_loss:.4f})")

end_time = time.time()
total_time = end_time - start_time
hours = int(total_time // 3600)
minutes = int((total_time % 3600) // 60)
seconds = int(total_time % 60)
print(f"\n‚úÖ Treinamento conclu√≠do em {hours}h {minutes}m {seconds}s")


<div style='background-color: #fff7f7; border: 2px solid '>
    <font size="+2" color="blue" ><b>7. Infer√™ncia com Dataset de Teste</b></font>
</div>


In [None]:
# Carrega melhor modelo
if os.path.exists('runs/ssd/best_model.pth'):
    model.load_state_dict(torch.load('runs/ssd/best_model.pth'))
    print("‚úÖ Melhor modelo carregado!")
else:
    print("‚ö†Ô∏è  Modelo n√£o encontrado. Execute o treinamento primeiro.")

model.eval()

# Executa infer√™ncia
test_images_path = os.path.join(DATASET_DIR, "test", "images")
test_image_files = [f for f in os.listdir(test_images_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))][:50]  # Limita a 50 para demonstra√ß√£o

print(f"üîç Executando infer√™ncia em {len(test_image_files)} imagens de teste...")

results = []
transform = get_transform(train=False)

with torch.no_grad():
    for img_file in test_image_files:
        img_path = os.path.join(test_images_path, img_file)
        image = Image.open(img_path).convert('RGB')
        img_tensor = transform(image).unsqueeze(0).to(device)
        
        predictions = model(img_tensor)
        
        results.append({
            'path': img_path,
            'image': image,
            'predictions': predictions[0]
        })

print(f"‚úÖ Infer√™ncia conclu√≠da em {len(results)} imagens")


In [None]:
# Visualiza resultados
def visualizar_inferencia_ssd(results, n_ims=15, rows=3, conf_threshold=0.5):
    """Visualiza resultados da infer√™ncia SSD"""
    cols = n_ims // rows
    plt.figure(figsize=(20, 10))
    
    for idx, result in enumerate(results[:n_ims]):
        plt.subplot(rows, cols, idx + 1)
        or_im_rgb = np.array(result['image'].convert("RGB"))
        
        boxes = result['predictions']['boxes'].cpu().numpy()
        scores = result['predictions']['scores'].cpu().numpy()
        labels = result['predictions']['labels'].cpu().numpy()
        
        # Filtra por confian√ßa
        mask = scores >= conf_threshold
        boxes = boxes[mask]
        scores = scores[mask]
        labels = labels[mask]
        
        # Desenha bounding boxes
        for box, score, label in zip(boxes, scores, labels):
            x1, y1, x2, y2 = box.astype(int)
            coord1, coord2 = (x1, y1), (x2, y2)
            
            # Desenha ret√¢ngulo
            cv2.rectangle(or_im_rgb, coord1, coord2, color=(255, 0, 0), thickness=2)
            
            # Adiciona label com confian√ßa
            class_name = class_names[label - 1] if label > 0 else 'background'
            label_text = f"{class_name}: {score:.2f}"
            cv2.putText(or_im_rgb, label_text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        
        plt.imshow(or_im_rgb)
        plt.title(f"Imagem #{idx + 1} - {len(boxes)} detec√ß√µes")
        plt.axis("off")
    
    plt.tight_layout()
    plt.show()

# Visualiza resultados
visualizar_inferencia_ssd(results, n_ims=15, rows=3)
