# Definimos el bloque convolucional

In [40]:
from sched import scheduler

import torch
import torch.nn as nn


def conv_block(in_channels, out_channels, kernel_size, stride=1, padding=0):
    return nn.Sequential(
       nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.LeakyReLU(0.1)
    )

# Creámos nuestra red YoloV1

In [50]:
class Yolo(nn.Module):
    def __init__(self, s=7,b=2, c=1):
        super(Yolo, self).__init__()
        self.s = s
        self.b = b
        self.c = c
        self.layers=[self.layer1(),self.layer2(),self.layer3(),self.layer4(), self.layer5(),self.layer6()]
        self.features = nn.Sequential(*self.layers)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024*self.s*self.s,4096),
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
            nn.Linear(4096,self.s*self.s*(self.c+self.b*5)),

        )
    def layer1(self):
        return nn.Sequential(
            conv_block(3,64,7, stride=2, padding=3),
            nn.MaxPool2d(2,2),
        )
    def layer2(self):
        return nn.Sequential(
            conv_block(64,192,3,padding=1),
            nn.MaxPool2d(2,2),
        )
    def layer3(self):
        return nn.Sequential(
            conv_block(192,128,1),
            conv_block(128,256,3, padding=1),
            conv_block(256,256,1),
            conv_block(256,512,3, padding=1),
            nn.MaxPool2d(2,2),
        )
    def layer4(self):
        blocks=[
            nn.Sequential(
                conv_block(512,256,1),
                conv_block(256,512,3, padding=1),
            ) for _ in range(4)
        ]
        return nn.Sequential(
            *blocks,
            conv_block(512,512,1),
            conv_block(512,1024,3, padding=1),
            nn.MaxPool2d(2,2)
        )
    def layer5(self):
        blocks=[
            nn.Sequential(
                conv_block(1024,512,1),
                conv_block(512,1024,3, padding=1),
            ) for _ in range(2)
        ]
        return nn.Sequential(
            *blocks,
            conv_block(1024,1024,3, padding=1),
            conv_block(1024,1024,3, stride=2, padding=1)
        )
    def layer6(self):
        return nn.Sequential(
            conv_block(1024,1024,3, padding=1),
            conv_block(1024,1024,3, padding=1)
        )
    def forward(self, x):
        x=self.features(x)
        x=self.classifier(x)
        return x.view(-1, self.s, self.s, self.c+self.b*5)

# DATA LOADER

In [51]:
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from PIL import Image
import os

class TumorDataset(Dataset):
    def __init__(self, img_dir,label_dir, S=7, B=2, C=1 ):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.S = S
        self.B = B
        self.C = C
        self.transform = transforms.Compose([transforms.Resize((448, 448)),transforms.ToTensor()])
        #["tumor1.jpg","tumor2.png".....]
        self.image_filenames=[f for f in os.listdir(self.img_dir) if f.endswith('.jpg') or f.endswith('.png') ]

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        image_filename = self.image_filenames[idx]
        image_path=os.path.join(self.img_dir, image_filename)
        label_path=os.path.join(self.label_dir,image_filename.replace('.jpg','.txt').replace('.png','.txt'))
        image=Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label_tensor=torch.zeros((self.S,self.S,self.C+self.B*5))
        if os.path.exists(label_path):
            with open(label_path) as f:
                for line in f.readlines():
                    class_id, x,y,w,h=map(float,line.strip().split())
                    i=int(x*self.S)
                    j=int(y*self.S)
                    x_cell=x*self.S-1
                    y_cell=y*self.S-1
                    w_cell=w
                    h_cell=h
                    if label_tensor[j,i,self.C]==0:
                        label_tensor[j,i,self.C]=1
                        label_tensor[j,i,self.C+1:self.C+5]=torch.tensor([x_cell,y_cell,w_cell,h_cell])
                        label_tensor[j,i,0]=class_id
        return  image, label_tensor

# Calculo del IoU

In [52]:
def intersection_over_union(boxes_preds, boxes_labels):
    box1_x1=boxes_preds[0]-boxes_preds[2]/2
    box1_y1=boxes_preds[1]-boxes_preds[3]/2
    box1_x2=boxes_preds[0]+boxes_preds[2]/2
    box1_y2=boxes_preds[1]+boxes_preds[3]/2

    box2_x1=boxes_labels[0]-boxes_labels[2]/2
    box2_y1=boxes_labels[1]-boxes_labels[3]/2
    box2_x2=boxes_labels[0]+boxes_labels[2]/2
    box2_y2=boxes_labels[1]+boxes_labels[3]/2

    x1=max(box1_x1,box2_x1)
    y1=max(box1_y1,box2_y1)
    x2=min(box1_x2,box2_x2)
    y2=min(box1_y2,box2_y2)

    intersection=max(0,x2-x1)*max(0,y2-y1)

    box1_area=(box1_x2-box1_x1)*(box1_y2-box1_y1)
    box2_area=(box2_x2-box2_x1)*(box2_y2-box2_y1)

    union=box1_area+box2_area-intersection+1e-6
    return intersection/union

# YOLOLOSS

In [53]:
import torch
class YOLOLoss(nn.Module):
    def __init__(self, S=7, B=2, C=1, lambda_coords=5,lambda_noobj=0.5 ):
        super(YOLOLoss, self).__init__()
        self.S = S
        self.B = B
        self.C = C
        self.lambda_coords=lambda_coords
        self.lambda_noobj=lambda_noobj
        self.mse=nn.MSELoss(reduction='sum')
    def forward(self, predictions, target):
        N=predictions.size(0)
        loss=0
        for n in range(N):
            for i in range(self.S):
                for j in range(self.S):
                    pred=predictions[n,i,j]
                    truth=target[n,i,j]
                    has_obj=truth[self.C]
                    if has_obj==1:
                        ious=[]
                        for b in range(self.B):
                            start=self.C+b*5+1
                            box_pred=pred[start:start+4]
                            box_true=truth[self.C+1:self.C+5]
                            iou=intersection_over_union(box_pred,box_true)
                            ious.append(iou)
                        best_box=torch.argmax(torch.tensor(ious))
                        #Coordednadas y confianza del mejor box
                        start=self.C+best_box*5
                        pred_box=pred[start+1:start+5]
                        true_box=truth[self.C+1:self.C+5]

                        pred_conf=pred[start]
                        true_conf=truth[self.C]
                        #perdida de la clase
                        loss += self.mse(pred[0:self.C], truth[0:self.C])
                        loss+=self.lambda_coords*(self.mse(pred_box,true_box))
                        loss+=self.mse(pred_conf,true_conf)
                        #Penalizar confianza de los otros boxes
                        for b in range(self.B):
                            if b !=best_box:
                                conf=pred[self.C+b*5]
                                loss+=self.lambda_noobj*self.mse(conf,torch.tensor(0.))
                    else:
                        for b in range(self.B):
                            conf=pred[self.C+b*5]
                            loss+=self.lambda_noobj*self.mse(conf,torch.tensor(0.))
        return loss/N

# Entrenamiento

In [54]:
from torch.utils.data import DataLoader
from torch import optim
from tqdm import tqdm

def train_yolov1(model,dataset,loss_fn, device="cuda" if torch.cuda.is_available() else "cpu", epochs=50, batch_size=16, lr=1e-4, checkpoint_path="yolov1_final.path", save_best=True):
    dataloader=DataLoader(dataset,batch_size=batch_size,shuffle=True, drop_last=True)
    model=model.to(device)
    optimizer=optim.Adam(model.parameters(),lr=lr)
    scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.5)
    best_loss=float('inf')

    for epoch in range(epochs):
        model.train()
        epoch_loss=0
        loop=tqdm(dataloader,leave=False)
        for imgs, labels in loop:
            imgs, labels = imgs.to(device), labels.to(device)
            preds=model(imgs)
            loss=loss_fn(preds,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss+=loss.item()
            loop.set_description(f"Epoch [{epoch+1}/{epochs}]")
            loop.set_postfix(loss=loss.item(), avg_loss=epoch_loss/(loop.n+1))

    avg_loss=epoch_loss/len(dataloader)
    scheduler.step()
    print(f"Epoch [{epoch+1}/{epochs}]- Loss: {avg_loss:.4f}")
    if save_best and avg_loss < best_loss:
        best_loss=avg_loss
        torch.save(model.state_dict(), "best_model.pth")
        print(f"Nuevo mejor modelo guardado (Loss: {best_loss:.4f})")
    if (epoch+1)%5==0:
        ckpt_name=f"checkpoint_{epoch+1}.pth"
        torch.save(model.state_dict(), ckpt_name)
        print(f"Guardado de checkpoint: {ckpt_name}")

    torch.save(model.state_dict(), checkpoint_path)
    print(f"Modelo final guardado en: {checkpoint_path}")


# Preparativos finales

In [None]:
model=Yolo(s=7,b=2,c=1)
loss_fn=YOLOLoss(S=7,B=2,C=1)

image_dir=r"C:\Users\Intel\Desktop\yolo\brain-tumor\train\images"
label_dir=r"C:\Users\Intel\Desktop\yolo\brain-tumor\train\labels"

train_dataset=TumorDataset(img_dir=image_dir,label_dir=label_dir,S=7,B=2,C=1)

train_yolov1(model=model,dataset=train_dataset,loss_fn=loss_fn, epochs=50, batch_size=16)

Epoch [2/50]:  89%|████████▉ | 49/55 [08:22<00:59,  9.89s/it, avg_loss=9.26, loss=6.07]

# Creamos una función de predicción

In [None]:
import torchvision.transforms as T
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

def show_prediction_on_image(model, image_path, S=7, B=2, C=1, conf_threshold=0.4):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    model.eval()

    transform = T.Compose([T.Resize((448, 448)), T.ToTensor()])

    image = Image.open(image_path).convert("RGB")
    original_w, original_h = image.size
    input_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor).squeeze(0).cpu()

    best_conf = 0
    best_box = None
    for i in range(S):
        for j in range(S):
            cell = output[i, j]
            for b in range(B):
                conf = cell[C + b * 5]
                if conf > best_conf and conf > conf_threshold:
                    x_rel, y_rel, w_rel, h_rel = cell[C + b * 5 + 1:C + b * 5 + 5]
                    best_conf = conf
                    best_box = (j, i, x_rel, y_rel, w_rel, h_rel)

    draw = ImageDraw.Draw(image)
    if best_box:
        j, i, x_cell, y_cell, w, h = best_box
        x_center = (j + x_cell) / S * original_w
        y_center = (i + y_cell) / S * original_h
        width = w * original_w
        height = h * original_h

        x1 = x_center - width / 2
        y1 = y_center - height / 2
        x2 = x_center + width / 2
        y2 = y_center + height / 2

        draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
        draw.text((x1, y1 - 10), f"conf: {best_conf:.2f}", fill="red")
    else:
        print("⚠️ No se detectó ningún objeto con suficiente confianza.")

    plt.figure(figsize=(6, 6))
    plt.imshow(image)
    plt.axis("off")
    plt.title("Predicción YOLOv1 - Tumor cerebral")
    plt.show()


# Modelo cargado

# Implementamos

In [None]:

path_test=r"C:\Users\Intel\Desktop\yolo\brain-tumor\valid\images\val_1 (1).jpg"
show_prediction_on_image(model,path_test)