## Preparación del Dataset

In [8]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, SubsetRandomSampler

# Definir las transformaciones
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Cargar el dataset
dataset_path = 'dataset'
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Crear índices para el split: train, validation y test
total_size = len(dataset)
train_size = int(0.8 * total_size)
validation_size = int(0.1 * total_size)
test_size = total_size - train_size - validation_size

indices = torch.randperm(total_size).tolist()
train_indices = indices[:train_size]
val_indices = indices[train_size:train_size+validation_size]
test_indices = indices[train_size+validation_size:]

# Crear DataLoaders
batch_size = 32
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)
test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)


## Data Augmentation

In [9]:
transform_augmentation = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


## Configuración del Modelo e Hiperparámetros

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Cargar EfficientNet preentrenado y modificarlo para tu tarea específica
model = models.efficientnet_b0(pretrained=True)

# Congelar los pesos de las capas anteriores
for param in model.parameters():
    param.requires_grad = False

# Reemplazar la última capa de clasificación
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 2)  # 2 clases: Ford Bronco y otros vehículos

# Definir hiperparámetros
lr = 0.001
epochs = 10

# Definir dispositivo
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# Definir la función de pérdida y el optimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier[1].parameters(), lr=lr)


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /home/tecotl/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:01<00:00, 13.3MB/s]


## Función de Entrenamiento y Validación

In [11]:
def train_model(model, criterion, optimizer, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        
        # Validación
        model.eval()
        val_running_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item() * inputs.size(0)
        
        val_epoch_loss = val_running_loss / len(val_loader.dataset)
        
        print(f'Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f}, Val Loss: {val_epoch_loss:.4f}')


## Entrenamiento y Validación

In [12]:
train_model(model, criterion, optimizer, train_loader, val_loader, epochs=epochs)

Epoch 1/10 - Loss: 0.0492, Val Loss: 0.0029
Epoch 2/10 - Loss: 0.0137, Val Loss: 0.0014
Epoch 3/10 - Loss: 0.0073, Val Loss: 0.0009
Epoch 4/10 - Loss: 0.0054, Val Loss: 0.0008
Epoch 5/10 - Loss: 0.0038, Val Loss: 0.0007
Epoch 6/10 - Loss: 0.0034, Val Loss: 0.0007
Epoch 7/10 - Loss: 0.0020, Val Loss: 0.0007
Epoch 8/10 - Loss: 0.0034, Val Loss: 0.0006
Epoch 9/10 - Loss: 0.0020, Val Loss: 0.0006
Epoch 10/10 - Loss: 0.0020, Val Loss: 0.0005


## Guardar el modelo

In [13]:
# Guardar el modelo entrenado
model_path = 'model_efficientnet_b0.pth'
torch.save(model.state_dict(), model_path)

In [14]:
# Cargar el modelo
model_clf = models.efficientnet_b0(pretrained=False)  # Inicializa el modelo sin pesos preentrenados

# Reemplazar la última capa de clasificación (igual que antes)
num_ftrs = model_clf.classifier[1].in_features
model_clf.classifier[1] = nn.Linear(num_ftrs, 2)  # Asegúrate de que esto coincida con el modelo guardado

# Cargar los pesos guardados
model_clf.load_state_dict(torch.load(model_path))

# Mover el modelo al dispositivo adecuado
model_clf.to(device)

# Asegurarse de que el modelo esté en modo de evaluación
model_clf.eval()




EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [22]:
import torch
from torchvision import models, transforms
import torch.nn as nn
import cv2
import numpy as np

# Asumiendo que ya has guardado el modelo en 'model_efficientnet_b0.pth'
model_path = 'model_efficientnet_b0.pth'

# Cargar el modelo preentrenado y modificar la última capa para tu caso específico
def load_model(path):
    model_clf = models.efficientnet_b0(pretrained=False)
    num_ftrs = model_clf.classifier[1].in_features
    model_clf.classifier[1] = nn.Linear(num_ftrs, 2)  # Asumiendo 2 clases
    model_clf.load_state_dict(torch.load(path))
    model_clf.eval()  # Importante para preparar el modelo para evaluación y no entrenamiento
    return model_clf

model_clf = load_model(model_path)


In [24]:
model_yolo = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Using cache found in /home/tecotl/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-3-18 Python-3.9.18 torch-2.2.1+cu121 CUDA:0 (NVIDIA GeForce GTX 1050, 4039MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [28]:
import cv2
import torch
import numpy as np
from torchvision import models, transforms
from torch.nn import functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_clf.to(device)

# Configuración para la transformación de la imagen antes de clasificar
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

cap = cv2.VideoCapture('broncoGTBC.mp4')
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
out = cv2.VideoWriter('test_2.mp4', fourcc, fps, (frame_width, frame_height))

interested_classes = [0, 2, 7]  # Solo interesa 'car' y 'truck' para clasificación

window_name = 'Detección YOLOv5 con Clasificación'
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Detectar objetos con YOLOv5
    results = model_yolo(frame)

    for det in results.xyxy[0]:
        classId = int(det[5])
        score = det[4].item()
        if score > 0.75:  # Filtro de confianza
            x1, y1, x2, y2 = map(int, det[:4])
            
            # Para personas, dibuja el bounding box directamente
            if classId == 0:  
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
                cv2.putText(frame, f'Persona ({score:.2f})', (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
            
            # Para vehículos, clasifica si es un Bronco o no
            elif classId in [2, 7]:
                crop_img = frame[y1:y2, x1:x2]
                crop_img_tensor = transform(crop_img).unsqueeze(0).to(device)
                with torch.no_grad():
                    output = model_clf(crop_img_tensor)
                    probabilities = F.softmax(output, dim=1)
                    pred = probabilities.argmax(dim=1)
                    pred_label = 'Bronco' if pred.item() == 0 else 'Otro vehículo'
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, f'{pred_label} ({score:.2f})', (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    out.write(frame)
    cv2.imshow(window_name, frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()
