In [None]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

import cv2
import numpy as np
import glob
import os

In [None]:
# Cargar los archivos de configuración y pesos
config_path = 'conf.cfg'
weights_path = 'pesos.weights'
names_path = 'obj.names'

# Cargar la lista de nombres de clases
with open(names_path, 'r') as f:
    class_names = [line.strip() for line in f.readlines()]

# Cargar el modelo YOLOv4
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)

# Obtener la lista de capas de salida
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

image_files = []
image_list = []

# Obtener la lista de archivos de imagen en la carpeta "fotos"
# Abajo se define el path que contiene varios directorios con imagenes.
path = './ComputerVision2'
for root, dirs, files in os.walk(path):
    print(root, dirs, files)
    for name in files:
        image_list = glob.glob(root + '/*.jpg') # Creamos lista de imagenes por directorio
    image_files.extend(image_list) # Extendemos la lista que contiene las imagenes de todos los directorios 

img_index = 0 # Indice para utilizar en la escritura de las imagenes 
# Iterar sobre cada imagen
for image_path in image_files:
    if 'ROI' not in image_path:
        # Cargar la imagen de entrada
        image = cv2.imread(image_path)
        height, width, channels = image.shape

        # Preprocesar la imagen para que se ajuste a la entrada del modelo
        blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)

        # Establecer la entrada del modelo
        net.setInput(blob)

        # Ejecutar la detección de objetos
        layer_outputs = net.forward(output_layers)

        # Recopilar información sobre detecciones de personas
        boxes = []
        confidences = []
        class_ids = []

        for output in layer_outputs:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if confidence > 0.5 and class_id == 0:  # Clase 0 es para personas
                    # Escalar las coordenadas de la caja delimitadora al tamaño de la imagen original
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)

                    # Calcular las coordenadas de la esquina superior izquierda de la caja delimitadora
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # Aplicar supresión no máxima para eliminar detecciones superpuestas
        indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

        # Encontrar el bounding box de mayor área
        max_area = 0
        max_box = None

        for i in indices:
            x, y, w, h = boxes[i]
            area = w * h

            if area > max_area:
                max_area = area
                max_box = (x, y, w, h)
                max_label = class_names[class_ids[i]]
                max_confidence = confidences[i]

        # Verificar si se encontró una persona
        if max_box is not None:
            x, y, w, h = max_box
            label = max_label
            confidence = max_confidence

            # Valido que el bounding box no se salga de la img
            if y < 0:
                y = 1
            if x < 0:
                x = 1
            
            # Extraer la ROI
            roi = image[y:y+h, x:x+w]

            if all(roi.shape): # Valido que todos las componentes de la forma de la ROI sean > 0

                output_path = image_path.split("\\")[:-1][0] # Recorto el path con la \\ y me quedo con la primer parte para reutilizar
            
                cv2.imwrite(f'{output_path}/ROI_{label}_{confidence:.2f}_{img_index}.jpg', roi) # Guardar la ROI en un archivo
        else:
            # No se encontró ninguna persona, pasar a la siguiente imagen
            continue
        
        # Se incrementa el indice para el nombre de la imagen
        img_index += 1

In [None]:
import os

folder_path = r'./fotos'  # Ruta de la carpeta con las fotos
extension = '.jpg'  # Extensión de los archivos de imagen

counter = 1  # Contador para el número secuencial

# Recorrer los archivos en la carpeta
for filename in os.listdir(folder_path):
    if filename.endswith(extension):
        # Obtener la ruta completa del archivo actual y el nuevo archivo
        current_path = os.path.join(folder_path, filename)
        new_filename = '0' + str(counter) + extension
        new_path = os.path.join(folder_path, new_filename)

        # Renombrar el archivo
        os.rename(current_path, new_path)

        counter += 1  # Incrementar el contador para el siguiente archivo

print('Renombrado completado.')

## Entrenamiento 

In [None]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib
import glob

#checking for device
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device


#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])


#Path for training and testing directory
train_path=r'.\TRAIN-RECORTAR'
test_path=r'.\TEST-RECORTAR'
torch.manual_seed(42)

train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=32, shuffle=True
)
test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=32, shuffle=True
)

#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [None]:
class ConvNet(nn.Module):
    def __init__(self,num_classes=5):
        super(ConvNet,self).__init__()

        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.bn1=nn.BatchNorm2d(num_features=12)

        self.relu1=nn.ReLU()

        
        self.pool=nn.MaxPool2d(kernel_size=2)  
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)

        self.relu2=nn.ReLU()
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)

        self.bn3=nn.BatchNorm2d(num_features=32)

        self.relu3=nn.ReLU()
        
        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
        output=output.view(-1,32*75*75)
            
            
        output=self.fc(output)
            
        return output

In [None]:
model=ConvNet(num_classes=5).to(device)
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [None]:
import matplotlib.pyplot as plt

num_epochs = 10
best_accuracy = 0.0

train_accuracies = []
test_accuracies = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_accuracy = 0.0
    train_count = 0
    
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.cpu().data * images.size(0)
        _, prediction = torch.max(outputs.data, 1)
        train_accuracy += int(torch.sum(prediction == labels.data))
        train_count += labels.size(0)

    train_loss = train_loss / train_count
    train_accuracy = train_accuracy / train_count
    train_accuracies.append(train_accuracy)
    
    model.eval()
    test_accuracy = 0.0
    test_count = 0
    test_loss = 0.0
    
    for i, (images, labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

        outputs = model(images)
        _, prediction = torch.max(outputs.data, 1)
        test_accuracy += int(torch.sum(prediction == labels.data))
        test_count += labels.size(0)
        loss = loss_function(outputs, labels)
        test_loss += loss.cpu().data * images.size(0)
    
    test_loss = test_loss / test_count
    test_accuracy = test_accuracy / test_count
    test_accuracies.append(test_accuracy)
    
    #print('Epoch: ' + str(epoch) + ' Train Loss: ' + str(train_loss) + ' Train Accuracy: ' + str(train_accuracy) + ' Test Accuracy: ' + str(test_accuracy))

    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'best_checkpoint.model')
        best_accuracy = test_accuracy