<h1>Resumen de Implementación</h1>
<h2>Objetivo</h2>
    <p>El código entrena un modelo de deep learning para detectar puntos característicos en rostros.</p>

<h2>Descripción</h2>
    <ul>
        <li>Implementa un modelo de regresión basado en ResNet-18 para detectar puntos clave en rostros.</li>
        <li>El modelo estima 136 valores (68 puntos, cada uno con coordenadas x e y).</li>
    </ul>

<h2>Implementación Base</h2>
    <ul>
        <li>El código sigue la implementación de ResNet-18.</li>
        <li>Los datos de entrenamiento y validación se cargan desde archivos CSV.</li>
        <li>Se utiliza MSE como función de pérdida.</li>
    </ul>

<h2>Datos de Entrenamiento y Validación</h2>
    <p>Se asume que los datos de entrenamiento y validación están en archivos CSV proporcionados.</p>

   <h2>Función de Pérdida</h2>
   <p>Se utiliza MSE (Mean Square Error).</p>

<h2>Métricas</h2>
    <ul>
        <li>El código evalúa el modelo usando RMSE (Root Mean Square Error).</li>
        <li>También muestra visualmente las predicciones comparando los puntos reales y predichos.</li>
    </ul>
</body>
</html>


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torchvision.models import ResNet18_Weights
from torchsummary import summary
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import cv2
from torch.utils.data import Dataset, DataLoader
import os
from resnet import ResNetBackbone
from retinaface import RetinaFace  # Asegúrate de instalar retinaface

In [None]:
# Punto 2: Definir el dataset de puntos característicos de rostros
class FaceKeypointsDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.keypoints_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.keypoints_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.keypoints_frame.iloc[idx, 0])
        image = cv2.imread(img_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        keypoints = self.keypoints_frame.iloc[idx, 1:].values
        keypoints = keypoints.astype('float').reshape(-1, 2)

        # Detección de rostros usando RetinaFace
        faces = RetinaFace.detect_faces(image)
        if faces:
            face = faces[list(faces.keys())[0]]  # Usar la primera cara detectada
            facial_area = face['facial_area']
            image = image[facial_area[1]:facial_area[3], facial_area[0]:facial_area[2]]

        sample = {'image': image, 'keypoints': keypoints}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [None]:
# Punto 2: Definir las transformaciones
data_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

In [None]:
# Punto 2.2: Cargar los datasets
train_dataset = FaceKeypointsDataset(csv_file='FaceKPoints/training_frames_keypoints.csv', root_dir='FaceKPoints/training', transform=data_transform)
valid_dataset = FaceKeypointsDataset(csv_file='FaceKPoints/valid_frames_keypoints.csv', root_dir='FaceKPoints/valid', transform=data_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False, num_workers=4)

In [None]:
# Punto 2: Definir el modelo basado en ResNet de resnet.py
class KeypointModel(nn.Module):
    def __init__(self):
        super(KeypointModel, self).__init__()
        # Usar el backbone de ResNet desde resnet.py
        self.resnet_backbone = ResNetBackbone(block_sizes=[2, 2, 2, 2], filters=[64, 128, 256, 512])
        self.fc = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, 136)  # 68 puntos clave * 2 (x, y)
        )

    def forward(self, x):
        x = self.resnet_backbone(x)
        x = self.fc(x)
        return x


In [None]:
# Punto 2.3: Inicializar el modelo, la función de pérdida y el optimizador
#model = KeypointModel()
model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
summary(model, input_size=(3, 224, 224))

In [None]:
# Punto 2.3: Compilar y entrenar el modelo
def train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        print(f'Época {epoch+1}/{num_epochs}')
        print('-' * 10)

        for i, batch in enumerate(train_loader, 0):
            inputs = batch['image'].to(device)
            keypoints = batch['keypoints'].view(batch['keypoints'].size(0), -1).to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, keypoints)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            if i % 10 == 9:    # imprimir cada 10 mini-batches
                print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}')
                running_loss = 0.0

        # Validación
        model.eval()
        valid_loss = 0.0
        with torch.no_grad():
            for batch in valid_loader:
                inputs = batch['image'].to(device)
                keypoints = batch['keypoints'].view(batch['keypoints'].size(0), -1).to(device)
                outputs = model(inputs)
                loss = criterion(outputs, keypoints)
                valid_loss += loss.item()
        
        print(f'Epoch {epoch + 1}, Validation Loss: {valid_loss / len(valid_loader):.3f}')
        model.train()

    print('Finished Training')

In [None]:
# Ejecutar el proceso de entrenamiento
train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=3)

In [None]:
# Punto 5: Guardar el modelo
torch.save(model.state_dict(), 'model-res.pth')
print('Model saved as model-res.pth')

In [None]:
# Punto 2.4: Evaluación del modelo usando RMSE
def evaluate_model(model, valid_loader):
    model.eval()
    total_rmse = 0.0
    n_samples = 0

    with torch.no_grad():
        for batch in valid_loader:
            images = batch['image']
            keypoints = batch['keypoints'].view(batch['keypoints'].size(0), -1)
            outputs = model(images)
            mse = criterion(outputs, keypoints)
            rmse = torch.sqrt(mse)
            total_rmse += rmse.item() * images.size(0)
            n_samples += images.size(0)

    avg_rmse = total_rmse / n_samples
    print(f'RMSE en el conjunto de validación: {avg_rmse:.4f}')
    return avg_rmse

In [None]:
# Evaluar el modelo
rmse = evaluate_model(model, valid_loader)

In [None]:
# Punto 2.4: Mostrar algunos resultados cualitativos
def visualize_predictions(model, valid_loader, num_images=5):
    model.eval()
    images_so_far = 0

    with torch.no_grad():
        for batch in valid_loader:
            images = batch['image']
            keypoints = batch['keypoints'].view(batch['keypoints'].size(0), -1)
            outputs = model(images)

            for i in range(images.size(0)):
                if images_so_far >= num_images:
                    return
                image = images[i].permute(1, 2, 0).cpu().numpy()
                true_keypoints = keypoints[i].view(-1, 2).cpu().numpy()
                predicted_keypoints = outputs[i].view(-1, 2).cpu().numpy()

                plt.figure()
                plt.imshow(image)
                plt.scatter(true_keypoints[:, 0], true_keypoints[:, 1], c='r', marker='o')
                plt.scatter(predicted_keypoints[:, 0], predicted_keypoints[:, 1], c='b', marker='x')
                plt.title('Puntos Reales (rojo) vs Predichos (azul)')
                plt.show()

                images_so_far += 1

In [None]:
# Visualizar las predicciones
visualize_predictions(model, valid_loader, num_images=5)