In [1]:

import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split

# Cargar el CSV que subiste
df = pd.read_csv("/content/drive/MyDrive/tfm/TFM/csv/csv_fotos_etiquetadas.csv", names=["property_id", "etiqueta"])




In [9]:
df

Unnamed: 0,property_id,etiqueta
0,foto_100243804_1.webp,[reformado]
1,foto_100243804_10.webp,[reformado]
2,foto_100243804_2.webp,[nulo]
3,foto_100243804_3.webp,[reformado]
4,foto_100243804_4.webp,[reformado]
...,...,...
639,foto_107702266_1.webp,[reformado]
640,foto_107702266_5.webp,[reformado]
641,foto_107702266_7.webp,[reformado]
642,foto_107702266_8.webp,[reformado]


In [10]:

# Ajuste para incluir también 'nulo'
df['etiqueta'] = df['etiqueta'].str.strip("[]").str.strip().str.lower()

# Dividir solo las válidas (reformado / no reformado)
df_validas = df[df['etiqueta'].isin(['reformado', 'no reformado'])]
train_df, val_df = train_test_split(df_validas, test_size=0.2, random_state=42)

# Extraer los nulos
df_nulo = df[df['etiqueta'] == 'nulo']

# Directorios
image_dir = "/content/drive/MyDrive/tfm/TFM/fotos"
output_dir = "/content/drive/MyDrive/tfm/TFM/dataset_clasificacion"

def copiar_imagenes(df, subset):
    for _, row in df.iterrows():
        img_file = row['property_id']
        label = row['etiqueta'].replace(" ", "_")  # e.g., no_reformado, reformado, nulo

        src = os.path.join(image_dir, img_file)
        label_dir = os.path.join(output_dir, subset, label)
        os.makedirs(label_dir, exist_ok=True)

        dst = os.path.join(label_dir, img_file)

        if os.path.exists(src):
            shutil.copy(src, dst)
        else:
            print(f"⚠️ Imagen no encontrada: {src}")

# Ejecutar
copiar_imagenes(train_df, "train")
copiar_imagenes(val_df, "val")
copiar_imagenes(df_nulo, "nulos")  # se crea un subdirectorio llamado 'nulos/nulo'



In [3]:
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import os

# ----------- 📦 Data Augmentation y Normalización -----------
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# ----------- 📂 Dataset -----------
train_dataset = ImageFolder("/content/drive/MyDrive/tfm/TFM/dataset_clasificacion/train", transform=transform_train)
val_dataset = ImageFolder("/content/drive/MyDrive/tfm/TFM/dataset_clasificacion/val", transform=transform_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# ----------- 🧠 Modelo -----------
model = models.efficientnet_b0(pretrained=True)

# Ajustar la capa final (solo 2 clases: reformado / no_reformado)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

# GPU si está disponible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# ----------- ⚙️ Pérdida y optimizador -----------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# ----------- 🚀 Entrenamiento -----------
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"\n🔁 Epoch {epoch+1}/{num_epochs} - Train Loss: {total_loss:.4f}")

    # ----------- 📊 Validación -----------
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    acc = 100 * correct / total
    print(f"✅ Validation Accuracy: {acc:.2f}%")

# ----------- 💾 Guardar modelo -----------
save_path = "/content/drive/MyDrive/tfm/TFM/modelos/efficientnet_reformado.pth"
os.makedirs(os.path.dirname(save_path), exist_ok=True)
torch.save(model.state_dict(), save_path)
print(f"\n✅ Modelo guardado en: {save_path}")


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 193MB/s]



🔁 Epoch 1/10 - Train Loss: 9.1226
✅ Validation Accuracy: 74.07%

🔁 Epoch 2/10 - Train Loss: 7.2459
✅ Validation Accuracy: 84.26%

🔁 Epoch 3/10 - Train Loss: 5.6657
✅ Validation Accuracy: 85.19%

🔁 Epoch 4/10 - Train Loss: 4.1860
✅ Validation Accuracy: 87.96%

🔁 Epoch 5/10 - Train Loss: 3.0906
✅ Validation Accuracy: 89.81%

🔁 Epoch 6/10 - Train Loss: 2.1615
✅ Validation Accuracy: 91.67%

🔁 Epoch 7/10 - Train Loss: 1.5205
✅ Validation Accuracy: 91.67%

🔁 Epoch 8/10 - Train Loss: 1.0625
✅ Validation Accuracy: 93.52%

🔁 Epoch 9/10 - Train Loss: 0.9151
✅ Validation Accuracy: 94.44%

🔁 Epoch 10/10 - Train Loss: 1.1051
✅ Validation Accuracy: 94.44%

✅ Modelo guardado en: /content/drive/MyDrive/tfm/TFM/modelos/efficientnet_reformado.pth


In [9]:
import os
import torch
from torchvision import models, transforms
from PIL import Image
import pandas as pd
from tqdm import tqdm
from collections import defaultdict, Counter

# ------ Parámetros ------
image_folder = "/content/drive/MyDrive/tfm/TFM/fotos"
model_path = "/content/drive/MyDrive/tfm/TFM/modelos/efficientnet_reformado.pth"
output_csv_path = "/content/drive/MyDrive/tfm/TFM/csv/predicciones_resto.csv"

# Obtener imágenes ya usadas
used_images = set([os.path.basename(p) for p, _ in train_dataset.samples + val_dataset.samples])

# ------ Transformaciones ------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# ------ Cargar modelo ------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.efficientnet_b0(pretrained=False)
model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, 2)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

# ------ Función de predicción ------
def predecir_imagen(path_img):
    image = Image.open(path_img).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(image)
        _, pred = torch.max(output, 1)
    return train_dataset.classes[pred.item()]

# ------ Agrupar imágenes por casa (property_id) ------
imagenes_por_casa = defaultdict(list)

for file in os.listdir(image_folder):
    if not file.endswith((".jpg", ".jpeg", ".png", ".webp")):
        continue
    if file in used_images:
        continue
    # Extraer el property_id (ej: foto_104515791_3.webp → 104515791)
    try:
        parts = file.split("_")
        property_id = parts[1]  # "104515791"
        imagenes_por_casa[property_id].append(file)
    except IndexError:
        print(f"❌ Nombre inesperado: {file}")

# ------ Predecir por casa ------
resultados = []

for property_id, archivos in tqdm(imagenes_por_casa.items(), desc="Evaluando casas"):
    predicciones = []

    # Ordenar imágenes por número final (_1, _2, ...) para asegurar consistencia
    archivos_ordenados = sorted(archivos, key=lambda x: int(x.split("_")[-1].split(".")[0]))

    for idx, file in enumerate(archivos_ordenados):
        full_path = os.path.join(image_folder, file)
        try:
            pred = predecir_imagen(full_path)
            predicciones.append(pred)

            # Si las dos primeras coinciden → salimos
            if idx == 1 and predicciones[0] == predicciones[1]:
                resultados.append({"property_id": property_id, "prediccion": pred})
                break

        except Exception as e:
            print(f"❌ Error con {file}: {e}")

    # Si no se hizo break (es decir, no hubo consenso temprano), tomar la clase más frecuente
    if len(predicciones) >= 1:
        clase_final = Counter(predicciones).most_common(1)[0][0]
        resultados.append({"property_id": property_id, "prediccion": clase_final})


# ------ Guardar resultados ------
df_resultados = pd.DataFrame(resultados)
df_resultados.to_csv(output_csv_path, index=False)
print(f"\n✅ CSV con predicciones guardado en: {output_csv_path}")



Evaluando casas: 100%|██████████| 1150/1150 [05:21<00:00,  3.58it/s]



✅ CSV con predicciones guardado en: /content/drive/MyDrive/tfm/TFM/csv/predicciones_resto.csv


In [5]:
used_images

{'foto_100243804_1.webp',
 'foto_100243804_10.webp',
 'foto_100243804_3.webp',
 'foto_100243804_4.webp',
 'foto_100243804_5.webp',
 'foto_100243804_6.webp',
 'foto_100243804_7.webp',
 'foto_100243804_8.webp',
 'foto_100243804_9.webp',
 'foto_100458501_1.webp',
 'foto_100458501_10.webp',
 'foto_100458501_2.webp',
 'foto_100458501_3.webp',
 'foto_100458501_4.webp',
 'foto_100458501_5.webp',
 'foto_100458501_6.webp',
 'foto_100458501_7.webp',
 'foto_100458501_8.webp',
 'foto_100458501_9.webp',
 'foto_100519346_1.webp',
 'foto_100519346_10.webp',
 'foto_100519346_2.webp',
 'foto_100519346_3.webp',
 'foto_100519346_4.webp',
 'foto_100519346_5.webp',
 'foto_100519346_7.webp',
 'foto_100519346_8.webp',
 'foto_100519346_9.webp',
 'foto_100531606_10.webp',
 'foto_100531606_2.webp',
 'foto_100531606_3.webp',
 'foto_100531606_4.webp',
 'foto_100531606_5.webp',
 'foto_100531606_6.webp',
 'foto_100531606_7.webp',
 'foto_100531606_8.webp',
 'foto_100531606_9.webp',
 'foto_100932241_1.webp',
 'foto_1