# 1. Crear subdataset etiquetado

In [1]:
import os
import shutil
import pandas as pd
from glob import glob

def crear_submuestra(dataset_path, output_folder, num_muestras=100):
    # dataset_path: carpeta train o test con subcarpetas por estilo
    # output_folder: donde copiar las imágenes
    # num_muestras: número de imágenes a coger por estilo

    os.makedirs(output_folder, exist_ok=True)

    data = []
    estilos = sorted([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))])

    for estilo in estilos:
        estilo_path = os.path.join(dataset_path, estilo)
        imagenes = sorted(glob(os.path.join(estilo_path, '*.*')))  # todas las imágenes
        seleccionadas = imagenes[:num_muestras]

        for img_path in seleccionadas:
            filename = os.path.basename(img_path)
            dst_path = os.path.join(output_folder, filename)

            # Copiar imagen
            shutil.copy(img_path, dst_path)

            # Guardar registro: id=nombre original, label=estilo
            data.append({'id': filename, 'label': estilo})

    # Crear DataFrame y guardarlo
    df = pd.DataFrame(data)
    csv_path = os.path.join(output_folder, 'labels.csv')
    df.to_csv(csv_path, index=False)

    print(f"Copiadas {len(data)} imágenes y creado CSV en {csv_path}")
    return csv_path


In [None]:
def renombrar_imagenes_y_actualizar_csv(folder_path, csv_path):
    df = pd.read_csv(csv_path)

    new_ids = []
    for i, row in df.iterrows():
        old_name = row['id']
        new_name = f"{i+1}.jpg"

        old_path = os.path.join(folder_path, old_name)
        new_path = os.path.join(folder_path, new_name)

        os.rename(old_path, new_path)
        new_ids.append(new_name)

    df['id'] = new_ids
    df.to_csv(csv_path, index=False)

    print(f"Imágenes renombradas y CSV actualizado en {csv_path}")


In [None]:
dataset_train_path = 'dataset_train/dataset_train'
output_folder = 'submuestra'
num_muestras = 100

csv_path = crear_submuestra(dataset_train_path, output_folder, num_muestras)


Copiadas 1900 imágenes y creado CSV en submuestra/labels.csv


In [None]:
csv = pd.read_csv('submuestra/labels.csv')

Unnamed: 0,id,label
0,asian_0.jpg,asian
1,asian_1.jpg,asian
2,asian_10.jpg,asian
3,asian_100.jpg,asian
4,asian_101.jpg,asian
...,...,...
1895,victorian_204.jpg,victorian
1896,victorian_205.jpg,victorian
1897,victorian_206.jpg,victorian
1898,victorian_207.jpg,victorian


In [None]:
renombrar_imagenes_y_actualizar_csv(output_folder, csv_path)

Imágenes renombradas y CSV actualizado en submuestra/labels.csv


In [None]:
csv = pd.read_csv('submuestra/labels.csv')
csv

Unnamed: 0,id,label
0,1.jpg,asian
1,2.jpg,asian
2,3.jpg,asian
3,4.jpg,asian
4,5.jpg,asian
...,...,...
1895,1896.jpg,victorian
1896,1897.jpg,victorian
1897,1898.jpg,victorian
1898,1899.jpg,victorian


## datos de test

In [None]:
dataset_train_path = 'dataset_test/dataset_test'
output_folder = 'submuestra/test'
num_muestras = 8

csv_path = crear_submuestra(dataset_train_path, output_folder, num_muestras)


Copiadas 152 imágenes y creado CSV en submuestra/test/labels.csv


In [None]:
renombrar_imagenes_y_actualizar_csv(output_folder, csv_path)

Imágenes renombradas y CSV actualizado en submuestra/test/labels.csv


In [None]:
csv = pd.read_csv('submuestra/test/labels.csv')
csv

Unnamed: 0,id,label
0,1.jpg,asian
1,2.jpg,asian
2,3.jpg,asian
3,4.jpg,asian
4,5.jpg,asian
...,...,...
147,148.jpg,victorian
148,149.jpg,victorian
149,150.jpg,victorian
150,151.jpg,victorian


# 2 Creamos clase para preparar los datos

In [1]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:

class EstilosDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.labels = sorted(self.df['label'].unique())
        self.label2idx = {label: idx for idx, label in enumerate(self.labels)}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.df.iloc[idx]['id'])
        image = Image.open(img_name).convert('RGB')
        label = self.label2idx[self.df.iloc[idx]['label']]
        if self.transform:
            image = self.transform(image)
        return image, label

# Data augmentation mejorada para train
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),  # crop + resize para más variedad
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


In [4]:
base_path = '/content/drive/MyDrive/style'
train_csv = os.path.join(base_path, 'train.csv')
test_csv = os.path.join(base_path, 'test.csv')
train_dir = os.path.join(base_path, 'train')
test_dir = os.path.join(base_path, 'test')


In [5]:

train_dataset = EstilosDataset(train_csv, train_dir, transform=transform_train)
test_dataset = EstilosDataset(test_csv, test_dir, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)


In [15]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_classes = 19  # tus clases

# Cargar modelo preentrenado y reemplazar capa final
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Congelar todas las capas excepto layer4 y fc
for name, param in model.named_parameters():
    if not (name.startswith('layer4') or name.startswith('fc')):
        param.requires_grad = False

model = model.to(device)

criterion = nn.CrossEntropyLoss()

# Optimizer solo para parámetros que requieren gradiente (layer4 + fc)
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-5, weight_decay=1e-4)


In [16]:

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


In [17]:

def eval_model(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [None]:

num_epochs = 30

for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = eval_model(model, test_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"  Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f}")
    print(f"  Val loss:   {val_loss:.4f}, Val acc:   {val_acc:.4f}")

print("Entrenamiento completado")


Epoch 1/30:
  Train loss: 2.9581, Train acc: 0.0726
  Val loss:   2.8562, Val acc:   0.1018


In [None]:
torch.save(model.state_dict(), '/content/drive/MyDrive/style/resnet_finetuned.pth')


In [10]:
import pandas as pd

# Ruta a tu CSV de entrenamiento
train_csv_path = '/content/drive/MyDrive/style/train.csv'

# Cargar CSV y obtener etiquetas únicas ordenadas
df_train = pd.read_csv(train_csv_path)
unique_labels = sorted(df_train['label'].unique())

# Crear el diccionario label -> índice
label_map = {label: idx for idx, label in enumerate(unique_labels)}

print("label_map =", label_map)


label_map = {'asian': 0, 'coastal': 1, 'contemporary': 2, 'craftsman': 3, 'eclectic': 4, 'farmhouse': 5, 'french-country': 6, 'industrial': 7, 'mediterranean': 8, 'mid-century-modern': 9, 'modern': 10, 'rustic': 11, 'scandinavian': 12, 'shabby-chic-style': 13, 'southwestern': 14, 'traditional': 15, 'transitional': 16, 'tropical': 17, 'victorian': 18}
