In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from PIL import Image
import pandas as pd
import ast
import json
from pathlib import Path
import numpy as np

In [None]:

BASE_DIR = Path("/content/drive/MyDrive/NLP_Fashionpedia")
IMG_DIR = BASE_DIR / "train_sample"
ATTR_CSV = BASE_DIR / "train_attribute_data.csv"
CHECKPOINT_DIR = BASE_DIR / "checkpoints"
CHECKPOINT_DIR.mkdir(exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [None]:
class FashionAttrDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.df = pd.read_csv(csv_file)
        # Convert the string representation of the attribute list into an actual list
        self.df["attributes"] = self.df["attributes"].apply(ast.literal_eval)
        self.img_dir = Path(img_dir)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.img_dir / f"{row.image_id}.jpg"
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(row.attributes, dtype=torch.float32)
        return image, label


In [None]:
# Define transforms (ResNet-50 expects 224x224 images, normalized with ImageNet stats)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create the dataset using the CSV with attribute data and the image directory.
dataset = FashionAttrDataset(ATTR_CSV, IMG_DIR, transform)
print("Total samples in dataset:", len(dataset))

Total samples in dataset: 2000


In [None]:
# Split into training (80%) and validation (20%) subsets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")


Training samples: 1600
Validation samples: 400


In [None]:
# Create DataLoader objects (use num_workers=0 to avoid multiprocessing pickling issues in Colab)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)


In [None]:
# Determine the number of attributes from one sample.
num_attributes = len(dataset.df.iloc[0]["attributes"])
print("Number of attributes:", num_attributes)

# Load ResNet-50 pre-trained on ImageNet
model = models.resnet50(pretrained=True)

# Replace the final FC layer to output 'num_attributes' predictions (for multi-label classification)
model.fc = nn.Linear(model.fc.in_features, num_attributes)
model = model.to(device)


Number of attributes: 294




In [None]:
import time
from tqdm import tqdm

def train_epoch(model, dataloader, criterion, optimizer):
    model.train()
    total_loss = 0.0
    for images, labels in tqdm(dataloader, desc="Training", leave=False):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * images.size(0)
    return total_loss / len(dataloader.dataset)

def validate(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Validation", leave=False):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)
    return total_loss / len(dataloader.dataset)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 30
patience = 5  # Early stopping patience (number of epochs to wait for improvement)
best_val_loss = float('inf')
epochs_no_improve = 0

start_time = time.time()
for epoch in range(num_epochs):
    epoch_start = time.time()

    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    val_loss = validate(model, val_loader, criterion)

    epoch_time = time.time() - epoch_start
    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Time: {epoch_time:.2f}s")

    # Early stopping: if validation loss improves, save the model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_model.pth")
        epochs_no_improve = 0
        print("Validation loss improved, model saved.")
    else:
        epochs_no_improve += 1
        print(f"No improvement for {epochs_no_improve} epoch(s).")

    if epochs_no_improve >= patience:
        print("Early stopping triggered.")
        break

total_training_time = time.time() - start_time
print(f"Total training time: {total_training_time/60:.2f} minutes")




Epoch 1/30 | Train Loss: 0.1644 | Val Loss: 0.0824 | Time: 1664.58s
Validation loss improved, model saved.




Epoch 2/30 | Train Loss: 0.0799 | Val Loss: 0.0762 | Time: 182.34s
Validation loss improved, model saved.




Epoch 3/30 | Train Loss: 0.0725 | Val Loss: 0.0743 | Time: 182.83s
Validation loss improved, model saved.




Epoch 4/30 | Train Loss: 0.0672 | Val Loss: 0.0737 | Time: 184.77s
Validation loss improved, model saved.




Epoch 5/30 | Train Loss: 0.0618 | Val Loss: 0.0732 | Time: 183.52s
Validation loss improved, model saved.




Epoch 6/30 | Train Loss: 0.0565 | Val Loss: 0.0739 | Time: 182.85s
No improvement for 1 epoch(s).




Epoch 7/30 | Train Loss: 0.0516 | Val Loss: 0.0742 | Time: 182.03s
No improvement for 2 epoch(s).




Epoch 8/30 | Train Loss: 0.0470 | Val Loss: 0.0748 | Time: 183.96s
No improvement for 3 epoch(s).




Epoch 9/30 | Train Loss: 0.0421 | Val Loss: 0.0754 | Time: 183.21s
No improvement for 4 epoch(s).


                                                           

Epoch 10/30 | Train Loss: 0.0380 | Val Loss: 0.0756 | Time: 184.62s
No improvement for 5 epoch(s).
Early stopping triggered.
Total training time: 55.26 minutes




In [None]:
final_model_path = BASE_DIR / "resnet50_NLP_attributes_final.pth"
torch.save(model.state_dict(), final_model_path)
print(f"Model saved to {final_model_path}")


Model saved to /content/drive/MyDrive/NLP_Fashionpedia/resnet50_NLP_attributes_final.pth
