In [1]:
####################### IMPORTING ALL LIBRARIES #############################
# Core
import os
import json
import random
import itertools

# Math & Analysis
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

# Image Processing
from PIL import Image

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Vision
import torchvision.models as models
import torchvision.transforms as transforms

# Machine Learning
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_distances

# Similarity Search
import faiss

##########################################################################
# Setting working directory
os.chdir("/home/ec2-user/SageMaker/spring-2025-final-project-project-group-4")
print("Current working directory:", os.getcwd())

Current working directory: /home/ec2-user/SageMaker/spring-2025-final-project-project-group-4


## 1. File Renaming for Clean Image Format
### Standardize all image filenames by category to include a suffix like _shorts, _pants, etc., so that they match the format expected in metadata and model training.

In [12]:
%%bash
# Rename shorts
cd Parsa/clothes/shorts
for i in {1..40}; do mv "$(printf "%02d" $i).jpg" "$(printf "%02d" $i)_shorts.jpg"; done

# Rename pants
cd ../pants
for i in {1..40}; do mv "$(printf "%02d" $i).jpg" "$(printf "%02d" $i)_pants.jpg"; done

# Rename shirts
cd ../shirts
for i in {1..40}; do mv "$(printf "%02d" $i).jpg" "$(printf "%02d" $i)_shirts.jpg"; done

# Rename t-shirts
cd ../t-shirts
for i in {1..40}; do mv "$(printf "%02d" $i).jpg" "$(printf "%02d" $i)_t-shirts.jpg"; done
done

## 2. Image Integrity Check
### Verify that every image referenced in our metadata file exists in the corresponding directory. This helps you catch any missing or misnamed images before training begins.

In [2]:
# === Check if all images in metadata exist ===
metadata_path = "metadata.json"
data_dir = "Parsa/clothes"

with open(metadata_path, "r") as f:
    metadata = json.load(f)
total_images = 0
missing_images = 0

for category, items in metadata.items():
    category_dir = os.path.join(data_dir, category)
    if not os.path.isdir(category_dir):
        print(f"🚨 Folder missing: {category_dir}")
        continue

    for item in items:
        filename = item.get("filename")
        if not filename:
            continue

        full_path = os.path.join(category_dir, filename)
        total_images += 1

        if not os.path.isfile(full_path):
            print(f"❌ Missing file: {full_path}")
            missing_images += 1

print(f"\n✅ Total images referenced in metadata: {total_images}")
print(f"❌ Missing images: {missing_images}")

❌ Missing file: Parsa/clothes/t-shirts/05_t-shirts.jpg 

✅ Total images referenced in metadata: 160
❌ Missing images: 1


## 3. Configuration & Transforms
### Set up project-level constants including batch size, epochs, device (CPU/GPU), and normalization transforms. These will be used consistently throughout the dataset, model, and training pipeline.

In [2]:
# === Configurations ===
metadata_path = "metadata.json"
data_dir = "Parsa/clothes"
num_classes = 4  # pants, shorts, shirt, t-shirt
batch_size = 32
epochs = 10
learning_rate = 0.0005
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Image Transforms ===
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## 4. Dataset Loader
### Create a custom PyTorch Dataset that reads metadata, loads images from disk, applies transforms, and labels them based on category. This feeds into the DataLoader for efficient batching and shuffling during training.

In [3]:
# === Custom Clothing Dataset ===
class ClothingDataset(Dataset):
    def __init__(self, metadata, root_dir, transform=None):
        self.samples = []
        self.label_map = {"pants": 0, "shorts": 1, "shirts": 2, "t-shirts": 3}
        self.transform = transform

        for category, items in metadata.items():
            if category not in self.label_map:
                continue
            for item in items:
                image_path = os.path.join(root_dir, category, item["filename"])
                if os.path.exists(image_path):
                    self.samples.append((image_path, self.label_map[category]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# === Load Metadata + DataLoader ===
with open(metadata_path, "r") as f:
    metadata = json.load(f)

dataset = ClothingDataset(metadata, data_dir, transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

## 5. Define and Train Model

### Build a lightweight ResNet18-based classification model, replace its final layer for 4-class clothing prediction, and train it using cross-entropy loss. Track loss and accuracy over multiple epochs.

In [5]:
# === Define CNN Feature Model ===
class FeatureModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.base = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        self.base.fc = nn.Linear(self.base.fc.in_features, num_classes)

    def forward(self, x):
        return self.base(x)

model = FeatureModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# === Train Model ===
model.train()
for epoch in range(epochs):
    total_loss = 0
    correct = 0
    total = 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {acc:.2f}%")

# === Save Trained Model ===
os.makedirs("model", exist_ok=True)
torch.save(model.state_dict(), "Parsa/checkpoint/feature_model.pth")
print("\n✅ Model saved at Parsa/checkpoint/feature_model.pth")


Epoch [1/10], Loss: 2.6899, Accuracy: 78.62%
Epoch [2/10], Loss: 0.2335, Accuracy: 98.74%
Epoch [3/10], Loss: 0.0381, Accuracy: 100.00%
Epoch [4/10], Loss: 0.1027, Accuracy: 99.37%
Epoch [5/10], Loss: 0.1076, Accuracy: 98.74%
Epoch [6/10], Loss: 0.0636, Accuracy: 98.74%
Epoch [7/10], Loss: 0.0301, Accuracy: 100.00%
Epoch [8/10], Loss: 0.0284, Accuracy: 100.00%
Epoch [9/10], Loss: 0.0243, Accuracy: 100.00%
Epoch [10/10], Loss: 0.0804, Accuracy: 99.37%

✅ Model saved at Parsa/checkpoint/feature_model.pth


## 6. Generate Embeddings
### Use the trained model (minus its classification head) as a feature extractor to generate 512D vector embeddings for each image. These embeddings are saved as a dictionary to use later in similarity matching or pairing.

In [7]:
# === Extract Feature Embeddings ===
model.eval()
feature_extractor = nn.Sequential(*list(model.base.children())[:-1]) 

# === Prepare storage ===
embedding_dict = {} 
embedding_list = [] 
image_list = []      

# === Extract embeddings from dataset ===
with torch.no_grad():
    for img_path, label in dataset.samples:
        image = Image.open(img_path).convert("RGB")
        image = transform(image).unsqueeze(0).to(device)  # [1, 3, 224, 224]

        features = feature_extractor(image)  # Output shape: [1, 512, 1, 1]
        embedding = features.squeeze().cpu().numpy()  # Convert to [512]

        embedding_dict[img_path] = embedding.tolist()  # Save for JSON
        embedding_list.append(embedding)
        image_list.append(img_path)

# === Save embeddings to JSON ===
with open("Parsa/checkpoint/clothing_embeddings.json", "w") as f:
    json.dump(embedding_dict, f)
print("✅ Embeddings saved to Parsa/checkpoint/clothing_embeddings.json")

✅ Embeddings saved to Parsa/checkpoint/clothing_embeddings.json


## 7. Build and Save FAISS Index

### Convert all embeddings into a numpy array and store them in a FAISS index, enabling fast nearest-neighbor queries. Save both the index and the raw embeddings to disk for future retrieval.

In [8]:
# === Save to FAISS index ===
embedding_array = np.array(embedding_list).astype("float32")
faiss_index = faiss.IndexFlatL2(embedding_array.shape[1])  # 512D L2 similarity index
faiss_index.add(embedding_array)
faiss.write_index(faiss_index, "Parsa/checkpoint/clothing_faiss.index")
print("✅ FAISS index saved to Parsa/checkpoint/clothing_faiss.index")

✅ FAISS index saved to Parsa/checkpoint/clothing_faiss.index
