In [6]:
####################### IMPORTING ALL LIBRARIES #############################
# Core
import os
import json
import random
import itertools

# Math & Analysis
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

# Image Processing
from PIL import Image

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Vision
import torchvision.models as models
import torchvision.transforms as transforms

# Machine Learning
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_distances

# Similarity Search
import faiss

##########################################################################
# Setting working directory
os.chdir("/home/ec2-user/SageMaker/spring-2025-final-project-project-group-4")
print("Current working directory:", os.getcwd())

Current working directory: /home/ec2-user/SageMaker/spring-2025-final-project-project-group-4


## 1. Dataset Preparation – Outfit Pair Loader
### Loads the top-bottom paired embeddings from a JSON file and prepares them as training data for the Siamese Network.

In [7]:
# === Siamese Dataset ===
class OutfitPairDataset(Dataset):
    def __init__(self, pairs_file):
        with open(pairs_file, "r") as f:
            self.pairs = json.load(f)

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        pair = self.pairs[idx]
        top = torch.tensor(pair["top_embedding"], dtype=torch.float32)
        bottom = torch.tensor(pair["bottom_embedding"], dtype=torch.float32)
        label = torch.tensor(pair["label"], dtype=torch.float32)
        return top, bottom, label

## 2. Model Definition – Siamese Network for Style Compatibility
### Defines a two-branch Siamese neural network for learning a joint embedding space for outfit items.

In [8]:
# === Siamese Network ===
class SiameseNetwork(nn.Module):
    def __init__(self, input_dim=512, embedding_dim=128):
        super(SiameseNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, embedding_dim)
        )

    def forward_once(self, x):
        return self.fc(x)

    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

## 3. Loss Function – Contrastive Loss for Similarity Learning
### Uses contrastive loss to minimize distance between compatible pairs and maximize it for incompatible ones.

In [9]:
# === Contrastive Loss ===
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2)
        loss = torch.mean((1 - label) * torch.pow(euclidean_distance, 2) +
                          label * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss

## 4. Data Loading & Model Initialization & Training
### Loads the training pairs and prepares them for batching. 
### Initializes the Siamese model, loss function, and optimizer.
### Trains the model for a fixed number of epochs using contrastive loss.

In [10]:
# === Load Data ===
dataset = OutfitPairDataset("Parsa/checkpoint/training_pairs.json")
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# === Model, Loss, Optimizer ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SiameseNetwork().to(device)
criterion = ContrastiveLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# === Training Loop ===
epochs = 50
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    for top, bottom, label in dataloader:
        top, bottom, label = top.to(device), bottom.to(device), label.to(device)
        optimizer.zero_grad()
        output1, output2 = model(top, bottom)
        loss = criterion(output1, output2, label)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")

# === Save model ===
torch.save(model.state_dict(), "Parsa/checkpoint/siamese_model.pth")
print("✅ Siamese model saved as Parsa/checkpoint/siamese_model.pth")


Epoch [1/50], Loss: 0.5461
Epoch [2/50], Loss: 0.2133
Epoch [3/50], Loss: 0.1991
Epoch [4/50], Loss: 0.1983
Epoch [5/50], Loss: 0.1910
Epoch [6/50], Loss: 0.1834
Epoch [7/50], Loss: 0.1875
Epoch [8/50], Loss: 0.1825
Epoch [9/50], Loss: 0.1786
Epoch [10/50], Loss: 0.1766
Epoch [11/50], Loss: 0.1726
Epoch [12/50], Loss: 0.1764
Epoch [13/50], Loss: 0.1681
Epoch [14/50], Loss: 0.1698
Epoch [15/50], Loss: 0.1695
Epoch [16/50], Loss: 0.1728
Epoch [17/50], Loss: 0.1654
Epoch [18/50], Loss: 0.1667
Epoch [19/50], Loss: 0.1603
Epoch [20/50], Loss: 0.1652
Epoch [21/50], Loss: 0.1711
Epoch [22/50], Loss: 0.1631
Epoch [23/50], Loss: 0.1596
Epoch [24/50], Loss: 0.1654
Epoch [25/50], Loss: 0.1599
Epoch [26/50], Loss: 0.1614
Epoch [27/50], Loss: 0.1578
Epoch [28/50], Loss: 0.1681
Epoch [29/50], Loss: 0.1567
Epoch [30/50], Loss: 0.1591
Epoch [31/50], Loss: 0.1583
Epoch [32/50], Loss: 0.1568
Epoch [33/50], Loss: 0.1549
Epoch [34/50], Loss: 0.1562
Epoch [35/50], Loss: 0.1570
Epoch [36/50], Loss: 0.1579
E