In [10]:
####################### IMPORTING ALL LIBRARIES #############################
# Core
import os
import json
import random
import itertools

# Math & Analysis
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

# Image Processing
from PIL import Image

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Vision
import torchvision.models as models
import torchvision.transforms as transforms

# Machine Learning
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_distances

# Similarity Search
import faiss

##########################################################################
# Setting working directory
os.chdir("/home/ec2-user/SageMaker/spring-2025-final-project-project-group-4")
print("Current working directory:", os.getcwd())

Current working directory: /home/ec2-user/SageMaker/spring-2025-final-project-project-group-4


## 1. Load Metadata and Embeddings
### loads our pre-labeled metadata and the pre-computed image embeddings.

In [11]:
# === Load metadata and embeddings ===
with open("metadata.json", "r") as f:
    metadata = json.load(f)

with open("Parsa/checkpoint/clothing_embeddings.json", "r") as f:
    embeddings = json.load(f)

## 2. Define Outfit Categories and Color Harmony Rules
### Categorizes clothing types into tops and bottoms, and defines acceptable color pairings for harmony.

In [12]:
# === Define tops and bottoms categories ===
top_categories = {"shirts", "t-shirts"}
bottom_categories = {"pants", "shorts"}

# === Color harmony dictionary ===
fashion_color_pairs = {
    "black": ["white", "gray", "red", "beige", "camel", "denim", "gold", "silver"],
    "white": ["black", "navy", "beige", "gray", "pastel", "denim", "brown"],
    "gray": ["black", "white", "red", "blue", "pink", "burgundy", "camel"],
    "beige": ["white", "black", "brown", "green", "burgundy", "navy", "gray"],
    "navy": ["white", "gray", "camel", "red", "beige", "pink", "brown"],
    "brown": ["beige", "white", "blue", "orange", "green", "mustard", "tan"],
    "camel": ["black", "white", "navy", "burgundy", "olive", "gray"],
    "tan": ["white", "black", "navy", "green", "burgundy", "brown"],
    "red": ["black", "white", "gray", "denim", "tan", "camel", "gold"],
    "burgundy": ["white", "beige", "black", "camel", "gray", "navy"],
    "pink": ["white", "gray", "beige", "denim", "navy", "brown"],
    "blue": ["white", "gray", "tan", "brown", "camel", "beige"],
    "denim": ["white", "black", "gray", "red", "camel", "beige"],
    "green": ["white", "black", "beige", "brown", "tan", "mustard", "denim"],
    "olive": ["white", "black", "tan", "camel", "denim", "beige"],
    "mustard": ["white", "brown", "olive", "denim", "camel"],
    "orange": ["white", "blue", "brown", "camel", "olive"],
    "purple": ["gray", "white", "navy", "black"],
    "pastel": ["white", "gray", "denim", "beige", "camel"],
    "gold": ["black", "white", "burgundy", "navy"],
    "silver": ["black", "white", "gray", "navy"]
}


## 3. Compatibility Check Functions
### Defines functions to check style and color compatibility for outfit pairing.

In [5]:
# === Utility functions ===
def is_style_compatible(styles1, styles2):
    return bool(set(styles1).intersection(set(styles2)))

def is_color_harmonious(color1, color2):
    return color2 in fashion_color_pairs.get(color1, [])

## 4. Organize Tops and Bottoms
### Splits the clothing items into top and bottom groups based on metadata.

In [6]:
tops, bottoms = [], []

for category, items in metadata.items():
    for item in items:
        filename = item["filename"]
        path = f"Parsa/clothes/{category}/{filename}"
        if path not in embeddings:
            continue
        entry = {
            "path": path,
            "embedding": embeddings[path],
            "style": item.get("style", []),
            "color": item.get("color", [])[0] if isinstance(item.get("color"), list) else item.get("color", "")
        }
        if category in top_categories:
            tops.append(entry)
        elif category in bottom_categories:
            bottoms.append(entry)

## 5. Generate Training Pairs
### Pairs compatible (positive) and incompatible (negative) outfits using the embedding, style, and color info

In [7]:
positive_pairs, negative_pairs = [], []

for top in tops:
    for bottom in bottoms:
        same_style = is_style_compatible(top["style"], bottom["style"])
        harmonious_color = is_color_harmonious(top["color"], bottom["color"])
        label = 1 if same_style and harmonious_color else 0

        pair = {
            "top_embedding": top["embedding"],
            "bottom_embedding": bottom["embedding"],
            "label": label
        }

        if label == 1:
            positive_pairs.append(pair)
        else:
            negative_pairs.append(pair)

random.shuffle(negative_pairs)
negative_pairs = negative_pairs[:len(positive_pairs)]  # balance

pairs = positive_pairs + negative_pairs
random.shuffle(pairs)

with open("Parsa/checkpoint/training_pairs.json", "w") as f:
    json.dump(pairs, f)

print(f"✅ Saved {len(positive_pairs)} positive and {len(negative_pairs)} negative pairs to Parsa/checkpoint/training_pairs.json")

✅ Saved 1193 positive and 1193 negative pairs to Parsa/checkpoint/training_pairs.json


## 6. Sanity Check – Sample Pair Preview
### Shows a few generated pairs with partial embedding previews to confirm format is correct.

In [8]:
with open("Parsa/checkpoint/training_pairs.json", "r") as f:
    pairs = json.load(f)

for pair in pairs[:5]:
    print(f"\nLabel: {pair['label']}")
    print("Top path (partial):", pair['top_embedding'][:5])
    print("Bottom path (partial):", pair['bottom_embedding'][:5])


Label: 1
Top path (partial): [1.5378458499908447, 1.5747472047805786, 0.3615378439426422, 0.18449342250823975, 0.018867067992687225]
Bottom path (partial): [0.3086162805557251, 0.007389220874756575, 0.0694318488240242, 0.7044539451599121, 0.13697466254234314]

Label: 1
Top path (partial): [2.0682568550109863, 1.937971830368042, 0.32155701518058777, 0.28209683299064636, 0.011589804664254189]
Bottom path (partial): [0.4600033760070801, 0.032074104994535446, 0.06877424567937851, 1.002170205116272, 0.040428489446640015]

Label: 1
Top path (partial): [2.03474760055542, 2.2361249923706055, 0.10728473961353302, 0.20168417692184448, 0.011874226853251457]
Bottom path (partial): [0.6128318905830383, 0.04115550220012665, 0.23760464787483215, 1.2158700227737427, 0.06490008533000946]

Label: 1
Top path (partial): [1.9874252080917358, 1.3292760848999023, 0.6204907298088074, 0.4153148829936981, 0.0886683240532875]
Bottom path (partial): [0.07905063033103943, 0.018887965008616447, 0.14997614920139313

## 7. Sanity Check with Metadata Matching
### Matches embeddings back to file paths and displays whether style/color match and label agree.

In [9]:
# Build path -> metadata lookup
metadata_lookup = {}
for category, items in metadata.items():
    for item in items:
        filename = item["filename"]
        path = f"Parsa/clothes/{category}/{filename}"
        metadata_lookup[path] = {
            "style": item.get("style", []),
            "color": item.get("color", [])[0] if isinstance(item.get("color"), list) else item.get("color", "")
        }

print("🔍 Sanity Check on Training Pairs\n")
for i, pair in enumerate(pairs[:10]):
    top_emb = pair["top_embedding"]
    bottom_emb = pair["bottom_embedding"]
    label = pair["label"]

    top_path = next((k for k, v in embeddings.items() if v[:5] == top_emb[:5]), "Unknown")
    bottom_path = next((k for k, v in embeddings.items() if v[:5] == bottom_emb[:5]), "Unknown")

    top_meta = metadata_lookup.get(top_path, {})
    bottom_meta = metadata_lookup.get(bottom_path, {})

    style_match = is_style_compatible(top_meta.get("style", []), bottom_meta.get("style", []))
    color_match = is_color_harmonious(top_meta.get("color", ""), bottom_meta.get("color", ""))

    print(f"Pair {i+1}")
    print(f"  👕 Top:    {top_path}")
    print(f"  👖 Bottom: {bottom_path}")
    print(f"  🎨 Color Match? {color_match}")
    print(f"  🧵 Style Match? {style_match}")
    print(f"  ✅ Label: {label}")
    print("-" * 50)

🔍 Sanity Check on Training Pairs

Pair 1
  👕 Top:    Parsa/clothes/t-shirts/16_t-shirts.jpg
  👖 Bottom: Parsa/clothes/pants/02_pants.jpg
  🎨 Color Match? True
  🧵 Style Match? True
  ✅ Label: 1
--------------------------------------------------
Pair 2
  👕 Top:    Parsa/clothes/t-shirts/30_t-shirts.jpg
  👖 Bottom: Parsa/clothes/pants/12_pants.jpg
  🎨 Color Match? True
  🧵 Style Match? True
  ✅ Label: 1
--------------------------------------------------
Pair 3
  👕 Top:    Parsa/clothes/t-shirts/14_t-shirts.jpg
  👖 Bottom: Parsa/clothes/pants/13_pants.jpg
  🎨 Color Match? True
  🧵 Style Match? True
  ✅ Label: 1
--------------------------------------------------
Pair 4
  👕 Top:    Parsa/clothes/t-shirts/08_t-shirts.jpg
  👖 Bottom: Parsa/clothes/pants/22_pants.jpg
  🎨 Color Match? True
  🧵 Style Match? True
  ✅ Label: 1
--------------------------------------------------
Pair 5
  👕 Top:    Parsa/clothes/t-shirts/24_t-shirts.jpg
  👖 Bottom: Parsa/clothes/shorts/34_shorts.jpg
  🎨 Color Match? 