In [6]:
import os

import numpy as np
import open_clip
import torch
from PIL import Image
from torch.ao.nn.quantized.functional import threshold
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

from src import config

In [7]:
print("Using device:", config.DEVICE)

Using device: mps


In [8]:
def load_image_embeddings_from_dir(dir_path):
    embeddings = []
    for file in os.listdir(dir_path):
        if file.endswith('.npy'):
            emb = np.load(os.path.join(dir_path, file))
            embeddings.append(torch.tensor(emb))
    return torch.stack(embeddings)

def load_metadata_embeddings_from_dir(dir_path):
    embeddings = []
    for file in os.listdir(dir_path):
        if file.endswith('_caption.npy'):
            emb = np.load(os.path.join(dir_path, file))
            embeddings.append(torch.tensor(emb))
    return torch.stack(embeddings)

In [9]:
# Load user likes and dislikes (images)
liked_image_embeddings = load_image_embeddings_from_dir(config.EMBEDDINGS_SEGMENTED_POS_OUTFITS_DIR)
disliked_image_embeddings = load_image_embeddings_from_dir(config.EMBEDDINGS_SEGMENTED_NEG_OUTFITS_DIR)

In [10]:
print(liked_image_embeddings.shape)

torch.Size([816, 1, 512])


In [11]:
print(disliked_image_embeddings.shape)

torch.Size([4531, 1, 512])


In [12]:
print(liked_image_embeddings[0].shape)

torch.Size([1, 512])


In [13]:
print(liked_image_embeddings[0])

tensor([[ 6.5039e-01, -7.1167e-02,  3.8184e-01,  1.6982e+00,  4.9048e-01,
          3.8159e-01, -4.5357e-03, -1.7847e-01,  8.6670e-01,  1.4526e-01,
         -1.3843e-01,  2.0996e-01,  1.6504e-01, -2.8442e-01,  6.2256e-01,
         -1.6968e-02,  2.0435e-01,  7.1045e-02, -2.6538e-01,  2.0776e-01,
         -5.0244e-01, -4.4653e-01, -1.0187e-01, -1.8809e+00,  5.3857e-01,
         -2.6880e-01,  1.8110e-03,  6.9531e-01,  6.5625e-01,  5.6299e-01,
          2.1118e-01,  1.0841e-02, -8.1299e-01, -3.9844e-01, -1.9727e-01,
          3.6206e-01,  1.3403e-01,  5.6689e-01,  4.2017e-01, -2.5562e-01,
         -5.3711e-01,  2.3254e-01, -5.6494e-01, -5.3271e-01,  4.1333e-01,
          2.9053e-01, -3.6035e-01,  3.5425e-01, -7.6465e-01, -1.5225e+00,
         -1.0469e+00, -4.7144e-01, -2.6904e-01, -6.0840e-01,  1.8811e-01,
         -5.4443e-01, -4.3506e-01, -3.4009e-01,  1.5918e-01, -5.6689e-01,
          4.1809e-02, -2.9572e-02,  7.7588e-01, -1.1359e-01, -4.2542e-02,
         -1.5491e-01,  3.1714e-01, -5.

In [14]:
print("min", torch.min(liked_image_embeddings[0]))
print("max", torch.max(liked_image_embeddings[0]))

min tensor(-6.0703)
max tensor(2.2461)


In [15]:
similarity = F.cosine_similarity(F.normalize(liked_image_embeddings[0], dim=-1), F.normalize(disliked_image_embeddings[80], dim=-1), dim=-1)
print("Cosine similarity score:", similarity.item())

similarity = F.cosine_similarity(F.normalize(liked_image_embeddings[0], dim=-1), F.normalize(liked_image_embeddings[8], dim=-1), dim=-1)
print("Cosine similarity score:", similarity.item())

# no difference between normalized and not normalized

Cosine similarity score: 0.48054736852645874
Cosine similarity score: 0.46512293815612793


In [16]:
# dot product
# new_liked_image_embeddings = liked_image_embeddings/np.linalg.norm(liked_image_embeddings, ord=2, axis=-1, keepdims=True)
# new_disliked_image_embeddings = disliked_image_embeddings/np.linalg.norm(disliked_image_embeddings, ord=2, axis=-1, keepdims=True)

In [17]:
# Create personal style anchor
style_anchor = torch.mean(liked_image_embeddings, dim=0, keepdim=True)

# Create negative style anchor
negative_anchor = torch.mean(disliked_image_embeddings, dim=0, keepdim=True)

In [18]:
print("style_anchor shape:", style_anchor.shape)
print("Style Anchor Embedding:", style_anchor)

style_anchor shape: torch.Size([1, 1, 512])
Style Anchor Embedding: tensor([[[ 3.1263e-01,  2.1046e-02,  9.8415e-01,  7.0405e-01,  1.2590e-01,
           9.4716e-03,  7.0533e-03, -2.0593e-01,  3.0972e-01,  3.3591e-01,
           2.4057e-01, -3.7011e-02,  9.2569e-02, -1.1175e-01,  6.4867e-02,
          -5.5690e-01,  2.2929e-02, -1.1523e-01,  6.4453e-03,  1.8714e-01,
          -4.2866e-01, -7.6405e-02,  2.2310e-01, -1.7353e+00,  2.1232e-02,
           2.9180e-01, -7.4424e-03,  1.4595e-01,  1.1066e-01, -1.1848e-01,
          -5.2210e-01,  2.8804e-01, -2.7563e-01, -5.9863e-02, -3.4566e-01,
           3.0929e-01,  3.5462e-01, -1.5565e-01,  1.0833e-01, -2.7737e-01,
          -1.5877e-01,  3.7248e-02,  2.1774e-01, -4.9635e-02,  4.7212e-01,
          -1.1349e-01, -3.8900e-01,  1.0901e-01, -2.3257e-01, -2.7758e-01,
          -4.0346e-01, -2.2742e-01,  1.6481e-01,  2.2738e-01, -1.6640e-01,
          -2.3008e-01,  1.7722e-02, -5.0922e-02, -2.8895e-02, -2.3505e-01,
          -4.0952e-02, -4.0721e-

In [19]:
print("negative_anchor shape:", negative_anchor.shape)
print("Negative Anchor Embedding:", negative_anchor)

negative_anchor shape: torch.Size([1, 1, 512])
Negative Anchor Embedding: tensor([[[ 4.6389e-01, -3.7068e-03,  1.1371e+00,  6.5363e-01,  2.7299e-01,
           1.2313e-01, -6.6666e-02, -2.1502e-02,  1.6423e-01,  2.0017e-01,
           1.4286e-01,  3.7322e-02,  1.6859e-01, -1.2475e-01,  1.2703e-01,
          -1.1470e-01, -8.4155e-02, -1.8580e-01, -8.1583e-02,  1.8141e-01,
          -5.0163e-01, -2.2163e-01, -7.4979e-02, -1.5879e+00,  1.6355e-01,
           2.0963e-01,  1.1418e-01,  5.9576e-03,  2.6664e-01,  1.1309e-01,
          -4.4484e-01,  1.5087e-01, -3.3179e-01,  9.0865e-02, -2.3829e-01,
           3.6133e-01,  3.4359e-01, -2.0637e-02,  1.0944e-01, -8.8985e-02,
          -1.8615e-01,  1.4062e-02, -6.8003e-02, -9.8007e-02,  4.9222e-01,
          -1.5266e-01, -3.7637e-01,  1.6831e-01, -1.5246e-01, -1.7899e-01,
          -3.1363e-01, -3.0271e-01,  1.0807e-01,  1.5037e-01, -9.7010e-02,
           6.0834e-04,  3.0621e-02,  4.2494e-03,  6.1245e-02, -2.7367e-01,
           3.5483e-02, -3.

In [20]:
print("min style_anchor", torch.min(style_anchor[0]))
print("max style_anchor", torch.max(style_anchor[0]))

min style_anchor tensor(-6.1928)
max style_anchor tensor(1.7304)


In [21]:
print("min negative_anchor", torch.min(negative_anchor[0]))
print("max negative_anchor", torch.max(negative_anchor[0]))

min negative_anchor tensor(-5.5279)
max negative_anchor tensor(1.7129)


In [22]:
try:
    model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms('hf-hub:Marqo/marqo-fashionCLIP')
    tokenizer = open_clip.get_tokenizer('hf-hub:Marqo/marqo-fashionCLIP')
    model = model.to(config.DEVICE)
except Exception as e:
    raise RuntimeError(f"‚ùå Failed to load FashionCLIP model: {e}")

In [23]:
def build_image_embedding_database(image_files):
    embeddings = {}
    for file in image_files:
        path = os.path.join(config.TEST_DIR, file)
        img = Image.open(path).convert("RGB")
        img_tensor = preprocess_val(img).unsqueeze(0).to(config.DEVICE)
        with torch.no_grad(), torch.amp.autocast(device_type='mps'):
            embedding = model.encode_image(img_tensor).to(config.DEVICE)
        embeddings[path] = embedding
    return embeddings

def build_metadata_embedding_database(image_files):
    embeddings = {}
    for file in image_files:
        path = os.path.join(config.TEST_DIR, file)
        img = Image.open(path).convert("RGB")
        img_tensor = preprocess_val(img).unsqueeze(0).to(config.DEVICE)
        with torch.no_grad(), torch.amp.autocast(device_type='mps'):
            embedding = model.encode_image(img_tensor).to(config.DEVICE)
        embeddings[path] = embedding
    return embeddings

In [24]:
def embed_text(text_content):
    tokens = tokenizer(text_content).to(config.DEVICE)
    with torch.no_grad(), torch.amp.autocast(device_type="mps"):
        embedding = model.encode_text(tokens).to(config.DEVICE)
    return embedding

In [25]:
def update_user_anchor(current_anchor, new_like_path, alpha=0.3):
    img = Image.open(new_like_path).convert("RGB")
    img_tensor = preprocess_val(img).unsqueeze(0).to(config.DEVICE)
    with torch.no_grad(), torch.amp.autocast(device_type='mps'):
        embedding = model.encode_image(img_tensor).to(config.DEVICE)

    new_embed = F.normalize(embedding, dim=1)
    updated_anchor = F.normalize((1 - alpha) * current_anchor + alpha * new_embed, dim=1)
    return updated_anchor

In [26]:
# Optional: map score from [-1, 1] to [0, 1]
def rescale(x):
    return (x + 1) / 2

In [27]:
def score_image(image_embedding, anchor, neg_anchor=None, context_text=None):
    image_embedding = F.normalize(image_embedding, dim=-1)
    anchor = F.normalize(anchor, dim=-1)
    pos_score = F.cosine_similarity(image_embedding, anchor, dim=-1).item() # calculates the cosine similarity along the last dimension
    final_score = pos_score # calculates how similar the image is to the anchor

    if neg_anchor is not None:
        neg_anchor = F.normalize(neg_anchor, dim=-1)
        neg_score = F.cosine_similarity(image_embedding, neg_anchor, dim=-1).item()
        final_score = pos_score - neg_score

    if context_text is not None:
        text_embedding = embed_text(context_text)
        text_embedding = F.normalize(text_embedding, dim=-1)
        text_score = F.cosine_similarity(image_embedding, text_embedding, dim=-1).item()
        # Soft gate for text impact
        beta = torch.sigmoid(torch.tensor(pos_score - text_score))
        final_score = beta * final_score + (1 - beta) * text_score


    return rescale(final_score)

In [28]:
test_image_files = [
    f for f in os.listdir(config.TEST_DIR)
    if f.lower().endswith(config.IMAGE_FILE_EXTENSIONS)
]

test_image_files.sort()

embeddings = build_image_embedding_database(test_image_files)
scores = {}
score_threshold = 0.5

In [29]:
print(type(embeddings))
print(type(embeddings[list(embeddings.keys())[0]]))

<class 'dict'>
<class 'torch.Tensor'>


In [30]:
correct_count = 0
faulty_count = 0

for image_path, embedding in embeddings.items():
    embedding = embedding.to(config.DEVICE)
    style_anchor = style_anchor.to(config.DEVICE)
    score = score_image(embedding, style_anchor)
    scores[image_path] = score

    print(f"Image: {image_path}")
    print(f"Personalized score: {score:.4f}")

    # Condition checks
    if (score > score_threshold and 'good' in image_path) or \
       (score < score_threshold and 'bad' in image_path):
        print("classification correct\n")
        correct_count += 1
    else:
        print("classification faulty\n")
        faulty_count += 1

not_rated = 0

# Final summary
print(f"Total correct classifications: {correct_count} out of {correct_count + faulty_count - not_rated} ({correct_count / (correct_count + faulty_count - not_rated) * 100:.2f}%)")
print(f"Total faulty classifications: {faulty_count - not_rated} out of {correct_count + faulty_count - not_rated} ({(faulty_count - not_rated) / (correct_count + faulty_count - not_rated) * 100:.2f}%)")

Image: ../../data/outfits/outfits_to_test/10_good_o.jpg
Personalized score: 0.7930
classification correct

Image: ../../data/outfits/outfits_to_test/11_good_o.jpg
Personalized score: 0.7405
classification correct

Image: ../../data/outfits/outfits_to_test/12_good_o.jpg
Personalized score: 0.7377
classification correct

Image: ../../data/outfits/outfits_to_test/13_bad_o.jpg
Personalized score: 0.6924
classification faulty

Image: ../../data/outfits/outfits_to_test/14_bad_o.jpg
Personalized score: 0.6558
classification faulty

Image: ../../data/outfits/outfits_to_test/15_good_o.jpg
Personalized score: 0.7951
classification correct

Image: ../../data/outfits/outfits_to_test/16_good_o.jpg
Personalized score: 0.7755
classification correct

Image: ../../data/outfits/outfits_to_test/17_bad_o.jpg
Personalized score: 0.7908
classification faulty

Image: ../../data/outfits/outfits_to_test/18_bad_o.jpg
Personalized score: 0.7928
classification faulty

Image: ../../data/outfits/outfits_to_test/19_

In [31]:
correct_count = 0
faulty_count = 0

for image_path, embedding in embeddings.items():
    embedding = embedding.to(config.DEVICE)
    style_anchor = style_anchor.to(config.DEVICE)
    negative_anchor = negative_anchor.to(config.DEVICE)
    score = score_image(embedding, style_anchor, negative_anchor)
    scores[image_path] = score

    print(f"Image: {image_path}")
    print(f"Personalized score: {score:.4f}")

    # Condition checks
    if (score > score_threshold and 'good' in image_path) or \
       (score < score_threshold and 'bad' in image_path):
        print("classification correct\n")
        correct_count += 1
    else:
        print("classification faulty\n")
        faulty_count += 1

not_rated = 0

# Final summary
print(f"Total correct classifications: {correct_count} out of {correct_count + faulty_count - not_rated} ({correct_count / (correct_count + faulty_count - not_rated) * 100:.2f}%)")
print(f"Total faulty classifications: {faulty_count - not_rated} out of {correct_count + faulty_count - not_rated} ({(faulty_count - not_rated) / (correct_count + faulty_count - not_rated) * 100:.2f}%)")

Image: ../../data/outfits/outfits_to_test/10_good_o.jpg
Personalized score: 0.4949
classification faulty

Image: ../../data/outfits/outfits_to_test/11_good_o.jpg
Personalized score: 0.5185
classification correct

Image: ../../data/outfits/outfits_to_test/12_good_o.jpg
Personalized score: 0.5211
classification correct

Image: ../../data/outfits/outfits_to_test/13_bad_o.jpg
Personalized score: 0.4942
classification correct

Image: ../../data/outfits/outfits_to_test/14_bad_o.jpg
Personalized score: 0.4950
classification correct

Image: ../../data/outfits/outfits_to_test/15_good_o.jpg
Personalized score: 0.4882
classification faulty

Image: ../../data/outfits/outfits_to_test/16_good_o.jpg
Personalized score: 0.4641
classification faulty

Image: ../../data/outfits/outfits_to_test/17_bad_o.jpg
Personalized score: 0.4919
classification correct

Image: ../../data/outfits/outfits_to_test/18_bad_o.jpg
Personalized score: 0.5010
classification faulty

Image: ../../data/outfits/outfits_to_test/19_

In [32]:
correct_count = 0
faulty_count = 0

for image_path, embedding in embeddings.items():
    embedding = embedding.to(config.DEVICE)
    style_anchor = style_anchor.to(config.DEVICE)
    negative_anchor = negative_anchor.to(config.DEVICE)
    score = score_image(embedding, style_anchor, negative_anchor, "A chic, elegant outfit")
    scores[image_path] = score

    class_names = ['bad', 'good']  # 0=negative, 1=positive
    print(f"Image: {image_path}")
    print(f"Personalized score: {score:.4f}")

    # Condition checks
    if (score > score_threshold and 'good' in image_path) or \
       (score < score_threshold and 'bad' in image_path):
        print("classification correct\n")
        correct_count += 1
    else:
        print("classification faulty\n")
        faulty_count += 1

not_rated = 0

# Final summary
print(f"Total correct classifications: {correct_count} out of {correct_count + faulty_count - not_rated} ({correct_count / (correct_count + faulty_count - not_rated) * 100:.2f}%)")
print(f"Total faulty classifications: {faulty_count - not_rated} out of {correct_count + faulty_count - not_rated} ({(faulty_count - not_rated) / (correct_count + faulty_count - not_rated) * 100:.2f}%)")

Image: ../../data/outfits/outfits_to_test/10_good_o.jpg
Personalized score: 0.5306
classification correct

Image: ../../data/outfits/outfits_to_test/11_good_o.jpg
Personalized score: 0.5343
classification correct

Image: ../../data/outfits/outfits_to_test/12_good_o.jpg
Personalized score: 0.5375
classification correct

Image: ../../data/outfits/outfits_to_test/13_bad_o.jpg
Personalized score: 0.5336
classification faulty

Image: ../../data/outfits/outfits_to_test/14_bad_o.jpg
Personalized score: 0.5242
classification faulty

Image: ../../data/outfits/outfits_to_test/15_good_o.jpg
Personalized score: 0.5350
classification correct

Image: ../../data/outfits/outfits_to_test/16_good_o.jpg
Personalized score: 0.5174
classification correct

Image: ../../data/outfits/outfits_to_test/17_bad_o.jpg
Personalized score: 0.5374
classification faulty

Image: ../../data/outfits/outfits_to_test/18_bad_o.jpg
Personalized score: 0.5370
classification faulty

Image: ../../data/outfits/outfits_to_test/19_

In [33]:
# Load user likes and dislikes (metadata)
liked_metadata_embeddings = load_metadata_embeddings_from_dir(config.EMBEDDINGS_METADATA_POS_OUTFITS_DIR)
disliked_metadata_embeddings = load_metadata_embeddings_from_dir(config.EMBEDDINGS_METADATA_NEG_OUTFITS_DIR)

In [34]:
print(liked_metadata_embeddings.shape)
print(disliked_image_embeddings.shape)

torch.Size([816, 1, 512])
torch.Size([4531, 1, 512])


In [35]:
print("Sample image embedding:", liked_image_embeddings[0][:10])
print("Sample metadata embedding:", liked_metadata_embeddings[0][:10])
print("Unique metadata values:", torch.unique(liked_metadata_embeddings))

Sample image embedding: tensor([[ 6.5039e-01, -7.1167e-02,  3.8184e-01,  1.6982e+00,  4.9048e-01,
          3.8159e-01, -4.5357e-03, -1.7847e-01,  8.6670e-01,  1.4526e-01,
         -1.3843e-01,  2.0996e-01,  1.6504e-01, -2.8442e-01,  6.2256e-01,
         -1.6968e-02,  2.0435e-01,  7.1045e-02, -2.6538e-01,  2.0776e-01,
         -5.0244e-01, -4.4653e-01, -1.0187e-01, -1.8809e+00,  5.3857e-01,
         -2.6880e-01,  1.8110e-03,  6.9531e-01,  6.5625e-01,  5.6299e-01,
          2.1118e-01,  1.0841e-02, -8.1299e-01, -3.9844e-01, -1.9727e-01,
          3.6206e-01,  1.3403e-01,  5.6689e-01,  4.2017e-01, -2.5562e-01,
         -5.3711e-01,  2.3254e-01, -5.6494e-01, -5.3271e-01,  4.1333e-01,
          2.9053e-01, -3.6035e-01,  3.5425e-01, -7.6465e-01, -1.5225e+00,
         -1.0469e+00, -4.7144e-01, -2.6904e-01, -6.0840e-01,  1.8811e-01,
         -5.4443e-01, -4.3506e-01, -3.4009e-01,  1.5918e-01, -5.6689e-01,
          4.1809e-02, -2.9572e-02,  7.7588e-01, -1.1359e-01, -4.2542e-02,
         -1.54

In [36]:
liked_joint = torch.cat([liked_image_embeddings, liked_metadata_embeddings], dim=0)
disliked_joint = torch.cat([disliked_image_embeddings, disliked_metadata_embeddings], dim=0)

style_anchor = torch.mean(liked_joint, dim=0, keepdim=True)
negative_anchor = torch.mean(disliked_joint, dim=0, keepdim=True)

In [37]:
print("style_anchor shape:", style_anchor.shape)
print("negative_anchor shape:", negative_anchor.shape)

style_anchor shape: torch.Size([1, 1, 512])
negative_anchor shape: torch.Size([1, 1, 512])


In [38]:
print("Style Anchor Embedding:", style_anchor)
print("Negative Anchor Embedding:", negative_anchor)

Style Anchor Embedding: tensor([[[ 2.2366e-01, -1.5877e-01,  9.0211e-01,  8.2685e-01,  1.6340e-01,
           1.5664e-01, -1.3843e-01, -7.2630e-03,  4.2761e-01,  5.7784e-02,
           3.1500e-01, -1.8306e-01,  1.2633e-01, -1.4995e-01,  1.7120e-01,
          -3.7351e-01,  6.9910e-02, -1.3889e-01,  2.8279e-01,  2.4198e-01,
          -5.1334e-01, -7.8462e-02,  1.7118e-01, -9.8692e-01,  1.8611e-01,
           2.3277e-01,  1.1330e-01,  9.1591e-02,  5.0786e-02, -6.8069e-02,
          -1.6148e-01,  5.9783e-01, -2.3785e-01,  6.1609e-03, -2.9434e-01,
           2.4997e-01,  4.2124e-02, -3.3424e-02,  3.5561e-01, -2.2458e-01,
          -3.6564e-01, -9.3298e-02,  3.7153e-02, -9.3394e-02,  4.3627e-01,
           4.1131e-02, -5.1049e-02,  1.6026e-01, -3.2340e-01, -2.1054e-01,
          -5.1668e-01,  1.1531e-02,  1.3245e-01,  3.9304e-01, -1.1020e-01,
          -2.2519e-01, -3.0141e-02,  9.0973e-02, -1.3104e-01, -1.0718e-01,
          -1.2697e-01, -4.3541e-01,  3.5983e-01, -2.4906e-01, -3.3708e-01,
 

In [39]:
correct_count = 0
faulty_count = 0

for image_path, embedding in embeddings.items():
    embedding = embedding.to(config.DEVICE)
    style_anchor = style_anchor.to(config.DEVICE)
    negative_anchor = negative_anchor.to(config.DEVICE)
    score = score_image(embedding, style_anchor, negative_anchor)
    scores[image_path] = score

    print(f"Image: {image_path}")
    print(f"Personalized score: {score:.4f}")

    # Condition checks
    if (score > score_threshold and 'good' in image_path) or \
       (score < score_threshold and 'bad' in image_path):
        print("classification correct\n")
        correct_count += 1
    else:
        print("classification faulty\n")
        faulty_count += 1

not_rated = 0

# Final summary
print(f"Total correct classifications: {correct_count} out of {correct_count + faulty_count - not_rated} ({correct_count / (correct_count + faulty_count - not_rated) * 100:.2f}%)")
print(f"Total faulty classifications: {faulty_count - not_rated} out of {correct_count + faulty_count - not_rated} ({(faulty_count - not_rated) / (correct_count + faulty_count - not_rated) * 100:.2f}%)")

Image: ../../data/outfits/outfits_to_test/10_good_o.jpg
Personalized score: 0.5007
classification correct

Image: ../../data/outfits/outfits_to_test/11_good_o.jpg
Personalized score: 0.5159
classification correct

Image: ../../data/outfits/outfits_to_test/12_good_o.jpg
Personalized score: 0.5139
classification correct

Image: ../../data/outfits/outfits_to_test/13_bad_o.jpg
Personalized score: 0.4968
classification correct

Image: ../../data/outfits/outfits_to_test/14_bad_o.jpg
Personalized score: 0.4956
classification correct

Image: ../../data/outfits/outfits_to_test/15_good_o.jpg
Personalized score: 0.4966
classification faulty

Image: ../../data/outfits/outfits_to_test/16_good_o.jpg
Personalized score: 0.4778
classification faulty

Image: ../../data/outfits/outfits_to_test/17_bad_o.jpg
Personalized score: 0.4980
classification correct

Image: ../../data/outfits/outfits_to_test/18_bad_o.jpg
Personalized score: 0.5016
classification faulty

Image: ../../data/outfits/outfits_to_test/19

In [40]:
correct_count = 0
faulty_count = 0

for image_path, embedding in embeddings.items():
    embedding = embedding.to(config.DEVICE)
    style_anchor = style_anchor.to(config.DEVICE)
    negative_anchor = negative_anchor.to(config.DEVICE)
    score = score_image(embedding, style_anchor, negative_anchor, "A chic, elegant outfit")
    scores[image_path] = score

    print(f"Image: {image_path}")
    print(f"Personalized score: {score:.4f}")

    # Condition checks
    if (score > score_threshold and 'good' in image_path) or \
       (score < score_threshold and 'bad' in image_path):
        print("classification correct\n")
        correct_count += 1
    else:
        print("classification faulty\n")
        faulty_count += 1

not_rated = 0

# Final summary
print(f"Total correct classifications: {correct_count} out of {correct_count + faulty_count - not_rated} ({correct_count / (correct_count + faulty_count - not_rated) * 100:.2f}%)")
print(f"Total faulty classifications: {faulty_count - not_rated} out of {correct_count + faulty_count - not_rated} ({(faulty_count - not_rated) / (correct_count + faulty_count - not_rated) * 100:.2f}%)")

Image: ../../data/outfits/outfits_to_test/10_good_o.jpg
Personalized score: 0.5364
classification correct

Image: ../../data/outfits/outfits_to_test/11_good_o.jpg
Personalized score: 0.5344
classification correct

Image: ../../data/outfits/outfits_to_test/12_good_o.jpg
Personalized score: 0.5346
classification correct

Image: ../../data/outfits/outfits_to_test/13_bad_o.jpg
Personalized score: 0.5363
classification faulty

Image: ../../data/outfits/outfits_to_test/14_bad_o.jpg
Personalized score: 0.5246
classification faulty

Image: ../../data/outfits/outfits_to_test/15_good_o.jpg
Personalized score: 0.5425
classification correct

Image: ../../data/outfits/outfits_to_test/16_good_o.jpg
Personalized score: 0.5281
classification correct

Image: ../../data/outfits/outfits_to_test/17_bad_o.jpg
Personalized score: 0.5431
classification faulty

Image: ../../data/outfits/outfits_to_test/18_bad_o.jpg
Personalized score: 0.5395
classification faulty

Image: ../../data/outfits/outfits_to_test/19_

In [41]:
import torch
import numpy as np
import umap
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from mpl_toolkits.mplot3d import Axes3D

# Squeeze out the singleton dimension to get (N, 512)
liked_emb = liked_image_embeddings.squeeze(1).cpu().numpy()    # (816, 512)
disliked_emb = disliked_image_embeddings.squeeze(1).cpu().numpy()  # (4651, 512)

# Combine embeddings and create labels
embeddings = np.concatenate([liked_emb, disliked_emb], axis=0)
labels = np.array([1] * liked_emb.shape[0] + [0] * disliked_emb.shape[0])  # 1=liked, 0=disliked

# Color map: 0=blue (disliked), 1=orange (liked)
color_map = np.array(['blue', 'orange'])

# 2D UMAP Visualization
umap_2d = umap.UMAP(n_components=2, random_state=42).fit_transform(embeddings)
plt.figure(figsize=(8, 8))
plt.scatter(umap_2d[:, 0], umap_2d[:, 1], c=color_map[labels], alpha=0.7)
legend_elements = [
    Line2D([0], [0], marker='o', color='w', label='Disliked (blue)', markerfacecolor='blue', markersize=10),
    Line2D([0], [0], marker='o', color='w', label='Liked (orange)', markerfacecolor='orange', markersize=10)
]
plt.legend(handles=legend_elements, title='Outfit Quality')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.tight_layout()
plt.savefig('fashion-clip-embeddings-2d-visualization.png')
plt.close()

# 3D UMAP Visualization
umap_3d = umap.UMAP(n_components=3, random_state=42).fit_transform(embeddings)
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
for label, color, name in zip([0, 1], ['blue', 'orange'], ['Disliked', 'Liked']):
    idxs = np.where(labels == label)
    ax.scatter(umap_3d[idxs, 0], umap_3d[idxs, 1], umap_3d[idxs, 2], c=color, label=f'{name} ({color})', alpha=0.7)
ax.set_xlabel('UMAP Dimension 1')
ax.set_ylabel('UMAP Dimension 2')
ax.set_zlabel('UMAP Dimension 3')
ax.legend(title='Outfit Quality')
plt.tight_layout()
plt.savefig('fashion-clip-embeddings-3d-visualization.png')
plt.close()

  warn(
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
  warn(


In [42]:
# Assume style_anchor and negative_anchor are (1, 1, 512) tensors
# Remove extra dimensions to get (2, 512)
anchors = torch.stack([
    style_anchor.squeeze(0).squeeze(0),   # (512,)
    negative_anchor.squeeze(0).squeeze(0) # (512,)
], dim=0)  # shape: (2, 512)
print(anchors.shape)

torch.Size([2, 512])


In [43]:
anchors_np = anchors.cpu().numpy()
print(anchors_np.shape)  # should be (2, 512)
print(anchors_np)

(2, 512)
[[ 0.2236568  -0.1587712   0.90210813 ... -0.11545078 -0.1967367
   0.18821849]
 [ 0.3852181  -0.16047184  1.0301242  ... -0.13445272 -0.1907
   0.1134533 ]]


In [44]:
from sklearn.decomposition import PCA

# anchors_np: shape (2, 512)
pca = PCA(n_components=2)
anchors_2d = pca.fit_transform(anchors_np)
colors = ['orange', 'blue']  # orange = liked, blue = disliked
labels = ['Liked (Style Anchor)', 'Disliked (Negative Anchor)']

plt.figure(figsize=(8, 8))
for i in range(2):
    plt.scatter(anchors_2d[i, 0], anchors_2d[i, 1], color=colors[i], s=100, label=labels[i])
legend_elements = [
    Line2D([0], [0], marker='o', color='w', label=labels[i], markerfacecolor=colors[i], markersize=10)
    for i in range(2)
]
plt.legend(handles=legend_elements, title='Anchor Type')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.tight_layout()
plt.savefig('fashion-clip-anchors-embeddings-2d-visualization.png')
plt.close()