# This is v1 of the recommender system. 

It uses a pretrained ResNet50 model to encode images into a latent space, and performs ANNOY on the latent space representations of images to generate recommendations. The dataset used for recommendations is a custom dataset scraped from nordstrom.com and the input to the NN algorithm is the mean latent space representation of all items in a user's wardrobe. 

In [None]:
!pip install tqdm
!pip install sentence-transformers
!pip install annoy

In [None]:
import os
import torch
import torch.nn as nn
import torchvision.models as models
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
from tqdm import tqdm
from annoy import AnnoyIndex

In [None]:
# Build the encoder for images using ResNet50
resnet50 = models.resnet50(pretrained=True)
feature_extractor = nn.Sequential(*(list(resnet50.children())[:-1])) # remove fc layer used for classification
feature_extractor.eval()

In [33]:
# Define a custom dataset
class ImageDataset(Dataset):
    def __init__(self, image_folder, transform):
        self.image_folder = image_folder
        self.image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder)]
        self.transform = transform
    def __len__(self):
        return len(self.image_paths)
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path)
        return self.transform(image), img_path # return (image, path)

In [34]:
# Define constants
def convert_to_rgb(image):
    # Convert RGBA or grayscale to RGB
    if image.mode != "RGB":
        image = image.convert("RGB")
    return image

transform = transforms.Compose([
    transforms.Lambda(convert_to_rgb),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
batch_size=32
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
feature_extractor.to(device)
print(device)

mps


In [35]:
def embed_image_dataset(image_folder, save_to_file=False, filename=""):
    dataset = ImageDataset(image_folder, transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    latent_representations = {}
    with torch.no_grad():
        for images, paths in tqdm(dataloader, desc="Processing Images", unit='batch'):
            images = images.to(device) # Output: [batch_size, 3, 224, 224]
            features = feature_extractor(images).squeeze() # Output: [batch_size, 2048]
            for path, feature in zip(paths, features.cpu()):
                latent_representations[path] = feature.numpy()
    if save_to_file:
        np.save(filename, latent_representations)

    return latent_representations

In [None]:
# load the fashion dataset and compute embeddings
# NOTE: without a GPU, this cell could take hours to finish
fd_image_folder = "./fashion-dataset/images"
fd_lat_rep = embed_image_dataset(fd_iamge_folder, True, "lat_rep_fd_nft.npy") # save embeddings to a file

In [36]:
# load the wardrobe dataset and compute embeddings
wardrobe_folder = "./sample-wardrobe/images"
wardrobe_lat_rep = embed_image_dataset(wardrobe_folder)

Processing Images: 100%|██████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.77batch/s]


In [37]:
# load the inventory (nordstrom) dataset and compute embeddings
inventory_folder = "./nordstrom-data/images"
inventory_lat_rep = embed_image_dataset(inventory_folder, True, "lat_rep_inventory_nft.npy")

Processing Images: 100%|██████████████████████████████████████████████████| 219/219 [10:44<00:00,  2.94s/batch]


In [46]:
# create list of fashion dataset embeddings and paths
inventory_lat_rep = np.load("lat_rep_inventory_nft.npy", allow_pickle=True).item()
inventory_img_paths = list(inventory_lat_rep.keys())
inventory_features = np.array(list(inventory_lat_rep.values()))

# create list of wardrobe embeddings and paths
wardrobe_paths = list(wardrobe_lat_rep.keys())
wardrobe_features = np.array(list(wardrobe_lat_rep.values()))

# get the mean embedding of all items in wardrobe
mean_embedding = np.mean(wardrobe_features, axis=0)

# mean_embedding = wardrobe_features[0]

In [47]:
# Perform Annoy
embedding_dim = 2048  # Original dimensionality
annoy_index = AnnoyIndex(embedding_dim, metric='euclidean')

# Add all items to Annoy index
for i, embedding in enumerate(inventory_features):
    annoy_index.add_item(i, embedding)

# Build the index
n_trees = 50
annoy_index.build(n_trees)  # Number of trees

# Query the index
n_neighbors = 10
indices = annoy_index.get_nns_by_vector(mean_embedding, n_neighbors, include_distances=True)

print("Recommended indices:", indices[0])
for idx in indices[0]:
    im = Image.open(inventory_img_paths[idx])
    im.show()

Recommended indices: [430, 2302, 1373, 5419, 6865, 1326, 2682, 1513, 3454, 6030]
