In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import cv2
from sklearn.metrics.pairwise import cosine_similarity
from matplotlib import colors
import colorsys
import json
import re
import tensorflow as tf
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D , Flatten, UpSampling2D , Dense, concatenate
import pickle
import keras
from sklearn.model_selection import train_test_split
from itertools import cycle

In [2]:
#open training data file
with open('polyvore/train_no_dup.json') as json_file:
    train_data = json.load(json_file)

#open testing data file
with open('polyvore/test_no_dup.json') as json_file:
    test_data = json.load(json_file)

#open validation data file
with open('polyvore/valid_no_dup.json') as json_file:
    validation_data = json.load(json_file)

# Combine all the Test , Train and Validation data

In [3]:
combined_data = train_data + test_data + validation_data

In [4]:
df_combined = pd.DataFrame(combined_data)
df_combined.head()

Unnamed: 0,name,views,items,image,likes,date,set_url,set_id,desc
0,Casual,8743,"[{'index': 1, 'name': 'mock neck embroidery su...",http://ak1.polyvoreimg.com/cgi/img-set/cid/214...,394,One month,http://www.polyvore.com/casual/set?id=214181831,214181831,A fashion look from January 2017 by beebeely-l...
1,Being a Vans shoe model with Luke. Idk about t...,188,"[{'index': 1, 'name': 'nirvana distressed t-sh...",http://ak1.polyvoreimg.com/cgi/img-set/cid/120...,9,Two years,http://www.polyvore.com/being_vans_shoe_model_...,120161271,A fashion look from April 2014 featuring destr...
2,These Chanel bags is a bad habit .x,562,"[{'index': 1, 'name': 'monki singlet', 'price'...",http://ak1.polyvoreimg.com/cgi/img-set/cid/143...,32,Two years,http://www.polyvore.com/these_chanel_bags_is_b...,143656996,12.19.14
3,Avenger/Supernatural prp,2613,"[{'index': 1, 'name': 'tops', 'price': 18.0, '...",http://ak2.polyvoreimg.com/cgi/img-set/cid/186...,88,One year,http://www.polyvore.com/avenger_supernatural_p...,186627934,A fashion look from January 2016 by alyssaclai...
4,Boho (39),62,"[{'index': 1, 'name': 'yoins leather sexy v-ne...",http://ak2.polyvoreimg.com/cgi/img-set/cid/206...,3,5 months,http://www.polyvore.com/boho_39/set?id=206969379,206969379,A fashion look from August 2016 by kate-goida ...


In [5]:
df_combined.columns

Index(['name', 'views', 'items', 'image', 'likes', 'date', 'set_url', 'set_id',
       'desc'],
      dtype='object')

In [6]:
df_combined.shape

(21889, 9)

# Use the outfit to extract only the images that match articles of clothing in our definition of an "outfit."

In [7]:
# define all the clothes super category - FEATURES
dress = [3,4,5,6,15, 243, 244]
bottom = [7,8,9,10, 27,28,29, 237, 238, 239, 240, 241, 332]
top = [11,17,21, 104]
pullover = [19, 4495, 4496]
outerwear = [23,24,25,236, 18]
bag = [35, 36,37, 38]
shoe = [41, 42, 43,46,47,48,49,50, 261, 262, 263, 264, 265]

In [8]:
# Dress, Outerwear, Shoes, Bag
type_1_category_ids = {3,4,5,6,15, 243, 244, 23,24,25, 236,35, 36,37, 38, 41, 42, 43,46,47,48,49,50, 261, 262, 263, 264, 265}   
# Top, Bottom, Pullover, Outerwear, Shoes, Bag
type_2_category_ids = {11,17,21, 104,7,8,9,10, 27,28,29, 237, 238, 239, 240, 241, 332, 18,19, 4495, 4496, 23,24,25, 236,35, 36,37, 38, 41, 42, 43,46,47,48,49,50, 261, 262, 263, 264, 265}

In [9]:
# Define the optional category IDs for each outfit type
type_1_optional_category_ids = {23,24,25, 236, 18}  # Outerwear (optional)
type_2_optional_category_ids = {23,24,25, 236,18,19, 4495, 4496 }  # Pullover, Outerwear (optional)

#Filter and keep only the outfits that match the defined types
df_filtered = df_combined[
    (
        (df_combined['items'].apply(lambda x: any(item['categoryid'] in type_1_category_ids for item in x))) &
        (
            (df_combined['items'].apply(lambda x: set([item['categoryid'] for item in x]) & type_1_optional_category_ids == type_1_optional_category_ids)) |
            (~df_combined['items'].apply(lambda x: any(item['categoryid'] in type_1_optional_category_ids for item in x)))
        )
    ) |
    (
        (df_combined['items'].apply(lambda x: any(item['categoryid'] in type_2_category_ids for item in x))) &
        (
            (df_combined['items'].apply(lambda x: set([item['categoryid'] for item in x]) & type_2_optional_category_ids == type_2_optional_category_ids)) |
            (~df_combined['items'].apply(lambda x: any(item['categoryid'] in type_2_optional_category_ids for item in x)))
        )
    )
]

In [10]:
df_filtered = pd.DataFrame()
for index, row in df_combined.iterrows():
    outfit_first = [0,0,0,0]
    outfit_sec = [0,0,0,0,0,0]
    for item in row['items']:
        if item['categoryid'] in dress:
            outfit_first[0] = 1
        if item['categoryid'] in outerwear:
            outfit_first[1] = 1
            outfit_sec[3] = 1
        if item['categoryid'] in shoe:
            outfit_first[2] = 1
            outfit_sec[4] = 1
        if item['categoryid'] in bag:
            outfit_first[3] = 1
            outfit_sec[5] = 1
        if item['categoryid'] in top:
            outfit_sec[0] = 1
        if item['categoryid'] in bottom:
            outfit_sec[1] = 1
        if item['categoryid'] in pullover:
            outfit_sec[2] = 1
    if outfit_first[0] == 1 and  outfit_first[2] == 1 and outfit_first[3] == 1:
        df_filtered = df_filtered.append(row)
    if outfit_sec[0] == 1 and  outfit_sec[1] == 1 and outfit_sec[4] == 1 and outfit_sec[5] == 1:
        df_filtered = df_filtered.append(row)   

In [11]:
# Reset the index of the filtered DataFrame
df_filtered.reset_index(drop=True, inplace=True)
df_filtered.shape

(7186, 9)

In [12]:
def get_category_id(categoryid):
    if categoryid in dress:
        return "Dress"
    if categoryid in top:
        return "Top"
    if categoryid in bottom:
        return "Bottom"
    if categoryid in pullover:
        return "Pullover"
    if categoryid in outerwear:
        return "Outerwear"
    if categoryid in shoe:
        return "Shoe"
    if categoryid in bag:
        return "Bag"

In [13]:
# The base URL or directory path where the images are stored
base_image_path = "polyvore-images/images/"

# Function to generate the image path based on set_id and index
def generate_image_path(row):
    set_id = row['set_id']
    image_paths = []
    for item in row['items']:
        index = item['index']
        # enter the clothing type
        item['category_name'] = get_category_id(item['categoryid'])
        image_path = f"{base_image_path}{set_id}/{index}.jpg"  # Adjust the file extension if necessary
        item['image'] = image_path
        image_paths.append(image_path)
    return image_paths


# Create a new column 'image_path' containing the generated image paths
df_filtered['image_path'] = df_filtered.apply(generate_image_path, axis=1)

In [14]:
df_filtered.head()

Unnamed: 0,date,desc,image,items,likes,name,set_id,set_url,views,image_path
0,Two years,12.19.14,http://ak1.polyvoreimg.com/cgi/img-set/cid/143...,"[{'index': 1, 'name': 'monki singlet', 'price'...",32.0,These Chanel bags is a bad habit .x,143656996,http://www.polyvore.com/these_chanel_bags_is_b...,562.0,"[polyvore-images/images/143656996/1.jpg, polyv..."
1,7 months,A fashion look from June 2016 by sara-86 featu...,http://ak1.polyvoreimg.com/cgi/img-set/cid/201...,"[{'index': 1, 'name': 'new look light blue den...",83.0,Denim on Denim,201969694,http://www.polyvore.com/denim_on/set?id=201969694,276.0,"[polyvore-images/images/201969694/1.jpg, polyv..."
2,One month,A fashion look from December 2016 by beebeely-...,http://ak2.polyvoreimg.com/cgi/img-set/cid/213...,"[{'index': 1, 'name': 'yoins plus size blue st...",492.0,Yoins Casual: Faux Fur Vest,213824660,http://www.polyvore.com/yoins_casual_faux_fur_...,24915.0,"[polyvore-images/images/213824660/1.jpg, polyv..."
3,Two years,A fashion look from April 2014 featuring loose...,http://ak1.polyvoreimg.com/cgi/img-set/cid/118...,"[{'index': 1, 'name': 'official runway web sto...",3979.0,9. Lovebug,118117317,http://www.polyvore.com/lovebug/set?id=118117317,55501.0,"[polyvore-images/images/118117317/1.jpg, polyv..."
4,9 months,A fashion look from April 2016 by zabead featu...,http://ak2.polyvoreimg.com/cgi/img-set/cid/196...,"[{'index': 1, 'name': 'burberry floral jacquar...",287.0,Edgy urbanology,196794889,http://www.polyvore.com/edgy_urbanology/set?id...,1005.0,"[polyvore-images/images/196794889/1.jpg, polyv..."


# Train an autoencoder model for each type of clothing in our defined “outfit.”

In [15]:
#Create dataset for each clothing item
datasets = {}

for category, categoryid in {'Bag': bag, 'Shoe': shoe, 'Dress': dress, 'Bottom': bottom, 'Top': top, 'Pullover': pullover, 'Outerwear': outerwear}.items():
    category_dataset = []
    for index, row in df_filtered.iterrows():
        items = [item for item in row['items'] if item['categoryid'] in categoryid]
        if items:
            row_data = row.copy()
            row_data['items'] = items
            #row_data['image_path'] = [path for path in row_data['image_path'] if category in path]
            selected_paths = []
            for item in items:
                selected_paths.extend([path for path in row['image_path'] if path.endswith(f"{item['index']}.jpg")])
            row_data['image_path'] = selected_paths
            category_dataset.append(row_data)
    datasets[category] = pd.DataFrame(category_dataset)

In [16]:
# Accessing the dataset for a specific category
pullover_data = datasets['Pullover']
outerwear_data = datasets['Outerwear']
dress_data = datasets['Dress']
bottom_data = datasets['Bottom']
top_data = datasets['Top']
bag_data = datasets['Bag']
shoe_data = datasets['Shoe']

In [17]:
print(pullover_data.shape)
print(outerwear_data.shape)
print(dress_data.shape)
print(bottom_data.shape)
print(top_data.shape)
print(bag_data.shape)
print(shoe_data.shape)

(140, 10)
(2680, 10)
(3239, 10)
(4105, 10)
(4057, 10)
(7186, 10)
(7186, 10)


In [18]:
dress_data.head()

Unnamed: 0,date,desc,image,items,likes,name,set_id,set_url,views,image_path
3,Two years,A fashion look from April 2014 featuring loose...,http://ak1.polyvoreimg.com/cgi/img-set/cid/118...,"[{'index': 1, 'name': 'official runway web sto...",3979.0,9. Lovebug,118117317,http://www.polyvore.com/lovebug/set?id=118117317,55501.0,[polyvore-images/images/118117317/1.jpg]
4,9 months,A fashion look from April 2016 by zabead featu...,http://ak2.polyvoreimg.com/cgi/img-set/cid/196...,"[{'index': 1, 'name': 'burberry floral jacquar...",287.0,Edgy urbanology,196794889,http://www.polyvore.com/edgy_urbanology/set?id...,1005.0,[polyvore-images/images/196794889/1.jpg]
5,10 months,A fashion look from March 2016 by mari-yasuda-...,http://ak2.polyvoreimg.com/cgi/img-set/cid/194...,"[{'index': 1, 'name': 'michael kors sleeveless...",11.0,It's a Lacey kind of day,194797482,http://www.polyvore.com/its_lacey_kind_day/set...,139.0,[polyvore-images/images/194797482/1.jpg]
7,29 days,A fashion look from January 2017 by pokadoll f...,http://ak1.polyvoreimg.com/cgi/img-set/cid/215...,"[{'index': 1, 'name': 'sexy off-the-shoulder h...",328.0,Edgy Style,215293689,http://www.polyvore.com/edgy_style/set?id=2152...,1218.0,[polyvore-images/images/215293689/1.jpg]
9,Two months,A fashion look from December 2016 by stranjaki...,http://ak2.polyvoreimg.com/cgi/img-set/cid/212...,"[{'index': 1, 'name': 'balmain lace-up suede m...",977.0,dress,212866780,http://www.polyvore.com/dress/set?id=212866780,2939.0,[polyvore-images/images/212866780/1.jpg]


In [19]:
# reading .jpg images to bytes - OpenCV allows you to read images as bytes in Python
def prepare_images(fpath):
    image = cv2.imread(fpath, cv2.IMREAD_COLOR)
    #plt.imshow(image)
    #plt.show() 
    image = cv2.resize(image, (32,32))
    image = image / 255.0
    return image

In [20]:
def get_data(data_path, dataframe):
    cleanedList = [x for x in data_path if str(x) != 'nan']
    output = []
    for path in cleanedList:
        if path == 'nan':
            continue 
        else:
            file_path = os.path.normpath(path[0])
            output.append(prepare_images(file_path))
    return np.array(output)

In [21]:
clothing_list = ['Top', 'Bottom', 'Dress', 'Pullover', 'Outerwear', 'Shoe', 'Bag']

In [22]:
autoencoder_models = {}
encoder_file_path = "model/encoder/"
model_weights_file_path = "model/weights/"

In [25]:
#Train an autoencoder model for each type of clothing in our defined “outfit.”


for clothing_type, dataset in datasets.items():
    features = get_data(list(dataset['image_path']), dataset)
    
    
    #encoded
    input_layer = Input(shape=(32,32,3))
    conv1 = Conv2D(496, (3,3), activation = 'relu', padding = 'same')(input_layer)
    pool1 = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(conv1)
    conv2 = Conv2D(248, (3,3), activation = 'relu', padding = 'same')(pool1)
    pool2 = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(conv2)
    conv3 = Conv2D(124, (3,3), activation = 'relu', padding = 'same')(pool2)
    pool3 = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(conv3)
    conv4 = Conv2D(64, (3,3), activation = 'relu', padding = 'same')(pool3)
    pool4 = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(conv4)
    conv5 = Conv2D(32, (3,3), activation = 'relu', padding = 'same')(pool4)
    pool5 = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(conv5)
    #conv6 = Conv2D(16, (3,3), activation = 'relu', padding = 'same')(pool5)
    #pool6 = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(conv6)
    #conv7 = Conv2D(16, (3,3), activation = 'relu', padding = 'same')(pool6)
    #pool7 = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(conv7)
    
    encoded = Flatten(name="encoded")(pool5)
    
    #decoding
    #conv8 = Conv2D(16, (3,3), activation = 'relu', padding = 'same')(pool7)
    #pool8 = UpSampling2D((2,2))(conv8)
    #conv9 = Conv2D(16, (3,3), activation = 'relu', padding = 'same')(pool6)
    #pool9 = UpSampling2D((2,2))(conv9)
    conv10 = Conv2D(32, (3,3), activation = 'relu', padding = 'same')(pool5)
    pool10 = UpSampling2D((2,2))(conv10)
    conv11 = Conv2D(64, (3,3), activation = 'relu', padding = 'same')(pool10)
    pool11 = UpSampling2D((2,2))(conv11)
    conv12 = Conv2D(124, (3,3), activation = 'relu', padding = 'same')(pool11)
    pool12 = UpSampling2D((2,2))(conv12)
    conv13 = Conv2D(248, (3,3), activation = 'relu', padding = 'same')(pool12)
    pool13 = UpSampling2D((2,2))(conv13)
    conv14 = Conv2D(496, (3,3), activation = 'relu', padding = 'same')(pool13)
    pool14 = UpSampling2D((2,2))(conv14)
    
    decoded = Conv2D(3,1,1,activation='sigmoid',padding='same')(pool14)
    
    autoencoder = Model(input_layer, decoded)
    autoencoder.compile(optimizer="adadelta",loss='binary_crossentropy')
    encoder = Model(input_layer, encoded)
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience = 10, verbose = 1, mode = 'auto')
    print("Fitting " + clothing_type)
    autoencoder.fit(features, features, epochs=5, batch_size=100, validation_split=.15,callbacks=(early_stopping))
    
    encoder.save(encoder_file_path + clothing_type + "_encoder_model.h5")
    autoencoder.save(encoder_file_path + clothing_type + "_autoencoder_model.h5")
    
    autoencoder_models[clothing_type] = autoencoder
    
    weights = encoder.get_weights()
    with open(model_weights_file_path + clothing_type + "_model_weights.pkl" , 'wb') as w:
        pickle.dump(weights, w)
        
    del(autoencoder)
    del(encoder)
    tf.keras.backend.clear_session()

Fitting Bag
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fitting Shoe
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fitting Dress
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fitting Bottom
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fitting Top
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fitting Pullover
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fitting Outerwear
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
for clothing_type in clothing_list:
    autoencoder_models[clothing_type] = tf.keras.models.load_model(encoder_file_path + clothing_type + "_autoencoder_model.h5")

In [24]:
autoencoder_models

{'Top': <keras.engine.functional.Functional at 0x2391ca9b460>,
 'Bottom': <keras.engine.functional.Functional at 0x2394ec026d0>,
 'Dress': <keras.engine.functional.Functional at 0x23944cd17c0>,
 'Pullover': <keras.engine.functional.Functional at 0x2394eac5790>,
 'Outerwear': <keras.engine.functional.Functional at 0x2394eda0dc0>,
 'Shoe': <keras.engine.functional.Functional at 0x2394a2196a0>,
 'Bag': <keras.engine.functional.Functional at 0x2394f449250>}

# Predict embeddings for each input image.

In [25]:
#Predict embeddings for each input image.
def predict_embeddings(image_url, autoencoder):
    # Preprocess image
    preprocessed_image = prepare_images(image_url)
    preprocessed_image = np.expand_dims(preprocessed_image, axis=0)
    encoder = tf.keras.Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('conv2d_10').output)
    embeddings = encoder.predict(preprocessed_image)
    flatten_embeddings = embeddings.flatten()
    return embeddings

#Combined data data
com_data = []
com_likes = []

df_filtered['processed_image'] = None
df_filtered['embedding_image'] = None

for index, row in df_filtered.iterrows():
    processed_images = []
    embedding_images = []
    for item in row['image_path']:
        for key, value in item.items():
            #print(key, value)
            if key is not None:
                processed_image = prepare_images(value)
                processed_images.append(processed_image)
                predicted_embeddings = predict_embeddings(value, autoencoder_models[key])
                embedding_images.append(predicted_embeddings)
    df_filtered.at[index, 'processed_image'] = processed_images
    df_filtered.at[index, 'embedding_image'] = embedding_images
    com_data.append(embedding_images)
    com_likes.append(row['likes'])


print(len(com_data))
print(len(com_likes))

In [26]:
for clothing_type, dataset in datasets.items():
    print(dataset.shape)

(7186, 10)
(7186, 10)
(3239, 10)
(4105, 10)
(4057, 10)
(140, 10)
(2680, 10)


In [27]:
embedding_file_path = "model/embeddings/"

In [28]:
from sklearn.decomposition import IncrementalPCA
from sklearn.decomposition import PCA


# Perform Incremental PCA on the training set
n_components = 50 
batch_size = 1000  

for clothing_type, dataset in datasets.items():
    embedding_images = []
    target_likes = []
    
    for index, row in dataset.iterrows():
        for item in row['image_path']:
            predicted_embeddings = predict_embeddings(item, autoencoder_models[clothing_type])
            embedding_images.append(predicted_embeddings)
            target_likes.append(row['likes'])

    # Reshape the embedding_images array to have 2 dimensions
    embedding_images = np.array(embedding_images, dtype=np.float32)  # Convert the list to a NumPy array
    embedding_images = np.reshape(embedding_images, (len(embedding_images), -1))  # Reshape the array
    
    print("Embedding images of " + clothing_type + " splitted into train and val data")
    embedding_images_train, embedding_images_val, likes_train, likes_val = train_test_split(embedding_images, target_likes, test_size=0.2, random_state=42)
    
    # Perform PCA on the training set
    n_components = min(embedding_images_train.shape[0], embedding_images_train.shape[1])  # Use the minimum of rows and columns as n_components

    pca = PCA(n_components=n_components)
    reduced_embeddings_train = pca.fit_transform(embedding_images_train)
    
    print("Transforming...")
    # Transform the validation set 
    reduced_embeddings_val = pca.transform(embedding_images_val)
    
    print("Saving " + clothing_type + " as a npy")
    # Save the reduced embeddings
    np.save(embedding_file_path + clothing_type + '_train_embeddings.npy', reduced_embeddings_train)
    np.save(embedding_file_path + clothing_type + '_val_embeddings.npy', reduced_embeddings_val)
    
    # Save the corresponding target likes
    np.save(embedding_file_path + clothing_type + '_train_likes.npy', likes_train)
    np.save(embedding_file_path + clothing_type + '_val_likes.npy', likes_val)
























KeyboardInterrupt: 

#Accessing the dataset for a specific category
dress_com_data = np.load(embedding_file_path + 'dress_embeddings.npy')
top_com_data = np.load(embedding_file_path + 'top_embeddings.npy')
bottom_com_data = np.load(embedding_file_path + 'bottom_embeddings.npy')
pullover_com_data = np.load(embedding_file_path + 'pullover_embeddings.npy')
outerwear_com_data = np.load(embedding_file_path + 'outerwear_embeddings.npy')
shoe_com_data = np.load(embedding_file_path + 'shoe_embeddings.npy')
bag_com_data = np.load(embedding_file_path + 'bag_embeddings.npy')
    

dress_data_train, top_data_train, bottom_data_train, pullover_data_train, outerwear_data_train, shoe_data_train, bag_data_train = inputs_train
dress_data_val, top_data_val, bottom_data_val, pullover_data_val, outerwear_data_val, shoe_data_val, bag_data_val = inputs_val

In [29]:
dress_data_train = np.load(embedding_file_path + 'Dress_train_embeddings.npy')
top_data_train = np.load(embedding_file_path + 'Top_train_embeddings.npy')
bottom_data_train = np.load(embedding_file_path + 'Bottom_train_embeddings.npy')
pullover_data_train = np.load(embedding_file_path + 'Pullover_train_embeddings.npy')
outerwear_data_train = np.load(embedding_file_path + 'Outerwear_train_embeddings.npy')
shoe_data_train = np.load(embedding_file_path + 'Shoe_train_embeddings.npy')
bag_data_train = np.load(embedding_file_path + 'Bag_train_embeddings.npy')

print(dress_data_train.shape)
print(top_data_train.shape)
print(bottom_data_train.shape)
print(outerwear_data_train.shape)
print(pullover_data_train.shape)
print(shoe_data_train.shape)
print(bag_data_train.shape)

(2733, 2733)
(3364, 3072)
(3338, 3072)
(2184, 2184)
(115, 115)
(6067, 3072)
(5842, 128)


In [30]:
from skimage.transform import resize

datasets = [dress_data_train, top_data_train, bottom_data_train, outerwear_data_train, pullover_data_train, shoe_data_train, bag_data_train]

# Normalize each dataset
for i in range(len(clothing_list)):
    # Resize each dataset to (32, 32, 3)
    resized_data = np.zeros((datasets[i].shape[0], 32, 32, 3))
    for j in range(datasets[i].shape[0]):
        resized_data[j] = resize(datasets[i][j], (32, 32, 3), anti_aliasing=True)
    
    # Normalize the pixel values to [0, 1]
    normalized_data = (resized_data - np.min(resized_data)) / (np.max(resized_data) - np.min(resized_data))
    
    # Convert the data to float32
    normalized_data = normalized_data.astype(np.float32)
    
    # Update the dataset with the normalized data
    datasets[i] = normalized_data

In [31]:
dress_data_train = datasets[0]
top_data_train = datasets[1]
bottom_data_train = datasets[2]
pullover_data_train = datasets[3]
outerwear_data_train = datasets[4]
shoe_data_train = datasets[5]
bag_data_train = datasets[6]

print(dress_data_train.shape)
print(top_data_train.shape)
print(bottom_data_train.shape)
print(outerwear_data_train.shape)
print(pullover_data_train.shape)
print(shoe_data_train.shape)
print(bag_data_train.shape)

(2733, 32, 32, 3)
(3364, 32, 32, 3)
(3338, 32, 32, 3)
(115, 32, 32, 3)
(2184, 32, 32, 3)
(6067, 32, 32, 3)
(5842, 32, 32, 3)


In [32]:
dress_likes_train = np.load(embedding_file_path + 'Dress_train_likes.npy')
top_likes_train = np.load(embedding_file_path + 'Top_train_likes.npy')
bottom_likes_train = np.load(embedding_file_path + 'Bottom_train_likes.npy')
pullover_likes_train = np.load(embedding_file_path + 'Pullover_train_likes.npy')
outerwear_likes_train = np.load(embedding_file_path + 'Outerwear_train_likes.npy')
shoe_likes_train = np.load(embedding_file_path + 'Shoe_train_likes.npy')
bag_likes_train = np.load(embedding_file_path + 'Bag_train_likes.npy')

print(dress_likes_train.shape)
print(top_likes_train.shape)
print(bottom_likes_train.shape)
print(outerwear_likes_train.shape)
print(pullover_likes_train.shape)
print(shoe_likes_train.shape)
print(bag_likes_train.shape)

(2733,)
(3364,)
(3338,)
(2184,)
(115,)
(6067,)
(5842,)


In [33]:
all_likes_train = np.concatenate([dress_likes_train, top_likes_train, bottom_likes_train, pullover_likes_train, outerwear_likes_train, shoe_likes_train, bag_likes_train], axis=0)

In [34]:
dress_data_val = np.load(embedding_file_path + 'Dress_val_embeddings.npy')
top_data_val = np.load(embedding_file_path + 'Top_val_embeddings.npy')
bottom_data_val = np.load(embedding_file_path + 'Bottom_val_embeddings.npy')
pullover_data_val = np.load(embedding_file_path + 'Pullover_val_embeddings.npy')
outerwear_data_val = np.load(embedding_file_path + 'Outerwear_val_embeddings.npy')
shoe_data_val = np.load(embedding_file_path + 'Shoe_val_embeddings.npy')
bag_data_val = np.load(embedding_file_path + 'Bag_val_embeddings.npy')

print(dress_data_val.shape)
print(top_data_val.shape)
print(bottom_data_val.shape)
print(outerwear_data_val.shape)
print(pullover_data_val.shape)
print(shoe_data_val.shape)
print(bag_data_val.shape)

(684, 2733)
(842, 3072)
(835, 3072)
(547, 2184)
(29, 115)
(1517, 3072)
(1461, 128)


In [35]:
dataset_val = [dress_data_val, top_data_val, bottom_data_val, outerwear_data_val, pullover_data_val, shoe_data_val, bag_data_val]

# Normalize each dataset
for i in range(len(clothing_list)):
    # Resize each dataset to (32, 32, 3)
    resized_data = np.zeros((dataset_val[i].shape[0], 32, 32, 3))
    for j in range(dataset_val[i].shape[0]):
        resized_data[j] = resize(dataset_val[i][j], (32, 32, 3), anti_aliasing=True)
    
    # Normalize the pixel values to [0, 1]
    normalized_data = (resized_data - np.min(resized_data)) / (np.max(resized_data) - np.min(resized_data))
    
    # Convert the data to float32
    normalized_data = normalized_data.astype(np.float32)
    
    # Update the dataset with the normalized data
    dataset_val[i] = normalized_data

In [36]:
dress_data_val = dataset_val[0]
top_data_val = dataset_val[1]
bottom_data_val = dataset_val[2]
pullover_data_val = dataset_val[3]
outerwear_data_val = dataset_val[4]
shoe_data_val = dataset_val[5]
bag_data_val = dataset_val[6]

print(dress_data_val.shape)
print(top_data_val.shape)
print(bottom_data_val.shape)
print(outerwear_data_val.shape)
print(pullover_data_val.shape)
print(shoe_data_val.shape)
print(bag_data_val.shape)

(684, 32, 32, 3)
(842, 32, 32, 3)
(835, 32, 32, 3)
(29, 32, 32, 3)
(547, 32, 32, 3)
(1517, 32, 32, 3)
(1461, 32, 32, 3)


In [37]:
dress_likes_val = np.load(embedding_file_path + 'Dress_val_likes.npy')
top_likes_val = np.load(embedding_file_path + 'Top_val_likes.npy')
bottom_likes_val = np.load(embedding_file_path + 'Bottom_val_likes.npy')
pullover_likes_val = np.load(embedding_file_path + 'Pullover_val_likes.npy')
outerwear_likes_val = np.load(embedding_file_path + 'Outerwear_val_likes.npy')
shoe_likes_val = np.load(embedding_file_path + 'Shoe_val_likes.npy')
bag_likes_val = np.load(embedding_file_path + 'Bag_val_likes.npy')

print(dress_likes_val.shape)
print(top_likes_val.shape)
print(bottom_likes_val.shape)
print(outerwear_likes_val.shape)
print(pullover_likes_val.shape)
print(shoe_likes_val.shape)
print(bag_likes_val.shape)

(684,)
(842,)
(835,)
(547,)
(29,)
(1517,)
(1461,)


In [38]:
all_likes_val = np.concatenate([dress_likes_val, top_likes_val, bottom_likes_val, pullover_likes_val, outerwear_likes_val, shoe_likes_val, bag_likes_val], axis=0)

In [39]:
# for each outfit get the encodings and create the input and output

dress_index = 0
top_index = 0
bottom_index = 0
pullover_index = 0
outerwear_index = 0
shoe_index = 0
bag_index = 0

outfit_embeddings_list = []
outfit_likes_list = []

for index, row in df_filtered.iterrows():
    outfit_embeddings = []
    for items in row['items']:
        # get the embeddings of all the items from each clothing type
        filename, clothing_type = item['image'], item['category_name']
        
        # for each clothing item maintain different index
        
        # get embeddings according to that index
        
        if clothing_type == 'Dress':
            embedding = dress_data_train[dress_index] 
            dress_index += 1
        elif clothing_type == 'Top':
            embedding = top_data_train[top_index] 
            top_index += 1
        elif clothing_type == 'Bottom':
            embedding = bottom_data_train[bottom_index] 
            bottom_index += 1
        elif clothing_type == 'Pullover':
            
            embedding = pullover_data_train[pullover_index] 
            pullover_index += 1
        elif clothing_type == 'Outerwear':
            embedding = outerwear_data_train[outerwear_index]
            outerwear_index += 1
        elif clothing_type == 'Shoe':
            embedding = shoe_data_train[shoe_index]
            shoe_index += 1
        elif clothing_type == 'Bag':
            embedding = bag_data_train[bag_index]
            bag_index += 1
        else:
            np.zeros((embedding_dimension,))
        
        # if an article is missing put embedding as 0
        # combine all the embeddings in outfit_embeddings
        outfit_embeddings.append(embedding)
        
    # Combine the embeddings for the current outfit and append it to the outfit_embeddings list
    #print(outfit_embeddings)
    combined_embedding = np.concatenate(outfit_embeddings)
    outfit_embeddings_list.append(combined_embedding)

    # Get the likes for the current outfit and append it to the outfit_likes list
    outfit_likes_list.append(row['likes'])
        

TypeError: string indices must be integers

# Train a multi-input CNN to predict the likes for each outfit.

In [None]:
model = Sequential()


# Define the input shapes for each clothing type
dress_input = Input(shape=(32, 32, 3), name='dress_input')
top_input = Input(shape=(32, 32, 3), name='top_input')
bottom_input = Input(shape=(32, 32, 3), name='bottom_input')
pullover_input = Input(shape=(32, 32, 3), name='pullover_input')
outerwear_input = Input(shape=(32, 32, 3), name='outerwear_input')
shoe_input = Input(shape=(32, 32, 3), name='shoe_input')
bag_input = Input(shape=(32, 32, 3), name='bag_input')


# Define the convolutional layers for each clothing type
dress_conv = Conv2D(32, (3, 3), activation='relu')(dress_input)
top_conv = Conv2D(32, (3, 3), activation='relu')(top_input)
bottom_conv = Conv2D(32, (3, 3), activation='relu')(bottom_input)
pullover_conv = Conv2D(32, (3, 3), activation='relu')(pullover_input)
outerwear_conv = Conv2D(32, (3, 3), activation='relu')(outerwear_input)
shoe_conv = Conv2D(32, (3, 3), activation='relu')(shoe_input)
bag_conv = Conv2D(32, (3, 3), activation='relu')(bag_input)

# Flatten the convolutional outputs
dress_flat = Flatten()(dress_conv)
top_flat = Flatten()(top_conv)
bottom_flat = Flatten()(bottom_conv)
pullover_flat = Flatten()(pullover_conv)
outerwear_flat = Flatten()(outerwear_conv)
shoe_flat = Flatten()(shoe_conv)
bag_flat = Flatten()(bag_conv)

# Concatenate the flattened outputs
merged_features = concatenate([dress_flat, top_flat, bottom_flat, pullover_flat, outerwear_flat, shoe_flat, bag_flat])

# Fully connected layers
fc1 = Dense(128, activation='relu')(merged_features)
fc2 = Dense(64, activation='relu')(fc1)
output = Dense(1, activation='linear')(fc2)  # Output layer for likes prediction

# Define the model with multiple inputs and single output
model = Model(inputs=[dress_input, top_input, bottom_input, pullover_input, outerwear_input, shoe_input, bag_input], outputs=output)

In [51]:
model.compile(optimizer="adam", loss="mean_squared_error")

In [56]:
model.fit([dress_data_train, top_data_train, bottom_data_train, pullover_data_train, outerwear_data_train, shoe_data_train, bag_data_train], 
          all_likes_train,
          epochs=10, 
          batch_size=32, 
          validation_data=(
              [dress_data_val, top_data_val, bottom_data_val, pullover_data_val, outerwear_data_val, shoe_data_val, bag_data_val],
              all_likes_val,
              )
         )

ValueError: Data cardinality is ambiguous:
  x sizes: 2733, 3364, 3338, 2184, 115, 6067, 5842
  y sizes: 23643
Make sure all arrays contain the same number of samples.

In [39]:
# Create a dictionary to map clothing types to their corresponding data and target likes
clothing_data = {
    clothing_type: {
        'data': np.load(embedding_file_path + clothing_type + '_train_embeddings.npy'),
        'likes': np.load(embedding_file_path + clothing_type + '_train_likes.npy')
    } for clothing_type in clothing_list
}
clothing_data

{'Top': {'data': array([[-8.20594258e-04, -7.82323186e-04,  8.73650497e-05, ...,
           2.56170946e-10, -3.07404519e-11, -6.64314020e-11],
         [ 5.90826105e-03,  2.70249584e-04, -1.15899194e-03, ...,
          -3.94650319e-11, -6.58210361e-11, -2.34807052e-11],
         [-3.20785027e-03,  7.83498283e-04, -8.31825426e-04, ...,
          -1.10233177e-10, -1.93058611e-10, -1.35758115e-11],
         ...,
         [ 1.67688262e-03,  4.49825398e-04, -4.38671821e-04, ...,
           1.25505245e-10, -3.46517398e-11,  5.40763788e-12],
         [ 1.85598875e-03, -7.84277159e-04, -4.27327730e-04, ...,
           1.35156036e-11,  2.15330080e-11, -1.97920881e-11],
         [ 2.51192716e-04,  6.62153354e-04, -3.39364778e-04, ...,
           3.01979058e-12,  6.57547697e-11, -1.55233222e-11]], dtype=float32),
  'likes': array([1071.,   57.,  216., ...,    5.,   93.,   61.])},
 'Bottom': {'data': array([[-1.12439471e-03,  1.73573862e-04,  1.27199834e-04, ...,
           2.17156682e-10, -8.5852

In [52]:
def custom_data_generator(batch_size=32):
    while True:
        data_batch, likes_batch = [], []
        for clothing_type in cycle(clothing_list):
            current_data = clothing_data[clothing_type]['data']
            current_likes = clothing_data[clothing_type]['likes']
            available_samples = len(current_data)
            
            if available_samples < batch_size // len(clothing_list):
                # Sample all available samples in the category if they are not enough for the batch size
                indices = np.arange(available_samples)
            else:
                # Randomly sample batch_size // len(clothing_types) samples from the current category with replacement
                indices = np.random.choice(available_samples, batch_size // len(clothing_list), replace=False)
                
            data_batch.extend(current_data[indices])
            likes_batch.extend(current_likes[indices])
            
            if len(data_batch) >= batch_size:
                yield np.array(data_batch[:batch_size]), np.array(likes_batch[:batch_size])
                data_batch, likes_batch = data_batch[batch_size:], likes_batch[batch_size:]


In [53]:
# Define the number of steps per epoch based on the number of samples in the largest category
batch_size = 1000
max_samples = max(len(clothing_data[clothing_type]['data']) for clothing_type in clothing_list)
steps_per_epoch = max_samples // batch_size

# Fit the model using the custom data generator
model.fit(custom_data_generator(batch_size=batch_size), epochs=10, steps_per_epoch=steps_per_epoch,
          validation_data=(np.array([val_data for _, val_data in clothing_data.values()]),
                           np.array([val_likes for _, val_likes in clothing_data.values()])))

  yield np.array(data_batch[:batch_size]), np.array(likes_batch[:batch_size])


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

In [None]:
model.predict(
    [dress_data_val, top_data_val, bottom_data_val, pullover_data_val, outerwear_data_val, shoe_data_val, bag_data_val]
)

In [None]:
#Input end user wardrobe and predict the embedding for each image.
example_url = "user/example"
wardrobe_images = []  # List of wardrobe images

for images in os.listdir(example_url):
    wardrobe_images.add(os.path.join(example_url, images))

In [None]:
# Predict embedding for each user input wardrobe
preprocessed_images = [prepare_images(image_path) for image_path in wardrobe_images]

dress_embeddings = autoencoders['Dress'].predict(preprocessed_images)
top_embeddings = autoencoders['Top'].predict(preprocessed_images)
bottom_embeddings = autoencoders['Bottom'].predict(preprocessed_images)
pullover_embeddings = autoencoders['Pullover'].predict(preprocessed_images)
outerwear_embeddings = autoencoders['Outerwear'].predict(preprocessed_images)
shoe_embeddings = autoencoders['Shoe'].predict(preprocessed_images)
bag_embeddings = autoencoders['Bag'].predict(preprocessed_images)

# Store the embeddings for further use
embedding_dict = {
    'Dress': dress_embeddings,
    'Top': top_embeddings,
    'Bottom': bottom_embeddings,
    'Pullover': pullover_embeddings,
    'Outerwear': outerwear_embeddings,
    'Shoe': shoe_embeddings,
    'Bag': bag_embeddings
}

In [None]:
dress_images = ['user/wardrobe/dress1.jpg', 'user/wardrobe/dress2.jpg']
top_images = ['user/wardrobe/top.jpg', 'user/wardrobe/shirt.jpg']  # List of top images
bottom_images = ['user/wardrobe/bottom.jpg', 'user/wardrobe/skirt2.jpg', 'user/wardrobe/skirt1.jpg']  # List of bottom images
shoe_images = ['user/wardrobe/shoee1.jpg']  # List of shoe images
bag_images = ['user/wardrobe/bag1.jpg']  # List of bag images
pullover_images = []  # List of pullover images
outerwear_images = ['user/wardrobe/jacket.jpg'] 

In [None]:
#Generate all possible combinations of outfits using the images.
# Define the different types of outfits
outfit_types = [
    ['Top', 'Bottom', 'Shoe', 'Bag'],
    ['Top', 'Bottom', 'Pullover', 'Shoe', 'Bag'],
    ['Top', 'Bottom', 'Outerwear', 'Shoe', 'Bag'],
    ['Top', 'Bottom', 'Pullover', 'Outerwear', 'Shoe', 'Bag'],
    ['Dress', 'Bag', 'Shoe'],
    ['Dress', 'Outerwear', 'Bag', 'Shoe']
]

# Generate all possible combinations of outfits
outfit_combinations = []
for outfit_type in outfit_types:
    items = [top_images, bottom_images, shoe_images, bag_images]
    if 'Pullover' in outfit_type:
        items.append(pullover_images)
    if 'Outerwear' in outfit_type:
        items.append(outerwear_images)
    outfit_combinations.extend(list(itertools.product(*items)))


In [None]:
# Create a list to store the scores for each outfit
outfit_scores = []

# Iterate through each outfit combination
for combination in outfit_combinations:
    # Create a list to store the images of the outfit
    outfit_images = []

    # Append the images of the outfit to the list
    for image_path in combination:
        # Load or preprocess the image as needed
        prepocessed_image = prepare_image(image_path)

        # Append the image to outfit_images
        outfit_images.append(preprocessed_image)

    # Convert the outfit_images list to a numpy array
    outfit_images = np.array(outfit_images)

    # Make predictions using the CNN model
    outfit_scores.append(model.predict(outfit_images))

In [None]:
# Print the scores for each outfit
for i, score in enumerate(outfit_scores):
    print(f"Outfit {i+1} score: {score}")