In [1]:
import warnings
warnings.filterwarnings('ignore')

import random
import functools
import numpy as np
import pandas as pd
import ipywidgets as widgets
from numpy.linalg import norm
from IPython.display import display, clear_output

In [36]:
# Load data

# read style file
styles_df = pd.read_csv('styles.csv', nrows=4000, error_bad_lines=False)
styles_df = styles_df.dropna()
styles_df.reset_index(drop=True, inplace=True)

# read baseline vgg results csv
combo_baseline_df = pd.read_csv('combo_embeddings_vgg16_subsample.csv')
combo_baseline_df['subCategory'] = styles_df['subCategory']
combo_baseline_df['articleType'] = styles_df['articleType']

In [11]:
# styles_df

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Turtle Check Men Navy Blue Shirt
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016,Casual,Titan Women Silver Watch
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012,Casual,Puma Men Grey T-shirt
...,...,...,...,...,...,...,...,...,...,...
3955,12191,Men,Apparel,Topwear,Shirts,Brown,Fall,2011,Casual,Basics Men Brown Slim Fit Shirt
3956,55656,Women,Footwear,Shoes,Heels,Brown,Winter,2015,Casual,Catwalk Women Brown Wedges
3957,31313,Women,Apparel,Topwear,Kurtas,Blue,Summer,2012,Ethnic,W Women Blue Kurta
3958,50324,Women,Apparel,Topwear,Kurtas,Peach,Summer,2012,Ethnic,Aurelia Women Peach Kurta


In [12]:
# combo_baseline_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,506,507,508,509,510,511,image,id,subCategory,articleType
0,0,0,0,1,0,0,0,0,0,0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.048971,15970.jpg,15970,Topwear,Shirts
1,0,0,0,1,0,0,0,0,0,0,...,0.0,0.259653,0.081962,0.033451,0.129113,0.000000,39386.jpg,39386,Bottomwear,Jeans
2,1,0,0,0,0,0,0,0,0,0,...,0.0,0.286985,0.000000,0.000000,0.228443,0.000000,59263.jpg,59263,Watches,Watches
3,0,0,0,1,0,0,0,0,0,0,...,0.0,0.068164,0.000000,0.000000,0.000000,0.000000,21379.jpg,21379,Bottomwear,Track Pants
4,0,0,0,1,0,0,0,0,0,0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.170445,53759.jpg,53759,Topwear,Tshirts
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3955,0,0,0,1,0,0,0,0,0,0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.455157,12191.jpg,12191,Topwear,Shirts
3956,0,0,0,0,0,0,0,0,0,0,...,0.0,0.358805,0.000000,0.000000,0.370474,0.000000,55656.jpg,55656,Shoes,Heels
3957,0,0,0,1,0,0,0,0,0,0,...,0.0,0.000000,0.001912,0.000000,0.000000,0.000000,31313.jpg,31313,Topwear,Kurtas
3958,0,0,0,1,0,0,0,0,0,0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,50324.jpg,50324,Topwear,Kurtas


In [73]:
# Interface layout

title = widgets.HTML(value="<h1>Product Recommendation System</h1>")
text_1 = widgets.HTML(value="<h2>Similar Products</h2>")
text_1 = widgets.HTML(value="<h2>Inspired By Your Choices</h2>")

box_layout = widgets.Layout(align_items='center')
title_text = widgets.VBox([title], layout=box_layout)
body_text = widgets.VBox([text_1], layout=box_layout)

In [90]:
# Image button clicked

image_output = widgets.Output()

def image_on_click(image_id, _):
    with image_output:
        clear_output()
        
        label_info = styles_df.loc[styles_df['id'] == image_id]
        image_label = label_info['productDisplayName'].values[0]
        
        # print(f'\nImage {image_id}, {image_label}')
        
        content = f"Image {image_id}: {image_label}"
        display(widgets.HTML(f"<div style='text-align:center'>{content}</div>"))
        
        prediction(image_id)

box_layout = widgets.Layout(align_items='center')
image_info_box = widgets.VBox([image_output], layout=box_layout)

In [91]:
# Read images

path = 'images/'
list_images = styles_df['id'].tolist()

# Random pick five images
k_images = random.sample(list_images, 6)

# Open images with images data
k_image_list = []
for image_id in k_images:
    
#     label_info = styles_df.loc[styles_df['id'] == image_id]
    
    file = open(path + str(image_id) + '.jpg', "rb")
    image = file.read()
    image_headline = widgets.Image(value=image, format='jpg', height='160', width='120')
#     image_label = widgets.Label(value='{}'.format(label_info['productDisplayName'].values[0]))
    
    button = widgets.Button(description='Image %d' % image_id)
    # bind the click event to the image_on_click function, with our image_id as argument
    button.on_click(functools.partial(image_on_click, image_id))

    # Create a vertical layout box, image above the button
    box = widgets.VBox([image_headline, button])
    k_image_list.append(box)
    
    
vbox_k_images = widgets.HBox(k_image_list)

In [92]:
# Find the k most similar items

def rec_k(v_in, v_in_idx, k, df):
    # get name of input  vector
    v_image_name = df[df['id'] == df['id'][v_in_idx]]['image'].values[0]
    # create empty diictionary for image name and similarity to given input vector
    imageName_sim_dic = {}
    # loop the items
    for i in df.index:
        # get the name  of the image 
        image_name = df[df['id'] == df['id'][i]]['image'].values[0]
        # get the vector representation for the image
        v = df.loc[i][:-4]
        # get the cosine similarity for the orginal and current vec
        sim = np.dot(v_in,v)/(norm(v_in)*norm(v))
        # add them to dictionary
        imageName_sim_dic[image_name] = sim
    # sort the dictionary in descending and return the names of the sort 
    imageNames_sorted = sorted(imageName_sim_dic, key=imageName_sim_dic.__getitem__, reverse=True)[:k+1] # take top k of sorted list
    # remove the copy
    imageNames_sorted.remove(v_image_name)
    
    return v_image_name, imageNames_sorted

In [93]:
# Predicted results

def prediction(input_image_id):
    pred_image_list = []
    
    # find the input image index number
    samp_idx = combo_baseline_df[combo_baseline_df.id==input_image_id].index.values[0]
    
    # find the input_image_id's vectors that have the same subcategory and article type
    subcategory = combo_baseline_df.iloc[samp_idx]['subCategory']
    articletype = combo_baseline_df.iloc[samp_idx]['articleType']
    compare_vectors_df = combo_baseline_df[(combo_baseline_df['subCategory'] == subcategory) & (combo_baseline_df['articleType']==articletype)]
    
    # naive search through k samples
    v1 = combo_baseline_df.iloc[samp_idx][:-4]
    input_image, predicted_images = rec_k(v1, samp_idx, 5, compare_vectors_df)
    
    result_images_list = predicted_images
    
    result_images_list.insert(0, input_image)
    
    for i, result_image in enumerate(result_images_list):

        file = open(path + result_image, "rb")
        
        image = file.read()
        
        if i == 0:
            image_label = widgets.Label(value='Input Image')
        else:
            image_label = widgets.Label(value=f'Similar Product #{i}')
            
        image_headline = widgets.Image(value=image, format='jpg', height='160', width='120')

        # Create a vertical layout box, image above the button
        box = widgets.VBox([image_label, image_headline])
        pred_image_list.append(box)
    
    pred_images = widgets.HBox(pred_image_list)
    pred_box = widgets.VBox([body_text, pred_images])
    display(pred_box)

In [94]:
box_layout = widgets.Layout(align_items='center')
web_page = widgets.VBox([title_text, vbox_k_images, image_info_box], layout=box_layout)
display(web_page)

VBox(children=(VBox(children=(HTML(value='<h1>Product Recommendation System</h1>'),), layout=Layout(align_item…