In [None]:
import pandas as pd
import numpy as np
import annoy
from tqdm.notebook import tqdm
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

pd.set_option('display.max_columns',100)

In [None]:
customers = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/customers.csv')
transactions_train = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv')
articles = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/articles.csv')

In [None]:
articles.head(2)

In [None]:
dims = [
    'prod_name',
    'product_type_no',
    'product_group_name',
    'graphical_appearance_no',
    'colour_group_code',
    'perceived_colour_value_id',
    'perceived_colour_master_id',
    'department_no',
    'index_name',
    'index_group_no',
    'section_no',
    'garment_group_no'
]

In [None]:
articles.set_index('article_id',inplace=True)

In [None]:
prod_name_enc = LabelEncoder()
group_name_enc = LabelEncoder()
index_enc = LabelEncoder()

articles['prod_name'] = prod_name_enc.fit_transform(articles['prod_name'].values.reshape(-1,1))
articles['product_group_name'] = group_name_enc.fit_transform(articles['product_group_name'].values.reshape(-1,1))
articles['index_name'] = index_enc.fit_transform(articles['index_name'].values.reshape(-1,1))

In [None]:
vector = np.ascontiguousarray(articles[dims].values, dtype=np.float32)
items = np.array(articles.index)

In [None]:
item_data = {"id":items,"vector":vector}

In [None]:
class AnnoyIndex():
    def __init__(self, vectors, labels):
        self.dimension = vectors.shape[1]
        self.vectors = vectors.astype('float32')
        self.labels = labels   
        self.search_in_x_trees = 8
   
    def build(self, number_of_trees=100):
        self.index = annoy.AnnoyIndex(self.dimension)
        for i, vec in enumerate(self.vectors):
            self.index.add_item(i, vec.tolist())
        self.index.build(number_of_trees)
        
    def query(self, vector, k=10):
        indices = self.index.get_nns_by_vector(
              vector.tolist(), 
              k, 
              search_k=self.search_in_x_trees)                                           
        return [self.labels[i] for i in indices]

In [None]:
index = AnnoyIndex(item_data['vector'],item_data['id'])
index.build(100)

In [None]:
dd = {"item":[],"similar_items":[]}
for x in tqdm(range(len(item_data['vector']))):
    similar_items = index.query(item_data['vector'][x])
    dd['item'].append(item_data['id'][x])
    dd['similar_items'].append(similar_items)

In [None]:
item_similarities = pd.DataFrame(dd)

### Plot sample items

In [None]:
sample = list(item_similarities.sample(10)['item'])

In [None]:
id = str(sample[0])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[1])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[2])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[3])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[4])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[5])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[6])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[7])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[8])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()

In [None]:
id = str(sample[9])
path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
img = mpimg.imread(path)
imgplot = plt.imshow(img)
plt.axis('off')
plt.show()

similar_items = list(item_similarities[item_similarities['item']==int(id)]['similar_items'])[0]
_,ax = plt.subplots(1,len(similar_items),figsize=(15,10))

for i,x in enumerate(similar_items):
    id = str(x)
    path = f"../input/h-and-m-personalized-fashion-recommendations/images/0{id[0:2]}/0{id}.jpg"
    img = mpimg.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')
plt.show()