In [None]:
import os
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'

In [None]:
import tensorflow as tf
import numpy as np
from numpy.linalg import norm
import pickle
from tqdm import tqdm, tqdm_notebook
import time
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import InceptionV3,preprocess_input
import pandas as pd
import math

In [None]:
model = InceptionV3(weights='imagenet', include_top=False,
                 input_shape=(299, 299, 3),pooling="max")
def extract_features(img_path, model):
    input_shape = (299, 299, 3)
    img = image.load_img(img_path, target_size=(
        input_shape[0], input_shape[1]))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    normalized_features = flattened_features / norm(flattened_features)
    return normalized_features

In [None]:
train_df = pd.read_csv('../input/shopee-product-matching/train.csv')
test_df = pd.read_csv('../input/shopee-product-matching/test.csv')
len_data_train  = len(train_df)
len_data_test  = len(test_df)
BATCH_SIZE = 32
TRAIN_BATCHES = math.ceil(len_data_train/BATCH_SIZE)
TEST_BATCHES = math.ceil(len_data_test/BATCH_SIZE)
train_images = '../input/shopee-product-matching/train_images'
test_images = '../input/shopee-product-matching/test_images'
IMG_SIZE = 299

In [None]:
def myzip(s,t):
    return [(s[i], t[i]) for i in range(len(s))]

In [None]:
labels = list(set(train_df.label_group.tolist()))
 
labels.sort()
no_classes = len(labels)

label=[]
mapped=[]
for index,value in enumerate(labels):
    label.append(value)
    mapped.append(index)
zipper = myzip(label, mapped)
reverse = myzip(mapped,label)
label_dict = dict(zipper)
reverse_dict = dict(reverse)

for index,label in enumerate(train_df.label_group):    
    train_df.at[index,'label_group'] = label_dict[label]

In [None]:
train_df['image'] = train_df.image.map(lambda x: '../input/shopee-product-matching/train_images/' + x)
test_df['image'] = test_df.image.map(lambda x: '../input/shopee-product-matching/test_images/' + x)

In [None]:
xs_train_image = train_df.image.to_numpy()
xs_test_image = test_df.image.to_numpy()
class_ids = train_df.label_group.to_numpy()

In [None]:
filenames = xs_train_image

In [None]:
feature_list = []
for i in tqdm_notebook(range(len(filenames))):
    feature_list.append(extract_features(filenames[i], model))

In [None]:
from sklearn.neighbors import NearestNeighbors
neighbors = NearestNeighbors(n_neighbors=5, algorithm='brute',
metric='euclidean').fit(feature_list)
distances, indices = neighbors.kneighbors([feature_list[0]])

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline  
plt.imshow(mpimg.imread(filenames[0]))

In [None]:
def plot_images(similar_image_paths, distances): 
    plt.figure(figsize=(20,20))
    for i,imagepath in enumerate(similar_image_paths[:25]):
        plt.subplot(5,5,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(mpimg.imread(imagepath))
        ipath = "/".join(imagepath.split("/")[-2:])
        if i == 0:
            plt.xlabel(f'Original: self d {distances[i]:.1f}')
            plt.ylabel(f'{ipath}')
        else:
            plt.xlabel(f'Near match: {distances[i]:.5f}')
            plt.ylabel(f'{ipath}')

In [None]:
num_images = len(filenames)

In [None]:
import random

In [None]:
for i in range(6):
    random_image_index = random.randint(0,num_images)
    distances, indices = neighbors.kneighbors([feature_list[random_image_index]])
    similar_image_paths = [   filenames[random_image_index]   ] + [     filenames[    indices[0][i]   ] for i in range(1,len(indices[0]) )     ]
    plot_images(similar_image_paths, distances[0])

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [None]:
# Perform PCA over the features
num_feature_dimensions=100      # Set the number of features
pca = PCA(n_components = num_feature_dimensions)
pca.fit(feature_list)
feature_list_compressed = pca.transform(feature_list)

In [None]:
# partial clusters.
selected_features = feature_list_compressed[:1000]
selected_class_ids = class_ids[:1000]
selected_filenames = filenames[:1000]

tsne_results = TSNE(n_components=2,verbose=1,metric='euclidean').fit_transform(selected_features)

# Plot a scatter plot from the generated t-SNE results
colormap = plt.cm.get_cmap('coolwarm')
scatter_plot = plt.scatter(tsne_results[:,0],tsne_results[:,1], c = selected_class_ids, cmap=colormap)
plt.colorbar(scatter_plot)
plt.show()

In [None]:
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib.cbook import get_sample_data

In [None]:
def imscatter(x, y, images, ax=None, zoom=1):
    if ax is None:
        ax = plt.gca()
    
    x, y = np.atleast_1d(x, y)
    artists = []
    for i,(x0, y0) in enumerate(zip(x, y)):
        try:
            img = plt.imread(images[i])
        except TypeError:
            pass
        im = OffsetImage(img, zoom=zoom)

        ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
        artists.append(ax.add_artist(ab))
    ax.update_datalim(np.column_stack([x, y]))
    ax.autoscale()
    return artists

In [None]:
fig, ax = plt.subplots(figsize=(12,12), dpi=100)
imscatter(tsne_results[:,0],tsne_results[:,1], selected_filenames, zoom=0.1, ax=ax)
plt.show()