In [19]:
import os
#Source Image Folder
image_location = os.path.abspath(os.path.join("C:\\workshop\\ImageSimilarity\\", "kids_girls_shoes"))

#Load all the TIFF images
images = []
for file in [img for img in os.listdir(image_location)
if img.endswith(".tif")]:
   images.append(image_location + "\\" + file)

import pandas
image_df = pandas.DataFrame(data=dict(image=images))
print(image_df)

# Get the feature vectors of the images
# This requires a 4 step process:
# 1. Load the image(s) via the loadImage() transform
# 2. Resize the image(s) to the size required by the image model 
#    (224x224 for resnet models, 227x227 for the alexnet model)
# 3. Extract the pixels from the resized image(s) using the extractPixel() transform
# 4. Finally, featurize the image(s) via the featurizeImage() transform
from microsoftml import rx_featurize, load_image, resize_image, extract_pixels, featurize_image
image_vector = rx_featurize(data=image_df, ml_transforms=[
    load_image(cols=dict(Features="image")),
    resize_image(cols="Features", width=224, height=224), #Alexnet uses 227x227, resnet50 uses 224x224
    extract_pixels(cols="Features"),
    featurize_image(cols="Features", dnn_model="resnet50")])

print(image_vector.head())

                                                 image
0    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
1    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
2    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
3    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
4    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
5    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
6    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
7    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
8    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
9    C:\workshop\ImageSimilarity\kids_girls_shoes\1...
10   C:\workshop\ImageSimilarity\kids_girls_shoes\1...
11   C:\workshop\ImageSimilarity\kids_girls_shoes\1...
12   C:\workshop\ImageSimilarity\kids_girls_shoes\1...
13   C:\workshop\ImageSimilarity\kids_girls_shoes\2...
14   C:\workshop\ImageSimilarity\kids_girls_shoes\2...
15   C:\workshop\ImageSimilarity\kids_girls_shoes\2...
16   C:\workshop\ImageSimilarity\kids_girls_shoes\2...
17   C:\wo

In [2]:
image_location_match = os.path.abspath(os.path.join("C:\\workshop\\ImageSimilarity\\", "kids_girls_shoes_match"))
images_match = []
for file in [img for img in os.listdir(image_location_match)
if img.endswith(".tif")]:
   images_match.append(image_location_match + "\\" + file)

image_match_df = pandas.DataFrame(data=dict(image=images_match))
image_match_vectors = rx_featurize(data=image_match_df, ml_transforms=[
    load_image(cols=dict(Features="image")),
    resize_image(cols="Features", width=227, height=227),
    extract_pixels(cols="Features"),
    featurize_image(cols="Features", dnn_model="Alexnet")])
print(image_match_vectors.head())

matimg = image_vector.drop("image", axis=1).as_matrix()
matmat = image_match_vectors.drop("image", axis=1).as_matrix()

#Using Cosine Distance to find distances between feature vectors
from scipy.spatial.distance import cdist
distance = cdist(matimg, matmat)

Beginning processing data.
Rows Read: 19, Read Time: 0, Transform Time: 0
Beginning processing data.
Elapsed time: 00:00:07.6546857
Finished writing 19 rows.
Writing completed.
                                               image  Features.0  Features.1  \
0  C:\workshop\ImageSimilarity\kids_girls_shoes_m...         0.0         0.0   
1  C:\workshop\ImageSimilarity\kids_girls_shoes_m...         0.0         0.0   
2  C:\workshop\ImageSimilarity\kids_girls_shoes_m...         0.0         0.0   
3  C:\workshop\ImageSimilarity\kids_girls_shoes_m...         0.0         0.0   
4  C:\workshop\ImageSimilarity\kids_girls_shoes_m...         0.0         0.0   

   Features.2  Features.3  Features.4  Features.5  Features.6  Features.7  \
0    1.632986    0.000000         0.0    0.873705         0.0         0.0   
1    1.154633    0.000000         0.0    2.121917         0.0         0.0   
2    2.033399    0.000000         0.0    0.000000         0.0         0.0   
3    0.000000    0.093332         

In [15]:
import numpy as np
idx=1
sorted = np.argsort(distance[:,idx])
print(images_match[idx])
print(images[sorted[0]])
print(images[sorted[1]])
print(images[sorted[2]])

C:\workshop\ImageSimilarity\kids_girls_shoes_match\DP0203201717175691M.tif
C:\workshop\ImageSimilarity\kids_girls_shoes\8305749_fpx.tif
C:\workshop\ImageSimilarity\kids_girls_shoes\2023066_fpx.tif
C:\workshop\ImageSimilarity\kids_girls_shoes\2037925_fpx.tif
