In [None]:
import os

import cv2
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.utils import shuffle
from tqdm.notebook import tqdm

# import pickle


# fix for pycharm jupyter
if not os.path.abspath(os.curdir).endswith("sneakers-ml"):
    os.chdir("../../")

os.path.abspath(os.curdir)

In [None]:
# test

img = cv2.imread("data/merged/images/by-brands/asics/0.jpeg")
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

sift = cv2.SIFT_create()
keypoints, descriptors = sift.detectAndCompute(img, None)
sift_image = cv2.drawKeypoints(imgGray, keypoints, img)

plt.figure()
plt.imshow(sift_image, cmap="gray")

In [None]:
# https://www.codespeedy.com/find-similarity-between-two-images-in-python/
# https://stackoverflow.com/questions/50217364/sift-comparison-calculate-similarity-score-python
# https://stackoverflow.com/questions/43220408/measure-of-image-similarity-for-feature-matching

# 1. SIFT

In [None]:
df = pd.read_csv("data/merged/metadata/brands_dataset.csv")
df.drop("unique_images_count", axis=1, inplace=True)
df

In [None]:
# flat map dataset into brand -> images
df["image_path"] = df["images"].apply(lambda path: [os.path.join(path, file) for file in os.listdir(path)])
df.drop("images", axis=1, inplace=True)
df = df.explode("image_path")
df

In [None]:
tqdm.pandas()


def calc_sift_similarity(image_path):
    img = cv2.imread(image_path)
    if img is None:
        return None
    image8bit = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype("uint8")
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image8bit, None)
    return keypoints, descriptors


df["keypoints"], df["descriptors"] = zip(*df["image_path"].progress_apply(calc_sift_similarity))

# pickle.dump(df, open("data/features/brands_dataset_sift.pkl", "wb"))

# 2. Image similarity

In [None]:
img1, keypoints1 = cv2.imread(df.iloc[0]["image_path"]), df.iloc[0]["keypoints"]
img2, keypoints2 = cv2.imread(df.iloc[1]["image_path"]), df.iloc[1]["keypoints"]

In [None]:
bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)
matches = bf.match(df.iloc[0]["descriptors"], df.iloc[1]["descriptors"])
matches = sorted(matches, key=lambda x: x.distance)

plt.imshow(
    cv2.drawMatches(
        img1, keypoints1, img2, keypoints2, matches[:10], None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
    )
)
plt.title("BFMatcher comparation of two sneakers")
plt.show()

In [None]:
flann = cv2.FlannBasedMatcher({"algorithm": 0, "trees": 5}, {})
matches = flann.knnMatch(df.iloc[0]["descriptors"], df.iloc[1]["descriptors"], k=2)

plt.imshow(cv2.drawMatchesKnn(img1, keypoints1, img2, keypoints2, matches, None))
plt.title("FlannBasedMatcher comparation of two sneakers")
plt.show()

In [None]:
def flann_match(
    df: pd.DataFrame, example: pd.Series, distance_threshold: float = 0.7, good_count: int = 10, k: int = 2
):
    for _, row in tqdm(df.iterrows(), total=df.shape[0]):
        if example["image_path"] == row["image_path"]:
            continue

        matches = flann.knnMatch(example["descriptors"], row["descriptors"], k=k)
        good = []
        for m, n in matches:
            if m.distance < distance_threshold * n.distance:
                good.append([m])

        if len(good) > good_count:
            return row
    return None

In [None]:
# take 1 image and find most similar


example_index = 1
example = df.iloc[example_index]

print("Trying to find match for:", example["image_path"])
match = flann_match(shuffle(df), example, distance_threshold=0.7, k=2, good_count=50)

if match is not None:
    print(f"Found match: {match['image_path']}")
    f = plt.figure()
    f.add_subplot(1, 2, 1)
    plt.imshow(cv2.imread(example["image_path"]))
    f.add_subplot(1, 2, 2)
    plt.imshow(cv2.imread(match["image_path"]))
    plt.show()
else:
    print("No match found")