In [131]:
from transformers import DetrFeatureExtractor, DetrForObjectDetection
import torch
from PIL import Image
import requests
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ast

In [50]:
def object_detection(image_dir):
    
    image = Image.open(image_dir)
    
    feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

    inputs = feature_extractor(images=image, return_tensors="pt")
    outputs = model(**inputs)

    # convert outputs to COCO API
    target_sizes = torch.tensor([image.size[::-1]])
    results = feature_extractor.post_process(outputs, target_sizes=target_sizes)[0]
    
    
    objects, labels, scores = [], [], []
    
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        box = [round(i, 2) for i in box.tolist()] 
        x,y,w,h = box

        if score > 0.5:
            objects.append(image.crop((int(x), int(y), int(w), int(h))))
            labels.append(model.config.id2label[label.item()])
            scores.append(round(score.item(), 3))
    return  objects, labels, scores

In [228]:
import cv2 
import matplotlib.pyplot as plt

def sift_score(image1, image2, ratio = 0.8) :
    
    hight = max(np.shape(image1)[0],np.shape(image2)[0])
    width = max(np.shape(image1)[1],np.shape(image2)[1])
    
    image1 = cv2.resize(np.array(image1), (width, hight))                           
    image2 = cv2.resize(np.array(image2), (width, hight))

    #sift
    sift = cv2.SIFT_create()

    keypoints_1, descriptors_1 = sift.detectAndCompute(image1,None)
    keypoints_2, descriptors_2 = sift.detectAndCompute(image2,None)

    # BFMatcher with default params
    bf = cv2.BFMatcher()
    try : 
        matches = bf.knnMatch( descriptors_1, descriptors_2,k=2)
        # Apply ratio test
        good = []
        for m,n in matches:
            if m.distance < ratio*n.distance:
                good.append([m])
        number_keypoints = 0
        if len(keypoints_1) <= len(keypoints_2):
            number_keypoints = len(keypoints_1)
        else : 
            number_keypoints = len(keypoints_2)

        score = len(good)/number_keypoints*100
    except:
        score = 0

    return score
    

In [229]:
def match_images(image_dir1, image_dir2):
    objects1, labels1, scores1 = object_detection(image_dir1)
    objects2, labels2, scores2 = object_detection(image_dir2)
    score = 0
    if objects1 and objects2:
        for i in range(len(labels1)):
            for j in range(len(labels2)):
                if labels1[i] == labels2[j] :
                        score += sift_score(objects1[i],objects2[j])
    return score

In [219]:
score = match_images("data/pictures/41529__7.jpg", "data/pictures/44432__3.jpg")

In [220]:
score

182.7283985364481

In [221]:
data = pd.read_csv("data/pairs.csv")[:5]

In [222]:
listing = pd.read_csv("data/listing.csv")

In [None]:
matches = []
for idx, row in data.iterrows():
    print(idx)
    score = 0
    id1 ,id2 = data["listing_id_1"][idx], data["listing_id_2"][idx]
    checkPrice = listing["current_price"][listing["listing_id"]==id1].values[0]== listing["current_price"][listing["listing_id"]==id2].values[0]
    city_zip = listing["city_zip"][listing["listing_id"]==id1].values[0]== listing["city_zip"][listing["listing_id"]==id2].values[0]
    transaction_type = listing["transaction_type"][listing["listing_id"]==id1].values[0]== listing["transaction_type"][listing["listing_id"]==id2].values[0]
    item_type = listing["item_type"][listing["listing_id"]==id1].values[0]== listing["item_type"][listing["listing_id"]==id2].values[0]
    check = True
    if check : 
        directories_1 =ast.literal_eval(listing[listing["listing_id"]==id1].pictures.values[0])
        directories_2 =ast.literal_eval(listing[listing["listing_id"]==id2].pictures.values[0])
        for i in range(len(directories_1)):
            for j in range(len(directories_2)):
                score += match_images(directories_1[i], directories_2[j])
        matches.append(score)                    
    else :
        matches.append(0)


0
1
2
3


In [None]:
len(matches)

In [211]:
results = data

In [212]:
results["score"] = matches

In [213]:
results.head(60)

Unnamed: 0.1,Unnamed: 0,listing_id_1,listing_id_2,labels,score
0,0,120777696,116630376,1,0
1,1,120793420,121050028,1,0
2,2,118823311,111300261,1,0
3,3,112597318,102266138,0,0
4,4,72795989,68729895,1,0
