# Script to find similar homes

In [None]:
import sys  
sys.path.insert(0, '../visual_home_finder')

import config, paths, utilities
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from random import randint

from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model, load_model

In [None]:
favorite_image = '98117_83.jpg'#'98117_81.jpg' #98105_27
num_listings_to_show = 5
use_favorite_image = True

Read in embeddings for all home listings

In [None]:
# First read-in the feature vectors for all similar homes
home_listings_df = pd.read_csv(os.path.sep.join([config.FEATURE_PATH, 'home_features4.csv']),
                              index_col = 0)
home_listings_df['home_feature'] = home_listings_df.home_feature.apply(utilities.str_to_array)
home_listings_df['resnet_feature'] = home_listings_df.resnet_feature.apply(utilities.str_to_array)

Generate embeddings for the favorite image

In [None]:
if use_favorite_image:
    # Load our home-style feature model
    home_model = load_model(os.path.sep.join([config.MODEL_PATH, config.MODEL_NAME]))

    # Get home-style features from the model
    home_feature_model = Model(inputs = home_model.input, 
                               outputs = home_model.get_layer('dense_4').output)

In [None]:
if use_favorite_image:
    # Load image
    fav_img = image.load_img(favorite_image, target_size = (224,224))
    fav_img_orig = image.img_to_array(fav_img)
    fav_img = np.expand_dims(fav_img_orig - config.IMG_MEAN, axis = 0)

    # Get  features for the image from the model
    fav_feature = np.ravel(home_feature_model.predict(fav_img)).tolist()

    # Also plot the image
    plt.imshow(fav_img_orig/255)
    plt.show()

In [None]:
def show_home_images(home_index, home_listings_df_local, similarity_score=None):
    """
    home_listings_df_local is the dataframe that contains home listing information
    home_index is the index of the image to be shown
    similarity_score is the similarity score for the homes
    """
    home_img_name = os.path.sep.join([config.LISTINGS_PATH, home_index+'.jpg'])
    home_name_only = (home_img_name.split("/")[-1]).strip(".jpg")
    if similarity_score:
        print(home_name_only + " Similarity: %.2f"%(similarity_score))
    else:
        print(home_name_only)
    img = image.load_img(home_img_name)
    img = image.img_to_array(img)
    plt.imshow(img/255)
    plt.title(home_listings_df_local['predicted_style'][ii])
    plt.show()

In [None]:
# Using home-style features
if use_favorite_image:
    selected_home_feature = fav_feature
else:
    selected_ind = 76
    selected_home_feature = home_listings_df["home_feature"].iloc[selected_ind]

# Find the cosine similarity of selected home with other homes
home_similarities = np.ravel(cosine_similarity(np.reshape(selected_home_feature, [1,-1]), 
                    np.vstack(home_listings_df.home_feature)))
similar_homes_arg = np.ravel(np.flip(np.argsort(home_similarities)))

# Plot the images of the selected home and other similar homes
count = 0
for iv, ii in enumerate(similar_homes_arg):
    if use_favorite_image and iv==0:
        continue
    home_index = home_listings_df.index[ii]
    show_home_images(home_index, home_listings_df, home_similarities[ii])
    count += 1
    if count>=num_listings_to_show:
        break

In [None]:
# Show randomly selected homes
num_listings = len(similar_homes_arg)
count = 1
while count<=num_listings_to_show:
    count+=1
    ii = randint(0,num_listings-1)
    home_index = home_listings_df.index[ii]
    show_home_images(home_index, home_listings_df)

In [None]:
# Using Resnet Features
selected_home_feature = home_listings_df["resnet_feature"].iloc[selected_ind]

#Find the cosine similarity of selected home with other homes
home_similarities = np.ravel(cosine_similarity(np.reshape(selected_home_feature, [1,-1]), 
                    np.vstack(home_listings_df.resnet_feature)))
similar_homes_arg = np.ravel(np.flip(np.argsort(home_similarities)))

# Plot the images of the selected home and other similar homes
count = 0
for iv, ii in enumerate(similar_homes_arg):
    if use_favorite_image and iv==0:
        continue
    home_index = home_listings_df.index[ii]
    show_home_images(home_index, home_listings_df, home_similarities[ii])
    count += 1
    if count>=num_listings_to_show:
        break