In [1]:
import pandas as pd
import numpy as np
from PIL import Image
from PIL.Image import Image as PillowImage
from ultralytics import YOLO
import torch
import clip
import cv2
from skimage.feature import hog, local_binary_pattern
from skimage import exposure
from sklearn.decomposition import PCA
from sklearn.feature_extraction.text import TfidfVectorizer

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style='white')

from home_finder.text_analysis import TfIdfMaker, TfIdfCosineRanker, NeighbourRanker
from home_finder.image_analysis import ObjectsMaker, ObjectDetectionRanker, AI_SimRanker
from home_finder.skeleton import ImageData, RankingOutput, prep_input
from home_finder.ensemble import get_most_relevant, get_top_images

In [2]:
property_df = pd.read_csv('sample_property_images.csv')
print(property_df.shape)
print(property_df.dtypes)
property_df.head()

(484, 4)
id          int64
url        object
image      object
queries    object
dtype: object


Unnamed: 0,id,url,image,queries
0,118290854,https://media.rightmove.co.uk/12k/11130/118290...,<PIL.PngImagePlugin.PngImageFile image mode=RG...,This is a single-story detached house with a p...
1,118290854,https://media.rightmove.co.uk/12k/11130/118290...,<PIL.PngImagePlugin.PngImageFile image mode=RG...,The room features carpeted flooring and an L-s...
2,118290854,https://media.rightmove.co.uk/12k/11130/118290...,<PIL.PngImagePlugin.PngImageFile image mode=RG...,This is an interior space featuring a dining a...
3,118290854,https://media.rightmove.co.uk/12k/11130/118290...,<PIL.PngImagePlugin.PngImageFile image mode=RG...,The room features built-in storage above the b...
4,118290854,https://media.rightmove.co.uk/12k/11130/118290...,<PIL.PngImagePlugin.PngImageFile image mode=RG...,The image shows a spacious lawn with a well-ma...


In [3]:
imgs_per_prop = property_df[['id', 'url']].copy().groupby('id').count().reset_index()
imgs_per_prop = imgs_per_prop.sort_values(by='url', ascending=False)
imgs_per_prop

Unnamed: 0,id,url
5,121777088,34
1,117898208,25
31,130826972,25
10,124780949,24
29,130327244,21
27,130197212,19
25,129995945,18
18,128321342,17
4,121327586,16
2,118290854,16


In [4]:
img_input = prep_input(df=property_df)
print(len(img_input))
img_input

472


[ImageData(url='https://media.rightmove.co.uk/12k/11130/118290854/11130_STA190734_IMG_00_0000.jpeg', description='This is a single-story detached house with a pitched roof and a garage. There is a paved driveway and a landscaped front garden with raised flowerbeds. The house features a bay window and a white front door. A vehicle is parked on the driveway.', path_name='tmp/images/11130_STA190734_IMG_00_0000.jpeg', objects={}, scores={'tf_idf_cosine': 0.0, 'neighbours': 0.0, 'object_detection': 0.0, 'ai_similarity': 0.0}),
 ImageData(url='https://media.rightmove.co.uk/12k/11130/118290854/11130_STA190734_IMG_01_0000.jpeg', description='The room features carpeted flooring and an L-shaped sofa facing a glass-top coffee table. There is wood paneling around the doorway, and decorative plates and artwork adorn the walls. The space includes a dining area with a red table and chairs, and has natural light coming in from windows covered with patterned drapes.', path_name='tmp/images/11130_STA190

In [5]:
text_ranking = TfIdfCosineRanker().rank(query='spacious living area', source_data=img_input)
text_ranking

[ImageData(url='https://media.rightmove.co.uk/175k/174341/121777088/174341_31388259_IMG_01_0000.jpeg', description='This is an architectural elevation drawing showing a comparison between existing and proposed structures at 2A Royal Road. The left side of the drawing shows adjoining properties for context, while the right side provides a front view of the proposed building development highlighting the additions to the existing structure, depicted with a dashed red line. The design suggests a multi-level residential building with', path_name='tmp/images/174341_31388259_IMG_01_0000.jpeg', objects={}, scores={'tf_idf_cosine': 1.1056009915318377, 'neighbours': 0.0, 'object_detection': 0.0, 'ai_similarity': 0.0}),
 ImageData(url='https://media.rightmove.co.uk/115k/114019/131138348/114019_OLIVE_003267_IMG_06_0000.jpeg', description='This is an image of a bathroom featuring a built-in bathtub with gray tile surround, a pedestal sink, and a toilet. The room has a frosted glass window that allo

In [6]:
neighbour_ranking = NeighbourRanker().rank(query='spacious living area', source_data=img_input)
neighbour_ranking

[ImageData(url='https://media.rightmove.co.uk/12k/11130/118290854/11130_STA190734_IMG_00_0000.jpeg', description='This is a single-story detached house with a pitched roof and a garage. There is a paved driveway and a landscaped front garden with raised flowerbeds. The house features a bay window and a white front door. A vehicle is parked on the driveway.', path_name='tmp/images/11130_STA190734_IMG_00_0000.jpeg', objects={}, scores={'tf_idf_cosine': 0.25396660412200867, 'neighbours': 1.0, 'object_detection': 0.0, 'ai_similarity': 0.0}),
 ImageData(url='https://media.rightmove.co.uk/45k/44307/130015976/44307_25628446_IMG_17_0000.jpeg', description='The image shows a garden with a wooden shed on the left, a lawn area, and a fence surrounding the property. There is a brick house in the background, indicating this is a residential area. The garden appears to be well-defined with potential for personalization by the new occupant. A compost bin is visible in the foreground on the right.', p

In [7]:
objectranking = ObjectDetectionRanker().rank(query='spacious living area', source_data=img_input)

In [9]:
objectranking[:10]

[ImageData(url='https://media.rightmove.co.uk/12k/11130/118290854/11130_STA190734_IMG_00_0000.jpeg', description='This is a single-story detached house with a pitched roof and a garage. There is a paved driveway and a landscaped front garden with raised flowerbeds. The house features a bay window and a white front door. A vehicle is parked on the driveway.', path_name='tmp/images/11130_STA190734_IMG_00_0000.jpeg', objects={'car': 0.4105965793132782}, scores={'tf_idf_cosine': 0.25396660412200867, 'neighbours': 1.0, 'object_detection': 0.0, 'ai_similarity': 0.0}),
 ImageData(url='https://media.rightmove.co.uk/12k/11130/118290854/11130_STA190734_IMG_01_0000.jpeg', description='The room features carpeted flooring and an L-shaped sofa facing a glass-top coffee table. There is wood paneling around the doorway, and decorative plates and artwork adorn the walls. The space includes a dining area with a red table and chairs, and has natural light coming in from windows covered with patterned dra

In [12]:
ai_sim_ranking = AI_SimRanker().rank(query='spacious living area', source_data=img_input)
ai_sim_ranking

AttributeError: 'numpy.ndarray' object has no attribute 'index'

In [11]:
print(len(ai_sim_ranking[0]))
print(len(ai_sim_ranking[1]))

NameError: name 'ai_sim_ranking' is not defined

In [10]:
print([str(a) for a in ai_sim_ranking[0]][:5])
print([str(a) for a in ai_sim_ranking[1]][:5])

['im398.png', 'im263.png', 'im74.png', 'im343.png', 'im421.png']
['im215.png', 'im201.png', 'im229.png', 'im407.png', 'im361.png']


In [None]:
import os

img_files = [os.path.join('tmp/images', f) for f in os.listdir('tmp/images')]
text_descs = [t for t in list(property_df['queries']) if f"im{list(property_df['queries']).index(t)}.png" in os.listdir('tmp/images')]

print(len(text_descs))
print(len(img_files))
example_query = 'cottage style kitchen'
input_data = {'images': img_files, 'text': text_descs}

In [None]:
input_data['images']

In [None]:
ai = AI_SimRanker()
ai.rank(query=example_query, source_data=input_data['images'])

In [None]:
Image.open('tmp/images/im199.png')

In [None]:
tf = TfIdfCosineRanker()
ne = NeighbourRanker()
ob = ObjectDetectionRanker()
ai = AI_SimRanker()
im_rankers = [tf, ne, ai]

get_most_relevant(query=example_query, input_data=input_data, rankers=[ai], method='democratic', n_to_return=5)

In [None]:
queries = list(property_df['queries'])
tf_vectoriser = TfidfVectorizer(max_features=1000, stop_words='english', max_df=0.75, min_df=5) # ignore words in > 50% of queries & words in fewer than 25 queries
X_tfidf = tf_vectoriser.fit_transform(queries)
word_features = list(tf_vectoriser.get_feature_names_out())

print(len(word_features))
print(len(list(set(word_features))))
print(word_features[:20])
print(word_features[-20:])