# Extract image vectors for listings

In [None]:
import cv2
import numpy as np
import pandas as pd
import ollama
import requests

from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity

In [13]:
def compare_cosine(feature_vec1, feature_vec2):
    return cosine_similarity(feature_vec1, feature_vec2)

In [12]:
def get_hsv_histogram(image_url, h_bins=8, s_bins=8, v_bins=8):
    try:
        response = requests.get(image_url, stream=True, timeout=10)
        response.raise_for_status()
        image_bytes = np.asarray(bytearray(response.content), dtype=np.uint8)
        image = cv2.imdecode(image_bytes, cv2.IMREAD_COLOR)

        if image is None:
            raise ValueError("Failed to decode image")

        hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h_hist = cv2.calcHist([hsv_image], [0], None, [h_bins], [0, 180])
        s_hist = cv2.calcHist([hsv_image], [1], None, [s_bins], [0, 256])
        v_hist = cv2.calcHist([hsv_image], [2], None, [v_bins], [0, 256])

        h_hist = cv2.normalize(h_hist, h_hist).flatten()
        s_hist = cv2.normalize(s_hist, s_hist).flatten()
        v_hist = cv2.normalize(v_hist, v_hist).flatten()

        return np.concatenate([h_hist, s_hist, v_hist])
    except Exception as e:
        print(f"Error processing image URL {image_url}: {e}")
        return None

In [14]:
merged_listings = pd.read_csv('./NewYork/merged_listings_reviews.csv')
tqdm.pandas()
merged_listings['photo_vector'] = merged_listings['picture_url'].progress_apply(get_hsv_histogram)
merged_listings.to_csv('./NewYork/merged_img_vectors.csv')

  1%|          | 257/21495 [03:29<4:08:35,  1.42it/s]

Error processing image URL https://a0.muscache.com/pictures/bb6f9106-4547-47c2-8343-3c17a44e2811.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/bb6f9106-4547-47c2-8343-3c17a44e2811.jpg


  7%|▋         | 1431/21495 [20:04<4:16:57,  1.30it/s] 

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-42405152/original/c9a2653c-727b-4630-ae55-34e1ae3abada.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-42405152/original/c9a2653c-727b-4630-ae55-34e1ae3abada.jpeg


  9%|▉         | 1976/21495 [27:21<4:27:26,  1.22it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-740514885087653391/original/c81f9781-e363-46a5-9c34-59b12b113199.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-740514885087653391/original/c81f9781-e363-46a5-9c34-59b12b113199.jpeg


 11%|█         | 2413/21495 [33:51<3:56:51,  1.34it/s] 

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-41178612/original/4c192096-8f5d-406c-8f4c-bab7b524a0f1.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-41178612/original/4c192096-8f5d-406c-8f4c-bab7b524a0f1.jpeg


 12%|█▏        | 2568/21495 [36:06<4:09:22,  1.26it/s]

Error processing image URL https://a0.muscache.com/pictures/856e4945-cbee-4a1f-b6ec-2f0fbf8ced56.jpg: Failed to decode image


 13%|█▎        | 2706/21495 [38:01<2:57:33,  1.76it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-14440473/original/f7fe7799-aa24-4eca-aadc-450d61f1b909.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-14440473/original/f7fe7799-aa24-4eca-aadc-450d61f1b909.jpeg


 16%|█▌        | 3362/21495 [47:41<2:56:59,  1.71it/s] 

Error processing image URL https://a0.muscache.com/pictures/8fbe4470-c7f4-4114-9870-9dbf18c29baa.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/8fbe4470-c7f4-4114-9870-9dbf18c29baa.jpg


 18%|█▊        | 3833/21495 [54:33<4:31:00,  1.09it/s] 

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-1155943556816365963/original/e37b9220-5cfa-43ed-91e4-be76f3f01552.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-1155943556816365963/original/e37b9220-5cfa-43ed-91e4-be76f3f01552.jpeg


 19%|█▊        | 3979/21495 [56:31<3:32:09,  1.38it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-664475916141315459/original/b0122e9d-fdc4-4053-a6cd-c5f33c89ff1e.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-664475916141315459/original/b0122e9d-fdc4-4053-a6cd-c5f33c89ff1e.jpeg


 21%|██        | 4419/21495 [1:02:55<3:04:58,  1.54it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-1117935917476687376/original/b66109cc-0035-4891-a797-dc72437815f2.png: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-1117935917476687376/original/b66109cc-0035-4891-a797-dc72437815f2.png


 21%|██▏       | 4617/21495 [1:06:04<3:41:40,  1.27it/s] Invalid SOS parameters for sequential JPEG
 22%|██▏       | 4692/21495 [1:07:10<3:05:32,  1.51it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-898131131475931632/original/cc0a950f-35af-4894-a4c1-cd76fc614f23.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-898131131475931632/original/cc0a950f-35af-4894-a4c1-cd76fc614f23.jpeg


 23%|██▎       | 4933/21495 [1:10:47<3:26:15,  1.34it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-882473433648692369/original/e1357fe9-5e6d-4455-9f68-9641c4064bb9.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-882473433648692369/original/e1357fe9-5e6d-4455-9f68-9641c4064bb9.jpeg


 24%|██▍       | 5154/21495 [1:13:40<2:57:39,  1.53it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-859119320795456114/original/152bb949-3062-451d-b998-be422d15248d.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-859119320795456114/original/152bb949-3062-451d-b998-be422d15248d.jpeg


 25%|██▍       | 5368/21495 [1:16:37<2:37:05,  1.71it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-4714854/original/f561c634-1ecd-48d1-9ce7-b734a746660e.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-4714854/original/f561c634-1ecd-48d1-9ce7-b734a746660e.jpeg


 25%|██▌       | 5374/21495 [1:16:42<2:59:22,  1.50it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-U3RheVN1cHBseUxpc3Rpbmc6NTc1MTIzOTczOTIwMDM2NzAx/original/b903e809-3dbc-4909-91e0-55d6ab5c485f.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-U3RheVN1cHBseUxpc3Rpbmc6NTc1MTIzOTczOTIwMDM2NzAx/original/b903e809-3dbc-4909-91e0-55d6ab5c485f.jpeg


 25%|██▌       | 5444/21495 [1:17:40<2:51:18,  1.56it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-673874846826602871/original/1426b327-3d54-4010-b105-a99441d3c5b9.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-673874846826602871/original/1426b327-3d54-4010-b105-a99441d3c5b9.jpeg


 27%|██▋       | 5751/21495 [1:22:21<3:16:30,  1.34it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-762241527257255051/original/9b689710-7b2f-4d54-8c78-cbdc1d53c156.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-762241527257255051/original/9b689710-7b2f-4d54-8c78-cbdc1d53c156.jpeg


 29%|██▊       | 6163/21495 [1:28:31<3:59:10,  1.07it/s]Invalid SOS parameters for sequential JPEG
 29%|██▉       | 6341/21495 [1:31:10<3:53:29,  1.08it/s]

Error processing image URL https://a0.muscache.com/pictures/39266333/e2969d2f_original.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/39266333/e2969d2f_original.jpg


 30%|███       | 6495/21495 [1:33:30<2:48:00,  1.49it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-16134738/original/a01f1b41-2152-481d-a03a-4c100167de26.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-16134738/original/a01f1b41-2152-481d-a03a-4c100167de26.jpeg


 32%|███▏      | 6834/21495 [1:38:32<3:21:43,  1.21it/s]Invalid SOS parameters for sequential JPEG
 32%|███▏      | 6852/21495 [1:38:46<2:14:53,  1.81it/s]

Error processing image URL https://a0.muscache.com/pictures/9cf13602-fa27-4622-85d3-b58d907e769a.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/9cf13602-fa27-4622-85d3-b58d907e769a.jpg


 33%|███▎      | 7015/21495 [1:41:04<2:29:22,  1.62it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-24477757/original/0de8460d-2985-402e-a2ac-d0daf278f9d0.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-24477757/original/0de8460d-2985-402e-a2ac-d0daf278f9d0.jpeg


 33%|███▎      | 7151/21495 [1:42:59<2:28:34,  1.61it/s]

Error processing image URL https://a0.muscache.com/pictures/b68d1a44-bd36-4e77-952d-5c440c7ad92c.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/b68d1a44-bd36-4e77-952d-5c440c7ad92c.jpg


 39%|███▉      | 8476/21495 [2:02:11<2:23:38,  1.51it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-45914463/original/1bd9f23b-c656-4468-bfb8-d2eca44600f2.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-45914463/original/1bd9f23b-c656-4468-bfb8-d2eca44600f2.jpeg


 44%|████▍     | 9547/21495 [2:18:05<2:49:05,  1.18it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-18459547/original/a4374e79-b5e6-44c5-b067-03eed9b4947e.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-18459547/original/a4374e79-b5e6-44c5-b067-03eed9b4947e.jpeg


 46%|████▌     | 9864/21495 [2:22:41<1:54:28,  1.69it/s]Invalid SOS parameters for sequential JPEG
 46%|████▌     | 9895/21495 [2:23:09<2:00:33,  1.60it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-1079890729260575946/original/d3189c24-81a3-4bc2-8371-927e0a520f3e.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-1079890729260575946/original/d3189c24-81a3-4bc2-8371-927e0a520f3e.jpeg


 48%|████▊     | 10304/21495 [2:29:09<2:10:26,  1.43it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-50802956/original/09f60f48-c7d3-471e-b83a-7ee23e8fe43f.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-50802956/original/09f60f48-c7d3-471e-b83a-7ee23e8fe43f.jpeg


 48%|████▊     | 10370/21495 [2:30:13<3:29:19,  1.13s/it]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-696616459846609727/original/f881ad80-438b-4a8d-a4d7-ce7b6115f0a0.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-696616459846609727/original/f881ad80-438b-4a8d-a4d7-ce7b6115f0a0.jpeg


 56%|█████▌    | 12048/21495 [2:55:49<2:17:45,  1.14it/s] 

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-51840664/original/54e8cc2b-a8a4-4198-aa96-f03087f99c05.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-51840664/original/54e8cc2b-a8a4-4198-aa96-f03087f99c05.jpeg


 56%|█████▋    | 12134/21495 [2:57:11<2:01:39,  1.28it/s]Invalid SOS parameters for sequential JPEG
 57%|█████▋    | 12266/21495 [2:59:16<1:41:51,  1.51it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-748057680426693490/original/6912e670-df3c-457a-8f5d-0b3ee66bd0a0.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-748057680426693490/original/6912e670-df3c-457a-8f5d-0b3ee66bd0a0.jpeg


 58%|█████▊    | 12470/21495 [3:02:32<1:29:36,  1.68it/s]

Error processing image URL https://a0.muscache.com/pictures/d9be4f5f-94c6-4ebb-bdda-521bc61c9a8e.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/d9be4f5f-94c6-4ebb-bdda-521bc61c9a8e.jpg


 59%|█████▊    | 12626/21495 [3:05:05<4:22:58,  1.78s/it]

Error processing image URL https://a0.muscache.com/pictures/f43dd693-6fbb-456b-8049-e586d2803887.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/f43dd693-6fbb-456b-8049-e586d2803887.jpg


 59%|█████▉    | 12656/21495 [3:05:36<2:05:35,  1.17it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-49754187/original/57a3e980-e231-47a0-92e0-526c6c300d13.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-49754187/original/57a3e980-e231-47a0-92e0-526c6c300d13.jpeg


 59%|█████▉    | 12764/21495 [3:07:25<1:55:37,  1.26it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-905154913221917061/original/6d4344a1-577b-4063-befb-30410f462ac8.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-905154913221917061/original/6d4344a1-577b-4063-befb-30410f462ac8.jpeg


 60%|█████▉    | 12888/21495 [3:09:22<1:18:14,  1.83it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-1136788372396039808/original/5163f3ce-eeb2-42f2-b924-15ae1843aa97.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-1136788372396039808/original/5163f3ce-eeb2-42f2-b924-15ae1843aa97.jpeg


 62%|██████▏   | 13406/21495 [3:17:05<1:35:15,  1.42it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-691684916095150541/original/936e58d4-8ffe-43de-9791-e84ef50baede.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-691684916095150541/original/936e58d4-8ffe-43de-9791-e84ef50baede.jpeg


 62%|██████▏   | 13408/21495 [3:17:07<1:33:04,  1.45it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-763814274826602639/original/9399214b-97ee-4b42-a7f5-bfb3b3dda69a.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-763814274826602639/original/9399214b-97ee-4b42-a7f5-bfb3b3dda69a.jpeg


 64%|██████▎   | 13663/21495 [3:20:49<1:43:44,  1.26it/s]Invalid SOS parameters for sequential JPEG
 64%|██████▎   | 13689/21495 [3:21:10<1:11:59,  1.81it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-1135404412261240153/original/37d4ad25-0815-4b0e-a85d-1192ac6992a6.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-1135404412261240153/original/37d4ad25-0815-4b0e-a85d-1192ac6992a6.jpeg


 64%|██████▎   | 13691/21495 [3:21:12<1:56:36,  1.12it/s]Invalid SOS parameters for sequential JPEG
 64%|██████▎   | 13703/21495 [3:21:22<1:26:37,  1.50it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-45813469/original/6426b34f-0304-4ef4-8836-f630928356e8.png: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-45813469/original/6426b34f-0304-4ef4-8836-f630928356e8.png


 64%|██████▍   | 13762/21495 [3:22:20<1:58:03,  1.09it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-708883217066615089/original/c79f0c8b-9ad5-4c08-ba3c-5a355fc47eff.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-708883217066615089/original/c79f0c8b-9ad5-4c08-ba3c-5a355fc47eff.jpeg


 64%|██████▍   | 13812/21495 [3:23:18<3:06:37,  1.46s/it]Invalid SOS parameters for sequential JPEG
 64%|██████▍   | 13842/21495 [3:23:50<1:36:23,  1.32it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-43103808/original/18024093-903c-4cfb-8b8d-05ef6ca2adac.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-43103808/original/18024093-903c-4cfb-8b8d-05ef6ca2adac.jpeg


 65%|██████▌   | 14070/21495 [3:27:07<2:02:18,  1.01it/s]

Error processing image URL https://a0.muscache.com/pictures/ac35da94-7fc8-408c-aa4b-25e5df9dbac8.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/ac35da94-7fc8-408c-aa4b-25e5df9dbac8.jpg


 66%|██████▋   | 14282/21495 [3:30:27<1:45:25,  1.14it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-969046191401522947/original/17a9f498-2fee-41e0-9a68-53e32fa835a5.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-969046191401522947/original/17a9f498-2fee-41e0-9a68-53e32fa835a5.jpeg


 67%|██████▋   | 14302/21495 [3:30:43<1:26:52,  1.38it/s]Invalid SOS parameters for sequential JPEG
 68%|██████▊   | 14528/21495 [3:34:15<1:53:25,  1.02it/s]Invalid SOS parameters for sequential JPEG
 69%|██████▊   | 14725/21495 [3:37:04<1:17:36,  1.45it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-34136999/original/3064606d-8f3f-4057-914d-f6b57ddcbbaf.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-34136999/original/3064606d-8f3f-4057-914d-f6b57ddcbbaf.jpeg


 69%|██████▊   | 14761/21495 [3:37:36<1:19:04,  1.42it/s]

Error processing image URL https://a0.muscache.com/pictures/a3421aaa-05c7-4761-b76c-e68eae68da84.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/a3421aaa-05c7-4761-b76c-e68eae68da84.jpg


 70%|██████▉   | 14981/21495 [3:41:15<1:08:12,  1.59it/s]

Error processing image URL https://a0.muscache.com/pictures/53908ab7-67d9-4920-a684-05f8b4aee9b2.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/53908ab7-67d9-4920-a684-05f8b4aee9b2.jpg


 72%|███████▏  | 15500/21495 [3:49:51<1:36:50,  1.03it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-23538486/original/1177354f-4b8e-43e0-adb8-778fa88f723a.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-23538486/original/1177354f-4b8e-43e0-adb8-778fa88f723a.jpeg


 74%|███████▎  | 15836/21495 [3:54:54<1:02:37,  1.51it/s]

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-941212639695756797/original/a5c028ac-03f5-4d1d-9e0f-0610586b2fc2.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-941212639695756797/original/a5c028ac-03f5-4d1d-9e0f-0610586b2fc2.jpeg


 76%|███████▌  | 16317/21495 [4:02:03<56:51,  1.52it/s]  

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-42817699/original/bb78bb1f-b4bc-4c43-977c-43ca7c5aeb6b.jpeg: 503 Server Error: Service Unavailable for url: https://a0.muscache.com/pictures/miso/Hosting-42817699/original/bb78bb1f-b4bc-4c43-977c-43ca7c5aeb6b.jpeg


 79%|███████▊  | 16921/21495 [4:10:40<1:18:19,  1.03s/it]Corrupt JPEG data: 1 extraneous bytes before marker 0xd7
 79%|███████▉  | 17062/21495 [4:12:32<48:15,  1.53it/s]  

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-1208247500729460051/original/f4c91b80-fb42-4029-ae96-8cc7aece843c.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-1208247500729460051/original/f4c91b80-fb42-4029-ae96-8cc7aece843c.jpeg


 79%|███████▉  | 17069/21495 [4:12:46<4:16:06,  3.47s/it]

Error processing image URL https://a0.muscache.com/pictures/airflow/Hosting-52417197/original/dbc61a4a-1645-4aa6-8ee5-7e0222fba0fe.jpg: HTTPSConnectionPool(host='a0.muscache.com', port=443): Read timed out. (read timeout=10)


 81%|████████  | 17337/21495 [4:16:55<55:29,  1.25it/s]  

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-U3RheVN1cHBseUxpc3Rpbmc6OTA0NTg1MzgwMDk5NDAyMzk0/original/828ad34d-5234-4ee8-a7f2-7f26913bc8b5.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-U3RheVN1cHBseUxpc3Rpbmc6OTA0NTg1MzgwMDk5NDAyMzk0/original/828ad34d-5234-4ee8-a7f2-7f26913bc8b5.jpeg


 83%|████████▎ | 17865/21495 [4:25:18<40:10,  1.51it/s]  

Error processing image URL https://a0.muscache.com/pictures/6f2af625-5895-48a9-9bc7-2d8b3df1d076.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/6f2af625-5895-48a9-9bc7-2d8b3df1d076.jpg


 88%|████████▊ | 18916/21495 [4:40:38<35:22,  1.21it/s]  

Error processing image URL https://a0.muscache.com/pictures/miso/Hosting-642767846378335345/original/7068dfc6-eeb9-461a-84b7-7b367b960009.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/miso/Hosting-642767846378335345/original/7068dfc6-eeb9-461a-84b7-7b367b960009.jpeg


 90%|████████▉ | 19285/21495 [4:46:08<37:47,  1.03s/it]  

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-1005207590507549831/original/8a17a6c8-daef-4b0d-b958-af4e6ee6511d.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-1005207590507549831/original/8a17a6c8-daef-4b0d-b958-af4e6ee6511d.jpeg


 92%|█████████▏| 19725/21495 [4:52:58<21:41,  1.36it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-929862356918731434/original/78ade1b2-7dd6-4590-899d-cf2c2efb3448.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-929862356918731434/original/78ade1b2-7dd6-4590-899d-cf2c2efb3448.jpeg


 92%|█████████▏| 19833/21495 [4:54:41<18:50,  1.47it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-1072089744850191566/original/3993a1c2-01a9-4163-bd93-f9488be1aee0.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-1072089744850191566/original/3993a1c2-01a9-4163-bd93-f9488be1aee0.jpeg


 93%|█████████▎| 19936/21495 [4:56:13<22:17,  1.17it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-30201476/original/cc637142-6675-43b1-9ae0-549b066bb018.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-30201476/original/cc637142-6675-43b1-9ae0-549b066bb018.jpeg


 93%|█████████▎| 19996/21495 [4:57:07<23:37,  1.06it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-920069373723972214/original/639cb3a7-cd55-4e42-bdc4-969b3f49579c.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-920069373723972214/original/639cb3a7-cd55-4e42-bdc4-969b3f49579c.jpeg


 94%|█████████▍| 20152/21495 [4:59:15<14:00,  1.60it/s]

Error processing image URL https://a0.muscache.com/pictures/hosting/Hosting-U3RheVN1cHBseUxpc3Rpbmc6ODQ5MTQxMjA3OTU3NjEwNzM3/original/8e95c57a-7d14-4eee-99d2-d2b3cc731cbe.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/hosting/Hosting-U3RheVN1cHBseUxpc3Rpbmc6ODQ5MTQxMjA3OTU3NjEwNzM3/original/8e95c57a-7d14-4eee-99d2-d2b3cc731cbe.jpeg


 94%|█████████▍| 20271/21495 [5:00:54<10:36,  1.92it/s]

Error processing image URL https://a0.muscache.com/pictures/6cf8c11f-1458-42ff-9e27-9d47a336c15f.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/6cf8c11f-1458-42ff-9e27-9d47a336c15f.jpg


 94%|█████████▍| 20276/21495 [5:00:58<15:27,  1.31it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-851923089381776992/original/8ca3c848-c82c-4f1b-a2d5-264dcec5708a.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-851923089381776992/original/8ca3c848-c82c-4f1b-a2d5-264dcec5708a.jpeg


 97%|█████████▋| 20842/21495 [5:09:11<09:31,  1.14it/s]  Invalid SOS parameters for sequential JPEG
 99%|█████████▉| 21249/21495 [5:14:35<02:21,  1.74it/s]

Error processing image URL https://a0.muscache.com/pictures/prohost-api/Hosting-1067275090408807419/original/fee2bd8c-b746-4abb-8e14-154734330d97.jpeg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/prohost-api/Hosting-1067275090408807419/original/fee2bd8c-b746-4abb-8e14-154734330d97.jpeg


100%|█████████▉| 21484/21495 [5:18:10<00:07,  1.42it/s]

Error processing image URL https://a0.muscache.com/pictures/388c8393-4d65-4dda-8f86-a26b78d74b36.jpg: 403 Client Error: Forbidden for url: https://a0.muscache.com/pictures/388c8393-4d65-4dda-8f86-a26b78d74b36.jpg


100%|██████████| 21495/21495 [5:18:27<00:00,  1.12it/s]


In [4]:
merged = pd.read_csv('./NewYork/merged_img_vectors.csv')
merged['photo_vector'][0]

vector_string = merged['photo_vector'][0]
cleaned_string = vector_string.replace('\n', ' ').strip('[]')
vector_array = np.fromstring(cleaned_string, sep=' ')

print(vector_array)

[0.99579644 0.05453015 0.00740186 0.01827555 0.01543656 0.00161021
 0.00248193 0.06913844 0.48167908 0.74358845 0.3888742  0.08528185
 0.02424897 0.05904588 0.1697775  0.15383916 0.02413661 0.03813965
 0.18358052 0.3340042  0.36868337 0.77908784 0.32974714 0.03261502]


In [6]:
def convert_str_to_array(photo_vector):
    cleaned_string = photo_vector.replace('\n', ' ').strip('[]')
    vector_array = np.fromstring(cleaned_string, sep=' ')
    return vector_array

print(vector_string)

[0.99579644 0.05453015 0.00740186 0.01827555 0.01543656 0.00161021
 0.00248193 0.06913844 0.48167908 0.74358845 0.3888742  0.08528185
 0.02424897 0.05904588 0.1697775  0.15383916 0.02413661 0.03813965
 0.18358052 0.3340042  0.36868337 0.77908784 0.32974714 0.03261502]


In [None]:
merged.dropna(inplace=True)
merged['photo_vector'] = merged['photo_vector'].apply(convert_str_to_array)
merged.head()

In [12]:
merged.to_csv('./NewYork/merged_img_vectors.csv', index=False)

# Computing example cosine similarity

In [8]:
photo_url = 'https://a0.muscache.com/pictures/011da841-7234-42b9-9865-517a9be34127.jpg'
print(get_hsv_histogram(photo_url))

[0.9773287  0.05888927 0.05584503 0.09381795 0.1631924  0.01307373
 0.00284551 0.05127763 0.6366191  0.46354893 0.42393783 0.23765714
 0.232824   0.2090717  0.21377032 0.0043123  0.13136593 0.13823447
 0.24698664 0.19147581 0.36979872 0.76164883 0.3202244  0.2157894 ]


In [None]:
feature_vector1 = get_hsv_histogram("./airbnb_photos/photo_7.jpg")

feature_vector2 = get_hsv_histogram("./airbnb_photos/photo_0.jpg")
feature_vector3 = get_hsv_histogram("./airbnb_photos/photo_1.jpg")

print("Cosine Similarity (dissimilar):", compare_cosine(feature_vector1.reshape(1, -1), feature_vector2.reshape(1, -1))[0][0])
print("Cosine Similarity (similar):", compare_cosine(feature_vector3.reshape(1, -1), feature_vector2.reshape(1, -1))[0][0])

Cosine Similarity (dissimilar): 0.80601287
Cosine Similarity (similar): 0.92167395


In [8]:
def calculate_text_similarity(description):
    desc_vector = ollama.embeddings(model="all-minilm", prompt=description)
    return np.array(desc_vector["embedding"])

In [11]:
listings = pd.read_csv("./Amsterdam/listings_cleaned.csv")[:10]
description_1 = listings['description'].iloc[0]
description_2 = listings['description'].iloc[1]
description_3 = listings['description'].iloc[2]

embedding_1 = calculate_text_similarity(description_1)
embedding_2 = calculate_text_similarity(description_2)
embedding_3 = calculate_text_similarity(description_3)

print("Cosine Similarity:", compare_cosine(embedding_1.reshape(1, -1), embedding_2.reshape(1, -1))[0][0])
print("Cosine Similarity:", compare_cosine(embedding_3.reshape(1, -1), embedding_2.reshape(1, -1))[0][0])

Cosine Similarity: 0.13983070359820404
Cosine Similarity: 0.11068006771718075
