In [1]:
import pandas as pd
import numpy as np

from tqdm import tqdm
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel

In [3]:
listings_urls = pd.read_csv('./NewYork/listings_cleaned_extend.csv')
listings_urls = listings_urls[['listing_url']]
listings_urls.reset_index(inplace=True, drop=True)
listings_urls.to_csv('./NewYork/listing_urls.csv', index=False)
listings_urls

Unnamed: 0,listing_url
0,https://www.airbnb.com/rooms/572612125615500056
1,https://www.airbnb.com/rooms/13234457
2,https://www.airbnb.com/rooms/773237631431486588
3,https://www.airbnb.com/rooms/1097125455525075775
4,https://www.airbnb.com/rooms/760086536368275287
...,...
22715,https://www.airbnb.com/rooms/32252006
22716,https://www.airbnb.com/rooms/704255897463350082
22717,https://www.airbnb.com/rooms/943355797386691170
22718,https://www.airbnb.com/rooms/783387491435863626


In [4]:
# merged_df = pd.read_csv('./NewYork/merged_img_vectors.csv')
merged_df = pd.read_csv('./NewYork/full_merged.csv')

In [5]:
vectorizer = TfidfVectorizer(stop_words='english', analyzer='word')
tfidf_matrix = vectorizer.fit_transform(merged_df['description'])

In [6]:
def compare_cosine(feature_vec1, feature_vec2):
    return cosine_similarity(feature_vec1, feature_vec2)

In [19]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [16]:
results = {}
for idx, row in merged_df.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], merged_df['id'][i]) for i in similar_indices]
    results[row['id']] = similar_items[1:]

In [33]:
def recommend(item_id, num):
    name_value = merged_df.loc[merged_df['id'] == item_id, 'name'].values[0]
    print(f'Recommending {str(num)} products similar to {name_value} (id: {item_id})')
    print('---')
    recs = results[item_id][:num]
    for rec in recs:
        rec_name = merged_df.loc[merged_df['id'] == rec[1], 'name'].values[0]
        print(f'\nRecommended: {rec_name} (id: {rec[1]}) \n(score: {str(rec[0])})')

In [34]:
target_listing_index = merged_df.sample()
recommend(item_id = 45690330, num = 5)

Recommending 5 products similar to Home by Sunny & Bay!  Sunset Park & Bayridge! (id: 45690330)
---

Recommended: Room by Sunny & Bay! Sunset Park & Bay Ridge (id: 572612125615500056) 
(score: 0.9221075704867293)

Recommended: Owner’s Private Guest Suite in Bay Ridge (id: 845739414386723045) 
(score: 0.3622873081473927)

Recommended: Bay ridge Shore Road cozy basement apartment (id: 39300260) 
(score: 0.30996498200084605)

Recommended: Sunset Retreat - Oasis near Industry City... (id: 12015378) 
(score: 0.30447215893416457)

Recommended: Near 5th Ave Renovated Studio in Brooklyn (id: 53992289) 
(score: 0.3016073650782212)


# More Criteria

In [7]:
description_similarity = linear_kernel(tfidf_matrix, tfidf_matrix)


In [8]:
def convert_str_to_array_polarity(photo_vector):
    cleaned_string = photo_vector.strip('[]')
    vector_array = np.fromstring(cleaned_string, sep=' ')
    return vector_array
merged_df['polarity'] = merged_df['polarity'].apply(convert_str_to_array_polarity)

  vector_array = np.fromstring(cleaned_string, sep=' ')


In [9]:
polarity = np.array(merged_df['polarity'].tolist())
polarity_similarity = compare_cosine(polarity, polarity)

In [10]:
def convert_str_to_array_img(photo_vector):
    cleaned_string = photo_vector.replace('\n', ' ').strip('[]')
    vector_array = np.fromstring(cleaned_string, sep=' ')
    return vector_array
merged_df['photo_vector'] = merged_df['photo_vector'].apply(convert_str_to_array_img)

In [11]:
photo_vectors = np.array(merged_df['photo_vector'].tolist())
image_similarity = compare_cosine(photo_vectors, photo_vectors)

In [12]:
# description_sim = description_similarity / np.max(description_similarity)
# image_sim = image_similarity / np.max(image_similarity)
# polarity_sim = polarity_similarity / np.max(polarity_similarity)

weights = [0.5, 0.3, 0.2] # change weights
final_similarity = (weights[0] * image_similarity +
                    weights[1] * polarity_similarity +
                    weights[2] * description_similarity)

In [13]:
target_listing_index = merged_df.sample()
def get_recommendations(listing_index, similarity_matrix, k=5):
    similar_indices = np.argsort(similarity_matrix[listing_index])[::-1][1:k+1]
    return similar_indices

recommendations = get_recommendations(target_listing_index.index[0], final_similarity, k=10)

In [14]:
target_listing_index

Unnamed: 0,id,listing_url,name,description,neighborhood_overview,picture_url,host_name,host_since,host_location,host_about,...,reviews_per_month,polarity,comments,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,photo_vector
8143,908179761746967863,https://www.airbnb.com/rooms/908179761746967863,Peaceful Stylish 2+ Bdrm Brooklyn Brownstone,Come stay in a quintessential NYC brownstone w...,The beautiful tree-lined streets of historic B...,https://a0.muscache.com/pictures/e52978c5-9728...,Shelley,2013-03-11,"New York, United States",Having a comfortable place to come home to at ...,...,3.26,[0.9747],['Shelley and Ross were amazing! Loved their ...,4.95,5.0,4.98,5.0,4.72,4.84,"[0.9200153, 0.37697712, 0.014876, 0.01605273, ..."


In [15]:
recommended_listings = merged_df.iloc[recommendations]

In [16]:
recommended_listings

Unnamed: 0,id,listing_url,name,description,neighborhood_overview,picture_url,host_name,host_since,host_location,host_about,...,reviews_per_month,polarity,comments,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,photo_vector
17400,36453417,https://www.airbnb.com/rooms/36453417,Modern 1BR in Vibrant Bedford Stuyvesant Brooklyn,Welcome to our 1-bedroom apartment nestled wit...,Bedford Stuyvesant is one of Brooklyn's most v...,https://a0.muscache.com/pictures/airflow/Hosti...,Angela,2012-01-29,"New York, NY","Mom, wife, entrepreneur. \n\nI was born and ra...",...,1.5,[0.991],['WOW!! Angela’s place is so so so great! It’...,5.0,5.0,4.9,4.95,4.81,4.95,"[0.98031014, 0.18929613, 0.00052682008, 0.0009..."
2325,12973363,https://www.airbnb.com/rooms/12973363,Prospect Park Brownstone,This is Brooklyn charm. A sun-lit brownstone o...,"Cafes, restaurants and bars everywhere! Laundr...",https://a0.muscache.com/pictures/15d7ca43-f7d4...,Paige,2014-10-22,"Oakland, CA","When I travel I seek out great music, museums,...",...,0.45,[0.9877],"[""I must say I'm so happy that I stayed here! ...",4.91,4.89,4.85,4.87,4.83,4.83,"[0.9509225, 0.2735813, 0.06224415, 0.06967939,..."
17507,53687694,https://www.airbnb.com/rooms/53687694,Private bed+bath in luxury apartment!,Experience convenience and privacy with your o...,Safe and vibrant neighborhood just steps away ...,https://a0.muscache.com/pictures/9b8eee5f-8f9b...,Arushi,2015-06-25,"New York, NY",missing,...,0.15,[0.7783],"['Modern, clean apartment in a great location'...",5.0,5.0,4.8,5.0,5.0,5.0,"[0.92789263, 0.31465784, 0.0028524606, 0.01764..."
7140,45480607,https://www.airbnb.com/rooms/45480607,Newly renovated apartment in the heart of Bed-...,Come and get a taste of authentic Brooklyn in ...,missing,https://a0.muscache.com/pictures/f91c497a-be77...,Peter(BROOKE) & Maria,2017-10-05,"New York, United States",missing,...,2.9,[0.9334],['My stay here was amazing ! The host are very...,5.0,4.99,4.99,4.99,4.76,4.93,"[0.9930292, 0.11328306, 0.01798063, 0.01306812..."
5271,50723925,https://www.airbnb.com/rooms/50723925,3 Bedroom Apartment in Brownstone Duplex,3 bedroom garden duplex apartment in a classic...,missing,https://a0.muscache.com/pictures/557b571f-fe3e...,Maria,2015-08-16,"New York, United States",Hello I am an artist from Spain living in New ...,...,1.63,[0.8268],"['Great service and fun stay!', 'The host is a...",4.86,4.92,4.98,4.98,4.62,4.79,"[0.8941598, 0.4100919, 0.05061235, 0.08608343,..."
1252,21434692,https://www.airbnb.com/rooms/21434692,Modern Harlem Hamilton Heights Garden Apartment,Newly-renovated basement/garden-level apartmen...,"Diverse, convenient neighborhood with a range ...",https://a0.muscache.com/pictures/c0257418-15cd...,Sumitra,2017-01-15,"New York, NY",I'm a textile designer and I've lived/worked i...,...,1.12,[0.9879],"[""My husband and I enjoyed our stay at this ne...",4.84,4.77,4.94,4.92,4.75,4.68,"[0.9848799, 0.03662151, 0.01059556, 0.00431432..."
2530,736306604387468764,https://www.airbnb.com/rooms/736306604387468764,Renovated East Village Studio - NYU & Wash Sq ...,"This oversized furnished studio is BRAND NEW, ...",missing,https://a0.muscache.com/pictures/miso/Hosting-...,Jeremy,2020-12-17,"New York, NY",Born and raised on Long Island but living in M...,...,0.19,[0.8934],['Great studio in the heart of the east-villag...,5.0,5.0,5.0,5.0,5.0,4.67,"[0.93042463, 0.34498477, 0.0079997, 0.00624288..."
5044,6758849,https://www.airbnb.com/rooms/6758849,UPPER EAST SIDE 3 BR / 3 Bath APT,"30 day minimum stay required. Spacious 2,500 ...",Exciting Upper East Side of Manhattan is at yo...,https://a0.muscache.com/pictures/01f64677-2792...,Hans,2015-06-09,"New York, NY",Hans is first generation American. His parent...,...,1.01,[0.9363],"[""Hans and Paulette were welcoming, helpful, a...",4.83,4.87,4.95,4.96,4.96,4.79,"[0.86950755, 0.40821317, 0.09098829, 0.0932831..."
3555,21588805,https://www.airbnb.com/rooms/21588805,Sun Drenched Williamsburg 1 BR | Prime Location,Sunny one bedroom in prime Williamsburg on a q...,Williamsburg has it all -- four star restauran...,https://a0.muscache.com/pictures/c8a4d699-d7fa...,Eliza,2015-07-08,"New York, NY",Hi! I live with my husband in Williamsburg and...,...,0.37,[0.9844],"[""Eliza's place is perfect! Looks just like th...",4.9,4.81,4.9,4.9,4.97,4.68,"[0.95537734, 0.16156459, 0.05691298, 0.1020599..."
9478,1131458412663503638,https://www.airbnb.com/rooms/1131458412663503638,"Spacious suite, 10-min to subways",Welcome to your private suite in a landmark br...,missing,https://a0.muscache.com/pictures/hosting/Hosti...,Jiyao,2015-08-20,"New York, NY",missing,...,4.19,[0.9614],['Perfect place for our 2 weeks stay. JoJo was...,5.0,5.0,5.0,5.0,4.5,4.83,"[0.93885773, 0.33896387, 0.03159729, 0.0085690..."
