In [1]:
import pandas as pd
import numpy as np

import re
import string
import nltk

from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity

import gensim

In [2]:
vectors = pd.read_csv('nlp_vectors.csv')
vectors.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
1,-0.020985,0.07972,0.014596,0.039373,-0.042453,0.00799,0.044106,-0.10973,0.002894,0.106301,...,-0.139549,-0.022568,-0.140958,-0.000347,-0.032293,0.057663,0.041021,-0.022186,0.039479,0.017561
2,-0.019035,0.029907,0.059304,0.10355,0.005379,0.023773,0.008163,-0.111519,0.104652,0.082924,...,-0.104172,0.053316,-0.093506,0.020936,0.036129,-0.052326,-0.004178,-0.034481,0.027336,-0.041651
3,0.017837,0.03663,-0.004139,0.061884,-0.056658,0.002148,0.03781,-0.086593,0.089868,0.102893,...,-0.061085,0.071887,-0.08144,0.037477,-0.026248,0.00987,0.001176,-0.022683,0.005633,-0.028478
4,0.013963,0.004875,0.019757,0.093845,-0.042801,-0.014613,0.018288,-0.080157,0.095798,0.067508,...,-0.061187,0.057206,-0.080036,0.038635,-0.006875,0.01768,-0.004359,-0.033175,0.026842,-0.05002
5,-0.00306,0.020569,0.042245,0.044713,-0.046801,0.011214,0.02657,-0.08071,0.059676,0.059297,...,-0.110012,0.028893,-0.101961,0.053882,-0.0023,0.041499,-0.004092,-0.040666,0.030417,-0.018074


In [3]:
input_text = 'I would like a luxurious hotel that is clean and quiet. It is important to me that the staff is friendly and attentive.'

In [4]:
remove_n = lambda x: re.sub('\\n',' ',x)
remove_singlen = lambda x: re.sub('\n',' ',x)
alphanumeric = lambda x: re.sub('\w*\d\w*', ' ', x)
punc_lower = lambda x: re.sub('[%s]' % re.escape(string.punctuation), ' ', x.lower())

input_text1 = remove_n(input_text)
input_text1 = remove_singlen(input_text1)
input_text1 = alphanumeric(input_text1)
input_text1 = punc_lower(input_text1)
input_text1

'i would like a luxurious hotel that is clean and quiet  it is important to me that the staff is friendly and attentive '

In [5]:
emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)
remove_emoji = lambda x: emoji_pattern.sub(r'', x)
input_text1 = remove_emoji(input_text1)

In [6]:
def w2v_tokenizer(document):
    stoplist = set('for a an it its very of the and to in'.split())
    texts = [word for word in document.lower().split() if word not in stoplist]
    
    return texts

In [7]:
tokenized = w2v_tokenizer(input_text1)

In [8]:
tokenized

['i',
 'would',
 'like',
 'luxurious',
 'hotel',
 'that',
 'is',
 'clean',
 'quiet',
 'is',
 'important',
 'me',
 'that',
 'staff',
 'is',
 'friendly',
 'attentive']

In [9]:
import os

# Setup nltk corpora path and Google Word2Vec location
google_vec_file = '/Users/lindsayread/Downloads/GoogleNews-vectors-negative300.bin'

In [10]:
model = gensim.models.KeyedVectors.load_word2vec_format(google_vec_file, binary=True)

In [11]:
def viable_words(words):
    viable_words = []
    for word in words:
        try:
            model[word]
            viable_words.append(word)
        except:
            pass
    return viable_words

In [12]:
viables = viable_words(tokenized)

In [13]:
viables

['i',
 'would',
 'like',
 'luxurious',
 'hotel',
 'that',
 'is',
 'clean',
 'quiet',
 'is',
 'important',
 'me',
 'that',
 'staff',
 'is',
 'friendly',
 'attentive']

In [14]:
def mapped_vectors(viable_words):
    df_first = pd.DataFrame([model[viable_words[0]]])
    for word in viable_words[1:]:
        df_first = pd.concat([df_first, pd.DataFrame([model[f'{word}']])], ignore_index=True)
    df_mean = pd.DataFrame(df_first.mean())
    return df_mean.T

In [15]:
text_vector = mapped_vectors(viables)
text_vector

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
0,0.020325,0.056311,0.057435,0.05738,-0.103789,0.054056,0.043453,-0.076567,0.08164,0.078714,...,-0.122852,0.021111,-0.06678,0.050753,-0.010164,0.056668,-0.012494,-0.017348,0.039028,0.018817


In [16]:
text_vector.columns = vectors.columns

In [17]:
vectors.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
1,-0.020985,0.07972,0.014596,0.039373,-0.042453,0.00799,0.044106,-0.10973,0.002894,0.106301,...,-0.139549,-0.022568,-0.140958,-0.000347,-0.032293,0.057663,0.041021,-0.022186,0.039479,0.017561
2,-0.019035,0.029907,0.059304,0.10355,0.005379,0.023773,0.008163,-0.111519,0.104652,0.082924,...,-0.104172,0.053316,-0.093506,0.020936,0.036129,-0.052326,-0.004178,-0.034481,0.027336,-0.041651
3,0.017837,0.03663,-0.004139,0.061884,-0.056658,0.002148,0.03781,-0.086593,0.089868,0.102893,...,-0.061085,0.071887,-0.08144,0.037477,-0.026248,0.00987,0.001176,-0.022683,0.005633,-0.028478
4,0.013963,0.004875,0.019757,0.093845,-0.042801,-0.014613,0.018288,-0.080157,0.095798,0.067508,...,-0.061187,0.057206,-0.080036,0.038635,-0.006875,0.01768,-0.004359,-0.033175,0.026842,-0.05002
5,-0.00306,0.020569,0.042245,0.044713,-0.046801,0.011214,0.02657,-0.08071,0.059676,0.059297,...,-0.110012,0.028893,-0.101961,0.053882,-0.0023,0.041499,-0.004092,-0.040666,0.030417,-0.018074


In [18]:
vectors_all = vectors.append(text_vector)

In [19]:
vectors_all.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
326,0.031462,0.045366,0.021865,0.096295,-0.054402,-0.004726,0.026638,-0.063134,0.036097,0.05859,...,-0.064844,0.045405,-0.084736,0.050469,-0.01666,0.014252,-0.00632,-0.035374,0.054294,-0.022848
327,-0.029009,0.031826,0.042055,0.063677,-0.046213,-0.030179,0.058998,-0.075694,0.05592,0.084873,...,-0.051368,0.025235,-0.08553,0.051253,-0.005745,-0.037851,0.050975,-0.062034,0.011584,-0.012479
328,-0.028351,0.062371,0.050773,0.069491,-0.044939,-0.024586,0.04051,-0.126087,0.084763,0.052136,...,-0.055389,0.019512,-0.071179,0.039368,-0.036664,-0.087397,-0.031105,-0.027344,0.000923,-0.018604
329,-0.00751,0.038378,0.022828,0.082442,-0.036694,-0.014666,0.067579,-0.066124,0.043483,0.057335,...,-0.094858,0.02564,-0.094372,0.017017,-0.00477,0.025147,0.033896,-0.076249,0.072913,-0.021889
0,0.020325,0.056311,0.057435,0.05738,-0.103789,0.054056,0.043453,-0.076567,0.08164,0.078714,...,-0.122852,0.021111,-0.06678,0.050753,-0.010164,0.056668,-0.012494,-0.017348,0.039028,0.018817


In [57]:
similarity_matrix = pd.DataFrame(cosine_similarity(vectors_all))

In [58]:
similarity_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,320,321,322,323,324,325,326,327,328,329
0,1.000000,0.590262,0.760175,0.696436,0.839844,0.773283,0.684834,0.781843,0.716060,0.804452,...,0.686826,0.646678,0.777756,0.735761,0.768725,0.764621,0.746810,0.740613,0.744490,0.763020
1,0.590262,1.000000,0.725514,0.711866,0.717221,0.695812,0.684057,0.742357,0.700342,0.704514,...,0.752776,0.673115,0.710607,0.765505,0.705583,0.721169,0.747036,0.661576,0.707096,0.716270
2,0.760175,0.725514,1.000000,0.840045,0.892312,0.841961,0.842499,0.890320,0.807070,0.880888,...,0.763239,0.802634,0.811318,0.838234,0.814166,0.840353,0.756447,0.741766,0.771834,0.799795
3,0.696436,0.711866,0.840045,1.000000,0.860659,0.827249,0.897218,0.907629,0.838324,0.816952,...,0.819810,0.914822,0.819653,0.882776,0.854925,0.933905,0.734911,0.719738,0.796822,0.785435
4,0.839844,0.717221,0.892312,0.860659,1.000000,0.848623,0.843288,0.914028,0.861967,0.892854,...,0.827888,0.812050,0.823108,0.870822,0.863316,0.881807,0.818916,0.763138,0.833014,0.858947
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,0.764621,0.721169,0.840353,0.933905,0.881807,0.890402,0.867587,0.910343,0.807560,0.852888,...,0.840407,0.918340,0.863244,0.893900,0.904188,1.000000,0.776059,0.788969,0.818146,0.825975
326,0.746810,0.747036,0.756447,0.734911,0.818916,0.803915,0.676853,0.818269,0.805240,0.784602,...,0.893005,0.704646,0.876438,0.846843,0.843957,0.776059,1.000000,0.814182,0.862721,0.764525
327,0.740613,0.661576,0.741766,0.719738,0.763138,0.808399,0.692630,0.808448,0.711916,0.788065,...,0.835060,0.719060,0.864359,0.820170,0.830970,0.788969,0.814182,1.000000,0.796572,0.736404
328,0.744490,0.707096,0.771834,0.796822,0.833014,0.832915,0.736388,0.845768,0.816832,0.779745,...,0.859948,0.768447,0.846381,0.841748,0.841292,0.818146,0.862721,0.796572,1.000000,0.753779


In [60]:
similarity_matrix[329].nlargest(4)

329    1.000000
166    0.860651
4      0.858947
263    0.852931
Name: 329, dtype: float64

In [62]:
text_similarity = similarity_matrix[329][:-1]

In [63]:
text_similarity.shape

(329,)

In [64]:
text_similarity.nlargest(3)

166    0.860651
4      0.858947
263    0.852931
Name: 329, dtype: float64

In [65]:
text_similarity.nsmallest(3)

307    0.557917
152    0.563084
235    0.600855
Name: 329, dtype: float64

In [139]:
pd.DataFrame(text_similarity)

Unnamed: 0,329
0,0.76302
1,0.71627
2,0.799795
3,0.785435
4,0.858947
...,...
325,0.825975
326,0.764525
327,0.736404
328,0.753779


In [22]:
all_info = pd.read_csv('all_info_cleaned.csv')
all_info.tail()

Unnamed: 0,business_status,geometry,name_x,place_id,rating_x,user_ratings_total_x,vicinity,international_phone_number,reviews,website,...,top_3_shopping,num_tourist_attract,tourist_mean_rating,top_3_tourist_attract,num_casinos,casinos_mean_rating,top_casinos,num_supermarkets,supermarkets_mean_rating,top_3_supermarkets
324,OPERATIONAL,"{'lat': 20.6778627, 'lng': -105.2852463}",Grand Luxxe Tower 1 at Vidanta Nuevo Vallarta,ChIJ4SJARyNEIYQRx5laF9dUiN0,4.8,371.0,"Boulevard Riviera Nayarit 254, Nuevo Vallarta",+52 322 226 4000,"[{'author_name': 'Diane Tillman', 'author_url'...",,...,"['La Plaza, Vidanta Nuevo Vallarta']",1,4.8,['Lake Show at Vidanta Nuevo Vallarta'],0,,No Nearby Casinos,0,,No Nearby Supermarkets
325,OPERATIONAL,"{'lat': 20.6851287, 'lng': -105.287574}",Grupo Vidanta Headquarters,ChIJK_fqDKNBIYQRU-pYVJ--ZNI,3.8,29.0,"Boulevard Riviera Nayarit #254, Nuevo Vallarta",+52 322 226 4000,"[{'author_name': 'Elsy B', 'author_url': 'http...",https://www.vidanta.com/,...,"['La Plaza, Vidanta Nuevo Vallarta' 'Fishing t...",3,4.4,['Lake Show at Vidanta Nuevo Vallarta' 'Islas ...,0,,No Nearby Casinos,1,,['Market / Tianguis Artesanal Quiosco.']
326,OPERATIONAL,"{'lat': 20.6828908, 'lng': -105.2788986}",Grand Luxxe Tower 5,ChIJI9D-5CVEIYQRyClZqRFcuoM,4.8,437.0,,,"[{'author_name': 'Mary Mackey', 'author_url': ...",https://www.vidanta.com/web/nuevo-vallarta,...,"['Taller SS' 'La Plaza, Vidanta Nuevo Vallarta']",1,4.8,['Lake Show at Vidanta Nuevo Vallarta'],0,,No Nearby Casinos,0,,No Nearby Supermarkets
327,OPERATIONAL,"{'lat': 20.6769383, 'lng': -105.285772}",Grand Luxxe Punta Tower at Vidanta Nuevo Vallarta,ChIJ7zhNNCNEIYQRDyRrWYr8hLk,4.9,63.0,"Boulevard Riviera Nayarit #254, Nuevo Vallarta",+52 322 226 4000,"[{'author_name': 'John Bittle', 'author_url': ...",https://www.vidanta.com/en/web/nuevo-vallarta/...,...,"['La Plaza, Vidanta Nuevo Vallarta']",1,4.8,['Lake Show at Vidanta Nuevo Vallarta'],0,,No Nearby Casinos,0,,No Nearby Supermarkets
328,OPERATIONAL,"{'lat': 20.6843008, 'lng': -105.2780858}",Torre 6 The Grand Luxxe,ChIJo9we6EJFIYQRx9y4RoVO4Bs,4.6,15.0,,,"[{'author_name': 'Dana Nyborg', 'author_url': ...",,...,['Taller SS' 'Mini Super Ñoño'],1,4.8,['Lake Show at Vidanta Nuevo Vallarta'],0,,No Nearby Casinos,0,,No Nearby Supermarkets


In [28]:
all_info.describe()

Unnamed: 0,rating_x,user_ratings_total_x,lat,lng,w2vVector,num_restaurants,rest_mean_rating,num_bars,bars_mean_rating,num_art_galleries,...,num_gyms,gyms_mean_rating,num_shopping,shopping_mean_rating,num_tourist_attract,tourist_mean_rating,num_casinos,casinos_mean_rating,num_supermarkets,supermarkets_mean_rating
count,329.0,329.0,329.0,329.0,329.0,329.0,315.0,329.0,285.0,329.0,...,329.0,268.0,329.0,268.0,329.0,277.0,329.0,0.0,329.0,251.0
mean,4.464438,432.522796,20.620515,-105.24161,-0.002404,48.027356,4.447123,30.130699,4.459019,16.480243,...,9.395137,4.569127,7.191489,4.605138,9.322188,4.589235,0.419453,,5.103343,4.399347
std,0.354044,917.941327,0.034526,0.016557,0.001381,21.221592,0.107048,23.802502,0.121078,21.554251,...,6.602067,0.224651,5.174673,0.209836,12.085297,0.171836,0.81172,,4.176913,0.21075
min,3.6,1.0,20.532667,-105.291951,-0.010857,0.0,4.1,0.0,3.975,0.0,...,0.0,3.7,0.0,3.95,0.0,4.0,0.0,,0.0,3.85
25%,4.2,12.0,20.60015,-105.246627,-0.003113,48.0,4.393998,9.0,4.40625,0.0,...,3.0,4.442857,2.0,4.5,1.0,4.5,0.0,,1.0,4.3
50%,4.5,54.0,20.614356,-105.236168,-0.002124,60.0,4.461818,21.0,4.479661,2.0,...,10.0,4.628571,8.0,4.655556,2.0,4.655556,0.0,,4.0,4.39
75%,4.7,308.0,20.643852,-105.231399,-0.001469,60.0,4.526,60.0,4.525,36.0,...,13.0,4.7,12.0,4.7375,15.0,4.7,1.0,,9.0,4.575
max,5.0,6216.0,20.693907,-105.21772,0.001456,60.0,4.714,60.0,4.825,60.0,...,27.0,5.0,19.0,5.0,39.0,4.9,5.0,,14.0,5.0


In [31]:
oasis = all_info[all_info['name_x']=='Oasis Hostel']
oasis['rest_mean_rating']

236    4.714
Name: rest_mean_rating, dtype: float64

In [79]:
numerical_cols = pd.read_csv('numerical_cols.csv')
numerical_cols.head()

Unnamed: 0,rating_x,user_ratings_total_x,num_restaurants,rest_mean_rating,num_bars,bars_mean_rating,num_art_galleries,art_gallery_mean_rating,num_gyms,gyms_mean_rating,num_shopping,shopping_mean_rating,num_tourist_attract,tourist_mean_rating,num_casinos,num_supermarkets,supermarkets_mean_rating
0,4.6,8.0,48,4.594118,4,4.5,0,4.760621,3,4.466667,7,4.42,2,4.65,0,2,3.85
1,5.0,1.0,38,4.523077,3,4.5,0,4.760621,1,4.3,4,4.3,0,4.589235,0,2,3.85
2,4.0,54.0,48,4.594118,4,4.5,0,4.760621,3,4.466667,6,4.325,2,4.65,0,2,3.85
3,3.9,11.0,60,4.586364,25,4.386364,4,4.8,25,4.678947,5,4.75,0,4.589235,0,7,4.775
4,4.8,11.0,60,4.586667,17,4.32,4,4.8,19,4.653333,4,5.0,0,4.589235,0,6,4.775


## CUSTOMER INPUT STARTS HERE

In [81]:
qual_only = numerical_cols[['rating_x','rest_mean_rating','bars_mean_rating','art_gallery_mean_rating','gyms_mean_rating']]
qual_only.head()

Unnamed: 0,rating_x,rest_mean_rating,bars_mean_rating,art_gallery_mean_rating,gyms_mean_rating
0,4.6,4.594118,4.5,4.760621,4.466667
1,5.0,4.523077,4.5,4.760621,4.3
2,4.0,4.594118,4.5,4.760621,4.466667
3,3.9,4.586364,4.386364,4.8,4.678947
4,4.8,4.586667,4.32,4.8,4.653333


In [84]:
input_fields = {'rating_x':5.0, 'rest_mean_rating':5.0, 'bars_mean_rating':5.0,
                'art_gallery_mean_rating':5.0, 'gyms_mean_rating':5.0}

with_input = qual_only.append(input_fields, ignore_index=True)
with_input

Unnamed: 0,rating_x,rest_mean_rating,bars_mean_rating,art_gallery_mean_rating,gyms_mean_rating
0,4.6,4.594118,4.500000,4.760621,4.466667
1,5.0,4.523077,4.500000,4.760621,4.300000
2,4.0,4.594118,4.500000,4.760621,4.466667
3,3.9,4.586364,4.386364,4.800000,4.678947
4,4.8,4.586667,4.320000,4.800000,4.653333
...,...,...,...,...,...
325,3.8,4.589655,4.550000,4.760621,4.569127
326,4.8,4.500000,4.459019,4.760621,4.569127
327,4.9,4.562500,4.750000,4.760621,4.700000
328,4.6,4.350000,4.459019,4.760621,4.569127


In [71]:
# numerical_cols['nlp_info'] = pd.DataFrame(text_similarity)
# numerical_cols

Unnamed: 0,rating_x,user_ratings_total_x,num_restaurants,rest_mean_rating,num_bars,bars_mean_rating,num_art_galleries,art_gallery_mean_rating,num_gyms,gyms_mean_rating,num_shopping,shopping_mean_rating,num_tourist_attract,tourist_mean_rating,num_casinos,num_supermarkets,supermarkets_mean_rating,nlp_info
0,4.6,8.0,48,4.594118,4,4.500000,0,4.760621,3,4.466667,7,4.420,2,4.650000,0,2,3.850000,0.763020
1,5.0,1.0,38,4.523077,3,4.500000,0,4.760621,1,4.300000,4,4.300,0,4.589235,0,2,3.850000,0.716270
2,4.0,54.0,48,4.594118,4,4.500000,0,4.760621,3,4.466667,6,4.325,2,4.650000,0,2,3.850000,0.799795
3,3.9,11.0,60,4.586364,25,4.386364,4,4.800000,25,4.678947,5,4.750,0,4.589235,0,7,4.775000,0.785435
4,4.8,11.0,60,4.586667,17,4.320000,4,4.800000,19,4.653333,4,5.000,0,4.589235,0,6,4.775000,0.858947
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324,4.8,371.0,24,4.552941,4,4.750000,0,4.760621,3,4.700000,1,4.600,1,4.800000,0,0,4.399347,0.760367
325,3.8,29.0,34,4.589655,6,4.550000,0,4.760621,2,4.569127,2,4.600,3,4.400000,0,1,4.399347,0.825975
326,4.8,437.0,14,4.500000,1,4.459019,0,4.760621,1,4.569127,2,4.700,1,4.800000,0,0,4.399347,0.764525
327,4.9,63.0,23,4.562500,4,4.750000,0,4.760621,3,4.700000,1,4.600,1,4.800000,0,0,4.399347,0.736404


In [73]:
from sklearn.preprocessing import StandardScaler

In [85]:
scaler = StandardScaler()

In [86]:
scaled = scaler.fit_transform(with_input)

In [87]:
scaled_df = pd.DataFrame(scaled)
scaled_df

Unnamed: 0,0,1,2,3,4
0,0.378151,1.336227,0.338590,-0.006870,-0.510017
1,1.507473,0.682997,0.338590,-0.006870,-1.329199
2,-1.315830,1.336227,0.338590,-0.006870,-0.510017
3,-1.598160,1.264928,-0.639417,0.366069,0.533362
4,0.942812,1.267714,-1.210573,0.366069,0.407466
...,...,...,...,...,...
325,-1.880491,1.295194,0.768913,-0.006870,-0.006418
326,0.942812,0.470802,-0.014109,-0.006870,-0.006418
327,1.225142,1.045498,2.490204,-0.006870,0.636837
328,0.378151,-0.908470,-0.014109,-0.006870,-0.006418


In [88]:
similarity_matrix = pd.DataFrame(cosine_similarity(scaled_df))

In [89]:
similarity_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,320,321,322,323,324,325,326,327,328,329
0,1.000000,0.697463,0.555359,0.175610,0.456518,0.336705,0.228130,0.208741,0.512270,0.408491,...,0.439321,0.610679,0.507891,0.612259,0.489448,0.350933,0.615273,0.516720,-0.717959,0.670560
1,0.697463,1.000000,-0.065638,-0.515512,0.300428,-0.306829,-0.401269,-0.371171,0.468550,0.104934,...,0.259106,0.342919,0.524017,0.284198,0.332345,-0.324629,0.770962,0.392275,-0.022042,0.271404
2,0.555359,-0.065638,1.000000,0.749963,-0.040969,0.490852,0.773744,0.877297,0.154728,0.273838,...,0.133568,0.398239,-0.308065,0.054771,0.097416,0.940179,-0.294796,0.050695,-0.882275,0.347454
3,0.175610,-0.515512,0.749963,1.000000,0.265264,0.516620,0.975944,0.891429,-0.351991,-0.036915,...,-0.207767,-0.067718,-0.309014,0.185670,-0.239526,0.770994,-0.386059,-0.279263,-0.796870,0.174146
4,0.456518,0.300428,-0.040969,0.265264,1.000000,0.054490,0.305350,0.012187,-0.288505,-0.247546,...,-0.193548,-0.216863,0.596625,0.772772,-0.109350,-0.214519,0.688357,-0.044257,-0.385496,0.246744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,0.350933,-0.324629,0.940179,0.770994,-0.214519,0.669650,0.727049,0.829475,0.205332,0.427229,...,0.256877,0.457408,-0.345800,0.014494,0.197193,1.000000,-0.462288,0.131489,-0.800639,0.392575
326,0.615273,0.770962,-0.294796,-0.386059,0.688357,0.001033,-0.360695,-0.532515,0.342839,0.176078,...,0.329883,0.259877,0.916171,0.761155,0.425442,-0.462288,1.000000,0.503018,-0.068348,0.458343
327,0.516720,0.392275,0.050695,-0.279263,-0.044257,0.615480,-0.398074,-0.402789,0.937356,0.930535,...,0.981726,0.918081,0.699639,0.540218,0.996033,0.131489,0.503018,1.000000,-0.176093,0.859328
328,-0.717959,-0.022042,-0.882275,-0.796870,-0.385496,-0.636854,-0.797243,-0.750060,-0.123343,-0.301850,...,-0.192216,-0.388228,-0.090402,-0.498952,-0.193586,-0.800639,-0.068348,-0.176093,1.000000,-0.545849


In [90]:
input_row = similarity_matrix[329][:-1]

In [96]:
input_row_df = pd.DataFrame(input_row)
input_row_df

Unnamed: 0,329
0,0.670560
1,0.271404
2,0.347454
3,0.174146
4,0.246744
...,...
324,0.859605
325,0.392575
326,0.458343
327,0.859328


In [98]:
input_row_df['nlp_portion'] = text_similarity

In [99]:
input_row_df.head()

Unnamed: 0,329,nlp_portion
0,0.67056,0.76302
1,0.271404,0.71627
2,0.347454,0.799795
3,0.174146,0.785435
4,0.246744,0.858947


In [100]:
input_row_df.columns

Index([329, 'nlp_portion'], dtype='object')

In [101]:
input_row_df['mean_similarity'] = (input_row_df[329] + input_row_df['nlp_portion'])/2

In [102]:
input_row_df.head()

Unnamed: 0,329,nlp_portion,mean_similarity
0,0.67056,0.76302,0.71679
1,0.271404,0.71627,0.493837
2,0.347454,0.799795,0.573624
3,0.174146,0.785435,0.479791
4,0.246744,0.858947,0.552846


In [106]:
input_row_df.dtypes

329                float64
nlp_portion         object
mean_similarity     object
dtype: object

In [108]:
input_row_df['mean_similarity'] = input_row_df['mean_similarity'].astype(float)

In [110]:
top3hotels = input_row_df.nlargest(3, 'mean_similarity')

In [115]:
idx_numbers = list(top3hotels.index.values)
idx_numbers

[236, 311, 9]

In [122]:
top3df = all_info.iloc[idx_numbers,:]

In [123]:
top3df.columns

Index(['business_status', 'geometry', 'name_x', 'place_id', 'rating_x',
       'user_ratings_total_x', 'vicinity', 'international_phone_number',
       'reviews', 'website', 'all_ratings', 'all_reviews', 'lat', 'lng',
       'w2vVector', 'lat_long', 'num_restaurants', 'rest_mean_rating',
       'top_3_restaurants', 'num_bars', 'bars_mean_rating', 'top_3_bars',
       'num_art_galleries', 'art_gallery_mean_rating', 'top_3_art_galleries',
       'num_gyms', 'gyms_mean_rating', 'top_3_gyms', 'num_shopping',
       'shopping_mean_rating', 'top_3_shopping', 'num_tourist_attract',
       'tourist_mean_rating', 'top_3_tourist_attract', 'num_casinos',
       'casinos_mean_rating', 'top_casinos', 'num_supermarkets',
       'supermarkets_mean_rating', 'top_3_supermarkets'],
      dtype='object')

In [126]:
tb_returned = top3df[['name_x','rating_x','website','top_3_restaurants','top_3_bars','top_3_art_galleries','top_3_gyms']]

In [127]:
tb_returned

Unnamed: 0,name_x,rating_x,website,top_3_restaurants,top_3_bars,top_3_art_galleries,top_3_gyms
236,Oasis Hostel,4.3,http://www.oasishostel.com/,['La Fonda De Maria' 'Souper Sopa' 'The Singin...,['Sea Monkey' '421 PV' 'El Barril Mezcaleria'],['Qulture' 'Genius of Paco Gallery and Museum'...,['Crossfit Zapata' 'Marcelo Mico Pilates Amapa...
311,The Grand Mayan Nuevo Vallarta,4.6,https://www.vidanta.com/web/nuevo-vallarta/the...,['Oasis Bay Nuevo Vallarta.' 'ETXEA' 'IL Padri...,['The Grand Bliss Pool Bar at Vidanta Nuevo Va...,No Nearby Art Galleries,['Brio Gym & Spa - Grand Luxxe Tower 2'\n 'Bri...
9,Villa Lujosa en Puerto Vallarta,4.5,,['El Lonche de Maty' 'Puerto Vallarta' 'Birria...,"['Dig & Roz' 'Deposito ""One de Mayo""' 'Bocados...",No Nearby Art Galleries,['EVOLUTION FITNESS & GYM PUERTO VALLARTA' 'Fe...
