In [18]:
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import random
import re, nltk, spacy, gensim
import pyLDAvis
import pyLDAvis.sklearn
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_columns', 50)

In [21]:
df = pd.read_csv('data/Seattle_Hotels_dirty.csv', encoding="latin-1")
df.set_index('name', inplace = True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['desc'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [22]:
indices = pd.Series(df.index)
def recommendations(name, cosine_similarities = cosine_similarities):
    
    recommended_hotels = []
    
    # gettin the index of the hotel that matches the name
    idx = indices[indices == name].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending = False)

    # getting the indexes of the 5 most similar hotels except itself
    top_10_indexes = list(score_series.iloc[1:6].index)
    
    # populating the list with the names of the top 5 matching hotels
    for i in top_10_indexes:
        recommended_hotels.append(list(df.index)[i])
        
    return recommended_hotels

In [23]:
recommendations('Hilton Garden Inn Seattle Downtown')

['Hilton Seattle',
 "Mildred's Bed and Breakfast",
 'Seattle Airport Marriott',
 'Days Inn by Wyndham Seattle North of Downtown',
 'Holiday Inn Express & Suites North Seattle - Shoreline']

In [24]:
df.loc['Hilton Garden Inn Seattle Downtown'].desc

'Located on the southern tip of Lake Union, the Hilton Garden Inn Seattle Downtown hotel is perfectly located for business and leisure. Non-Smoking\nHotel is 100% non-smoking, including e-cigarettes, in all guest rooms and public areas. A fee of up to $250 USD will be assessed for smoking in a non-smoking room. Please ask the Front Desk for locations of designated outdoor smoking areas. Check-in: 4:00 pm. Check-out: 12:00 pm. Cancellation policies may vary depending on the rate or dates of your reservation. Please refer to your reservation confirmation to verify your cancellation policy.\n'

In [25]:
df.loc["Mildred's Bed and Breakfast"].desc

'A rare find in the heart of Seattle. 100% non-smoking. Check-in: 4:00 pm. Check-out: 12:00 pm. Cancellation policies may vary depending on the rate or dates of your reservation.'

In [26]:
df.loc["Seattle Airport Marriott"].desc

'We streamline your travel routine. Explore the local area, brimming with alluring attractions. Reserve one of our 14 versatile event spaces for your next business meeting or wedding reception. We also feature an outdoor atrium, which provides a gorgeous backdrop for intimate gatherings. Check-in: 4:00 PM, Check-out: 12:00 PM. We are committed to providing our guests and associates with a smoke-free environment, and are proud to boast one of the most comprehensive smoke-free hotel policies in the industry. Although smoking is not permitted within hotel buildings themselves, guests who smoke are permitted to do so outside in designated areas.'

In [27]:
df.reset_index(inplace=True)

In [29]:
df = pd.concat([pd.Series(str(row['name']), str(row['desc']).split('. '))              
                    for _, row in df.iterrows()]).reset_index()
df.columns = ['sentence', 'name']
df['sentence'] = df['sentence'].map(lambda x: re.sub(r'\W+', ' ', x))
print('We have ', len(df), 'sentences in total')

We have  1208 sentences in total


In [31]:
df.loc[df['name'] == 'Hilton Garden Inn Seattle Downtown']

Unnamed: 0,sentence,name
0,Located on the southern tip of Lake Union the ...,Hilton Garden Inn Seattle Downtown
1,Non Smoking Hotel is 100 non smoking including...,Hilton Garden Inn Seattle Downtown
2,A fee of up to 250 USD will be assessed for sm...,Hilton Garden Inn Seattle Downtown
3,Please ask the Front Desk for locations of des...,Hilton Garden Inn Seattle Downtown
4,Check in 4 00 pm,Hilton Garden Inn Seattle Downtown
5,Check out 12 00 pm,Hilton Garden Inn Seattle Downtown
6,Cancellation policies may vary depending on th...,Hilton Garden Inn Seattle Downtown
7,Please refer to your reservation confirmation ...,Hilton Garden Inn Seattle Downtown


In [32]:
a = 0
for i in range(a,a+8):
    print(df.sentence[i])
    print()

Located on the southern tip of Lake Union the Hilton Garden Inn Seattle Downtown hotel is perfectly located for business and leisure

Non Smoking Hotel is 100 non smoking including e cigarettes in all guest rooms and public areas

A fee of up to 250 USD will be assessed for smoking in a non smoking room

Please ask the Front Desk for locations of designated outdoor smoking areas

Check in 4 00 pm

Check out 12 00 pm

Cancellation policies may vary depending on the rate or dates of your reservation

Please refer to your reservation confirmation to verify your cancellation policy 



In [41]:
vectorizer = CountVectorizer(analyzer='word',       
                             min_df=3,                        # minimum reqd occurences of a word 
                             stop_words='english',             # remove stop words
                             lowercase=True,                   # convert all words to lowercase
                             token_pattern='[a-zA-Z0-9]{3,}',  # num chars > 3
                             max_features=3000,             # max number of uniq words
                            )

data_vectorized = vectorizer.fit_transform(df['sentence'])

In [44]:
lda_model = LatentDirichletAllocation(n_components=40, # Number of topics
                                      learning_method='online',
                                      random_state=0,       
                                      n_jobs = -1  # Use all available CPUs
                                     )
lda_output = lda_model.fit_transform(data_vectorized)

print(lda_model)

LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
                          evaluate_every=-1, learning_decay=0.7,
                          learning_method='online', learning_offset=10.0,
                          max_doc_update_iter=100, max_iter=10,
                          mean_change_tol=0.001, n_components=40, n_jobs=-1,
                          perp_tol=0.1, random_state=0, topic_word_prior=None,
                          total_samples=1000000.0, verbose=0)


In [45]:
pyLDAvis.enable_notebook()
pyLDAvis.sklearn.prepare(lda_model, data_vectorized, vectorizer, mds='tsne')

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))


In [46]:
# Show top 20 keywords for each topic
def show_topics(vectorizer=vectorizer, lda_model=lda_model, n_words=20):
    keywords = np.array(vectorizer.get_feature_names())
    topic_keywords = []
    for topic_weights in lda_model.components_:
        top_keyword_locs = (-topic_weights).argsort()[:n_words]
        topic_keywords.append(keywords.take(top_keyword_locs))
    return topic_keywords

topic_keywords = show_topics(vectorizer=vectorizer, lda_model=lda_model, n_words=20)        

# Topic - Keywords Dataframe
df_topic_keywords = pd.DataFrame(topic_keywords)
df_topic_keywords.columns = ['Word '+str(i) for i in range(df_topic_keywords.shape[1])]
df_topic_keywords.index = ['Topic '+str(i) for i in range(df_topic_keywords.shape[0])]
df_topic_keywords

Unnamed: 0,Word 0,Word 1,Word 2,Word 3,Word 4,Word 5,Word 6,Word 7,Word 8,Word 9,Word 10,Word 11,Word 12,Word 13,Word 14,Word 15,Word 16,Word 17,Word 18,Word 19
Topic 0,seattle,hotel,inn,lake,union,stay,experience,offers,south,guests,suites,downtown,offer,modern,comfort,today,original,alfred,unique,urban
Topic 1,stay,make,need,sure,comfortable,time,road,extra,convenient,night,extended,enjoyable,offering,designed,hit,pride,staying,landing,soft,creature
Topic 2,free,wifi,guests,smoke,property,parking,proud,amenities,furnished,designed,motel,airport,stylish,hotel,lifestyle,budget,industry,providing,choice,feature
Topic 3,event,meeting,space,feet,square,events,meetings,000,special,planning,people,team,catering,host,spaces,matter,dedicated,grill,boardroom,private
Topic 4,smoking,non,100,areas,accessible,hotel,suite,reservation,tell,upgrade,public,club,attentive,cancellation,including,guests,need,rate,vary,policies
Topic 5,business,available,free,high,access,internet,speed,center,complimentary,parking,guest,hotel,stay,rooms,wireless,travel,room,offer,services,laundry
Topic 6,named,gateway,grand,sheraton,provides,diverse,core,vibrant,city,pacific,located,northwest,seattle,kitchenette,basketball,bacon,small,spend,athletic,nearby
Topic 7,views,comforts,skyline,stunning,look,scenic,home2,diamond,technology,destinations,landmarks,enjoy,like,space,seattle,iconic,stylish,needle,settle,situated
Topic 8,accommodation,parking,beer,vary,mile,regency,coffee,studios,quiet,history,avenue,enjoy,2018,news,conveniently,meetings,story,accommodations,world,overlooking
Topic 9,queen,hill,anne,come,capitol,suite,neighborhood,door,know,just,volunteer,fireplaces,distinctive,shared,bathrooms,kimpton,shows,bacon,let,like


In [53]:
# Create Document - Topic Matrix
lda_output = lda_model.transform(data_vectorized)

# column names
topicnames = ["Topic" + str(i) for i in range(40)]

# index names
docnames = ["Doc" + str(i) for i in range(len(data))]

# Make the pandas dataframe
df_document_topic = pd.DataFrame(np.round(lda_output, 2), columns=topicnames, index=docnames)

# Get dominant topic for each document
dominant_topic = np.argmax(df_document_topic.values, axis=1)
df_document_topic['dominant_topic'] = dominant_topic

In [54]:
df_document_topic.head(10)

Unnamed: 0,Topic0,Topic1,Topic2,Topic3,Topic4,Topic5,Topic6,Topic7,Topic8,Topic9,Topic10,Topic11,Topic12,Topic13,Topic14,Topic15,Topic16,Topic17,Topic18,Topic19,Topic20,Topic21,Topic22,Topic23,Topic24,Topic25,Topic26,Topic27,Topic28,Topic29,Topic30,Topic31,Topic32,Topic33,Topic34,Topic35,Topic36,Topic37,Topic38,Topic39,dominant_topic
Doc0,0.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.39,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
Doc1,0.0,0.0,0.0,0.0,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
Doc2,0.0,0.0,0.0,0.0,0.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.29,0.0,0.0,0.0,4
Doc3,0.0,0.0,0.0,0.0,0.34,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.34,0.0,0.0,0.0,4
Doc4,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.51,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,12
Doc5,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.51,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,12
Doc6,0.0,0.0,0.0,0.0,0.84,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
Doc7,0.01,0.01,0.01,0.01,0.67,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,4
Doc8,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.22,0.0,0.0,0.0,0.0,0.0,0.0,6
Doc9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.32,0.06,0.0,0.58,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25


In [49]:
df_document_topic.reset_index(inplace=True)
df_sent_topic= pd.merge(df, df_document_topic, left_index=True, right_index=True)
df_sent_topic.drop('index', axis=1, inplace=True)

In [55]:
df_sent_topic.head(10)

Unnamed: 0,sentence,name,Topic0,Topic1,Topic2,Topic3,Topic4,Topic5,Topic6,Topic7,Topic8,Topic9,Topic10,Topic11,Topic12,Topic13,Topic14,Topic15,Topic16,Topic17,Topic18,Topic19,Topic20,Topic21,Topic22,Topic23,Topic24,Topic25,Topic26,Topic27,Topic28,Topic29,Topic30,Topic31,Topic32,Topic33,Topic34,Topic35,Topic36,Topic37,Topic38,Topic39,dominant_topic
0,Located on the southern tip of Lake Union the ...,Hilton Garden Inn Seattle Downtown,0.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.39,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,Non Smoking Hotel is 100 non smoking including...,Hilton Garden Inn Seattle Downtown,0.0,0.0,0.0,0.0,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
2,A fee of up to 250 USD will be assessed for sm...,Hilton Garden Inn Seattle Downtown,0.0,0.0,0.0,0.0,0.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.29,0.0,0.0,0.0,4
3,Please ask the Front Desk for locations of des...,Hilton Garden Inn Seattle Downtown,0.0,0.0,0.0,0.0,0.34,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.34,0.0,0.0,0.0,4
4,Check in 4 00 pm,Hilton Garden Inn Seattle Downtown,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.51,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,12
5,Check out 12 00 pm,Hilton Garden Inn Seattle Downtown,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.51,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,12
6,Cancellation policies may vary depending on th...,Hilton Garden Inn Seattle Downtown,0.0,0.0,0.0,0.0,0.84,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
7,Please refer to your reservation confirmation ...,Hilton Garden Inn Seattle Downtown,0.01,0.01,0.01,0.01,0.67,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,4
8,Located in the city s vibrant core the Sherato...,Sheraton Grand Seattle,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.22,0.0,0.0,0.0,0.0,0.0,0.0,6
9,Step out of our front doors to find gourmet di...,Sheraton Grand Seattle,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.32,0.06,0.0,0.58,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25


In [56]:
df_sent_topic.loc[df_sent_topic['name'] == 'Hilton Garden Inn Seattle Downtown'][['sentence', 'dominant_topic']]

Unnamed: 0,sentence,dominant_topic
0,Located on the southern tip of Lake Union the ...,0
1,Non Smoking Hotel is 100 non smoking including...,4
2,A fee of up to 250 USD will be assessed for sm...,4
3,Please ask the Front Desk for locations of des...,4
4,Check in 4 00 pm,12
5,Check out 12 00 pm,12
6,Cancellation policies may vary depending on th...,4
7,Please refer to your reservation confirmation ...,4


In [57]:
df_sent_topic.loc[df_sent_topic['dominant_topic'] == 4][['sentence', 'dominant_topic']].sample(20)

Unnamed: 0,sentence,dominant_topic
1053,The Spa at the WAC will spoil you with a compl...,4
697,Our hotel is completely non smoking,4
70,Please ask the Front Desk for locations of des...,4
7,Please refer to your reservation confirmation ...,4
23,We do not allow smoking in our rooms public ar...,4
1144,100 non smoking,4
605,Non Smoking Hotel,4
69,A fee of up to 250 USD will be assessed for sm...,4
102,The characters Attentive staff members who alw...,4
431,100 non smoking and accessible accommodations ...,4


In [64]:
df_sent_topic.loc[df_sent_topic['dominant_topic'] == 12][['sentence', 'dominant_topic']].sample(10)

Unnamed: 0,sentence,dominant_topic
604,Check in 15 00 check out 11 00,12
24,Check in is at 3pm and check out is at 12pm,12
608,Check in 15 00 check out 11 00,12
78,Check in begins at 4 00pm,12
546,Check emails in the Work Zone,12
82,Late check out is subject to availability,12
1146,Check out 12 00 pm,12
4,Check in 4 00 pm,12
5,Check out 12 00 pm,12
71,Check in 4 00 pm Check out 12 00 pm,12


In [65]:
print('There are', len(df_sent_topic.loc[df_sent_topic['dominant_topic'] == 4]), 'sentences that belong to topic 4 and we will remove')
print('There are', len(df_sent_topic.loc[df_sent_topic['dominant_topic'] == 12]), 'sentences that belong to topic 12 and we will remove')

There are 20 sentences that belong to topic 4 and we will remove
There are 19 sentences that belong to topic 12 and we will remove


In [66]:
df_topic_distribution = df_document_topic['dominant_topic'].value_counts().reset_index(name="Num Sentences")
df_topic_distribution.columns = ['Topic #', 'Num Sentences']
df_topic_distribution

Unnamed: 0,Topic #,Num Sentences
0,23,302
1,0,129
2,21,87
3,5,72
4,39,68
5,22,56
6,37,46
7,10,44
8,34,40
9,24,37


In [67]:
df_sent_topic_clean = df_sent_topic.drop(df_sent_topic[(df_sent_topic.dominant_topic == 4) | (df_sent_topic.dominant_topic == 12)].index)

In [93]:
df_description = df_sent_topic_clean[['sentence','name']]
df_description = df_description.groupby('name')['sentence'].agg(lambda col: ' '.join(col)).reset_index()

In [94]:
df_description.head()

Unnamed: 0,name,sentence
0,11th Avenue Inn Bed and Breakfast,Walk to the Pike Place Market and to the other...
1,Ace Hotel Seattle,We fell in love with a former maritime workers...
2,Aloft Seattle Redmond,Celebrate your style at Aloft Seattle Redmond ...
3,Americas Best Value Inn Shoreline / Seattle North,Americas Best Value Inn Shoreline Seattle Nort...
4,Ballard Inn,Historic Style with Modern Amenities The Balla...


In [95]:
df_description['sentence'][45]

'Located on the southern tip of Lake Union the Hilton Garden Inn Seattle Downtown hotel is perfectly located for business and leisure'

In [96]:
df_description.set_index('name', inplace = True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df_description['sentence'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [98]:
indices = pd.Series(df_description.index)
def recommendations(name, cosine_similarities = cosine_similarities):
    
    recommended_hotels = []
    
    # gettin the index of the hotel that matches the name
    idx = indices[indices == name].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending = False)

    # getting the indexes of the 5 most similar hotels except itself
    top_10_indexes = list(score_series.iloc[1:6].index)
    
    # populating the list with the names of the top 5 matching hotels
    for i in top_10_indexes:
        recommended_hotels.append(list(df_description.index)[i])
        
    return recommended_hotels

In [100]:
recommendations('Hilton Garden Inn Seattle Downtown')

['Silver Cloud Inn - Seattle Lake Union',
 'Residence Inn by Marriott Seattle Downtown/Lake Union',
 'Staybridge Suites Seattle Downtown - Lake Union',
 'Homewood Suites by Hilton Seattle Downtown',
 'Days Inn by Wyndham Seattle North of Downtown']