<a href="https://colab.research.google.com/github/rodrigu12/Recommender/blob/master/Hotel_Recommender_Systems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
#setup importing libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [28]:
#importing the data
metadata = pd.read_csv('https://raw.githubusercontent.com/rodrigu12/Recommender/master/booking_com-travel_sample.csv')
metadata.head()

Unnamed: 0,address,city,country,crawl_date,hotel_brand,hotel_description,hotel_facilities,hotel_star_rating,image_count,latitude,locality,longitude,pageurl,property_id,property_name,property_type,province,qts,room_count,room_type,similar_hotel,site_review_count,site_review_rating,site_stay_review_rating,sitename,special_tag,state,uniq_id,zone
0,"KHIRSU, 246147 Pauri, India – Great location -",pauri,India,2016-09-01,,Khirsu By GMVN offers accommodation in Pauri. ...,Bathroom:Toilet paper|Linen|Towels|Bathroom|To...,,3.0,30.123749,,78.793774,http://www.booking.com/hotel/in/khirsu-by-gmvn...,1742658.0,Khirsu By GMVN,204,,2016-09-01 11:52:38 +0000,18.0,Economy Double Room,Hotel Mandakini,,,,http://www.booking.com/,Share,Uttarakhand,a5ea72415f8007fcbe65759830fdd3d9,
1,"Kaathadimattam, Balacola Post, NEAR Siva Tea F...",ooty,India,2016-09-01,,"Situated in Ooty in the Tamil Nadu Region, 8 k...",Bathroom:Toilet paper|Linen|Towels|Bidet|Towel...,3 stars,,11.329595,,76.686732,http://www.booking.com/hotel/in/global-village...,1755986.0,Global Village Ooty,212,,2016-09-01 11:52:38 +0000,10.0,British Empire Chalet,Treebo Yantra Leisures|Western Valley Resorts|...,5.0,7.6,Location:8.5|Staff:8|Cleanliness:7.5|Comfort:7...,http://www.booking.com/,Share,Tamil Nadu,7e0b055417271bbd9dae902f3e231ed4,
2,"PIPALKOTI, 246472 Pīpalkoti, India – Show map",pīpalkoti,India,2016-09-01,,TRH Pipalkoti offers accommodation in Pīpalkot...,Bathroom:Toilet paper|Linen|Towels|Bathroom•Vi...,,4.0,30.42954,,79.428116,http://www.booking.com/hotel/in/trh-pipalkoti....,1741747.0,TRH Pipalkoti,204,,2016-09-01 11:52:38 +0000,28.0,Economy Double Room,TRH Joshimath (New),,,,http://www.booking.com/,Share,Uttarakhand,72c0af09827bbb620365aa5df523ba1d,
3,"1 KARIYIL HOUSE KUMARAKOM NORTH PO KOTTAYAM, 6...",kumarakom,India,2016-09-01,,"Swasti house boat 2 is located in Kumarakom, 3...",Bathroom:Toilet paper|Towels|Bath|Shower•Bedro...,,2.0,9.616057,,76.428269,http://www.booking.com/hotel/in/swasti-house-b...,1747392.0,Swasti house boat 2,204,,2016-09-01 11:52:38 +0000,1.0,Deluxe Room,,,,,http://www.booking.com/,Share,Kerala,eb6fd33d99aa4a8088caa8f3ecb08275,
4,"Kavanattinkara, 686563 Kumarakom, India – Show...",kumarakom,India,2016-09-01,,"Amrutham Houseboat 2 is set in Kumarakom, 5 km...",Bathroom:Toilet paper|Linen|Towels|Towels/Shee...,,,9.632854,,76.42357,http://www.booking.com/hotel/in/amrutham-house...,1744962.0,Amrutham Houseboat 2,215,,2016-09-01 11:52:38 +0000,2.0,Mobile Home,Mandala Beach House & Cottages,,,,http://www.booking.com/,Share,Kerala,0814d9af7ad808863c04db8f30437c57,


In [29]:
#print the plot of the hotel description
metadata['hotel_description'].head()

0    Khirsu By GMVN offers accommodation in Pauri. ...
1    Situated in Ooty in the Tamil Nadu Region, 8 k...
2    TRH Pipalkoti offers accommodation in Pīpalkot...
3    Swasti house boat 2 is located in Kumarakom, 3...
4    Amrutham Houseboat 2 is set in Kumarakom, 5 km...
Name: hotel_description, dtype: object

In [30]:
#Defining a TF-IDF Vectorizer OObject. Removes all english stops words such as 'a', 'the'
tfidf= TfidfVectorizer(stop_words='english')

#Replace NaN with an empty string 
metadata ['hotel_description'] = metadata['hotel_description'].fillna('')

#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(metadata['hotel_description'])

#Output the shape of tfidf_matrix
tfidf_matrix.shape


(6000, 5249)

In [31]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cosine_sim.shape

(6000, 6000)

In [32]:
indices = pd.Series(metadata.index, index=metadata['property_name']).drop_duplicates()

In [33]:
indices [:10]

property_name
Khirsu By GMVN          0
Global Village Ooty     1
TRH Pipalkoti           2
Swasti house boat 2     3
Amrutham Houseboat 2    4
The Hill Side           5
Hotel Mountain Face     6
Rahi Motel Haridwar     7
Swasti house boat 4     8
NaN                     9
dtype: int64

In [46]:
#create a function that takes in the name of a hotel and outputs recommended hotels

def get_recommendations(property_name, cosine_sim=cosine_sim):
  #get the index of the hotel that matches the hotel_name
    idx = indices[property_name]
  
    if isinstance(idx, pd.core.series.Series):
      idx = idx[0]

  #get the pairwise similarity scores of all hotel with that description
    sim_scores = list(enumerate(cosine_sim[idx]))

  #sort the hotels based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

  #Get the scores of the 10 most similar hotels
    sim_scores = sim_scores[1:11]
  
  #Get the hotel indices
    hotel_indices = [i[0] for i in sim_scores]
  
  #Return the top 10 most similar movies
    return metadata['property_name'].iloc[hotel_indices]





In [47]:
get_recommendations('Vishwaratna Hotel')

1021                       Vishwaratna Hotel
1214                       Vishwaratna Hotel
1429                       Vishwaratna Hotel
2027                       Vishwaratna Hotel
2251                       Vishwaratna Hotel
4245    The Guwahati Address By Centre Point
4447    The Guwahati Address By Centre Point
3494                          Hotel Prince B
3971                          Hotel Prince B
3958                          Hotel Prince B
Name: property_name, dtype: object

In [48]:
get_recommendations('Hotel Mountain Face')

44      Hotel Mountain Face
148     Hotel Mai Vrindavan
187     Hotel Mai Vrindavan
280     Hotel Mai Vrindavan
961       Vishwaratna Hotel
1021      Vishwaratna Hotel
1214      Vishwaratna Hotel
1429      Vishwaratna Hotel
2027      Vishwaratna Hotel
2251      Vishwaratna Hotel
Name: property_name, dtype: object

In [49]:
get_recommendations('Vishwaratna Hotel')

1021                       Vishwaratna Hotel
1214                       Vishwaratna Hotel
1429                       Vishwaratna Hotel
2027                       Vishwaratna Hotel
2251                       Vishwaratna Hotel
4245    The Guwahati Address By Centre Point
4447    The Guwahati Address By Centre Point
3494                          Hotel Prince B
3971                          Hotel Prince B
3958                          Hotel Prince B
Name: property_name, dtype: object