In [1]:
import pandas as pd
import numpy as np
clean_reviews = pd.read_csv('./data/Hotel_Reviews.csv')

In [2]:
# checking null values for latitude and longitude field
clean_reviews.lat.isnull().values.any()
clean_reviews.lng.isnull().values.any()

True

In [3]:
#removing empty or null values form columns Latitude and Longitude columns
clean_reviews = clean_reviews[np.isfinite(clean_reviews['lat'])]
clean_reviews = clean_reviews[np.isfinite(clean_reviews['lng'])]

In [4]:
# making new dataframe which consists of unique hotels with latitude and longitude fields and taking average of reviewer_Score
Hotel_And_Score = clean_reviews.groupby(['Hotel_Name','lat','lng'])['Reviewer_Score'].mean()

In [5]:
Hotel_And_Score

Hotel_Name                                           lat        lng       
11 Cadogan Gardens                                   51.493616  -0.159235     8.845283
1K Hotel                                             48.863932   2.365874     7.861486
25hours Hotel beim MuseumsQuartier                   48.206474   16.354630    8.983309
41                                                   51.498147  -0.143649     9.711650
45 Park Lane Dorchester Collection                   51.506371  -0.151536     9.603571
88 Studios                                           51.499279  -0.209073     8.489107
9Hotel Republique                                    48.870842   2.360586     8.743716
A La Villa Madame                                    48.848861   2.331526     8.853659
ABaC Restaurant Hotel Barcelona GL Monumento         41.410694   2.136294     8.464516
AC Hotel Barcelona Forum a Marriott Lifestyle Hotel  41.410131   2.218805     8.001384
AC Hotel Diagonal L Illa a Marriott Lifestyle Hotel  41

In [6]:
#Currently Hotel name column is index of hotel_and_score dataframe by using reset_index index field will be resetted.
Hotel_And_Score = Hotel_And_Score.to_frame().reset_index()
Hotel_And_Score

Unnamed: 0,Hotel_Name,lat,lng,Reviewer_Score
0,11 Cadogan Gardens,51.493616,-0.159235,8.845283
1,1K Hotel,48.863932,2.365874,7.861486
2,25hours Hotel beim MuseumsQuartier,48.206474,16.354630,8.983309
3,41,51.498147,-0.143649,9.711650
4,45 Park Lane Dorchester Collection,51.506371,-0.151536,9.603571
5,88 Studios,51.499279,-0.209073,8.489107
6,9Hotel Republique,48.870842,2.360586,8.743716
7,A La Villa Madame,48.848861,2.331526,8.853659
8,ABaC Restaurant Hotel Barcelona GL Monumento,41.410694,2.136294,8.464516
9,AC Hotel Barcelona Forum a Marriott Lifestyle ...,41.410131,2.218805,8.001384


In [7]:
#this function calculated distance between user specified location with all other hotels and return distance.
#https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude
import math
def distance(lat1, lon1, lat2, lon2):
    radius = 6371 # km 6371
    dlat = math.radians(lat2-lat1) #latitude converted into radians
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = radius * c

    return d

In [8]:
#this function will take loaction text as a input and return latitude and longitude for the location specified by the user
#reference:https://geopy.readthedocs.io/en/stable/
from geopy.geocoders import Nominatim
def getlatLng(Location):
    geolocator = Nominatim()
    location = geolocator.geocode(Location)
    return location.latitude, location.longitude

In [9]:
#this is input location Effiel tower given by user to get near by recommended hotel
lat2,lon2 = getlatLng("Eiffel Tower")

In [11]:
#in this cell different hotels latitude and longitude are passed in distance function with user specified location's longitude and latitude
df = []
for lat1,lon1,hotel,rating in zip(Hotel_And_Score.lat,Hotel_And_Score.lng,Hotel_And_Score.Hotel_Name,Hotel_And_Score.Reviewer_Score):
     dist = distance(lat1,lon1,lat2,lon2)
     df.append({'key': hotel,'value': dist,'value1':rating})

In [12]:
#converting list into dataframe
Hotel_distance = pd.DataFrame(df)

In [13]:
Hotel_distance

Unnamed: 0,key,value,value1
0,11 Cadogan Gardens,341.135094,8.845283
1,1K Hotel,5.259317,7.861486
2,25hours Hotel beim MuseumsQuartier,1036.337107,8.983309
3,41,340.997822,9.711650
4,45 Park Lane Dorchester Collection,342.062733,9.603571
5,88 Studios,343.497043,8.489107
6,9Hotel Republique,5.032567,8.743716
7,A La Villa Madame,2.903667,8.853659
8,ABaC Restaurant Hotel Barcelona GL Monumento,828.223984,8.464516
9,AC Hotel Barcelona Forum a Marriott Lifestyle ...,828.215388,8.001384


In [14]:
#renaming column names.
Hotel_distance = Hotel_distance.rename(index=str, columns={"key": "Hotels", "value": "Distance","value1":"Ratings"})

In [15]:
Hotel_distance

Unnamed: 0,Hotels,Distance,Ratings
0,11 Cadogan Gardens,341.135094,8.845283
1,1K Hotel,5.259317,7.861486
2,25hours Hotel beim MuseumsQuartier,1036.337107,8.983309
3,41,340.997822,9.711650
4,45 Park Lane Dorchester Collection,342.062733,9.603571
5,88 Studios,343.497043,8.489107
6,9Hotel Republique,5.032567,8.743716
7,A La Villa Madame,2.903667,8.853659
8,ABaC Restaurant Hotel Barcelona GL Monumento,828.223984,8.464516
9,AC Hotel Barcelona Forum a Marriott Lifestyle ...,828.215388,8.001384


In [16]:
#sorted the distance column to get top 10 recommended hotel with distance and rating
Hotel_distance.sort_values(['Distance','Ratings'],ascending=[True,False]).head(10)

Unnamed: 0,Hotels,Distance,Ratings
1155,Pullman Paris Tour Eiffel,0.329445,8.601064
978,Mercure Paris Centre Tour Eiffel,0.409955,7.86511
269,Derby Alma,0.515151,9.053571
1231,Shangri La Hotel Paris,0.610981,9.102174
300,Eiffel Trocad ro,0.689919,7.999219
751,Hotel Sezz Paris,0.699652,8.05
879,Le Marquis Eiffel,0.775675,8.776033
425,H tel Juliana Paris,0.808269,8.577515
470,H tel de la Bourdonnais,0.819728,8.760221
849,La Clef Tour Eiffel,0.834632,8.952063
