# Local Demo 

In [1]:
# Imports 
import os
import pandas as pd
import numpy as np
import pickle
import warnings

# Ignore Warnings
warnings.simplefilter("ignore")

# Paths to various files
filepath = '../files/review_final.pkl'
hotel_filepath = '../files/hotel_info.pkl'
hotel_model = '../files/hotel.sav'
review_model = '../files/review.sav'

# Reading in all of the files
df = pd.read_pickle(filepath)
hotel_info = pd.read_pickle(hotel_filepath)
hotel = pickle.load(open(hotel_model, 'rb'))
review = pickle.load(open(review_model, 'rb'))
ss = pickle.load(open('../files/quant_scaled.sav','rb'))
vectorizer = pickle.load(open('../files/vectorizer.pkl','rb'))

# Will create dictionary to store all values for demo
demo = {}

# Removing useless column
df.drop(['review_trans'], axis = 1, inplace = True)

# Matching hotel info table with the unique hotels available in our review dataset  
hotel_info = hotel_info.merge(pd.DataFrame(df['hotel_name'].unique(), 
                                           columns=['hotel_name']), how = 'right', on= 'hotel_name')
# Forgot to unpack predictions from modeling stage, doing it here
df['review_preds'] = df['review_preds'].apply(lambda x: int(x[0]))

In [2]:
# Iterate through all the amenities. If 70% or 30% hotel either have or don't have the amenities respectfully,
# I defaulted the value to either true or false 

for i in hotel_info.dtypes[hotel_info.dtypes == 'bool'].index: 
    if int(sum(hotel_info[i])/len(hotel_info)*100) >= 70:
        #print(i, sum(hotel_info[i]))
        demo[i] = True
    elif int(sum(hotel_info[i])/len(hotel_info)*100) <= 30:
        #print(i, sum(hotel_info[i]))
        demo[i] = False


In [3]:
# Start of the demo
print("Pick Your Prefrence: \nAmenities")
# For all other amenities that weren't defaulted, user will be prompted for response on amenities 
for i in hotel_info.dtypes[hotel_info.dtypes == 'bool'].index: 
    if int(sum(hotel_info[i])/len(hotel_info)*100) < 70 and int(
        sum(hotel_info[i])/len(hotel_info)*100) > 30:
        prompt = "Would you like " + i +"? (T/F) "
        while True:
            ans = input(prompt)
            if ans.lower == 'true' or ans.lower() == 't':
                demo[i] = True
                break
            elif ans.lower =='false' or ans.lower() == 'f': 
                demo[i] = False
                break
            else: print("Enter T/F")
# Added all the # of amenities that are true 
demo['num_amenities'] = sum(demo.values())
print("*"*64)
# Prompted for hotel size, which is equated to number of rooms. Created 3 bins based on 25, 50, and 75% quartiles
print("Pick your Hotel Size:")
while True:
    num_rooms = input("Hotel Size? (Large, Medium, Small): ")
    if num_rooms.lower() == 'large': 
        demo['num_rooms']  = 243.50
        break
    elif num_rooms.lower() == 'medium':
        demo['num_rooms']  = 147.70
        break
    elif num_rooms.lower() == 'small': 
        demo['num_rooms']  = 100
        break
    else: print("Enter Large, Medium, or Small")
print("*"*64)
# Prompted for hotel quality, equated to hotel overall rating. Created 3 bins based on 50, 75, and 100% quartile
print("Pick Quality of Hotel (Based on TripAdvisor): ")
while True:
    quality = input("Quality of hotel? (Great, Good, OK): ")
    if quality.lower() == 'great':
        demo['hotel_rating_hotel'] = 5
        break
    elif quality.lower() == 'good':
        demo['hotel_rating_hotel'] = 4 
        break
    elif quality.lower() == 'ok': 
        demo['hotel_rating_hotel'] = 3.5
        break
    else: print("Enter Great, Good, or OK")

Pick Your Prefrence: 
Amenities
Would you like Banquet Room? (T/F) t
Would you like Bar/Lounge? (T/F) f
Would you like Breakfast Available? (T/F) t
Would you like Breakfast included? (T/F) t
Would you like Conference Facilities? (T/F) f
Would you like Family Rooms? (T/F) f
Would you like Free parking? (T/F) t
Would you like Microwave? (T/F) t
Would you like Outdoor pool? (T/F) f
Would you like Pets Allowed ( Dog / Pet Friendly )? (T/F) f
Would you like Pool? (T/F) t
Would you like Public Wifi? (T/F) f
Would you like Refrigerator in room? (T/F) f
Would you like Restaurant? (T/F) t
Would you like Room service? (T/F) f
Would you like Self-Serve Laundry? (T/F) f
****************************************************************
Pick your Hotel Size:
Hotel Size? (Large, Medium, Small): Large
****************************************************************
Pick Quality of Hotel (Based on TripAdvisor): 
Quality of hotel? (Great, Good, OK): Great


In [4]:
# Need to arrange the columns the same was as when trained 
df_quant_cols = ['num_amenities', 'num_rooms', 'hotel_rating_hotel', 'Accessible rooms',
       'Air conditioning', 'Airport transportation', 'Babysitting',
       'Banquet Room', 'Bar/Lounge', 'Breakfast Available',
       'Breakfast included', 'Business Center with Internet Access',
       'Children Activities (Kid / Family Friendly)', 'Concierge',
       'Conference Facilities', 'Dry Cleaning',
       'Electric vehicle charging station', 'Family Rooms',
       'Fitness Center with Gym / Workout Room',
       'Free High Speed Internet (WiFi)', 'Free Internet', 'Free parking',
       'Golf course', 'Heated pool', 'Hot Tub', 'Indoor pool', 'Kitchenette',
       'Laundry Service', 'Meeting rooms', 'Microwave', 'Minibar',
       'Multilingual Staff', 'Non-smoking hotel', 'Non-smoking rooms',
       'Outdoor pool', 'Paid Internet', 'Paid Wifi',
       'Pets Allowed ( Dog / Pet Friendly )', 'Pool', 'Public Wifi',
       'Refrigerator in room', 'Restaurant', 'Room service', 'Sauna',
       'Self-Serve Laundry', 'Shuttle Bus Service', 'Smoking rooms available',
       'Spa', 'Suites', 'Tennis Court', 'Wheelchair access']
to_pred = pd.DataFrame.from_dict(demo, orient= 'index').transpose()[df_quant_cols]

In [5]:
# Scale the same as when trained
to_pred = pd.DataFrame(ss.transform(to_pred),index = to_pred.index, columns = to_pred.columns)

In [6]:
print("Hotel Based on Preferences")
# Predicts the cluster
hotel_predict = hotel.predict(to_pred)[0]
# Find all hotels in the same cluster as prediction and print name, url, low price, and high price
for x in df[df['hotel_pred'] == hotel_predict]['hotel_name'].unique():
    url = hotel_info[hotel_info['hotel_name'] == x]['url'].values[0]
    low =  "$"+str(hotel_info[hotel_info['hotel_name'] == x]['low_price'].values[0])[:-2]
    high =  "$"+str(hotel_info[hotel_info['hotel_name'] == x]['high_price'].values[0])[:-2]
    print(x)
    print("Price from "+low+" to " + high)
    print("To know more: ", url)

Hotel Based on Preferences
SpringHill Suites Atlanta Downtown
Price from $147 to $349
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d13451617-Reviews-SpringHill_Suites_Atlanta_Downtown-Atlanta_Georgia.html
Fairfield Inn & Suites Atlanta Downtown
Price from $174 to $270
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d605130-Reviews-Fairfield_Inn_Suites_Atlanta_Downtown-Atlanta_Georgia.html
La Quinta by Wyndham Atlanta Midtown - Buckhead
Price from $119 to $203
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d111330-Reviews-La_Quinta_by_Wyndham_Atlanta_Midtown_Buckhead-Atlanta_Georgia.html
Hampton Inn & Suites Atlanta - Downtown
Price from $144 to $362
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d89508-Reviews-Hampton_Inn_Suites_Atlanta_Downtown-Atlanta_Georgia.html
Residence Inn Atlanta Midtown/Peachtree at 17th
Price from $147 to $375
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d223855-Reviews-Residen

In [7]:
# Now will prompt for a representative review and find all reviews that match the same cluster
review_input = input("What was your best review?: ")
review_predict = review.predict(vectorizer.transform([review_input]).todense())[0]
review_predict_df = df[df['review_preds']== review_predict].groupby('hotel_name').agg('count')['review'].reset_index()


What was your best review?: Many other hotels did not provide the same type of service as this one did. Not only did I have a great time, but my family made many memories that we will never forget.


In [8]:
# Grouped by hotel name and aggregated by count, then divided by total # of reviews for that hotel to get % 
# Sorted by Percent and then printed only the top 10 
print("Top 10 Hotels with the most amount of similar reviews")
review_predict_df = review_predict_df.merge(df.groupby('hotel_name').agg('count')['url_x'].reset_index(), on = 'hotel_name', how = 'left') 
review_predict_df['Percent'] = review_predict_df['review']/review_predict_df['url_x']
review_predict_df[['hotel_name','Percent']].sort_values(by = 'Percent', ascending=False).head(10)


Top 10 Hotels with the most amount of similar reviews


Unnamed: 0,hotel_name,Percent
11,Cheshire Motor Inn,0.017544
88,Residence Inn Atlanta Midtown/Georgia Tech,0.015504
10,Budgetel Savannah,0.014599
99,The Westin Atlanta Airport,0.01087
60,Home2 Suites by Hilton Atlanta Downtown,0.010246
102,"The Whitley, a Luxury Collection Hotel, Atlant...",0.010194
94,Stonehurst Place,0.010163
34,Extended Stay America - Atlanta - Vinings,0.01
98,The University Inn at Emory,0.009346
45,Hampton Inn & Suites Atlanta Buckhead Place,0.009174


In [9]:
# Looks for all hotels that are in both the review and hotel clusters 
for x in df[(df['hotel_pred'] == hotel_predict) & (df['review_preds'] == review_predict)]['hotel_name'].unique():
    url = hotel_info[hotel_info['hotel_name'] == x]['url'].values[0]
    low =  "$"+str(hotel_info[hotel_info['hotel_name'] == x]['low_price'].values[0])[:-2]
    high =  "$"+str(hotel_info[hotel_info['hotel_name'] == x]['high_price'].values[0])[:-2]
    print(x)
    print("Price from "+low+" to " + high)
    print("To know more: ", url)

Fairfield Inn & Suites Atlanta Downtown
Price from $174 to $270
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d605130-Reviews-Fairfield_Inn_Suites_Atlanta_Downtown-Atlanta_Georgia.html
La Quinta by Wyndham Atlanta Midtown - Buckhead
Price from $119 to $203
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d111330-Reviews-La_Quinta_by_Wyndham_Atlanta_Midtown_Buckhead-Atlanta_Georgia.html
Hampton Inn & Suites Atlanta - Downtown
Price from $144 to $362
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d89508-Reviews-Hampton_Inn_Suites_Atlanta_Downtown-Atlanta_Georgia.html
Residence Inn Atlanta Midtown/Peachtree at 17th
Price from $147 to $375
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d223855-Reviews-Residence_Inn_Atlanta_Midtown_Peachtree_at_17th-Atlanta_Georgia.html
Holiday Inn Express & Suites Atlanta Downtown
Price from $160 to $336
To know more:  https://www.tripadvisor.com/Hotel_Review-g60898-d89527-Reviews-Holiday_Inn_E