### Import libraries

In [48]:
import pandas as pd
import numpy as np
import pickle

### Define useful functions

In [336]:
def extract_zip_code(hotel_address):
    address_tmp = hotel_address.split(',')[-2].strip(' ').split(' ')[-1][:5]
    if address_tmp == 'NY1':
        return hotel_address.split(',')[-2].strip(' ').split(' ')[-1][2:7]
    else:
        return address_tmp

In [298]:
def display_info_hotel(df_hotels_item):
    
    print('Hotel name: ',df_hotels_item['hotel_name'].values[0])
    print('---------------------')
    print('Negative mentions per topic: ')
    print('(0)  Noise/Smell/AC-Heat: ', str(int(df_hotels_item['0_neg'])))
    print('(1)  Staff/Check-in/out: ', str(int(df_hotels_item['1_neg'])))
    print('(2)  Breakfast: ', str(int(df_hotels_item['2_neg'])))
    print('(3)  Facilities: ', str(int(df_hotels_item['3_neg'])))
    #print('(4)  Parking: ', str(int(df_hotels_item['4_neg'])))
    #print('(5)  Smell: ', str(int(df_hotels_item['5_neg'])))
    #print('(6)  AC/Heat: ', str(int(df_hotels_item['6_neg'])))
    #print('(7)  Wi-Fi: ', str(int(df_hotels_item['7_neg'])))
    print('(4)  Location: ', str(int(df_hotels_item['4_neg'])))
    #print('(9)  Check-in/Check-out: ', str(int(df_hotels_item['9_neg'])))
    print('(5) Bathroom: ', str(int(df_hotels_item['5_neg'])))
    print('(6) Room amenities: ', str(int(df_hotels_item['6_neg'])))
    print('(7) Bed: ', str(int(df_hotels_item['7_neg'])))
    #print('Other/no topic: ', str(int(df_hotels_item['-1_neg'])))
    
def change_topic_counts(df_hotel_item,counts_delta_vec):
    
    df_hotel_item_cp = df_hotel_item.copy()
    df_hotel_item_cp.reset_index(drop=True,inplace=True)
    df_hotel_item_cp.at[0,'0_neg'] += counts_delta_vec[0]
    df_hotel_item_cp.at[0,'1_neg'] += counts_delta_vec[1]
    df_hotel_item_cp.at[0,'2_neg'] += counts_delta_vec[2]
    df_hotel_item_cp.at[0,'3_neg'] += counts_delta_vec[3]
    df_hotel_item_cp.at[0,'4_neg'] += counts_delta_vec[4]
    df_hotel_item_cp.at[0,'5_neg'] += counts_delta_vec[5]
    df_hotel_item_cp.at[0,'6_neg'] += counts_delta_vec[6]
    df_hotel_item_cp.at[0,'7_neg'] += counts_delta_vec[7]
    #df_hotel_item_cp.at[0,'8_neg'] += counts_delta_vec[8]
    #df_hotel_item_cp.at[0,'9_neg'] += counts_delta_vec[9]
    #df_hotel_item_cp.at[0,'10_neg'] += counts_delta_vec[10]
    #df_hotel_item_cp.at[0,'11_neg'] += counts_delta_vec[11]
    #df_hotel_item_cp.at[0,'12_neg'] += counts_delta_vec[12]
    #df_hotel_item_cp.at[0,'sentences_count_neg'] +=sum(counts_delta_vec)
    df_hotel_item_cp[[str(n)+'_pc_neg' for n in range(-1,8)]] = 100*df_hotel_item_cp[[str(n)+'_neg' for n in range(-1,8)]].div(df_hotel_item_cp.sentences_count_neg, axis=0)
    
    #display_info_hotel(df_hotel_item_cp)
    return df_hotel_item_cp

def get_price_prediction(model,cat_feat,feat_compl_list,feat_compl_list_dum,df_hotel_item):
    
    input_features = df_hotel_item[feat_compl_list]
    input_target = df_hotel_item[['hotel_room_price_per_person_avg']]

    scaled_input = input_features.copy()
    scaled_input = scaled_input.reindex(columns = feat_compl_list_dum, fill_value=0)
    for cat in cat_feat:
        dummy_cat = cat+'_'+str(input_features[cat].values[0])
        if dummy_cat in scaled_input.columns:
            scaled_input.iloc[0, scaled_input.columns.get_loc(dummy_cat)] = 1
            
    price_pred = model.predict(scaled_input)[0][0]
    #print('Price prediction :',str(round(price_pred,1)))
    return price_pred
    
def update_price(model,cat_feat,feat_compl_list,feat_compl_list_dum,df_hotel_item,counts_delta_vec):
    
    # baseline price from model
    baseline_price = get_price_prediction(model,cat_feat,feat_compl_list,feat_compl_list_dum,df_hotel_item)
    
    # change negative mentions per topic
    df_hotel_item_changed = change_topic_counts(df_hotel_item,counts_delta_vec)
    
    # new price from model
    new_price = get_price_prediction(model,cat_feat,feat_compl_list,feat_compl_list_dum,df_hotel_item_changed)
    
    # per cent change
    per_cent_change = 100*(new_price-baseline_price)/baseline_price
    
    return [baseline_price,new_price,per_cent_change]

### Import dataset

In [418]:
df_hotels = pd.read_csv('./datasets/input_app/df_hotels.csv')

### Create dataset with unique names for app

In [419]:
df_hotels_unique_names = df_hotels.copy()

In [420]:
df_hotels_unique_names['hotel_zip_code'] = df_hotels_unique_names['hotel_address'].apply(extract_zip_code)

In [421]:
df_hotels_unique_names['hotel_unique_name'] = df_hotels_unique_names['hotel_name'] + ' (' + df_hotels_unique_names['hotel_zip_code'] + ')'

In [422]:
df_hotels_unique_names.to_csv('./datasets/input_app/df_hotels_unique_names.csv',index=Fals

### Import regression model and aux files

In [424]:
reg = pickle.load(open('price_regression_model.sav','rb'))
categorical_features = pickle.load(open('categorical_features.sav','rb'))
features_complete_list = pickle.load(open('features_complete_list.sav','rb'))
features_complete_list_dummies = pickle.load(open('features_complete_list_dummies.sav','rb'))

### Input and test

In [432]:
input_hotel = df_hotels[df_hotels['hotel_name']=='Buffalo Grand Hotel']

In [433]:
display_info_hotel(input_hotel)

Hotel name:  Buffalo Grand Hotel
---------------------
Negative mentions per topic: 
(0)  Noise/Smell/AC-Heat:  14
(1)  Staff/Check-in/out:  34
(2)  Breakfast:  30
(3)  Facilities:  46
(4)  Location:  32
(5) Bathroom:  53
(6) Room amenities:  129
(7) Bed:  48


In [437]:
counts_delta_vec = [0,0,0,-46,0,0,0,0]
test_output = update_price(reg,categorical_features,features_complete_list,features_complete_list_dummies,input_hotel,counts_delta_vec)

In [438]:
# original price, new price, % change
test_output

[64.37007636661016, 67.90071356352766, 5.484904471464783]

In [436]:
input_hotel['hotel_room_price_per_person_avg']

573    71.5
Name: hotel_room_price_per_person_avg, dtype: float64