In [1]:
import pandas as pd
import ipywidgets as w
from IPython.display import display, IFrame
from __future__ import print_function
import re
from utils import Util
from rbm import RBM
import math

In [2]:
att_df = pd.read_json('etl/attractions.json',orient='records')

In [3]:
name = w.Text(description="User Name")
place = w.Text(description="Destination")
budget = w.IntRangeSlider(min=att_df.price.min(), max=att_df.price.max(),step=10,value=[att_df.price.min(),att_df.price.max()],description="Budget")
out = w.VBox([name,place,budget])
display(out)

VBox(children=(Text(value='', description='User Name'), Text(value='', description='Destination'), IntRangeSli…

In [4]:
start = w.DatePicker(description='Start Date',disabled=False)
display(start)
end = w.DatePicker(description='End Date',disabled=False)
display(end)

DatePicker(value=None, description='Start Date')

DatePicker(value=None, description='End Date')

In [7]:
category_df = att_df.groupby('category').size().reset_index().sort_values([0],ascending=False)[:18]
categories = list(category_df.category.values)
cat_rat = dict()
def on_button_clicked(b):
    if b.description in cat_rat:
        return
    print(b.description)
    slider = w.IntSlider(min=0,max=5,step=1,description='Rate')
    display(slider)
    cat_rat[b.description] = slider
    if(len(cat_rat) < 5):
        print("Rate {x} more!\n".format(x=5-len(cat_rat)))
    
but_layout = w.Layout(width='100%', height='100px')
but_items = [w.Button(description=c, layout=but_layout) for c in categories]
on_clk = [item.on_click(on_button_clicked) for item in but_items]
r1 = w.VBox([i for i in but_items[:6]])
r2 = w.VBox([i for i in but_items[6:12]])
r3 = w.VBox([i for i in but_items[12:]])
buttons = w.HBox([r1,r2,r3])
print("Select and rate atleast 5 categories and rate them:")
display(buttons)

Select and rate atleast 5 categories and rate them:


HBox(children=(VBox(children=(Button(description='private_&_custom_tours', layout=Layout(height='100px', width…

water_sports


IntSlider(value=0, description='Rate', max=5)

Rate 4 more!

tours_&_sightseeing


IntSlider(value=0, description='Rate', max=5)

Rate 3 more!

private_&_custom_tours


IntSlider(value=0, description='Rate', max=5)

Rate 2 more!

walking_&_biking_tours


IntSlider(value=0, description='Rate', max=5)

Rate 1 more!

family_friendly


IntSlider(value=0, description='Rate', max=5)

food,_wine_&_nightlife


IntSlider(value=0, description='Rate', max=5)

air,_helicopter_&_balloon_tours


IntSlider(value=0, description='Rate', max=5)

In [6]:
user_name = re.sub(' ','_',name.value.lower())
province = re.sub(' ','_',place.value.lower())
(low,high) = tuple([float(i) for i in budget.value])
begin_date = str(start.value)
end_date = str(end.value)
cat_rating = dict()
for key, value in cat_rat.items():
    cat_rating[key] = float(value.value)
cat_rating

{'private_&_custom_tours': 5.0,
 'walking_&_biking_tours': 4.0,
 'recommended_experiences': 1.0,
 'water_sports': 3.0,
 'food,_wine_&_nightlife': 4.0,
 'transfers_&_ground_transport': 2.0}

In [7]:
def f(row):
    avg_cat_rat = dict()
    for i in range(len(row['category'])):
        if row['category'][i] not in avg_cat_rat:
            avg_cat_rat[row['category'][i]] = [row['rating'][i]]
        else:
            avg_cat_rat[row['category'][i]].append(row['rating'][i])
    for key,value in avg_cat_rat.items():
        avg_cat_rat[key] = sum(value)/len(value)
    return avg_cat_rat

def sim_score(row):
    score = 0.0
    match = 0
    col1 = row['cat_rat']
    col2 = row['user_data']
    for key, value in col2.items():
        if key in col1:
            match+=1
            score += (value-col1[key])**2
    if match != 0:
        return ((math.sqrt(score)/match) + (len(col2) - match))
    else:
        return 100

In [15]:
def get_recc(cat_rating):
    util = Util()
    epochs = 50
    rows = 40000
    alpha = 0.01
    H = 128
    batch_size = 16
    dir= 'etl/'
    ratings, attractions = util.read_data(dir)
    rbm_att, train = util.preprocess(ratings)
    num_vis =  len(ratings)
    rbm = RBM(alpha, H, num_vis)
    
    joined = ratings.set_index('attraction_id').join(attractions[["attraction_id","category"]].set_index("attraction_id")).reset_index('attraction_id')
    grouped = joined.groupby('user_id')
    category_df = grouped['category'].apply(list).reset_index()
    rating_df = grouped['rating'].apply(list).reset_index()
    cat_rat_df = category_df.set_index('user_id').join(rating_df.set_index('user_id'))
    cat_rat_df['cat_rat'] = cat_rat_df.apply(f,axis=1)
    cat_rat_df = cat_rat_df.reset_index()[['user_id','cat_rat']]
    
    cat_rat_df['user_data'] = [cat_rating for i in range(len(cat_rat_df))]
    cat_rat_df['sim_score'] = cat_rat_df.apply(sim_score, axis=1)
    user = cat_rat_df.sort_values(['sim_score']).values[0][0]
    
    filename = "e"+str(epochs)+"_r"+str(rows)+"_lr"+str(alpha)+"_hu"+str(H)+"_bs"+str(batch_size)
    reco, weights, vb, hb = rbm.load_predict(filename,train,user)
    unseen, seen = rbm.calculate_scores(ratings, attractions, reco, user)
    rbm.export(unseen, seen, 'recommendations/'+filename, str(user))
    return filename, user, rbm_att

In [16]:
filename, user, rbm_att = get_recc(cat_rating)
recc_df = pd.read_csv('recommendations/'+filename+'/user{u}_unseen.csv'.format(u=user))

Reading the data
Preprocessing the dataset
Model restored


In [24]:
recc_df.columns = ['attraction_id', 'att_name', 'att_cat', 'att_price', 'score']
recc_df.head(11)

Unnamed: 0,attraction_id,att_name,att_cat,att_price,score
0,994,full-day_mont-tremblant_rouge_river_rafting_to...,water_sports,98.29,5.0
1,1053,medieval_times_dinner_and_tournament_toronto,family_friendly,68.99,3.477647
2,379,full-day_niagara_ice_wine_festival_tour_with_l...,"food,_wine_&_nightlife",395.0,1.598323
3,831,private_arrival_transfer:_calgary_internationa...,transfers_&_ground_transport,528.26,1.540151
4,65,3_hour_whale_watching_tour,"cruises,_sailing_&_water_tours",129.0,1.491627
5,1376,ultimate_day_tour_to_victoria,recommended_experiences,363.56,1.383766
6,862,ultimate_day_tour_to_victoria,walking_&_biking_tours,363.56,1.120624
7,1181,ultimate_day_tour_to_victoria,luxury_&_special_occasions,363.56,1.084641
8,759,call_of_the_wild_atv_tour,tours_&_sightseeing,156.45,0.949624
9,54,ultimate_day_tour_to_victoria,"cruises,_sailing_&_water_tours",363.56,0.88214
