In [3]:
import os
import pandas as pd
import numpy as np
import random
from time import sleep

import json
import time

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import Normalizer
from sklearn.decomposition import NMF

from nltk.corpus import stopwords

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from sklearn.metrics.pairwise import cosine_similarity

%matplotlib inline

In [4]:
attr = pd.read_pickle('attractions.pkl')
host = pd.read_pickle('hostels.pkl')
attractions = pd.read_pickle('attractions_all.pkl')
hostels = pd.read_pickle('hostels_all.pkl')
user_reviews = pd.read_pickle('userreviews.pkl')

### Aggregate Reviews by Attraction/Hostel

In [5]:
def aggReviews(df):
    reviews = []
    urls = []
    for url in df['url'].unique():
        reviews.append(df[df['url']==url].review.str.cat())
        urls.append(url)
        
    return reviews,urls
        
att_reviews,att_urls = aggReviews(attractions)
host_reviews,host_urls = aggReviews(hostels)

### Topic Modeling -- Attractions

In [6]:
def attractionTM(documents,max_df,min_df,n_topics,n_terms,show_topics=1):
    
    att_stopwords = ['however','medellin','marta','santa','cali', 'santa marta','juan','tayrona','rosario','bogotá',
                     'recommend','monserrate','villa','leyva','botero','simon','walled','city','town',
                     'salento','cartagena','san andres','si','gracias','andres','bolivar','candelaria','salt']
    stop_words = stopwords.words('english') + stopwords.words('spanish') + att_stopwords

    vectorizer = TfidfVectorizer(max_df=max_df,min_df=min_df, stop_words=stop_words,use_idf=True, ngram_range=(1,3))

    dtm = vectorizer.fit_transform(documents)
    nmf_model = NMF(n_topics,verbose=0)

    dtm_nmf = nmf_model.fit_transform(dtm)
    dtm_nmf = Normalizer(copy=False).fit_transform(dtm_nmf)

    wt = pd.DataFrame(nmf_model.components_,columns=vectorizer.get_feature_names()).T.reset_index()
    wt = wt.set_index('index')
    wt.index.name = None

    if show_topics == 1:
        for column in wt.columns:
            print("Topic",column+1,": Top",n_terms,"Terms\n-----------------------")
            print(wt.iloc[:,column].sort_values(ascending=False)[:n_terms])
            print("\n-----------------------")
        
    return vectorizer, nmf_model, dtm_nmf

In [8]:
att_vectorizer, att_nmf_model, att_dtm_nmf = attractionTM(documents = att_reviews,
                                             max_df = 0.25,
                                             min_df = 0.05,
                                             n_topics = 18,
                                             n_terms = 5,
                                             show_topics = 1)

Topic 1 : Top 5 Terms
-----------------------
hike        2.174523
trek        1.251086
trail       0.823993
hiking      0.783370
mountain    0.519883
Name: 0, dtype: float64

-----------------------
Topic 2 : Top 5 Terms
-----------------------
church              1.770954
churches            0.270285
altar               0.229164
beautiful church    0.177247
mass                0.176788
Name: 1, dtype: float64

-----------------------
Topic 3 : Top 5 Terms
-----------------------
mall          1.561154
stores        0.641038
shopping      0.631102
food court    0.461913
court         0.429531
Name: 2, dtype: float64

-----------------------
Topic 4 : Top 5 Terms
-----------------------
beach      2.080315
beaches    0.408861
sand       0.280960
playa      0.231297
blanca     0.139052
Name: 3, dtype: float64

-----------------------
Topic 5 : Top 5 Terms
-----------------------
dive           1.267024
diving         1.132910
instructor     0.362882
equipment      0.207892
instructors  

### Topic Modeling -- Hostels

In [130]:
def hostelTM(documents,max_df,min_df,n_topics,n_terms,show_topics=1):
    hostel_stopwords = [
                        'park','square',
                        'however','000','10','15','super','20','beach',
                        'bogota','candelaria','cartagena','bogotá','santa','marta','palomino',
                        'taganga','salento','santa marta','tayrona','medellin','poblado','medellín'
                        'san','andres','cali',
                        'quite','great','could','many','much','sheraton','westin','taipei','enough',
                        'front','back','side','would','tuk','like','cbd','lambeth','handily','stay','stayed','also',
                        'buena','excelente','buen','bueno','san juan','juan','personas','really','nice','recommend',
                        'south america','make sure','days','little','sure','one','two','bit',
                        'recommended','minute','three','year','mosquito','though','next',
                        'one','pool','wi']
    stop_words = stopwords.words('english') + stopwords.words('spanish') + hostel_stopwords

    vectorizer = TfidfVectorizer(max_df=max_df,min_df=min_df, stop_words=stop_words,use_idf=True, ngram_range=(2,2))

    dtm = vectorizer.fit_transform(documents)
    nmf_model = NMF(n_topics,verbose=0)

    dtm_nmf = nmf_model.fit_transform(dtm)
    dtm_nmf = Normalizer(copy=False).fit_transform(dtm_nmf)

    wt = pd.DataFrame(nmf_model.components_,columns=vectorizer.get_feature_names()).T.reset_index()
    wt = wt.set_index('index')
    wt.index.name = None

    if show_topics == 1:
        for column in wt.columns:
            print("Topic",column+1,": Top",n_terms,"Terms\n-----------------------")
            print(wt.iloc[:,column].sort_values(ascending=False)[:n_terms])
            print("\n-----------------------")
        
    return vectorizer, nmf_model, dtm_nmf

In [131]:
host_vectorizer, host_nmf_model, host_dtm_nmf = hostelTM(documents = host_reviews,
                                                         max_df = 0.25,
                                                         min_df = 0.05,
                                                         n_topics = 12,
                                                         n_terms = 10,
                                                         show_topics = 1)

Topic 1 : Top 10 Terms
-----------------------
party hostel        0.717859
hostel clean        0.564197
bed dorm            0.536750
best hostel         0.478431
meet people         0.460052
hostel staff        0.419315
common areas        0.390671
hostel good         0.367147
well equipped       0.353813
beds comfortable    0.329244
Name: 0, dtype: float64

-----------------------
Topic 2 : Top 10 Terms
-----------------------
food good          0.746752
food delicious     0.536810
lunch dinner       0.470720
good food          0.421066
day trip           0.404040
per person         0.307460
well worth         0.273912
told us            0.260111
spent nights       0.259275
breakfast lunch    0.257437
Name: 1, dtype: float64

-----------------------
Topic 3 : Top 10 Terms
-----------------------
feel home        1.201017
make feel        1.114695
made feel        0.202290
away home        0.184791
home away        0.160484
every morning    0.131933
want leave       0.129725
living ro

### Get Address Coordinates

In [132]:
def getGeocodeData(address):
    import googlemaps
    gmaps = googlemaps.Client(key='AIzaSyAhUS5R-l9enAdv6Kug_nAE8gGOdC4_X_0')
    result = gmaps.geocode(address)
    
    return result 

#### Make Attraction Addresses

In [133]:
# att_add = attractions[['url','name','address','locality','country']].drop_duplicates()

# full_adds = []
# full_adds_exname = []
# for index,row in att_add.iterrows():
#     full_add =  att_add.loc[index]['name'] + ', ' + \
#                 att_add.loc[index]['address'] + ', ' + \
#                 att_add.loc[index]['locality'] + ', ' + \
#                 att_add.loc[index]['country']
#     full_adds.append(full_add)
    
#     full_add_exname =   att_add.loc[index]['address'] + ', ' + \
#                         att_add.loc[index]['locality'] + ', ' + \
#                         att_add.loc[index]['country']
                
#     full_adds_exname.append(full_add_exname)
    
# att_add['full_add'] = full_adds
# att_add['add_only'] = full_adds_exname

# ## Attraction Geocodes
# geocodes = []
# for addname,addonly in zip(att_add.full_add,att_add.add_only):
#     geocode = getGeocodeData(addonly)
    
#     if len(geocode) == 0:
#         geocode = getGeocodeData(addname)
      
#     if len(geocode) == 0:
#         geocode = ''
    
#     geocodes.append(geocode)

# att_add['geocode'] = geocodes

#### Make Hostel Addresses

In [134]:
# host_add = hostels[['url','name','address','ext_address','locality','country']].drop_duplicates()

# full_adds = []
# full_adds_exname = []
# for index,row in host_add.iterrows():
#     full_add =  host_add.loc[index]['name'] + ', ' + \
#                 host_add.loc[index]['address'] + ', ' + \
#                 host_add.loc[index]['ext_address'] + ', ' + \
#                 host_add.loc[index]['locality'] + ', ' + \
#                 host_add.loc[index]['country']
#     full_adds.append(full_add)
    
#     full_add_exname =   host_add.loc[index]['address'] + ', ' + \
#                         host_add.loc[index]['ext_address'] + ', ' + \
#                         host_add.loc[index]['locality'] + ', ' + \
#                         host_add.loc[index]['country']
                
#     full_adds_exname.append(full_add_exname)
    
# host_add['full_add'] = full_adds
# host_add['add_only'] = full_adds_exname

# ## Hostel Geocodes
# geocodes = []
# for addname,addonly in zip(host_add.full_add,host_add.add_only):
#     geocode = getGeocodeData(addonly)
    
#     if len(geocode) == 0:
#         geocode = getGeocodeData(addname)
        
#     if len(geocode) == 0:
#         geocode = ''
    
#     geocodes.append(geocode)

# host_add['geocode'] = geocodes

### Add Clean Address and Coordinates to Address Dataframes

In [135]:
def addCleanGeo(dataframe):
    
    address_clean = []
    lat = []
    lng = []
    
    for geocode in dataframe.geocode:
        if len(geocode) > 0:
            i = 0       
            while True:
                try:
                    if 'Colombia' in geocode[i]['formatted_address']:
                        try:
                            add_clean = geocode[i]['formatted_address']
                        except:
                            add_clean = ''

                        try:
                            lt = geocode[i]['geometry']['location']['lat']
                        except:
                            lt = ''

                        try:
                            lg = geocode[i]['geometry']['location']['lng']
                        except:
                            lg = ''

                        address_clean.append(add_clean) 
                        lat.append(lt)
                        lng.append(lg)

                        break

                    else:
                        i += 1
                except:
                    address_clean.append('')
                    lat.append('')
                    lng.append('')
                    break

        else:
            address_clean.append('')
            lat.append('')
            lng.append('')
            next
            
    dataframe['address_clean'] = address_clean
    dataframe['lat'] = lat
    dataframe['lng'] = lng

In [136]:
# addCleanGeo(host_add)
# addCleanGeo(att_add)

In [137]:
def addCityNames(df):
    city = []
    for loc in df.locality:
        result = ''.join([i for i in loc if not i.isdigit()])
        result = result.split(',')[0].strip().\
                                 replace(' -',''). \
                                 replace(' *','')

        if 'Bogota' in loc:
            result = 'Bogota'
        if 'Taganga' in loc:
            result = 'Taganga'
        if 'Choachi' in loc:
            result = 'Choachi'
        if 'Medellin' in loc:
            result = 'Medellin'
        if 'Minca' in loc:
            result = 'Minca'
        if 'San Andres' in loc:
            result = 'San Andres'

        city.append(result)

    df['city'] = city

In [138]:
addCityNames(att_add)
addCityNames(host_add)

In [13]:
att_cities = list(att_add.city.drop_duplicates().sort_values().values)
host_cities = list(host_add.city.drop_duplicates().sort_values().values)

In [14]:
all_cities = att_cities+host_cities

In [42]:
# ## City Geocodes
# geocodes = []
# for city in all_cities:
#     geocode = getGeocodeData(city+' Colombia')
    
#     if len(city) == 0:
#         geocode = ''
        
#     if len(geocode) == 0:
#         geocode = ''
    
#     geocodes.append(geocode)

In [15]:
all_cities = pd.DataFrame(all_cities,columns=['city'])

In [142]:
all_cities['geocode'] = geocodes

NameError: name 'geocodes' is not defined

In [83]:
addCleanGeo(all_cities)

In [84]:
all_cities.columns = ['city','geocode','cityname','city_lat','city_lng']
all_cities.drop(['geocode','cityname'],axis=1,inplace=True)

In [87]:
all_cities.city_lat = pd.to_numeric(all_cities.city_lat)
all_cities.city_lng = pd.to_numeric(all_cities.city_lng)
all_cities.drop_duplicates(inplace=True)
# all_cities.dropna(axis=0,inplace=False)

In [89]:
all_cities.sort_values('city_lat').head()

Unnamed: 0,city,city_lat,city_lng
44,Leticia,-4.203165,-69.935907
220,San Martin de Amacayacu,-3.787286,-70.296751
201,Puerto Narino,-3.7702,-70.38306
37,Ipiales,0.825542,-77.639504
86,Tuquerres,1.085739,-77.618641


In [12]:
att_add = pd.merge(att_add, all_cities, how = 'inner', on = 'city')
host_add = pd.merge(host_add, all_cities, how = 'inner', on = 'city')

NameError: name 'all_cities' is not defined

### Store All

In [120]:
# pd.to_pickle(att_add,'att_add.pkl')
# pd.to_pickle(att_add,'att_add_BACKUP.pkl')
# pd.to_pickle(host_add,'host_add.pkl')
# pd.to_pickle(host_add,'host_add_BACKUP.pkl')
# pd.to_pickle(clean_cities,'clean_cities.pkl')
# pd.to_pickle(clean_cities,'clean_cities_BACKUP.pkl')

In [11]:
att_add = pd.read_pickle('att_add.pkl')
host_add = pd.read_pickle('host_add.pkl')

## Fix missing addresses

In [144]:
host_add[host_add.geocode==''] # 121 missing

Unnamed: 0,url,name,address,ext_address,locality,country,full_add,add_only,geocode,address_clean,lat,lng,city,city_lat,city_lng
4,Hotel_Review-g3754359-d7656787-Reviews-La_Ola_...,La Ola Hostel,Via Playa Donaire,200 mts antes de la playa,"Palomino,",Colombia,"La Ola Hostel, Via Playa Donaire, 200 mts ante...","Via Playa Donaire, 200 mts antes de la playa, ...",,,,,Palomino,11.245121,-73.559397
5,Hotel_Review-g3754359-d8744008-Reviews-Cabanas...,Cabanas San Sebastian,"Frente a la playa, a 15 minutos del rio Palomino",sansebastianpalomino@hotmail.com. Nelson Marti...,"Palomino,",Colombia,"Cabanas San Sebastian, Frente a la playa, a 15...","Frente a la playa, a 15 minutos del rio Palomi...",,,,,Palomino,11.245121,-73.559397
9,Hotel_Review-g3754359-d10394223-Reviews-Ecohot...,Ecohotel Brisas de la Sierra,Sendero Indigena via a la Sierra,Km. 1,"Palomino 446009,",Colombia,"Ecohotel Brisas de la Sierra, Sendero Indigena...","Sendero Indigena via a la Sierra, Km. 1, Palom...",,,,,Palomino,11.245121,-73.559397
12,Hotel_Review-g3754359-d6855228-Reviews-Jaguar_...,Jaguar Azul,Barrio La Sierrita,,"Palomino 570001,",Colombia,"Jaguar Azul, Barrio La Sierrita, , Palomino 57...","Barrio La Sierrita, , Palomino 570001,, Colombia",,,,,Palomino,11.245121,-73.559397
19,Hotel_Review-g3754359-d12405502-Reviews-Makao-...,Makao,Calle 8,"Lote 3, In font of the beach","Palomino 446009,",Colombia,"Makao, Calle 8, Lote 3, In font of the beach, ...","Calle 8, Lote 3, In font of the beach, Palomin...",,,,,Palomino,11.245121,-73.559397
21,Hotel_Review-g3754359-d11044603-Reviews-Bella_...,Bella Flor Hostel,Carrera 6,along via principal that leads to the Donaires...,"Palomino 446009,",Colombia,"Bella Flor Hostel, Carrera 6, along via princi...","Carrera 6, along via principal that leads to t...",,,,,Palomino,11.245121,-73.559397
30,Hotel_Review-g297475-d3731193-Reviews-La_Fonda...,La Fonda Pance,Metros Antes del Desvio Al Topacio 200,,"Cali 760010,",Colombia,"La Fonda Pance, Metros Antes del Desvio Al Top...","Metros Antes del Desvio Al Topacio 200, , Cali...",,,,,Cali,3.451647,-76.531985
33,Hotel_Review-g297475-d9877185-Reviews-Ari_Muna...,Ari Munani La Castellana Pance,Pueblo Pance,La Castellana Corregimiento,"Cali 0076,",Colombia,"Ari Munani La Castellana Pance, Pueblo Pance, ...","Pueblo Pance, La Castellana Corregimiento, Cal...",,,,,Cali,3.451647,-76.531985
71,Hotel_Review-g297478-d7142489-Reviews-Rancho_l...,Rancho los Carrieles,"Vereda el Cerro, Corregimiento de Santa Elena",,"Medellin,",Colombia,"Rancho los Carrieles, Vereda el Cerro, Corregi...","Vereda el Cerro, Corregimiento de Santa Elena,...",,,,,Medellin,6.244203,-75.581212
106,Hotel_Review-g297478-d2042873-Reviews-Arcadia_...,Arcadia Hostel,Calle 10a 11a No. 31A-188,"Poblado, 5 min walk from zona rosa, parque Lleras","Medellin 050021,",Colombia,"Arcadia Hostel, Calle 10a 11a No. 31A-188, Pob...","Calle 10a 11a No. 31A-188, Poblado, 5 min walk...",,,,,Medellin,6.244203,-75.581212


In [145]:
att_add[att_add.geocode==''] # 29 missing

Unnamed: 0,url,name,address,locality,country,full_add,add_only,geocode,address_clean,lat,lng,city,city_lat,city_lng
0,/Attraction_Review-g3860359-d315403-Reviews-Lo...,Lost City,El Mamey,470007,Colombia,"Lost City, El Mamey, 470007,, Colombia","El Mamey, 470007,, Colombia",,,,,,,
15,/Attraction_Review-g297476-d7212576-Reviews-Fr...,Free Tour Cartagena,"Plaza Santa Teresa, just outside Museo Naval d...","Cartagena 095,",Colombia,"Free Tour Cartagena, Plaza Santa Teresa, just ...","Plaza Santa Teresa, just outside Museo Naval d...",,,,,Cartagena,10.391049,-75.479426
30,/Attraction_Review-g297476-d315386-Reviews-San...,San Felipe de Barajas Castle,Avenida Antonio Arevalo,"Cartagena,",Colombia,"San Felipe de Barajas Castle, Avenida Antonio ...","Avenida Antonio Arevalo, Cartagena,, Colombia",,,,,Cartagena,10.391049,-75.479426
32,/Attraction_Review-g297476-d3952259-Reviews-La...,Las Bovedas,Between the Santa Catalina bastion and the for...,"Cartagena,",Colombia,"Las Bovedas, Between the Santa Catalina bastio...",Between the Santa Catalina bastion and the for...,,,,,Cartagena,10.391049,-75.479426
44,/Attraction_Review-g297476-d8282518-Reviews-Mo...,Monumento Pedro de Heredia,Praca dos Coches,"Cartagena,",Colombia,"Monumento Pedro de Heredia, Praca dos Coches, ...","Praca dos Coches, Cartagena,, Colombia",,,,,Cartagena,10.391049,-75.479426
49,/Attraction_Review-g297476-d6542709-Reviews-Vi...,Via Apia,Calle Santo Domingo #33-45,"Cartagena 130000,",Colombia,"Via Apia, Calle Santo Domingo #33-45, Cartagen...","Calle Santo Domingo #33-45, Cartagena 130000,,...",,,,,Cartagena,10.391049,-75.479426
84,/Attraction_Review-g297476-d8754893-Reviews-To...,Tobacco and Rum,Centro Calle del Curato #38-08,"Cartagena,",Colombia,"Tobacco and Rum, Centro Calle del Curato #38-0...","Centro Calle del Curato #38-08, Cartagena,, Co...",,,,,Cartagena,10.391049,-75.479426
88,/Attraction_Review-g297476-d6537864-Reviews-Ve...,Veleros Colombia,Av. Miramar,"Cartagena 00000,",Colombia,"Veleros Colombia, Av. Miramar, Cartagena 00000...","Av. Miramar, Cartagena 00000,, Colombia",,,,,Cartagena,10.391049,-75.479426
89,/Attraction_Review-g297476-d7609757-Reviews-Na...,Navega Colombia,Avenida miramar 19-50,"Cartagena 0000,",Colombia,"Navega Colombia, Avenida miramar 19-50, Cartag...","Avenida miramar 19-50, Cartagena 0000,, Colombia",,,,,Cartagena,10.391049,-75.479426
96,/Attraction_Review-g297476-d8331650-Reviews-Ca...,Cartagena Explore,El Carmelo Manzana 3,"Cartagena,",Colombia,"Cartagena Explore, El Carmelo Manzana 3, Carta...","El Carmelo Manzana 3, Cartagena,, Colombia",,,,,Cartagena,10.391049,-75.479426


### Recommendation Engine

In [146]:
def getRecs(username,
            num_recs=30,
            rec_type='attractions',
            rec_rating_filter=4.0,
            user_rating_threshold=0.0,
            aggregate_reviews=0):
    
    reviews = user_reviews[(user_reviews.category==rec_type) &
                           (user_reviews.username==username) &
                           (user_reviews.rating>=user_rating_threshold)
                          ].review 
    
    labels = reviews.index
    
    if aggregate_reviews == 1:
        reviews = [''.join(reviews)]
        labels = [username]
        
    if rec_type == 'attractions':
        vectorizer = att_vectorizer
        nmf_model = att_nmf_model
        dtm_nmf = att_dtm_nmf
        all_labels = att_urls
        info = attractions
        addresses = att_add

    if rec_type == 'hotels':
        vectorizer = host_vectorizer
        nmf_model = host_nmf_model
        dtm_nmf = host_dtm_nmf
        all_labels = host_urls
        info = hostels
        addresses = host_add
        
    user_dtm = vectorizer.transform(reviews)
    user_dtm_nmf = nmf_model.transform(user_dtm)
    user_dtm_nmf = Normalizer(copy=False).fit_transform(user_dtm_nmf)

    all_scores = pd.DataFrame(dtm_nmf,index = all_labels)
    user_scores = pd.DataFrame(user_dtm_nmf,index = labels)
    sims = pd.DataFrame(
            cosine_similarity(all_scores,user_scores),
            columns=user_scores.index,index=all_scores.index)
   
    urls = []
    recscores = []
    for i in range(sims.shape[1]):    
        urls += list(sims.iloc[:,i].sort_values(ascending=False)[:num_recs].index)
        recscores += list(sims.iloc[:,i].sort_values(ascending=False)[:num_recs].values)
        
    if aggregate_reviews == 0: 
        urls = urls[:num_recs]
        recscores = recscores[:num_recs]

    recommendations = pd.DataFrame([urls,recscores]).T
    recommendations.columns = ['url','recscore']
    recommendations.drop_duplicates(inplace=True)
    
    if rec_type == 'attractions':
        mergeinfo = pd.merge(recommendations, info, how='inner', on='url')\
        [['url', 'recscore','categories', 'description','days', 'hoursopen', 'recstay', 
          'numreviews','overallrating']].drop_duplicates()
        
        output = pd.merge(mergeinfo, addresses, how = 'inner', on = 'url')\
        [['url','name', 'recscore', 'categories', 'description', 'days', 'hoursopen',
           'recstay', 'numreviews','overallrating','address_clean','lat','lng','city',
          'city_lat','city_lng']]
    if rec_type == 'hotels':
        mergeinfo = pd.merge(recommendations, info, how='inner', on='url')\
        [['url', 'recscore','numreviews','overallrating']].drop_duplicates()
        
        output = pd.merge(mergeinfo, addresses, how = 'inner', on = 'url')\
        [['url','name','recscore','numreviews', 'overallrating','address_clean', 'lat',
          'lng','city','city_lat','city_lng']]
    
    return output[(output.address_clean != '') & (output.overallrating >= rec_rating_filter)]

### Generate Attraction & Hostel Recommendations for All Users

In [147]:
users = user_reviews.username.unique()
user_hostel_recs = {}
user_attraction_recs = {}
for user in users:
    user_hostel_recs[user] = getRecs(user,num_recs=1000,rec_type='hotels',rec_rating_filter=4.5,\
                                user_rating_threshold=4.0,aggregate_reviews=1)
    user_attraction_recs[user] = getRecs(user,num_recs=1000,rec_type='attractions',rec_rating_filter=4.5,\
                                    user_rating_threshold=4.0,aggregate_reviews=1)

In [148]:
pd.to_pickle(user_hostel_recs,'user_hostel_recs.pkl')
pd.to_pickle(user_attraction_recs,'user_attraction_recs.pkl')