In [22]:
import pandas as pd
import numpy as np
import re, math
from collections import Counter
from googlemaps import convert
from googlemaps import Client
from googlemaps.convert import as_list

In [23]:
WORD = re.compile(r'\w+')

In [24]:
def get_cosine(vec1, vec2):
     intersection = set(vec1.keys()) & set(vec2.keys())
     numerator = sum([vec1[x] * vec2[x] for x in intersection])
     sum1 = sum([vec1[x]**2 for x in vec1.keys()])
     sum2 = sum([vec2[x]**2 for x in vec2.keys()])
     denominator = math.sqrt(sum1) * math.sqrt(sum2)
     if not denominator:
        return 0.0
     else:
        return float(numerator) / denominator

In [25]:
def text_to_vector(text):
     words = WORD.findall(text)
     return Counter(words)

In [26]:
#remove spaces from the category column of dataset
def clean_data(x):
    if isinstance(x, list):
        return [str.lower(i.replace(" ", "")) for i in x]
    else:
        if isinstance(x, str):
            return str.lower(x.replace(" ", ""))
        else:
            return ''

In [27]:
metadata = pd.read_csv('data_content.csv', low_memory=False)
#print(metadata.head())
print("Select your preferred category:\n1.wildlife \n2.heritage \n3.pilgirmage\n4.park\n5.museum")
text1 = input("Enter User Interests: ")   #user preference
vector1 = text_to_vector(text1)
C = metadata['p_rating'].mean()
m = metadata['count'].quantile(0.75)

Select your preferred category:
1.wildlife 
2.heritage 
3.pilgirmage
4.park
5.museum
Enter User Interests: pilgirmage


In [28]:
def weighted_rating(x, m=m, C=C):
    v = x['count']
    R = x['p_rating']
    # Calculation based on the Bayesian Rating Formula
    return (v/(v+m) * R) + (m/(m+v) * C)

In [10]:
metadata['category'] = metadata['category'].apply(clean_data)
metadata['score'] = metadata.apply(weighted_rating, axis=1)
#print(metadata.head())
cos=[]
for i in list(metadata['category']):
    #print(type(i))
    text2 = i
    vector2 = text_to_vector(text2)
    cosine = get_cosine(vector1, vector2)
    cos.append(cosine)
metadata['cosine']=cos
x=metadata['cosine']>0.0
rec=pd.DataFrame(metadata[x])
rec=rec.sort_values('score',ascending=False)
src=input("Enter your location: ")
dest=list(rec['title'])

Enter your location: amer fort


In [29]:
metadata['category'] = metadata['category'].apply(clean_data)
metadata['score'] = metadata.apply(weighted_rating, axis=1)
metadata.head()

Unnamed: 0,category,distance,duration,nearby_places,title,url,p_rating,count,itemId,score
0,wildlife,6 Kms,1-2 Hours,,Jaipur Zoo,/rajasthan/jaipur/jaipur-zoo,2.2,158,1,3.244683
1,heritage,6 Kms,1-2 Hours,,Nahargarh Fort,/rajasthan/jaipur/nahargarh-fort,4.6,652,2,4.047382
2,heritage,13 Kms,2-3 Hours,"Jaigarh Fort (1 km by walk), Srijagat Siromani...",Amer Fort / Amber Fort,/rajasthan/jaipur/amer-fort-amber-fort,5.0,783,3,4.315621
3,pilgrimage,6 Kms,1-2 Hours,,Birla Mandir,/rajasthan/jaipur/birla-mandir,4.9,482,4,4.091953
4,heritage,6 Kms,30 Mins,,Hawa Mahal,/rajasthan/jaipur/hawa-mahal,4.2,890,5,3.902043


In [33]:
cos=[]
for i in list(metadata['category']):
    #print(i)
    text2 = i
    vector2 = text_to_vector(text2)
    cosine = get_cosine(vector1, vector2)
    cos.append(cosine)
cos

[0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [34]:
metadata['cosine']=cos
x=metadata['cosine']>0.0
rec=pd.DataFrame(metadata[x])
rec=rec.sort_values('score',ascending=False)

In [35]:
rec.head()

Unnamed: 0,category,distance,duration,nearby_places,title,url,p_rating,count,itemId,score,cosine
3,pilgrimage,6 Kms,1-2 Hours,,Birla Mandir,/rajasthan/jaipur/birla-mandir,4.9,482,4,4.091953,1
9,pilgrimage,6.5 Kms,1-2 Hours,,Akshardham Temple,/rajasthan/jaipur/akshardham-temple,5.0,345,10,4.015713,1
6,pilgrimage,6 Kms,30 Mins,,Moti Dungri Ganesh Temple,/rajasthan/jaipur/moti-dungri-ganesh-temple,4.5,350,7,3.846396,1
23,pilgrimage,14 Kms,2 Hours,,ISKCON Temple,/rajasthan/jaipur/iskcon-temple,3.9,412,24,3.652606,1
12,pilgrimage,11 Kms,1-2 Hours,,Khole Ke Hanuman Ji Temple,/rajasthan/jaipur/khole-ke-hanuman-ji-temple,3.5,990,13,3.498756,1


In [36]:
src=input("Enter your location: ")
dest=list(rec['title'])

Enter your location: amer fort


In [37]:
def distance_matrix(client,origins, destinations,
                    mode=None, language=None, avoid=None, units=None,
                    departure_time=None, arrival_time=None, transit_mode=None,
                    transit_routing_preference=None, traffic_model=None, region=None):
    params = {
        "origins": convert.location_list(origins),
        "destinations": convert.location_list(destinations)
    }

    if mode:
        # NOTE(broady): the mode parameter is not validated by the Maps API
        # server. Check here to prevent silent failures.
        if mode not in ["driving", "walking", "bicycling", "transit"]:
            raise ValueError("Invalid travel mode.")
        params["mode"] = mode

    if language:
        params["language"] = language

    if avoid:
        if avoid not in ["tolls", "highways", "ferries"]:
            raise ValueError("Invalid route restriction.")
        params["avoid"] = avoid

    if units:
        params["units"] = units

    if departure_time:
        params["departure_time"] = convert.time(departure_time)

    if arrival_time:
        params["arrival_time"] = convert.time(arrival_time)

    if departure_time and arrival_time:
        raise ValueError("Should not specify both departure_time and"
                         "arrival_time.")

    if transit_mode:
        params["transit_mode"] = convert.join_list("|", transit_mode)

    if transit_routing_preference:
        params["transit_routing_preference"] = transit_routing_preference

    if traffic_model:
        params["traffic_model"] = traffic_model

    if region:
        params["region"] = region
    #print(client._request("/maps/api/distancematrix/json", params))
    return client._request("/maps/api/distancematrix/json", params)

In [38]:
client = Client(key='AIzaSyB0CIzNJY23OQHbrE-7HBvHCtStfzMADG4')
dist=[]
dur=[]
for d in dest:
    d=d+",Jaipur"
    #print(d)
    output=distance_matrix(client,src,d)
    #print(output)
    a1=(output['rows'][0]['elements'][0]['distance']['text'])
    a2=(output['rows'][0]['elements'][0]['duration']['text'])
    dist.append(a1)
    dur.append(a2)

In [42]:
final=pd.DataFrame(rec,index=None,columns=['title','category','score','distance','duration'])
final

Unnamed: 0,title,category,score,distance,duration
3,Birla Mandir,pilgrimage,4.091953,6 Kms,1-2 Hours
9,Akshardham Temple,pilgrimage,4.015713,6.5 Kms,1-2 Hours
6,Moti Dungri Ganesh Temple,pilgrimage,3.846396,6 Kms,30 Mins
23,ISKCON Temple,pilgrimage,3.652606,14 Kms,2 Hours
12,Khole Ke Hanuman Ji Temple,pilgrimage,3.498756,11 Kms,1-2 Hours
14,Garh Ganesh Temple,pilgrimage,3.37377,7 Kms,30 Mins
