# Itinerary Algorithm 

Below are the functions modularized to add to the itinerary and create the order of attractions based on user preferences. The algorithm functions below only consider attractions at the moment, restuarant data will need to be added in. 

Libraries used:
- numpy
- pandas
- matplotlib
- sklearn
- collections
- datetime
- math
- geopy

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from collections import Counter
from datetime import date
import math
import geopy
from geopy.distance import vincenty

In [2]:
def attraction_clustering(start_date, end_date, city_name, attractions_df):
    """
    Inputs: start date, end date, city name user is traveling to, and the attractions dataframe
    Returns the filtered data with the clusters in a separate column based on KMeans
    Start and end date should already be in date(2020, 5, 15) type of format for this function to work
    """
    num_days = end_date - start_date
    num_days = num_days.days
    print("Number of days: ", num_days)
    print("City: ", city_name)

    filtered_data = attractions_df[attractions_df["City"] == city_name]
    index_names = filtered_data[filtered_data['Address'] == "No Address Found"].index
    filtered_data.drop(index_names, inplace=True)

    X = filtered_data.loc[:,['Latitude','Longitude']]

    kmeans = KMeans(n_clusters=num_days, random_state=0, max_iter = 1000).fit(X)
    id_label=kmeans.labels_

    filtered_data["cluster"] = kmeans.labels_
    return filtered_data


In [3]:
def get_start_per_cluster(df):
    """
    Take in the dataframe that is the result of attraction clustering (each attraction in a cluster)
    Returns a dataframe that has num_days number of rows (or one row per cluster) 
    based on the top ranked attraction in the cluster
    """
    idx = df.groupby(["cluster"])["Rank"].transform(min) == df['Rank'] 
    starting_attractions = df[idx]
    return starting_attractions

In [4]:
# anjali's function
def distances(starting_lat, starting_long, dataframe):
    """
    Function written by Anjali which gets the distances from the input starting lat and long to different places
    The dataframe is sorted based on distance from nearest to furthest
    """
    dataframe["Latitude"] = [-1 if lat == "No Address Found" else pd.to_numeric(lat) 
                             for lat in dataframe["Latitude"]]
    dataframe["Longitude"] = [-1 if long == "No Address Found" else pd.to_numeric(long)
                              for long in dataframe["Longitude"]]
    
    coords_1 = (starting_lat, starting_long)
    coords_2 = [(dataframe["Latitude"][i], dataframe["Longitude"][i]) 
                for i in range(len(dataframe["Latitude"]))]
    dists = [-1 if (math.isnan(coords_2[i][0]) & math.isnan(coords_2[i][1])) 
             else (vincenty(coords_1, coords_2[i]).miles)
             for i in range(len(coords_2))]
    dataframe_copy = dataframe 
    dataframe_copy['distance_from_start'] = dists
    dataframe_copy = dataframe_copy.sort_values(by='distance_from_start', ascending=True)
    return dataframe_copy

In [26]:
def rank_user_pref(user_pref_list, df):
    """
    Take in the df that has already been ranked by distance
    Takes into account the user preferences and adds a flag based on if an attraction follows with the user preference list or not
    The dataframe is then sorted by user preferences and then distances, within a cluster (return this)
    """

    preferences_mapping = {
    "Museums" : ["Museums"],
    "Famous Sites": ["Nature & Parks", "Sights & Landmarks"],
    "History": ["Transportation", "Events"],
    "Thrill-seeking adventures": ["Concerts & Shows", "Casinos & Gambling", "Water & Amusement Parks"],
    "Food" : ["Food & Drink"],
    "Shopping" : ["Shopping"],
    }
    
    if "A bit of everything" in user_pref_list:
        return df

    elif len(user_pref_list) != 0:
        # to get all the preferences corresponding to Type of Attraction
        top_prefs = []
        for p in user_pref_list:
            top_prefs += preferences_mapping[p]
            
        # put a flag 1 if attraction is something in user preferences, 0 otherwise
        df["user_pref_flag"] = df["Type"].apply(lambda x: 0 if x in top_prefs else 1)
        
        # sort by cluster, top_prefs (desc), dist
        df = df.sort_values(by=['cluster', 'user_pref_flag', 'Distance Ranking Order'])
        return df
      
    else:
        return df

In [31]:
# inputs we have: start and end date, what city user wants to go 
# random start, end dates and city name for testing purposes
start_date = date(2020, 5, 15)
end_date = date(2020, 5, 18)
city_name = "San Francisco"
user_preferences = ['Food', 'Shopping', 'Museums']
attractions = pd.read_csv("top_thirty_attraction_tripadvisor_sf_no_la.csv")

clustered_df = attraction_clustering(start_date, end_date, city_name, attractions)
start_att_df = get_start_per_cluster(clustered_df)

Number of days:  3
City:  San Francisco


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [32]:
dist_columns = list(clustered_df.columns) + ["Distance Ranking Order"]
cluster_distance_df = pd.DataFrame(data=[], columns=dist_columns)

for index, row in start_att_df.iterrows(): 
    start_lat = row["Latitude"]
    start_long = row["Longitude"]
    cluster_df = clustered_df[clustered_df["cluster"] == row["cluster"]].reset_index()
    end_df = distances(start_lat, start_long, cluster_df)
    distance_ranking_order = [i for i in range(1, end_df.shape[0] + 1)]
    end_df["Distance Ranking Order"] = distance_ranking_order
    cluster_distance_df = cluster_distance_df.append(end_df)

  from ipykernel import kernelapp as app


The result is a dataframe that has all the original attraction information but also includes what cluster the attraction is in based on the KMeans clustering done above, and the order the sites should be visited in based on looking at the top attraction in each cluster and ranking based on nearest distance.

In [33]:
cluster_distance_df

Unnamed: 0,Address,Attraction Name,City,Distance Ranking Order,Latitude,Link to Attraction Reviews,Longitude,Number of Reviews,Price,Rank,Type,cluster,distance_from_start,index
0,"Alcatraz Island, Parade Ground, San Francisco,...",Alcatraz Island,San Francisco,1,37.826721,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.422759,"55,267 reviews",$105.00,1,Nature & Parks,0,0.0,0.0
11,"Musée Mécanique, Fishermans Wharf, Russian Hil...",Musee Mecanique,San Francisco,2,37.809449,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.416283,"2,869 reviews",No Price,23,Museums,0,1.242812,22.0
12,"Fishermans Wharf, Russian Hill, San Francisco,...",Fisherman's Wharf,San Francisco,3,37.809167,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.416599,"19,301 reviews",$20.00,27,Sights & Landmarks,0,1.25672,26.0
10,"Pier 39, North Beach, San Francisco, San Franc...",Pier 39,San Francisco,4,37.809785,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.410266,"22,474 reviews",No Price,20,Sights & Landmarks,0,1.35333,19.0
4,"Lombard Street, Russian Hill, San Francisco, S...",Lombard Street,San Francisco,5,37.802076,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.418809,"18,111 reviews",No Price,13,Sights & Landmarks,0,1.71342,12.0
7,"Coit Tower, Telegraph Hill Boulevard, Telegrap...",Coit Tower,San Francisco,6,37.802379,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.405834,"4,477 reviews",No Price,16,Sights & Landmarks,0,1.917285,15.0
2,"Exploratorium, Herb Caen Way, Northeast Waterf...",Exploratorium,San Francisco,7,37.800906,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.398523,"3,491 reviews",$19.95,7,Museums,0,2.219988,6.0
9,"Cable Car Museum, 1201, Mason Street, Nob Hill...",Cable Car Museum,San Francisco,8,37.79481,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.411863,"3,700 reviews",No Price,19,Museums,0,2.280188,18.0
3,"1 Ferry Building, San Francisco, CA 94111",Ferry Building Marketplace,San Francisco,9,37.7958,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.3938,"7,358 reviews",No Price,12,Shopping,0,2.656803,11.0
5,"Museum of Ice Cream, 1, Grant Avenue, Union Sq...",Museum of Ice Cream,San Francisco,10,37.787008,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.405166,"1,591 reviews",No Price,14,Museums,0,2.903211,13.0


In [35]:
user_pref_and_clustered_distance = rank_user_pref(user_preferences, cluster_distance_df)
user_pref_and_clustered_distance

Unnamed: 0,Address,Attraction Name,City,Distance Ranking Order,Latitude,Link to Attraction Reviews,Longitude,Number of Reviews,Price,Rank,Type,cluster,distance_from_start,index,user_pref_flag
11,"Musée Mécanique, Fishermans Wharf, Russian Hil...",Musee Mecanique,San Francisco,2,37.809449,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.416283,"2,869 reviews",No Price,23,Museums,0,1.242812,22.0,0
2,"Exploratorium, Herb Caen Way, Northeast Waterf...",Exploratorium,San Francisco,7,37.800906,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.398523,"3,491 reviews",$19.95,7,Museums,0,2.219988,6.0,0
9,"Cable Car Museum, 1201, Mason Street, Nob Hill...",Cable Car Museum,San Francisco,8,37.79481,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.411863,"3,700 reviews",No Price,19,Museums,0,2.280188,18.0,0
3,"1 Ferry Building, San Francisco, CA 94111",Ferry Building Marketplace,San Francisco,9,37.7958,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.3938,"7,358 reviews",No Price,12,Shopping,0,2.656803,11.0,0
5,"Museum of Ice Cream, 1, Grant Avenue, Union Sq...",Museum of Ice Cream,San Francisco,10,37.787008,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.405166,"1,591 reviews",No Price,14,Museums,0,2.903211,13.0,0
6,"151 3rd St, San Francisco, CA 94103",San Francisco Museum of Modern Art (SFMOMA),San Francisco,11,37.7857,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.4011,"2,036 reviews",$25.00,15,Museums,0,3.067366,14.0,0
0,"Alcatraz Island, Parade Ground, San Francisco,...",Alcatraz Island,San Francisco,1,37.826721,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.422759,"55,267 reviews",$105.00,1,Nature & Parks,0,0.0,0.0,1
12,"Fishermans Wharf, Russian Hill, San Francisco,...",Fisherman's Wharf,San Francisco,3,37.809167,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.416599,"19,301 reviews",$20.00,27,Sights & Landmarks,0,1.25672,26.0,1
10,"Pier 39, North Beach, San Francisco, San Franc...",Pier 39,San Francisco,4,37.809785,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.410266,"22,474 reviews",No Price,20,Sights & Landmarks,0,1.35333,19.0,1
4,"Lombard Street, Russian Hill, San Francisco, S...",Lombard Street,San Francisco,5,37.802076,www.tripadvisor.com/Attraction_Review-g60713-d...,-122.418809,"18,111 reviews",No Price,13,Sights & Landmarks,0,1.71342,12.0,1
