## Configure and install libraries

In [None]:
#!pip install bs4
#!conda install -c conda-forge folium=0.5.0 --yes

import requests
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from folium.features import DivIcon
pd.set_option('display.max_rows', None)

In [None]:
df = pd.read_csv('worldcities.csv')
df = df.drop(['city_ascii', 'iso2', 'iso3', 'admin_name', 'capital', 'id'], axis='columns')

In [None]:
df[df['country']=='United States']

## User inputs: current city and destination country (for comparison)

In [None]:
current_city = 'Paris'

destination_country = 'United States'

###### Getting location data for current_city and top 20 cities in destination_country

In [None]:
current_df = df[df['city']=='Paris'].head(1)

destination_df = df[df['country']== destination_country].head(20)

print(current_df)
print()
print(destination_df)

## Importing data from foursquare

In [None]:
# Hidden sensitive info

#CLIENT_ID = ''
#CLIENT_SECRET = ''
#ACCESS_TOKEN = ''
#VERSION = ''
#LIMIT = 

#### Function to get venues around a specific location

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['city', 
                  'city Latitude', 
                  'city Longitude', 
                  'venue', 
                  'venue Latitude', 
                  'venue Longitude', 
                  'venue Category']
    
    return(nearby_venues)

#### Getting foursquare data for current & destination locations

In [None]:
current_venues = getNearbyVenues(current_df['city'], current_df['lat'], current_df['lng'])

destination_venues = getNearbyVenues(destination_df['city'], destination_df['lat'], destination_df['lng'])

In [273]:
all_venues = pd.concat([current_venues, destination_venues])

In [None]:
# one hot encoding
venues_onehot = pd.get_dummies(all_venues[['venue Category']], prefix="", prefix_sep="")

# add city column back to dataframe
venues_onehot['city'] = all_venues['city'] 

# group results by city
venues_grouped = venues_onehot.groupby('city').mean().reset_index()

# check intermediate results
venues_grouped

In [284]:
venues_grouped

Unnamed: 0,city,Alsatian Restaurant,Alternative Healer,American Restaurant,Animal Shelter,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,...,Turkish Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,Atlanta,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Boston,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Brooklyn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.011236,0.011236,0.0,0.0,0.0,0.011236,0.011236,0.011236
3,Chicago,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Dallas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,...,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.025641,0.0
5,Denver,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0
6,Detroit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Houston,0.0,0.0,0.0375,0.0,0.0125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0125,0.0,0.0,0.0125,0.0125,0.0,0.0,0.0125
8,Los Angeles,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Miami,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Define function to calculate euclidian distance between cities

In [275]:
def euclidian_distance(venueslist, cityname):

    venueslist = venues_grouped
    cityname = 'Paris'
    point1 = venueslist[venueslist['city']==cityname].reset_index().drop(['index'], axis = 'columns')
    ans = pd.DataFrame(columns=['city','distance'])

    for i in venueslist['city']:
        if i != cityname:
            point2 = venueslist[venueslist['city']==i].reset_index().drop(['index'], axis = 'columns')
            dist = 0
            for j in point1.columns.drop(['city']):
                dist = dist + abs(point1.loc[0,j] - point2.loc[0,j])
            ans = ans.append({'city':i, 'distance':dist},ignore_index=True)

    return ans

# Final Outputs

### List of top 20 cities by highest similarity (lowest Euclidian distance)

In [276]:
x = euclidian_distance(venues_grouped, 'Paris')
x['lat'] = destination_df[['city', 'lat', 'lng']].sort_values('city').reset_index().drop(['index'], axis = 'columns')['lat']
x['lng'] = destination_df[['city', 'lat', 'lng']].sort_values('city').reset_index().drop(['index'], axis = 'columns')['lng']
print('List of top 20 cities by highest similarity (lowest Euclidian distance):')
x[['city','distance']].sort_values('distance')

List of top 20 cities by highest similarity (lowest Euclidian distance):


Unnamed: 0,city,distance
16,San Francisco,1.4
7,Houston,1.415
19,Washington,1.58
17,Seattle,1.58
11,New York,1.66
5,Denver,1.713043
9,Miami,1.733043
2,Brooklyn,1.750112
14,Queens,1.755
10,Minneapolis,1.76


### Plotting results in map

In [282]:
# create map
map_clusters = folium.Map(location=[x['lat'].mean(),x['lng'].mean()], zoom_start=4)

# set color scheme for the clusters
colors_array = cm.rainbow(np.linspace(0, 1, 20))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, dist in zip(x['lat'], x['lng'], x['city'], x['distance']):
    string = poi + ': ' + str(round(dist,2))
    folium.map.Marker(
    [lat -.5, lon -1],
    icon=DivIcon(
        icon_size=(50,50),
        icon_anchor=(0,0),
        html='<div style="font-size: 8pt">%s</div>' % string,
        )
    ).add_to(map_clusters)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup = string,
        fill=True,
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters