# This is the notebook for my final IBM Data Science capstone project

In [1]:
import pandas as pd
import numpy as np

import folium

In [2]:
from geopy.geocoders import ArcGIS

### Collecting Basic Data for the Neighborhoods

In [3]:
# List of convention centers, with the first one being the venue of the 
# upcoming conference in San Antonio and the last three entries being the
# names of convention centers I have attended conferences at in the past

conv_center = ['Henry B. Gonzalez Convention Center, San Antonio, TX',
               'Huntington Convention Center, Cleveland, OH',
               'Cobo Center, Detroit, MI',
               'Anaheim Convention Center, Anaheim, CA']

In [4]:
# find and store latitudes and longitudes of the convention centers

conv_lat = []
conv_long = []

geolocator = ArcGIS()

for cc in conv_center:
    
    location = None
    
    while location == None:
        location = geolocator.geocode(cc)
    
    conv_lat.append(location.latitude)
    conv_long.append(location.longitude)

In [5]:
print(conv_lat)
print(conv_long)

[29.42222000000004, 41.504099996104934, 42.32713997446484, 33.80297995830737]
[-98.48568999999998, -81.69429997056957, -83.04813998167812, -117.91910000403843]


In [6]:
# names of San Antonio neighorhoods around the convention center

sa_tx_neighs = ['Downtown','Midtown','Southtown','Alamo Heights',
                'North Central', 'Inner West Side','East Side']

In [7]:
sa_tx_lats = []
sa_tx_longs = []

In [8]:
from geopy.exc import GeocoderTimedOut

In [9]:
# find and store latitudes and longitudes of the San Antonio neighborhoods

for neigh in sa_tx_neighs:
    
    location = None
    
    while location == None:
        try:
            location = geolocator.geocode(neigh + ', San Antonio, TX')
        except:
            location = None
    
    sa_tx_lats.append(location.latitude)
    sa_tx_longs.append(location.longitude)

In [10]:
print(sa_tx_lats)
print(sa_tx_longs)

[29.426510000000064, 29.42458000000005, 29.29591000000005, 29.48667501170863, 29.503120000000024, 29.37438042756018, 29.405950000000075]
[-98.48857999999996, -98.49460999999997, -98.41862999999995, -98.4699997706225, -98.52595999999994, -98.71032430037744, -98.42792999999995]


In [11]:
from geopy.distance import geodesic

In [12]:
# find the distance between each neighborhood and the convention center

stay_to_conv = []

for neigh_lat, neigh_long in zip(sa_tx_lats, sa_tx_longs):
    stay_to_conv.append(geodesic( (neigh_lat, neigh_long), (conv_lat[0], conv_long[0]) ).miles)

In [13]:
stay_to_conv

[0.3430295392223524,
 0.5618768744777035,
 9.59433791567158,
 4.538959788818945,
 6.077702008194346,
 13.94308344262236,
 3.658913309040918]

In [14]:
prev_lat = []
prev_long = []

In [15]:
# find and store the latitudes and longitudes I have stayed in on past trips

prev_stays = ['Playhouse Square, Cleveland, OH','North Corktown, Detroit, MI','Northwest Anaheim, Anaheim, CA']

for loc in prev_stays:
    
    location = None
    
    while location == None:
        location = geolocator.geocode(loc)
    
    prev_lat.append(location.latitude)
    prev_long.append(location.longitude)

In [16]:
print(prev_lat)
print(prev_long)

[41.50100000000003, 42.33143000000007, 33.84340000000003]
[-81.68094999999994, -83.06666999999999, -117.95460999999995]


In [17]:
# find the distance between each of my previous stays and the corresponding convention center

for neigh_lat, neigh_long, cc_lat, cc_long in zip(prev_lat, prev_long, conv_lat[1:], conv_long[1:]):
    stay_to_conv.append(geodesic( (neigh_lat, neigh_long), (cc_lat, cc_long) ).miles)

In [18]:
stay_to_conv

[0.3430295392223524,
 0.5618768744777035,
 9.59433791567158,
 4.538959788818945,
 6.077702008194346,
 13.94308344262236,
 3.658913309040918,
 0.7248861956115624,
 0.9941331544847948,
 3.454470063357804]

In [19]:
# re-format the names of San Antonio neighborhoods to include the city and state name

sa_tx_neighs = ["{}, San Antonio, TX".format(neigh) for neigh in sa_tx_neighs]

In [20]:
# combine the lists latitudes, longitudes, and location names
# for San Antonio neighborhoods and previous stays

all_lats = sa_tx_lats + prev_lat
all_longs = sa_tx_longs + prev_long

all_locations = sa_tx_neighs + prev_stays

In [21]:
# get San Antonio city latitude and longitude

sa_location = geolocator.geocode('San Antonio, TX')
sa_lat = sa_location.latitude
sa_long = sa_location.longitude

In [22]:
# plot San Antonio map with blue markers for the neighborhoods
# and a red marker for the convention center

sa_tx_map = folium.Map([sa_lat, sa_long], zoom_start=11)

for neigh, lat, long in zip(sa_tx_neighs, sa_tx_lats, sa_tx_longs):
    folium.CircleMarker([lat,long],
                        radius=4,
                        popup=folium.Popup(neigh, parse_html=True),
                        color='blue',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity='0.8').add_to(sa_tx_map)

folium.CircleMarker([conv_lat[0],conv_long[0]],
                    radius=4,
                    popup=folium.Popup(conv_center[0], parse_html=True),
                    color='red',
                    fill=True,
                    fill_color='#a43c2a',
                    fill_opacity='0.8').add_to(sa_tx_map)

sa_tx_map

In [23]:
# create a dataframe containing all the data collected so far

df = pd.DataFrame(data={'Neighborhood':all_locations,
                               'Latitude':all_lats,
                               'Longitude': all_longs,
                               'Distance':stay_to_conv})

In [24]:
df

Unnamed: 0,Neighborhood,Latitude,Longitude,Distance
0,"Downtown, San Antonio, TX",29.42651,-98.48858,0.34303
1,"Midtown, San Antonio, TX",29.42458,-98.49461,0.561877
2,"Southtown, San Antonio, TX",29.29591,-98.41863,9.594338
3,"Alamo Heights, San Antonio, TX",29.486675,-98.47,4.53896
4,"North Central, San Antonio, TX",29.50312,-98.52596,6.077702
5,"Inner West Side, San Antonio, TX",29.37438,-98.710324,13.943083
6,"East Side, San Antonio, TX",29.40595,-98.42793,3.658913
7,"Playhouse Square, Cleveland, OH",41.501,-81.68095,0.724886
8,"North Corktown, Detroit, MI",42.33143,-83.06667,0.994133
9,"Northwest Anaheim, Anaheim, CA",33.8434,-117.95461,3.45447


### Collecting Venue Information for the Neighborhoods

In [25]:
import requests

In [26]:
# define parameters for foursquare api requests

CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

radius = 5000
LIMIT = 50

venues = []

In [27]:
# collect information regarding venues in and around the locations in our dataset

for lat, long, neigh, dist in zip(df['Latitude'], df['Longitude'], df['Neighborhood'], stay_to_conv):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
                
        venues.append((
            neigh,
            dist,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [28]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Distance', 'NeighborhoodLatitude', 'NeighborhoodLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

venues_df

Unnamed: 0,Neighborhood,Distance,NeighborhoodLatitude,NeighborhoodLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,The Majestic Theatre,29.426083,-98.490634,Theater
1,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,The San Antonio River Walk,29.424311,-98.488558,Pedestrian Plaza
2,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,La Panaderia,29.426518,-98.489689,Bakery
3,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,Bohanan's Prime Steaks and Seafood,29.426559,-98.490567,Steakhouse
4,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,Texas de Brazil - San Antonio,29.426465,-98.488971,Restaurant
5,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,Fortress Alamo: The Key To Texas,29.425306,-98.486392,History Museum
6,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,Tobin Center for the Performing Arts,29.430331,-98.488762,Concert Hall
7,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,Rocky Mountain Chocolate Factory,29.424405,-98.488331,Dessert Shop
8,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,Alamo Plaza,29.425484,-98.486613,Plaza
9,"Downtown, San Antonio, TX",0.34303,29.42651,-98.48858,The Empire Theater,29.425981,-98.491150,Theater


In [29]:
venues_df.shape[0]

427

### Analyzing and Processing the Data

In [30]:
# check the number of results returned for each venue

venues_df.groupby("Neighborhood").count()

Unnamed: 0_level_0,Distance,NeighborhoodLatitude,NeighborhoodLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Alamo Heights, San Antonio, TX",50,50,50,50,50,50,50
"Downtown, San Antonio, TX",50,50,50,50,50,50,50
"East Side, San Antonio, TX",50,50,50,50,50,50,50
"Inner West Side, San Antonio, TX",11,11,11,11,11,11,11
"Midtown, San Antonio, TX",50,50,50,50,50,50,50
"North Central, San Antonio, TX",50,50,50,50,50,50,50
"North Corktown, Detroit, MI",50,50,50,50,50,50,50
"Northwest Anaheim, Anaheim, CA",50,50,50,50,50,50,50
"Playhouse Square, Cleveland, OH",50,50,50,50,50,50,50
"Southtown, San Antonio, TX",16,16,16,16,16,16,16


In [31]:
# remove the data for two neighborhoods with exceptionally low venues returned
# in order to maintain fair comparison when performing clustering later on
# also, these two neighborhoods happen to be the farthest from the convention
# center, and are therefore of less interest to us anyways

venues_df = venues_df[~venues_df.isin(['Inner West Side, San Antonio, TX', 'Southtown, San Antonio, TX'])]

In [32]:
venues_df.groupby("Neighborhood").count()

Unnamed: 0_level_0,Distance,NeighborhoodLatitude,NeighborhoodLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Alamo Heights, San Antonio, TX",50,50,50,50,50,50,50
"Downtown, San Antonio, TX",50,50,50,50,50,50,50
"East Side, San Antonio, TX",50,50,50,50,50,50,50
"Midtown, San Antonio, TX",50,50,50,50,50,50,50
"North Central, San Antonio, TX",50,50,50,50,50,50,50
"North Corktown, Detroit, MI",50,50,50,50,50,50,50
"Northwest Anaheim, Anaheim, CA",50,50,50,50,50,50,50
"Playhouse Square, Cleveland, OH",50,50,50,50,50,50,50


In [33]:
# perform one hot encoding for the venue categories
onehot_df = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood and distance columns back to dataframe
onehot_df['Neighborhood'] = venues_df['Neighborhood']
onehot_df['Distance'] = venues_df['Distance']

# move neighborhood and distance columns to the beginning
fixed_columns = list(onehot_df.columns[-2:]) + list(onehot_df.columns[:-2])
onehot_df = onehot_df[fixed_columns]

onehot_df.head()

Unnamed: 0,Wings Joint,Distance,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,...,Theme Park,Trail,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Waterfront,Wine Bar
0,0,0.34303,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0.34303,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0.34303,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0.34303,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0.34303,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
grouped_df = onehot_df.groupby("Neighborhood").mean().reset_index()

grouped_df

Unnamed: 0,Neighborhood,Wings Joint,Distance,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,...,Theme Park,Trail,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Waterfront,Wine Bar
0,"Alamo Heights, San Antonio, TX",0.0,4.53896,0.1,0.0,0.02,0.02,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
1,"Downtown, San Antonio, TX",0.0,0.34303,0.02,0.0,0.02,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"East Side, San Antonio, TX",0.04,3.658913,0.04,0.02,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Midtown, San Antonio, TX",0.0,0.561877,0.02,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"North Central, San Antonio, TX",0.0,6.077702,0.06,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0
5,"North Corktown, Detroit, MI",0.0,0.994133,0.02,0.0,0.0,0.02,0.0,0.02,0.02,...,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.02
6,"Northwest Anaheim, Anaheim, CA",0.0,3.45447,0.02,0.0,0.0,0.0,0.02,0.0,0.0,...,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Playhouse Square, Cleveland, OH",0.02,0.724886,0.08,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0


In [35]:
# the distance column seems to be off postion,
# in the middle of the venue categories
# we move it back, before the categories columns

dist = grouped_df['Distance']

grouped_df.drop(columns=['Distance'], inplace=True)
grouped_df.insert(1,'Distance',dist)
grouped_df

Unnamed: 0,Neighborhood,Distance,Wings Joint,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,...,Theme Park,Trail,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Waterfront,Wine Bar
0,"Alamo Heights, San Antonio, TX",4.53896,0.0,0.1,0.0,0.02,0.02,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
1,"Downtown, San Antonio, TX",0.34303,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"East Side, San Antonio, TX",3.658913,0.04,0.04,0.02,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Midtown, San Antonio, TX",0.561877,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"North Central, San Antonio, TX",6.077702,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0
5,"North Corktown, Detroit, MI",0.994133,0.0,0.02,0.0,0.0,0.02,0.0,0.02,0.02,...,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.02
6,"Northwest Anaheim, Anaheim, CA",3.45447,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,...,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Playhouse Square, Cleveland, OH",0.724886,0.02,0.08,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0


In [36]:
# we now find out the top 5 venue categories for each neighborhood by count

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# Create columns for top 10 venues
columns = ['Neighborhood','Distance']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = grouped_df['Neighborhood']
neighborhoods_venues_sorted['Distance'] = grouped_df['Distance']

for ind in np.arange(grouped_df.shape[0]):
    row_categories = grouped_df.iloc[ind, :].iloc[2:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 2:] = row_categories_sorted.index.values[0:num_top_venues]

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,Distance,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Alamo Heights, San Antonio, TX",4.53896,American Restaurant,Grocery Store,Italian Restaurant,Coffee Shop,Pizza Place
1,"Downtown, San Antonio, TX",0.34303,Hotel,Theater,Steakhouse,Concert Hall,Plaza
2,"East Side, San Antonio, TX",3.658913,Mexican Restaurant,Burger Joint,Fast Food Restaurant,Wings Joint,Southern / Soul Food Restaurant
3,"Midtown, San Antonio, TX",0.561877,Hotel,Theater,Plaza,Mexican Restaurant,Park
4,"North Central, San Antonio, TX",6.077702,Burger Joint,Thai Restaurant,American Restaurant,Sushi Restaurant,Mexican Restaurant
5,"North Corktown, Detroit, MI",0.994133,Hotel,Coffee Shop,Steakhouse,Park,Mexican Restaurant
6,"Northwest Anaheim, Anaheim, CA",3.45447,Mexican Restaurant,Burger Joint,Bakery,Diner,Brewery
7,"Playhouse Square, Cleveland, OH",0.724886,American Restaurant,Theater,Coffee Shop,Brewery,Steakhouse


### Clustering

In [37]:
from sklearn.cluster import KMeans

In [38]:
# set number of clusters
kclusters = 3

grouped_clustering = grouped_df.set_index("Neighborhood")

In [39]:
# perform k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=21).fit(grouped_clustering)

# check generated cluster labels
kmeans.labels_

array([0, 1, 0, 1, 2, 1, 0, 1])

In [40]:
# insert column for which cluster the neighborhood belongs to
neighborhoods_venues_sorted.insert(0, 'ClusterLabel', kmeans.labels_)

neighborhoods_venues_sorted

Unnamed: 0,ClusterLabel,Neighborhood,Distance,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,0,"Alamo Heights, San Antonio, TX",4.53896,American Restaurant,Grocery Store,Italian Restaurant,Coffee Shop,Pizza Place
1,1,"Downtown, San Antonio, TX",0.34303,Hotel,Theater,Steakhouse,Concert Hall,Plaza
2,0,"East Side, San Antonio, TX",3.658913,Mexican Restaurant,Burger Joint,Fast Food Restaurant,Wings Joint,Southern / Soul Food Restaurant
3,1,"Midtown, San Antonio, TX",0.561877,Hotel,Theater,Plaza,Mexican Restaurant,Park
4,2,"North Central, San Antonio, TX",6.077702,Burger Joint,Thai Restaurant,American Restaurant,Sushi Restaurant,Mexican Restaurant
5,1,"North Corktown, Detroit, MI",0.994133,Hotel,Coffee Shop,Steakhouse,Park,Mexican Restaurant
6,0,"Northwest Anaheim, Anaheim, CA",3.45447,Mexican Restaurant,Burger Joint,Bakery,Diner,Brewery
7,1,"Playhouse Square, Cleveland, OH",0.724886,American Restaurant,Theater,Coffee Shop,Brewery,Steakhouse


In [41]:
merged_df = neighborhoods_venues_sorted

In [42]:
# view just the San Antonio neighborhoods
merged_df.iloc[:6]

Unnamed: 0,ClusterLabel,Neighborhood,Distance,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,0,"Alamo Heights, San Antonio, TX",4.53896,American Restaurant,Grocery Store,Italian Restaurant,Coffee Shop,Pizza Place
1,1,"Downtown, San Antonio, TX",0.34303,Hotel,Theater,Steakhouse,Concert Hall,Plaza
2,0,"East Side, San Antonio, TX",3.658913,Mexican Restaurant,Burger Joint,Fast Food Restaurant,Wings Joint,Southern / Soul Food Restaurant
3,1,"Midtown, San Antonio, TX",0.561877,Hotel,Theater,Plaza,Mexican Restaurant,Park
4,2,"North Central, San Antonio, TX",6.077702,Burger Joint,Thai Restaurant,American Restaurant,Sushi Restaurant,Mexican Restaurant
5,1,"North Corktown, Detroit, MI",0.994133,Hotel,Coffee Shop,Steakhouse,Park,Mexican Restaurant


In [43]:
# set the neighborhood column as the index in the original dataframe
# to help retrieve their latitudes and longitudes for plotting below
df = df.set_index('Neighborhood')

df

Unnamed: 0_level_0,Latitude,Longitude,Distance
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Downtown, San Antonio, TX",29.42651,-98.48858,0.34303
"Midtown, San Antonio, TX",29.42458,-98.49461,0.561877
"Southtown, San Antonio, TX",29.29591,-98.41863,9.594338
"Alamo Heights, San Antonio, TX",29.486675,-98.47,4.53896
"North Central, San Antonio, TX",29.50312,-98.52596,6.077702
"Inner West Side, San Antonio, TX",29.37438,-98.710324,13.943083
"East Side, San Antonio, TX",29.40595,-98.42793,3.658913
"Playhouse Square, Cleveland, OH",41.501,-81.68095,0.724886
"North Corktown, Detroit, MI",42.33143,-83.06667,0.994133
"Northwest Anaheim, Anaheim, CA",33.8434,-117.95461,3.45447


In [44]:
# plot San Antonio map, showing the clusters by color

map_clusters = folium.Map(location=[sa_lat+0.01, sa_long], zoom_start=12)

# Set color scheme for the clusters
rainbow = ['#1b7397','#3e8a5e','#c1031d']

# Add markers to the map
markers_colors = []
for neigh, cluster in zip(merged_df.iloc[:6]['Neighborhood'], merged_df.iloc[:6]['ClusterLabel']):
    label = folium.Popup('{}, Cluster: {}'.format(neigh, cluster), parse_html=True)
    folium.CircleMarker(
        [df.loc[neigh]['Latitude'], df.loc[neigh]['Longitude']],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.8).add_to(map_clusters)

folium.CircleMarker(
        [conv_lat[0], conv_long[0]],
        radius=5,
        popup=folium.Popup(conv_center[0], parse_html=True),
        color='#ffb959',
        fill=True,
        fill_color='#ffb959',
        fill_opacity=0.8).add_to(map_clusters)
       
map_clusters

### Discussion

#### Cluster 1

In [45]:
merged_df[merged_df.ClusterLabel == 0]

Unnamed: 0,ClusterLabel,Neighborhood,Distance,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,0,"Alamo Heights, San Antonio, TX",4.53896,American Restaurant,Grocery Store,Italian Restaurant,Coffee Shop,Pizza Place
2,0,"East Side, San Antonio, TX",3.658913,Mexican Restaurant,Burger Joint,Fast Food Restaurant,Wings Joint,Southern / Soul Food Restaurant
6,0,"Northwest Anaheim, Anaheim, CA",3.45447,Mexican Restaurant,Burger Joint,Bakery,Diner,Brewery


#### Cluster 2

In [46]:
merged_df[merged_df.ClusterLabel == 1]

Unnamed: 0,ClusterLabel,Neighborhood,Distance,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,1,"Downtown, San Antonio, TX",0.34303,Hotel,Theater,Steakhouse,Concert Hall,Plaza
3,1,"Midtown, San Antonio, TX",0.561877,Hotel,Theater,Plaza,Mexican Restaurant,Park
5,1,"North Corktown, Detroit, MI",0.994133,Hotel,Coffee Shop,Steakhouse,Park,Mexican Restaurant
7,1,"Playhouse Square, Cleveland, OH",0.724886,American Restaurant,Theater,Coffee Shop,Brewery,Steakhouse


#### Cluster 3

In [47]:
merged_df[merged_df.ClusterLabel == 2]

Unnamed: 0,ClusterLabel,Neighborhood,Distance,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,2,"North Central, San Antonio, TX",6.077702,Burger Joint,Thai Restaurant,American Restaurant,Sushi Restaurant,Mexican Restaurant


Of the past experiences, the best I had was during my stay in Cleveland. This was due to my hotel's location; it was in downtown Cleveland, a walk-friendly area with lots to explore and was very close to the convention center. In Anaheim, I stayed in a very *typical* neighborhood, surrounded by plazas of everyday businesses and shops in a random part of town. There was little to explore in the area, and the convention center was close by car but not by foot. In Detroit, I was once again close to downtown, however the neighborhood itself did not feel all too welcoming, and the fun, walkable part of town, as well as the convention center, were again closer by car than by foot.

Cleveland was by quite some margin the best experience I had, and therefore its cluster is the one that interests me most. According to the results of clustering, Downtown and Midtown San Antonio, which ended up in the same cluster as Downtown Cleveland, would offer the most similar experience, while also being the closest to the convention center.