In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import re
import json # library to handle JSON files

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library
from uszipcode import SearchEngine
import googlemaps
pd.set_option('display.max_rows', 500)

### Get list of 50 top college towns

In [2]:
url = "http://www.collegeranker.com/features/best-college-towns/"

In [3]:
r = requests.get(url)

In [4]:
soup = BeautifulSoup(r.text, "html.parser")

In [5]:
post_table = soup.find("div",class_="awr-i")

In [6]:
town_list = []
for row in post_table.find_all('h2'):
    town = row.text.split('.', 1)[1].lstrip()
    town_list.append(town)

In [7]:
# Dictionary to conver State Name to two letter code
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

In [8]:
town_df = pd.DataFrame([(x.split(',')[0], x.split(',')[1].lstrip()) for x in town_list])
town_df.columns = ['City', 'State']

In [9]:
# Convert States to Two-Letter Code
town_df['State'] = [us_state_abbrev[x] if x in list(us_state_abbrev.keys()) else x for x in town_df['State']]

In [10]:
town_df['Town'] = town_df['City'] + ', ' + town_df['State'].map(str)

### Use geocoder to get latitiude and longitude

In [11]:
def get_coordinates(df):
    lat_list = []
    lng_list = []
    API_KEY = 'AIzaSyDfaz-Hluid0RfZy6NDa3z9l26Hcuws93M'
    gmaps = googlemaps.Client(key=API_KEY)
    for x in df['Town']:
        geocode_result = gmaps.geocode(x)
        lat_list.append(geocode_result[0]['geometry']['location']['lat'])
        lng_list.append(geocode_result[0]['geometry']['location']['lng'])
    df['lat'] = lat_list
    df['lng'] = lng_list
    return df

In [12]:
get_coordinates(town_df)

Unnamed: 0,City,State,Town,lat,lng
0,Troy,AL,"Troy, AL",31.808768,-85.969951
1,Wellesley,MA,"Wellesley, MA",42.296797,-71.292388
2,Columbus,MS,"Columbus, MS",33.495674,-88.427263
3,Berkeley,CA,"Berkeley, CA",37.871593,-122.272747
4,Lawton,OK,"Lawton, OK",34.603567,-98.395929
5,Vermillion,SD,"Vermillion, SD",42.779442,-96.92921
6,Brunswick,GA,"Brunswick, GA",31.149953,-81.491489
7,Williamsburg,VA,"Williamsburg, VA",37.270702,-76.707457
8,Boulder,CO,"Boulder, CO",40.014986,-105.270546
9,Monroe,LA,"Monroe, LA",32.509311,-92.119301


### Get Foursquare data on each town

In [13]:
# Credentials for URL Scrape
CLIENT_ID = 'LI4K1OOUAFPJJSTNNZ23CZ05ROFAGZ5LLOOM1WNWWE0PN0P0' # your Foursquare ID
CLIENT_SECRET = 'P1N0RHNW1IMWEVOT205NDGHN1MIGCGAKN52HCH3JVTZ4AVRF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 200 # limit of number of venues returned by Foursquare API

In [14]:
def getNearbyVenues(towns):
    count = 1
    venues_list=[]
    for town in towns:
        print(town, count)
        count += 1
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&near={}&limit={}'.format(
                    CLIENT_ID, 
                    CLIENT_SECRET, 
                    VERSION, 
                    town,
                    LIMIT)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(x['venue']['name'],
                            x['venue']['location']['lat'], 
                            x['venue']['location']['lng'],
                            town,
                            x['venue']['categories'][0]['name']) for x in results])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Venue',
                                'Venue Latitude', 
                                'Venue Longitude',
                                'City',
                                'Venue Category']
        
    return(nearby_venues)

In [15]:
all_venues = getNearbyVenues(town_df['Town'])

Troy, AL 1
Wellesley, MA 2
Columbus, MS 3
Berkeley, CA 4
Lawton, OK 5
Vermillion, SD 6
Brunswick, GA 7
Williamsburg, VA 8
Boulder, CO 9
Monroe, LA 10
Charlottesville, VA 11
Gainesville, FL 12
Burlington, VT 13
Long Beach, CA 14
Flagstaff, AZ 15
Ithaca, NY 16
Durham, NC 17
Auburn, AL 18
Savannah, GA 19
Panama City, FL 20
East Lansing, MI 21
Eugene, OR 22
State College, PA 23
Fayetteville, AR 24
Manhattan, KS 25
Logan, UT 26
Morgantown, WV 27
Asheville, NC 28
Duluth, MN 29
Santa Cruz, CA 30
Ann Arbor, MI 31
Syracuse, NY 32
Fargo, ND 33
Athens, GA 34
Madison, WI 35
Provo, UT 36
Bozeman, MT 37
Oxford, MS 38
Lincoln, NE 39
Corvallis, OR 40
Beaufort, SC 41
Malibu, CA 42
Tempe, AZ 43
South Bend, IN 44
Bloomington, IN 45
Ames, IA 46
Athens, OH 47
Fort Collins, CO 48
Lawrence, KS 49
St. Augustine, FL 50


### Map of all locations

In [16]:
map_df = all_venues.groupby('City').mean().reset_index()
map_df.columns = ['City', 'Latitude', 'Longitude']

In [17]:
map_df.head()

Unnamed: 0,City,Latitude,Longitude
0,"Ames, IA",42.023742,-93.632158
1,"Ann Arbor, MI",42.273732,-83.745716
2,"Asheville, NC",35.574073,-82.553108
3,"Athens, GA",33.946499,-83.390763
4,"Athens, OH",39.331427,-82.088832


In [18]:
# create map of US using latitude and longitude values
map_us = folium.Map(location=[48, -102], zoom_start=4)

# add markers to map
for lat, lng, city in zip(town_df['lat'], town_df['lng'], town_df['City']):
    label = '{}'.format(city)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_us)  
    
map_us

### Analyze each neighborhood

In [19]:
# one hot encoding
venues_onehot = pd.get_dummies(all_venues[['Venue Category']], prefix="", prefix_sep="")

# add city and state column back to dataframe
venues_onehot['City'] = all_venues['City']

# move city column to the first column
fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])
venues_onehot = venues_onehot[fixed_columns]

venues_onehot.head()

Unnamed: 0,City,Accessories Store,African Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Arcade,...,Water Park,Waterfall,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Troy, AL",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Troy, AL",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Troy, AL",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Troy, AL",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Troy, AL",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
venues_grouped = venues_onehot.groupby('City').mean().reset_index()
venues_grouped

Unnamed: 0,City,Accessories Store,African Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Arcade,...,Water Park,Waterfall,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Ames, IA",0.0,0.0,0.0,0.021505,0.0,0.0,0.0,0.0,0.010753,...,0.0,0.0,0.0,0.010753,0.0,0.0,0.010753,0.0,0.0,0.0
1,"Ann Arbor, MI",0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
2,"Asheville, NC",0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.0,0.0
3,"Athens, GA",0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Athens, OH",0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0
5,"Auburn, AL",0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Beaufort, SC",0.0,0.0,0.0,0.096386,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.024096,0.0,0.0,0.0
7,"Berkeley, CA",0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
8,"Bloomington, IN",0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
9,"Boulder, CO",0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
len(venues_grouped)

50

In [22]:
num_top_venues = 10

for town in venues_grouped['City']:
    print("----"+town+"----")
    temp = venues_grouped[venues_grouped['City'] == town].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Ames, IA----
                  venue  freq
0           Pizza Place  0.09
1                   Bar  0.08
2         Grocery Store  0.05
3                  Café  0.05
4           Coffee Shop  0.04
5            Restaurant  0.03
6        Sandwich Place  0.03
7  Fast Food Restaurant  0.02
8                  Park  0.02
9  Gym / Fitness Center  0.02


----Ann Arbor, MI----
            venue  freq
0   Grocery Store  0.06
1     Coffee Shop  0.05
2     Pizza Place  0.04
3  Ice Cream Shop  0.04
4             Bar  0.03
5            Park  0.03
6         Brewery  0.03
7     Music Venue  0.03
8      Steakhouse  0.02
9  Sandwich Place  0.02


----Asheville, NC----
                 venue  freq
0              Brewery  0.13
1  American Restaurant  0.05
2                Hotel  0.04
3        Grocery Store  0.04
4       Breakfast Spot  0.04
5                  Bar  0.04
6          Coffee Shop  0.03
7               Garden  0.03
8            BBQ Joint  0.03
9                 Park  0.02


----Athens, GA----
 

                 venue  freq
0        Grocery Store  0.10
1   Mexican Restaurant  0.05
2  American Restaurant  0.05
3          Pizza Place  0.05
4          Coffee Shop  0.04
5                  Bar  0.04
6   Italian Restaurant  0.04
7            BBQ Joint  0.04
8       Breakfast Spot  0.03
9       Sandwich Place  0.03


----Ithaca, NY----
                     venue  freq
0              Coffee Shop  0.05
1                     Park  0.04
2           Ice Cream Shop  0.04
3          Thai Restaurant  0.03
4           Sandwich Place  0.03
5                   Bakery  0.03
6               Bagel Shop  0.03
7  New American Restaurant  0.03
8      American Restaurant  0.03
9       Italian Restaurant  0.03


----Lawrence, KS----
                venue  freq
0         Coffee Shop  0.09
1        Burger Joint  0.05
2                 Bar  0.05
3         Pizza Place  0.04
4  Mexican Restaurant  0.04
5      Breakfast Spot  0.03
6             Brewery  0.03
7      Ice Cream Shop  0.03
8      Sandwich Place 

In [23]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
town_venues_sorted = pd.DataFrame(columns=columns)
town_venues_sorted['City'] = venues_grouped['City']

for ind in np.arange(venues_grouped.shape[0]):
    town_venues_sorted.iloc[ind, 1:] = return_most_common_venues(venues_grouped.iloc[ind, :], num_top_venues)

town_venues_sorted

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Ames, IA",Pizza Place,Bar,Grocery Store,Café,Coffee Shop,Restaurant,Sandwich Place,Gym,BBQ Joint,Mexican Restaurant
1,"Ann Arbor, MI",Grocery Store,Coffee Shop,Pizza Place,Ice Cream Shop,Brewery,Bar,Park,Music Venue,Sandwich Place,Cocktail Bar
2,"Asheville, NC",Brewery,American Restaurant,Grocery Store,Breakfast Spot,Bar,Hotel,Coffee Shop,BBQ Joint,Garden,Wine Bar
3,"Athens, GA",Mexican Restaurant,Bar,Pizza Place,Fast Food Restaurant,Coffee Shop,Grocery Store,American Restaurant,Liquor Store,Brewery,New American Restaurant
4,"Athens, OH",Pizza Place,Coffee Shop,Dive Bar,Hotel,Bar,Mexican Restaurant,Chinese Restaurant,Sandwich Place,Beer Bar,American Restaurant
5,"Auburn, AL",American Restaurant,Mexican Restaurant,Coffee Shop,BBQ Joint,Grocery Store,Sandwich Place,Pharmacy,Pizza Place,Burger Joint,Seafood Restaurant
6,"Beaufort, SC",American Restaurant,Hotel,Grocery Store,Seafood Restaurant,Discount Store,Fast Food Restaurant,BBQ Joint,Bar,Sandwich Place,Coffee Shop
7,"Berkeley, CA",Park,Coffee Shop,Ice Cream Shop,Brewery,Japanese Restaurant,Trail,Mexican Restaurant,Theater,Thai Restaurant,Flower Shop
8,"Bloomington, IN",Brewery,American Restaurant,Gym / Fitness Center,Pizza Place,Ice Cream Shop,Mexican Restaurant,Grocery Store,Bakery,Sandwich Place,Coffee Shop
9,"Boulder, CO",Trail,Brewery,Ice Cream Shop,Sandwich Place,American Restaurant,Pizza Place,Breakfast Spot,Café,Bakery,Coffee Shop


### Cluster neighborhoods

In [25]:
# set number of clusters
kclusters = 5

venues_grouped_clustering = venues_grouped.drop('City', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(venues_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 2, 0, 4, 4, 4, 1, 2, 4, 2], dtype=int32)

### Evaluate Clusters

In [45]:
# Fit several clustering models that take n_clusters
from sklearn import cluster
from sklearn.preprocessing import StandardScaler

X = StandardScaler().fit_transform(venues_grouped_clustering)

models = [cluster.SpectralClustering(n_clusters=5), 
          cluster.KMeans(n_clusters=5), 
          cluster.MiniBatchKMeans(n_clusters=5), 
          cluster.AgglomerativeClustering(n_clusters=5)]

for model in models:
    model.fit(X)
    print(model.__class__.__name__)
    print("\tFirst 10 labels", model.labels_[:10])
    print("\t", len(model.labels_))

SpectralClustering
	First 10 labels [0 0 0 0 0 4 0 0 0 0]
	 50
KMeans
	First 10 labels [1 3 3 1 1 1 1 1 1 3]
	 50
MiniBatchKMeans
	First 10 labels [3 3 3 3 3 3 3 3 3 3]
	 50
AgglomerativeClustering
	First 10 labels [0 0 0 0 0 0 0 0 0 0]
	 50


In [46]:
# ... then measure their pairwise similarity
from sklearn import metrics
from itertools import combinations

for clust1, clust2 in combinations(models, 2):
    print(clust1.__class__.__name__, "versus", clust2.__class__.__name__)
    print("\tRand score:", metrics.adjusted_rand_score(clust1.labels_, clust2.labels_))
    print("\tMutual info:", metrics.adjusted_mutual_info_score(clust1.labels_, clust2.labels_))

SpectralClustering versus KMeans
	Rand score: -0.0736862516967229
	Mutual info: -0.04492752084537756
SpectralClustering versus MiniBatchKMeans
	Rand score: -0.11041258516330876
	Mutual info: -0.06157292834619116
SpectralClustering versus AgglomerativeClustering
	Rand score: -0.10679660930917206
	Mutual info: -0.06668319136534008
KMeans versus MiniBatchKMeans
	Rand score: 0.03250599631607545
	Mutual info: 0.13717046666222324
KMeans versus AgglomerativeClustering
	Rand score: 0.017655255823736705
	Mutual info: 0.1438674688851498
MiniBatchKMeans versus AgglomerativeClustering
	Rand score: 0.31332365779680815
	Mutual info: 0.21853287747643182


### Visualize Clusters

In [27]:
venues_merged = town_df[['Town', 'lat', 'lng']]
venues_merged.columns = ['City', 'Latitude', 'Longitude']
# add clustering labels
venues_merged['Cluster Labels'] = kmeans.labels_

# merge venues_grouped with to add latitude/longitude for each neighborhood
venues_merged = venues_merged.join(town_venues_sorted.set_index('City'), on='City')

venues_merged.head() # check the last columns!

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Troy, AL",31.808768,-85.969951,4,Fast Food Restaurant,American Restaurant,Sandwich Place,Pharmacy,Pizza Place,Mexican Restaurant,Hotel,BBQ Joint,Discount Store,Grocery Store
1,"Wellesley, MA",42.296797,-71.292388,2,Pizza Place,Italian Restaurant,Donut Shop,Coffee Shop,Thai Restaurant,Japanese Restaurant,Grocery Store,Chinese Restaurant,Pharmacy,Bakery
2,"Columbus, MS",33.495674,-88.427263,0,Discount Store,Fast Food Restaurant,American Restaurant,Sandwich Place,Coffee Shop,Mexican Restaurant,Steakhouse,Hotel,BBQ Joint,Sporting Goods Shop
3,"Berkeley, CA",37.871593,-122.272747,4,Park,Coffee Shop,Ice Cream Shop,Brewery,Japanese Restaurant,Trail,Mexican Restaurant,Theater,Thai Restaurant,Flower Shop
4,"Lawton, OK",34.603567,-98.395929,4,Fast Food Restaurant,Mexican Restaurant,Burger Joint,American Restaurant,Ice Cream Shop,BBQ Joint,Pizza Place,Cosmetics Shop,Sandwich Place,Clothing Store


In [28]:
# create map
map_clusters = folium.Map(location=[48, -102], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(venues_merged['Latitude'], venues_merged['Longitude'], venues_merged['City'], venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [29]:
venues_merged.loc[venues_merged['Cluster Labels'] == 0]

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Columbus, MS",33.495674,-88.427263,0,Discount Store,Fast Food Restaurant,American Restaurant,Sandwich Place,Coffee Shop,Mexican Restaurant,Steakhouse,Hotel,BBQ Joint,Sporting Goods Shop
10,"Charlottesville, VA",38.029306,-78.476678,0,Hotel,Mexican Restaurant,American Restaurant,Pizza Place,French Restaurant,Bagel Shop,BBQ Joint,Sandwich Place,Brewery,Burger Joint
15,"Ithaca, NY",42.443961,-76.501881,0,Coffee Shop,Ice Cream Shop,Park,Italian Restaurant,Thai Restaurant,Bakery,Bagel Shop,Sandwich Place,New American Restaurant,American Restaurant
16,"Durham, NC",35.994033,-78.898619,0,Beer Store,Burger Joint,Sandwich Place,Brewery,Gastropub,Coffee Shop,Ice Cream Shop,Hotel,Concert Hall,Bakery
20,"East Lansing, MI",42.736979,-84.483865,0,Sushi Restaurant,Pizza Place,Coffee Shop,Bar,Sandwich Place,American Restaurant,Restaurant,Ice Cream Shop,Golf Course,Juice Bar
22,"State College, PA",40.793395,-77.860001,0,American Restaurant,Breakfast Spot,Pizza Place,Park,Gas Station,Café,Hotel,Sandwich Place,Salon / Barbershop,Bar
23,"Fayetteville, AR",36.082156,-94.171854,0,Burger Joint,Coffee Shop,Bakery,Fast Food Restaurant,Brewery,Sandwich Place,Pizza Place,BBQ Joint,Grocery Store,Fried Chicken Joint
26,"Morgantown, WV",39.629526,-79.955897,0,Coffee Shop,Restaurant,Fast Food Restaurant,American Restaurant,Gas Station,Bar,Pizza Place,Italian Restaurant,Mexican Restaurant,Trail
33,"Athens, GA",33.951935,-83.357567,0,Mexican Restaurant,Bar,Pizza Place,Fast Food Restaurant,Coffee Shop,Grocery Store,American Restaurant,Liquor Store,Brewery,New American Restaurant
39,"Corvallis, OR",44.564566,-123.262044,0,Coffee Shop,Park,Sandwich Place,Pizza Place,Brewery,Grocery Store,Bakery,Mexican Restaurant,Vegetarian / Vegan Restaurant,Burger Joint


In [30]:
venues_merged.loc[venues_merged['Cluster Labels'] == 1]

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,"Brunswick, GA",31.149953,-81.491489,1,American Restaurant,Sandwich Place,Grocery Store,Mexican Restaurant,BBQ Joint,Pizza Place,Department Store,Pharmacy,Clothing Store,Coffee Shop
14,"Flagstaff, AZ",35.198284,-111.651302,1,Coffee Shop,Brewery,Pizza Place,American Restaurant,Mexican Restaurant,Hotel,Restaurant,Dessert Shop,Sporting Goods Shop,Burger Joint
27,"Asheville, NC",35.595058,-82.551487,1,Brewery,American Restaurant,Grocery Store,Breakfast Spot,Bar,Hotel,Coffee Shop,BBQ Joint,Garden,Wine Bar
34,"Madison, WI",43.073052,-89.40123,1,Pizza Place,Liquor Store,American Restaurant,Gastropub,Park,Brewery,Grocery Store,Café,Coffee Shop,Restaurant
37,"Oxford, MS",34.366495,-89.519248,1,Coffee Shop,Sandwich Place,Discount Store,Restaurant,Pizza Place,Hotel,Bookstore,American Restaurant,Bar,Bakery
46,"Athens, OH",39.32924,-82.101255,1,Pizza Place,Coffee Shop,Dive Bar,Hotel,Bar,Mexican Restaurant,Chinese Restaurant,Sandwich Place,Beer Bar,American Restaurant


In [31]:
venues_merged.loc[venues_merged['Cluster Labels'] == 2]

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Wellesley, MA",42.296797,-71.292388,2,Pizza Place,Italian Restaurant,Donut Shop,Coffee Shop,Thai Restaurant,Japanese Restaurant,Grocery Store,Chinese Restaurant,Pharmacy,Bakery
7,"Williamsburg, VA",37.270702,-76.707457,2,Theme Park,Theme Park Ride / Attraction,Seafood Restaurant,History Museum,Gift Shop,Coffee Shop,Fast Food Restaurant,Historic Site,Golf Course,American Restaurant
9,"Monroe, LA",32.509311,-92.119301,2,Seafood Restaurant,Fast Food Restaurant,Mexican Restaurant,Sandwich Place,American Restaurant,Department Store,Coffee Shop,Cosmetics Shop,Cajun / Creole Restaurant,Chinese Restaurant
12,"Burlington, VT",44.475882,-73.212072,2,Pizza Place,Brewery,Bakery,Coffee Shop,Vegetarian / Vegan Restaurant,Sandwich Place,Café,Park,Gym,Breakfast Spot
17,"Auburn, AL",32.609857,-85.480782,2,American Restaurant,Mexican Restaurant,Coffee Shop,BBQ Joint,Grocery Store,Sandwich Place,Pharmacy,Pizza Place,Burger Joint,Seafood Restaurant
19,"Panama City, FL",30.158813,-85.660206,2,Seafood Restaurant,Fast Food Restaurant,Italian Restaurant,Sandwich Place,American Restaurant,Grocery Store,Breakfast Spot,Pizza Place,BBQ Joint,Mexican Restaurant
25,"Logan, UT",41.73698,-111.833836,2,Pizza Place,Mexican Restaurant,Sandwich Place,Fast Food Restaurant,Coffee Shop,American Restaurant,Bakery,Burger Joint,Ice Cream Shop,Clothing Store
30,"Ann Arbor, MI",42.280826,-83.743038,2,Grocery Store,Coffee Shop,Pizza Place,Ice Cream Shop,Brewery,Bar,Park,Music Venue,Sandwich Place,Cocktail Bar
31,"Syracuse, NY",43.048122,-76.147424,2,Pizza Place,Bakery,Italian Restaurant,Pub,Diner,Coffee Shop,Ice Cream Shop,Clothing Store,Bar,American Restaurant
40,"Beaufort, SC",32.431581,-80.669829,2,American Restaurant,Hotel,Grocery Store,Seafood Restaurant,Discount Store,Fast Food Restaurant,BBQ Joint,Bar,Sandwich Place,Coffee Shop


In [32]:
venues_merged.loc[venues_merged['Cluster Labels'] == 3]

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,"Fargo, ND",46.877186,-96.789803,3,Coffee Shop,Pizza Place,Burger Joint,American Restaurant,Sandwich Place,Mexican Restaurant,Bakery,Brewery,Gym,Asian Restaurant


In [33]:
venues_merged.loc[venues_merged['Cluster Labels'] == 4]

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Troy, AL",31.808768,-85.969951,4,Fast Food Restaurant,American Restaurant,Sandwich Place,Pharmacy,Pizza Place,Mexican Restaurant,Hotel,BBQ Joint,Discount Store,Grocery Store
3,"Berkeley, CA",37.871593,-122.272747,4,Park,Coffee Shop,Ice Cream Shop,Brewery,Japanese Restaurant,Trail,Mexican Restaurant,Theater,Thai Restaurant,Flower Shop
4,"Lawton, OK",34.603567,-98.395929,4,Fast Food Restaurant,Mexican Restaurant,Burger Joint,American Restaurant,Ice Cream Shop,BBQ Joint,Pizza Place,Cosmetics Shop,Sandwich Place,Clothing Store
5,"Vermillion, SD",42.779442,-96.92921,4,Pizza Place,Fast Food Restaurant,Bar,Hotel,Sandwich Place,Mexican Restaurant,Video Store,Sports Bar,Park,Bakery
8,"Boulder, CO",40.014986,-105.270546,4,Trail,Brewery,Ice Cream Shop,Sandwich Place,American Restaurant,Pizza Place,Breakfast Spot,Café,Bakery,Coffee Shop
11,"Gainesville, FL",29.651634,-82.324826,4,Grocery Store,American Restaurant,Pizza Place,Mexican Restaurant,Italian Restaurant,BBQ Joint,Coffee Shop,Bar,Sandwich Place,Breakfast Spot
13,"Long Beach, CA",33.77005,-118.19374,4,Beach,American Restaurant,Grocery Store,Ice Cream Shop,Seafood Restaurant,Mexican Restaurant,Burger Joint,Brewery,Coffee Shop,Park
18,"Savannah, GA",32.080899,-81.091203,4,Plaza,Museum,Café,Southern / Soul Food Restaurant,Coffee Shop,Restaurant,Dessert Shop,Park,Grocery Store,Gourmet Shop
21,"Eugene, OR",44.052069,-123.086754,4,Brewery,Pizza Place,Park,Coffee Shop,Breakfast Spot,Vegetarian / Vegan Restaurant,Grocery Store,Thai Restaurant,Café,Vineyard
24,"Manhattan, KS",39.183608,-96.571669,4,Coffee Shop,American Restaurant,Grocery Store,Breakfast Spot,Sandwich Place,Mexican Restaurant,Fast Food Restaurant,Pizza Place,Bar,Golf Course


##### There are four other locations that are similar to Flagstaff<br>
1. Asheville, NC
2. Madison, WI
3. Athens, OH
4. Fort Collins, CO

Madison, WI does not have coffee shops as the top 10 most common Venues. <br>Now, we need to look for the best location to open a coffee shop in Madison, WI.

# Explore Asheville, NC

### Find all zip codes in Ashville

In [34]:
search = SearchEngine(simple_zipcode=True)

In [35]:
res = search.by_city_and_state('Asheville', 'NC')
[x.zipcode for x in res]

['28801', '28803', '28804', '28805', '28806']

In [36]:
ash_zips = [x.zipcode for x in res]

### Find all places that serve coffee

In [37]:
def getVenuesByZip(zipcodes):
    count = 1
    venues_list=[]
    for zipcode in zipcodes:
        print(zipcode, count)
        count += 1
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&near={}&limit={}&query={}'.format(
                    CLIENT_ID, 
                    CLIENT_SECRET, 
                    VERSION, 
                    zipcode,
                    LIMIT,
                    'coffee')

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(x['venue']['name'],
                            x['venue']['location']['lat'], 
                            x['venue']['location']['lng'],
                            zipcode,
                            x['venue']['categories'][0]['name']) for x in results])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Venue',
                                'Venue Latitude', 
                                'Venue Longitude',
                                'Zip Code',
                                'Venue Category']
        
    return(nearby_venues)

In [38]:
ash_venues = getVenuesByZip(ash_zips)

28801 1
28803 2
28804 3
28805 4
28806 5


In [39]:
ash_venues

Unnamed: 0,Venue,Venue Latitude,Venue Longitude,Zip Code,Venue Category
0,French Broad Chocolate Lounge,35.595231,-82.551178,28801,Chocolate Shop
1,Posana Cafe,35.594699,-82.551753,28801,Café
2,Corner Kitchen,35.566289,-82.54127,28801,Café
3,High Five Coffee,35.595801,-82.553448,28801,Coffee Shop
4,High Five Coffee Bar,35.601041,-82.555424,28801,Coffee Shop
5,Dobra Tea,35.597427,-82.553548,28801,Tea Room
6,Double D's Coffee & Desserts,35.593223,-82.551365,28801,Coffee Shop
7,Hole,35.580767,-82.573272,28801,Donut Shop
8,Vortex Doughnuts,35.58919,-82.553733,28801,Donut Shop
9,Battle Cat Coffee,35.577951,-82.578697,28801,Coffee Shop


### Map out all spots that serve coffee

In [40]:
# Get Latitude and logitude for Asheville, NC
API_KEY = 'AIzaSyDfaz-Hluid0RfZy6NDa3z9l26Hcuws93M'
gmaps = googlemaps.Client(key=API_KEY)
geocode_result = gmaps.geocode('Asheville, NC')
latitude = geocode_result[0]['geometry']['location']['lat']
longitude = geocode_result[0]['geometry']['location']['lng']

In [41]:
np.arange(len(ash_venues['Zip Code'].unique()))

array([0, 1, 2, 3, 4])

In [42]:
# create map
map_venues = folium.Map(location=[latitude, longitude], zoom_start=12)
# set color scheme for the clusters
zip_dic = {i:x+1 for x,i in enumerate(ash_venues['Zip Code'].unique())}
x = np.arange(len(zip_dic))
ys = [i+x+(i*x)**2 for i in range(len(zip_dic))]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


# add markers to the map
markers_colors = []
for lat, lon, venue, zipcode in zip(ash_venues['Venue Latitude'], ash_venues['Venue Longitude'], ash_venues['Venue'], ash_venues['Zip Code']):
    label = folium.Popup(str(venue) + ' Zip Code: ' + str(zipcode), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[zip_dic[zipcode]-1],
        fill=True,
        fill_color=rainbow[zip_dic[zipcode]-1],
        fill_opacity=0.7).add_to(map_venues)

map_venues

Observations:
1. University of North Carolina, Asheville is located in zip code 28804
2. There is a lot of competition in the 28801 area coe
3. 28805 and 28803 seem to be far from the University.
4. There aren't many coffee shops in area code 28804. Seems like the best place to open that is near the university