# Capstone Project Jupyter Notebook

### Import libraries

In [1]:
import pandas as pd
import numpy as np

### Read in zip code data

In [93]:
RI_zips = pd.read_csv('https://opendata.arcgis.com/datasets/79ccc9b901684a958ac7134199f82b9f_0.csv')

In [95]:
RI_zips.drop(['OBJECTID','Shape__Area','Shape__Length'],axis = 1, inplace = True)

In [96]:
RI_zips.columns = ['PostalCode']

### Import libraries for mapping, plotting, working with coordinates

In [7]:
from geopy.geocoders import Nominatim

In [9]:
from pandas.io.json import json_normalize

import json

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

In [10]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Folium installed')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                       

### Format Postal Code information and Pull location data for each code

In [98]:
RI_zips['PostalCode'].astype(str)
RI_zips.head()

Unnamed: 0,PostalCode
0,2885
1,2809
2,2806
3,2921
4,2920


In [99]:
RI_zips['PostalCode'] = '0' + RI_zips['PostalCode'].astype(str)

In [66]:
latitudes = [];
longitudes = [];
addresses = [];

for row in RI_zips['PostalCode']:
    geolocator = Nominatim(user_agent="RI_explorer")
    location = geolocator.geocode(row)
    latitudes.append(location.latitude)
    longitudes.append(location.longitude)
    addresses.append(location.address)

In [86]:
addresses = pd.DataFrame(addresses)

In [87]:
RI_address = addresses[0].str.contains('Rhode Island', regex = True)
RI_address.shape

(77,)

In [101]:
RI_zips['Address'] = addresses[0]
RI_zips.head()

Unnamed: 0,PostalCode,Address
0,2885,"Warren, Rhode Island, 02885, United States of ..."
1,2809,"Bristol, Rhode Island, 02809, United States of..."
2,2806,"Barrington, Rhode Island, 02806, United States..."
3,2921,"Cranston, Providence, Rhode Island, 02921, Uni..."
4,2920,"Pohjois-Espoo, Espoo, Helsingin seutukunta, Uu..."


In [102]:
RI_zips['Latitude'] = latitudes
RI_zips['Longitude'] = longitudes
RI_zips.head()

Unnamed: 0,PostalCode,Address,Latitude,Longitude
0,2885,"Warren, Rhode Island, 02885, United States of ...",41.728366,-71.27021
1,2809,"Bristol, Rhode Island, 02809, United States of...",41.710403,-71.28161
2,2806,"Barrington, Rhode Island, 02806, United States...",41.744013,-71.331062
3,2921,"Cranston, Providence, Rhode Island, 02921, Uni...",41.76081,-71.503702
4,2920,"Pohjois-Espoo, Espoo, Helsingin seutukunta, Uu...",60.265632,24.732453


### Remove Postal Codes with addresses outside of Rhode Island returned by Geocode

In [103]:
RI_zips['RI Address Returned?'] = RI_address
RI_zips.head()

Unnamed: 0,PostalCode,Address,Latitude,Longitude,RI Address Returned?
0,2885,"Warren, Rhode Island, 02885, United States of ...",41.728366,-71.27021,True
1,2809,"Bristol, Rhode Island, 02809, United States of...",41.710403,-71.28161,True
2,2806,"Barrington, Rhode Island, 02806, United States...",41.744013,-71.331062,True
3,2921,"Cranston, Providence, Rhode Island, 02921, Uni...",41.76081,-71.503702,True
4,2920,"Pohjois-Espoo, Espoo, Helsingin seutukunta, Uu...",60.265632,24.732453,False


In [104]:
RI_zips = RI_zips[RI_zips['RI Address Returned?'] == True]
RI_zips

Unnamed: 0,PostalCode,Address,Latitude,Longitude,RI Address Returned?
0,02885,"Warren, Rhode Island, 02885, United States of ...",41.728366,-71.270210,True
1,02809,"Bristol, Rhode Island, 02809, United States of...",41.710403,-71.281610,True
2,02806,"Barrington, Rhode Island, 02806, United States...",41.744013,-71.331062,True
3,02921,"Cranston, Providence, Rhode Island, 02921, Uni...",41.760810,-71.503702,True
5,02919,"Johnston, Rhode Island, 02919, United States o...",41.839760,-71.495033,True
6,02917,"Smithfield, Rhode Island, 02917, United States...",41.895466,-71.522751,True
7,02916,"East Providence, Bristol, Rhode Island, 02916,...",41.845920,-71.356046,True
8,02915,"East Providence, Bristol, Rhode Island, 02915,...",41.788613,-71.366901,True
9,02914,"East Providence, Bristol, Rhode Island, 02914,...",41.818357,-71.365960,True
10,02912,"Providence, Rhode Island, 02912, United States...",41.826846,-71.401021,True


In [106]:
RI_zips = RI_zips.drop('RI Address Returned?',1)

### Extract City Name(s) from the geocode address associated with each zip code. Some cities may be associated to multiple zip codes

In [133]:
RI_zips['City','Rest of Address'] = RI_zips['Address'].str.split(', Rhode')

In [160]:
RI_zips['City'] = cities
RI_zips

Unnamed: 0,PostalCode,Address,Latitude,Longitude,"(City, Rest of Address)",City
0,02885,"Warren, Rhode Island, 02885, United States of ...",41.728366,-71.270210,"[Warren, Island, 02885, United States of Amer...",Warren
1,02809,"Bristol, Rhode Island, 02809, United States of...",41.710403,-71.281610,"[Bristol, Island, 02809, United States of Ame...",Bristol
2,02806,"Barrington, Rhode Island, 02806, United States...",41.744013,-71.331062,"[Barrington, Island, 02806, United States of ...",Barrington
3,02921,"Cranston, Providence, Rhode Island, 02921, Uni...",41.760810,-71.503702,"[Cranston, Providence, Island, 02921, United ...","Cranston, Providence"
5,02919,"Johnston, Rhode Island, 02919, United States o...",41.839760,-71.495033,"[Johnston, Island, 02919, United States of Am...",Smithfield
6,02917,"Smithfield, Rhode Island, 02917, United States...",41.895466,-71.522751,"[Smithfield, Island, 02917, United States of ...","East Providence, Bristol"
7,02916,"East Providence, Bristol, Rhode Island, 02916,...",41.845920,-71.356046,"[East Providence, Bristol, Island, 02916, Uni...","East Providence, Bristol"
8,02915,"East Providence, Bristol, Rhode Island, 02915,...",41.788613,-71.366901,"[East Providence, Bristol, Island, 02915, Uni...","East Providence, Bristol"
9,02914,"East Providence, Bristol, Rhode Island, 02914,...",41.818357,-71.365960,"[East Providence, Bristol, Island, 02914, Uni...",Providence
10,02912,"Providence, Rhode Island, 02912, United States...",41.826846,-71.401021,"[Providence, Island, 02912, United States of ...",North Providence


In [167]:
RI_zips.reset_index(inplace = True)
RI_zips = RI_zips.drop('index',1)

In [174]:
city = [i[0] for i in RI_zips['City', 'Rest of Address']]

In [177]:
RI_zips['City'] = city

In [183]:
RI_data = RI_zips[['PostalCode','City','Latitude','Longitude']]
RI_data.head()

Unnamed: 0,PostalCode,City,Latitude,Longitude
0,2885,Warren,41.728366,-71.27021
1,2809,Bristol,41.710403,-71.28161
2,2806,Barrington,41.744013,-71.331062
3,2921,"Cranston, Providence",41.76081,-71.503702
4,2919,Johnston,41.83976,-71.495033


### Find the geographical coordinates of Rhode Island for mapping purposes. Then use folium to plot the the zip codes across the state.

In [185]:
address = 'Rhode Island, US'

geolocator = Nominatim(user_agent="RI_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Rhode Island are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Rhode Island are 41.7962409, -71.5992372.


In [189]:
map_RI = folium.Map(location=[latitude, longitude], zoom_start=9)

for lat, lng, label in zip(RI_data['Latitude'], RI_data['Longitude'], RI_data['City']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_RI)  
    
map_RI

### Find addresses of the two major airports nearby, and determine the distance from each zip code to each airport.

In [190]:
PVD_address = '2000 Post Rd, Warwick, RI 02886'

pvd_location = geolocator.geocode(PVD_address)
pvd_latitude = pvd_location.latitude
pvd_longitude = pvd_location.longitude

In [191]:
BOS_address = '1 Harborside Dr, Boston, MA 02128'

bos_location = geolocator.geocode(BOS_address)
bos_latitude = bos_location.latitude
bos_longitude = bos_location.longitude

In [192]:
from geopy.distance import vincenty

In [194]:
bos_airport_dist = []
pvd_airport_dist = []

for i in range(0, RI_data.shape[0]):
    bos_dist = vincenty((RI_data['Latitude'][i],RI_data['Longitude'][i]),(bos_latitude,bos_longitude)).miles
    bos_airport_dist.append(bos_dist)
    pvd_dist = vincenty((RI_data['Latitude'][i],RI_data['Longitude'][i]),(pvd_latitude,pvd_longitude)).miles
    pvd_airport_dist.append(pvd_dist)



In [197]:
min_airport_dist = []

for i in range(0,RI_data.shape[0]):
    min_airport_dist.append(min(bos_airport_dist[i],pvd_airport_dist[i]))

In [201]:
RI_data['Proximity to Airport [mi]'] = min_airport_dist
RI_data = RI_data.drop('Proximity to Airport',1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [203]:
RI_data = RI_data[RI_data['PostalCode'] != '02842']
RI_data

Unnamed: 0,PostalCode,City,Latitude,Longitude,Proximity to Airport [mi]
0,02885,Warren,41.728366,-71.270210,8.198661
1,02809,Bristol,41.710403,-71.281610,7.645372
2,02806,Barrington,41.744013,-71.331062,5.256922
3,02921,"Cranston, Providence",41.760810,-71.503702,4.699649
4,02919,Johnston,41.839760,-71.495033,8.794294
5,02917,Smithfield,41.895466,-71.522751,12.893345
6,02916,"East Providence, Bristol",41.845920,-71.356046,9.309410
7,02915,"East Providence, Bristol",41.788613,-71.366901,5.569499
8,02914,"East Providence, Bristol",41.818357,-71.365960,7.368240
9,02912,Providence,41.826846,-71.401021,7.344975


### Define a function to pull nearby venues 

In [211]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['PostalCode', 
                  'PostalCode Latitude', 
                  'PostalCode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Import additional libraries

In [207]:
import requests
from pandas.io.json import json_normalize
import json

### Store Foursquare credentials

In [205]:
# The code was removed by Watson Studio for sharing.

### Get venue data for all RI postal codes

In [213]:
LIMIT = 100
radius = 5000
RI_venues = getNearbyVenues(names=RI_data['PostalCode'],
                                   latitudes=RI_data['Latitude'],
                                   longitudes=RI_data['Longitude']
                                  )

02885
02809
02806
02921
02919
02917
02916
02915
02914
02912
02911
02909
02908
02907
02906
02905
02904
02903
02896
02895
02876
02865
02864
02863
02861
02859
02858
02857
02839
02838
02831
02825
02816
02815
02814
02802
02817
02818
02886
02888
02889
02893
02835
02837
02841
02871
02872
02878
02898
02892
02891
02882
02881
02879
02875
02874
02873
02852
02836
02833
02832
02822
02813
02812
02808
02807
02804


In [214]:
RI_onehot = pd.get_dummies(RI_venues[['Venue Category']], prefix="", prefix_sep="")

RI_onehot['PostalCode'] = RI_venues['PostalCode'] 

fixed_columns = [RI_onehot.columns[-1]] + list(RI_onehot.columns[:-1])
RI_onehot = RI_onehot[fixed_columns]

RI_onehot.head()

Unnamed: 0,PostalCode,Accessories Store,Airport,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Arts & Crafts Store,...,Video Store,Vietnamese Restaurant,Vineyard,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,2885,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2885,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2885,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2885,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2885,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [215]:
RI_grouped = RI_onehot.groupby('PostalCode').mean().reset_index()
RI_grouped

Unnamed: 0,PostalCode,Accessories Store,Airport,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Arts & Crafts Store,...,Video Store,Vietnamese Restaurant,Vineyard,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,02802,0.010000,0.010000,0.00,0.000000,0.010000,0.000000,0.00,0.00,0.000000,...,0.010000,0.00,0.000000,0.00,0.010000,0.000000,0.000000,0.000000,0.000000,0.000000
1,02804,0.000000,0.000000,0.00,0.000000,0.050000,0.000000,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,02806,0.000000,0.000000,0.00,0.000000,0.055556,0.000000,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.013889,0.000000,0.000000
3,02807,0.000000,0.015385,0.00,0.000000,0.046154,0.000000,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.015385,0.000000
4,02808,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,02809,0.000000,0.000000,0.00,0.000000,0.070000,0.000000,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.010000,0.010000,0.000000
6,02812,0.000000,0.030303,0.00,0.000000,0.000000,0.000000,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,02813,0.000000,0.000000,0.00,0.000000,0.062500,0.000000,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.031250,0.000000,0.000000,0.000000,0.000000
8,02814,0.000000,0.000000,0.00,0.000000,0.045455,0.045455,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,02815,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.00,0.00,0.000000,...,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


### Define a function to determine the common venues and run it

In [216]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [217]:
num_top_venues = 15

indicators = ['st', 'nd', 'rd']

columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = RI_grouped['PostalCode']

for ind in np.arange(RI_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(RI_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,2802,Donut Shop,Pizza Place,Gas Station,Pharmacy,Breakfast Spot,Fast Food Restaurant,Diner,Sandwich Place,Coffee Shop,Golf Course,Discount Store,Gym,Supermarket,Department Store,Movie Theater
1,2804,Pizza Place,Gas Station,Post Office,Liquor Store,Ski Area,Golf Course,Campground,Sandwich Place,Donut Shop,Rental Service,Ice Cream Shop,Home Service,American Restaurant,Recreation Center,Deli / Bodega
2,2806,Donut Shop,Pizza Place,Seafood Restaurant,American Restaurant,Coffee Shop,Bank,Bagel Shop,Grocery Store,Beach,Park,Sandwich Place,Bakery,Restaurant,Breakfast Spot,Diner
3,2807,Seafood Restaurant,Beach,Bar,Hotel,Ice Cream Shop,American Restaurant,Boat or Ferry,Hotel Bar,Lighthouse,Harbor / Marina,Farm,Movie Theater,Bed & Breakfast,Sandwich Place,Surf Spot
4,2808,Pizza Place,Baseball Field,Home Service,Nature Preserve,Snack Place,Liquor Store,Farm,Chinese Restaurant,Golf Driving Range,Trail,Food,Rental Service,Deli / Bodega,Donut Shop,Post Office


### Apply K-means Clustering to determine the common venues for each postalcode

In [218]:
kclusters = 4

RI_grouped_clustering = RI_grouped.drop('PostalCode', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(RI_grouped_clustering)

kmeans.labels_[0:10]

array([3, 1, 3, 0, 1, 3, 1, 3, 1, 2], dtype=int32)

In [219]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

RI_merged = RI_data

RI_merged = RI_merged.merge(neighborhoods_venues_sorted.set_index('PostalCode'), on='PostalCode')

RI_merged.head()

Unnamed: 0,PostalCode,City,Latitude,Longitude,Proximity to Airport [mi],Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,2885,Warren,41.728366,-71.27021,8.198661,3,American Restaurant,Donut Shop,Pharmacy,Restaurant,...,Grocery Store,Bakery,Ice Cream Shop,Sandwich Place,Seafood Restaurant,Shipping Store,Pizza Place,Baseball Field,Bank,Bagel Shop
1,2809,Bristol,41.710403,-71.28161,7.645372,3,Pizza Place,American Restaurant,Seafood Restaurant,Coffee Shop,...,Donut Shop,Sandwich Place,Restaurant,Bagel Shop,Convenience Store,Bank,Park,Discount Store,Shipping Store,Café
2,2806,Barrington,41.744013,-71.331062,5.256922,3,Donut Shop,Pizza Place,Seafood Restaurant,American Restaurant,...,Bank,Bagel Shop,Grocery Store,Beach,Park,Sandwich Place,Bakery,Restaurant,Breakfast Spot,Diner
3,2921,"Cranston, Providence",41.76081,-71.503702,4.699649,0,Pizza Place,Italian Restaurant,Bar,American Restaurant,...,Furniture / Home Store,Lingerie Store,Cosmetics Shop,Breakfast Spot,Golf Course,Restaurant,Bakery,Convenience Store,Chinese Restaurant,Café
4,2919,Johnston,41.83976,-71.495033,8.794294,3,Italian Restaurant,Pharmacy,Bakery,American Restaurant,...,Mexican Restaurant,Coffee Shop,Liquor Store,Breakfast Spot,Sushi Restaurant,Japanese Restaurant,Burger Joint,Pub,Sandwich Place,Convenience Store


### Use folium to plot the clusters on the maps

In [221]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=9)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(RI_merged['Latitude'], RI_merged['Longitude'], RI_merged['PostalCode'], RI_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7
    ).add_to(map_clusters)
       
map_clusters

### Display data for each cluster and store it

In [357]:
cluster0 = RI_merged.loc[RI_merged['Cluster Labels'] == 0, RI_merged.columns[[0] + list(range(5, RI_merged.shape[1]))]]
cluster0

Unnamed: 0,PostalCode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
3,2921,0,Pizza Place,Italian Restaurant,Bar,American Restaurant,Clothing Store,Furniture / Home Store,Lingerie Store,Cosmetics Shop,Breakfast Spot,Golf Course,Restaurant,Bakery,Convenience Store,Chinese Restaurant,Café
6,2916,0,American Restaurant,Café,Pizza Place,Bakery,Park,New American Restaurant,Thai Restaurant,Coffee Shop,Italian Restaurant,Bar,Bagel Shop,Japanese Restaurant,Donut Shop,Bowling Alley,Liquor Store
7,2915,0,American Restaurant,Café,Coffee Shop,Park,Mexican Restaurant,Pizza Place,Liquor Store,Bakery,Restaurant,Italian Restaurant,Sushi Restaurant,Baseball Field,Donut Shop,Steakhouse,Department Store
8,2914,0,American Restaurant,Café,Italian Restaurant,Coffee Shop,New American Restaurant,Bakery,Donut Shop,Park,Grocery Store,Thai Restaurant,Sushi Restaurant,Bar,Performing Arts Venue,Mexican Restaurant,Pizza Place
9,2912,0,Café,New American Restaurant,Bar,Italian Restaurant,Park,Hotel,American Restaurant,Coffee Shop,Seafood Restaurant,Restaurant,Bakery,Brewery,Thai Restaurant,Theater,Bagel Shop
11,2909,0,Italian Restaurant,Bar,Café,Mexican Restaurant,American Restaurant,New American Restaurant,Bakery,Seafood Restaurant,Restaurant,Cocktail Bar,Coffee Shop,Brewery,Hotel,Plaza,Pizza Place
12,2908,0,Café,Italian Restaurant,Coffee Shop,Bakery,Bar,American Restaurant,New American Restaurant,Mexican Restaurant,Pizza Place,Restaurant,Hotel,Brewery,Seafood Restaurant,Thai Restaurant,Theater
13,2907,0,Café,Italian Restaurant,Pizza Place,Bar,Restaurant,Park,New American Restaurant,Coffee Shop,Cocktail Bar,Mexican Restaurant,Brewery,American Restaurant,Seafood Restaurant,Diner,Vegetarian / Vegan Restaurant
14,2906,0,Café,Bar,New American Restaurant,Park,Italian Restaurant,Coffee Shop,Hotel,American Restaurant,Restaurant,Seafood Restaurant,Bakery,Bagel Shop,Thai Restaurant,Theater,Brewery
15,2905,0,Café,Bar,Italian Restaurant,Restaurant,Pizza Place,Park,New American Restaurant,Hookah Bar,Coffee Shop,American Restaurant,Seafood Restaurant,Cocktail Bar,Mexican Restaurant,Middle Eastern Restaurant,Bakery


In [358]:
cluster1 = RI_merged.loc[RI_merged['Cluster Labels'] == 1, RI_merged.columns[[0] + list(range(5, RI_merged.shape[1]))]]
cluster1

Unnamed: 0,PostalCode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
25,2859,1,Pizza Place,Golf Course,Café,Skating Rink,Sports Club,Chinese Restaurant,Liquor Store,Sandwich Place,Pharmacy,Mexican Restaurant,Donut Shop,Thai Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant
31,2825,1,Campground,Outdoors & Recreation,Park,Gun Shop,Golf Course,Construction & Landscaping,Zoo,Fast Food Restaurant,Fish & Chips Shop,Fishing Spot,Fish Market,Flea Market,Flight School,Flower Shop,Food
34,2814,1,Liquor Store,Pizza Place,Trail,Bank,Pharmacy,Sporting Goods Shop,Food,Discount Store,Golf Course,Bar,Breakfast Spot,Grocery Store,Spiritual Center,American Restaurant,Intersection
48,2898,1,Golf Course,Donut Shop,Pizza Place,Pharmacy,Liquor Store,Fair,Bank,Bar,Sandwich Place,Gift Shop,Discount Store,Juice Bar,Supermarket,Fast Food Restaurant,Chinese Restaurant
54,2875,1,Golf Course,Pizza Place,Post Office,Liquor Store,Pub,Seafood Restaurant,Furniture / Home Store,Farm,Fair,Gift Shop,Park,Trail,Juice Bar,Athletics & Sports,Donut Shop
56,2873,1,Campground,Deli / Bodega,Trail,Sporting Goods Shop,Golf Course,Liquor Store,Bakery,State / Provincial Park,American Restaurant,Eastern European Restaurant,Dive Bar,Diner,Food Truck,Food & Drink Shop,Food
58,2836,1,Pizza Place,Golf Course,Liquor Store,Seafood Restaurant,Post Office,Airport,Donut Shop,Furniture / Home Store,Farm,Gym,Gift Shop,Pub,Lake,Juice Bar,Trail
60,2832,1,Golf Course,Liquor Store,Donut Shop,Pharmacy,Sandwich Place,Chinese Restaurant,Supermarket,Campground,Baseball Field,Café,Bank,Bakery,Factory,Trail,Bar
61,2822,1,Trail,Campground,New American Restaurant,Historic Site,Golf Course,Food & Drink Shop,Food,Flower Shop,Flight School,Flea Market,Fishing Spot,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
63,2812,1,Golf Course,Pizza Place,Post Office,Donut Shop,Liquor Store,Bank,Trail,Fast Food Restaurant,Furniture / Home Store,Business Service,Supermarket,Café,Fair,Gift Shop,Park


In [359]:
cluster2 = RI_merged.loc[RI_merged['Cluster Labels'] == 2, RI_merged.columns[[0] + list(range(5, RI_merged.shape[1]))]]
cluster2

Unnamed: 0,PostalCode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
33,2815,2,Business Service,Trail,Food,Construction & Landscaping,Outdoors & Recreation,Jewelry Store,Fish Market,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Fishing Spot,Farm,Flight School,Flower Shop,Food & Drink Shop


In [360]:
cluster3 = RI_merged.loc[RI_merged['Cluster Labels'] == 3, RI_merged.columns[[0] + list(range(5, RI_merged.shape[1]))]]
cluster3

Unnamed: 0,PostalCode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,2885,3,American Restaurant,Donut Shop,Pharmacy,Restaurant,Coffee Shop,Grocery Store,Bakery,Ice Cream Shop,Sandwich Place,Seafood Restaurant,Shipping Store,Pizza Place,Baseball Field,Bank,Bagel Shop
1,2809,3,Pizza Place,American Restaurant,Seafood Restaurant,Coffee Shop,Pharmacy,Donut Shop,Sandwich Place,Restaurant,Bagel Shop,Convenience Store,Bank,Park,Discount Store,Shipping Store,Café
2,2806,3,Donut Shop,Pizza Place,Seafood Restaurant,American Restaurant,Coffee Shop,Bank,Bagel Shop,Grocery Store,Beach,Park,Sandwich Place,Bakery,Restaurant,Breakfast Spot,Diner
4,2919,3,Italian Restaurant,Pharmacy,Bakery,American Restaurant,Gym / Fitness Center,Mexican Restaurant,Coffee Shop,Liquor Store,Breakfast Spot,Sushi Restaurant,Japanese Restaurant,Burger Joint,Pub,Sandwich Place,Convenience Store
5,2917,3,American Restaurant,Donut Shop,Pizza Place,Italian Restaurant,Fast Food Restaurant,Coffee Shop,Sandwich Place,Sports Bar,Mexican Restaurant,Pharmacy,Department Store,Breakfast Spot,Accessories Store,Cosmetics Shop,Convenience Store
10,2911,3,Italian Restaurant,Pharmacy,Gym / Fitness Center,Bakery,American Restaurant,Japanese Restaurant,Liquor Store,Coffee Shop,Ice Cream Shop,Mexican Restaurant,Breakfast Spot,Cosmetics Shop,Convenience Store,Pub,Sandwich Place
18,2896,3,American Restaurant,Sandwich Place,Pharmacy,Convenience Store,Donut Shop,Pet Store,Bar,Food,Gas Station,Park,Discount Store,Liquor Store,Japanese Restaurant,Grocery Store,Pizza Place
19,2895,3,American Restaurant,Pizza Place,Pharmacy,Diner,Donut Shop,Convenience Store,Bar,Department Store,Discount Store,Pub,Fast Food Restaurant,Sandwich Place,Pet Store,Office,Ice Cream Shop
20,2876,3,American Restaurant,Liquor Store,Bar,Convenience Store,Intersection,Flower Shop,Bagel Shop,Martial Arts Dojo,Night Market,Gas Station,Sports Bar,Sandwich Place,Gift Shop,Business Service,Construction & Landscaping
21,2865,3,Donut Shop,Pizza Place,Brewery,American Restaurant,Sandwich Place,Pharmacy,Mexican Restaurant,Italian Restaurant,Gas Station,Bank,Deli / Bodega,Restaurant,Grocery Store,Breakfast Spot,Diner


### Define a function to count the instances of the preferred venue types showing up in the common venues for each cluster.

In [294]:
def topVenueCounter(cluster):
    gym_count = 0
    grocery_count = 0
    restaurant_count = 0
    hotel_count = 0
    
    for i in range(2,17):
        for n in range(0,cluster.shape[0]):
            if ('Gym' in cluster.iloc[n,i]) == True or ('Fitness' in cluster.iloc[n,i]) == True:
                gym_count += 1
            if ('Grocery' in cluster.iloc[n,i]) == True:
                grocery_count += 1
            if ('Restaurant' in cluster.iloc[n,i]) == True:
                restaurant_count += 1
            if ('Hotel' in cluster.iloc[n,i]) == True:
                hotel_count += 1
                
    return gym_count, grocery_count, restaurant_count, hotel_count

In [295]:
cluster_names = [cluster0,cluster1,cluster2,cluster3]
gyms = []
grocery_stores = []
restaurants = []
hotels = []

for name in cluster_names:
    gym_count, grocery_count, restaurant_count, hotel_count = topVenueCounter(name)
    gyms.append(gym_count)
    grocery_stores.append(grocery_count)
    restaurants.append(restaurant_count)
    hotels.append(hotel_count)

In [303]:
cluster_info = pd.DataFrame(columns = ['Cluster No.','Instances of Gym as Common Venue','Instances of Grocery Store as Common Venue','Instances of Restaurant as Common Venue','Instances of Hotel as Common Venue'])

In [304]:
cluster_info['Cluster No.'] = [0,1,2,3]

In [306]:
cluster_info = cluster_info.set_index('Cluster No.')

In [310]:
cluster_info.iloc[:,0] = gyms

In [312]:
cluster_info.iloc[:,1] = grocery_stores
cluster_info.iloc[:,2] = restaurants
cluster_info.iloc[:,3] = hotels
cluster_info

Unnamed: 0_level_0,Instances of Gym as Common Venue,Instances of Grocery Store as Common Venue,Instances of Restaurant as Common Venue,Instances of Hotel as Common Venue
Cluster No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2,2,83,10
1,1,1,18,0
2,0,0,1,0
3,12,10,98,4


### Apply weights to the instances of the preferred venues to get a score for each cluster

In [325]:
weights = np.array([[4],[2],[2],[3]])

In [333]:
scores = []

for n in range(0,cluster_info.shape[0]):
    cluster_score = cluster_info.iloc[n,:].dot(weights)
    scores.append(cluster_score[0])

In [335]:
cluster_info['Score'] = scores

In [336]:
cluster_info

Unnamed: 0_level_0,Instances of Gym as Common Venue,Instances of Grocery Store as Common Venue,Instances of Restaurant as Common Venue,Instances of Hotel as Common Venue,Score
Cluster No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2,2,83,10,208
1,1,1,18,0,42
2,0,0,1,0,2
3,12,10,98,4,276


In [339]:
print('The best locations to stay in RI will be found in Cluster',cluster_info['Score'].idxmax())

The best locations to stay in RI will be found in Cluster 3


### Now that the highest-scoring cluster has been identifed, we will figure out which zip code within that cluster has the closest proximity to an airport to determine the city in which we should stay.

In [350]:
RI_cluster3 = RI_data.merge(cluster3,on = 'PostalCode',how = 'inner')

In [356]:
print('When traveling to Rhode Island, based on your preferences you should plan on staying in',RI_cluster3.iloc[RI_cluster3['Proximity to Airport [mi]'].idxmin()][1])

When traveling to Rhode Island, based on your preferences you should plan on staying in Warwick, Bristol


# Warwick / Bristol it is!