# IBM Applied Data Science Specialization by Coursera
## Coursera Capstone Notebook
<hr>

## Install and import all the required Python libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans

#!pip install geocoder
import geocoder # import geocoder

#!pip install folium 
import folium # map rendering library

print('All reqiured libraries are imported!')

All reqiured libraries are imported!


## 1. Getting a List of Boston Neighborhoods and their Coordinates

### Getting a List of Boston Neighborhoods from Wikipedia Page

In [2]:
# web page URL
url = r"https://en.wikipedia.org/wiki/Neighborhoods_in_Boston"

# read web page into beautifulsoup object
html_page = requests.get(url).text
soup = BeautifulSoup(html_page, 'html.parser')

In [3]:
# extract list containing neighbourhoods
toc = soup.find('ul')
list_items = toc.find_all('li')

neighborhoods = []
for item in list_items:
    neighborhoods.append(item.find('a').text)
    
neighborhood_names = pd.DataFrame(data=neighborhoods, columns=['Neighborhood_name']) 
neighborhood_names

Unnamed: 0,Neighborhood_name
0,Allston
1,Back Bay
2,Bay Village
3,Beacon Hill
4,Brighton
5,Charlestown
6,Chinatown
7,Dorchester
8,Downtown
9,East Boston


### Using geocoder to get neighborhood coordinates (executed in IBM Watson Studio)

In [None]:
import geocoder # import geocoder

def get_coordinate(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Boston, Massachusetts'.format(neighborhood))
        lat_lng_coords = g.latlng
        
    return lat_lng_coords

In [None]:
# Testing the function

neighborhood = 'Allston'

neighborhood_coords= get_coordinate(neighborhood)
print(neighborhood_coords)

In [None]:
bs_coords = []

for neighborhood in neighborhood_names['Neighborhood_name'].values:
    neighborhood_coords = get_coordinate(neighborhood)
    latitude = neighborhood_coords[0]
    longitude = neighborhood_coords[1]
    bs_coords.append([neighborhood, latitude, longitude])
    print([neighborhood, latitude, longitude])
bs_coords

### Reading pre-extracted neighborhood coordinates from CSV file
#### Above function using geocoder didn't work in local. Therefore, it is executed in IBM Watson Studio to create the CSV file used below.

In [4]:
neighborhood_coords = pd.read_csv('Boston_Neighborhoods.csv')
neighborhood_coords.head()

Unnamed: 0,Neighborhood_name,Latitude,Longitude
0,Allston,42.350531,-71.111091
1,Back Bay,42.34999,-71.08765
2,Bay Village,42.348165,-71.06847
3,Beacon Hill,42.35842,-71.0686
4,Brighton,42.352134,-71.124925


### Initialize map

In [5]:
# Boston City cooriditae for map initialization
latitude = 42.35866
longitude = -71.05674

map_boston = folium.Map(location=[latitude, longitude], zoom_start=12)

### Mark Neighborhoods in the Map

In [6]:
for index, row in neighborhood_coords.iterrows():
    print(index, row['Neighborhood_name'], row['Latitude'], row['Longitude'])
    
    neighborhood = row['Neighborhood_name']
    latitude =  row['Latitude']
    longitude = row['Longitude']
    
    color = 'red'
    label = folium.Popup(str(neighborhood), parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)

map_boston

0 Allston 42.35053081552873 -71.1110910326945
1 Back Bay 42.34999000000005 -71.08764999999994
2 Bay Village 42.348165031218976 -71.06846991510525
3 Beacon Hill 42.35842000000007 -71.06859999999995
4 Brighton 42.35213365368456 -71.12492527560583
5 Charlestown 42.36777120956261 -71.05901636136193
6 Chinatown 42.352510000000045 -71.06089999999995
7 Dorchester 42.351354908126154 -71.05284849998098
8 Downtown 42.358290000000075 -71.05662999999998
9 East Boston 42.35141817326235 -71.05671435784329
10 Fenway Kenmore 42.34355000000005 -71.10156999999998
11 Hyde Park 42.27477308432304 -71.119898451483
12 Jamaica Plain 42.30584890846422 -71.11909201668145
13 Mattapan 42.278222288859574 -71.0960831569464
14 Mission Hill 42.33571000000006 -71.10979999999995
15 North End 42.36549000000008 -71.05296999999996
16 Roslindale 42.28182009628248 -71.13710364030405
17 Roxbury 42.330303515648225 -71.08946869163573
18 South Boston 42.3522498538783 -71.05568998397878
19 South End 42.34256000000005 -71.0735799

## 2. Getting neighborhood venues using Foursqure API  

### Setting API parameters

In [7]:
CLIENT_ID = '' 
CLIENT_SECRET = '' 
VERSION = ''
LIMIT = 100

### Getting Venues in neighborhoods using Foursqure explore API

In [8]:
def getNeighborhoodVenues(neighborhood, latitude, longitude, radius=1600):
    
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        latitude, 
        longitude, 
        radius, 
        LIMIT)   
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']

    venues_list=[]
    for v in results:    
        venue_name = v['venue']['name']
        venue_latitude = v['venue']['location']['lat']
        venue_longitude = v['venue']['location']['lng']
        venue_category = v['venue']['categories'][0]['name']
        #venue_postalCode = v['venue']['location']['postalCode']
        #venue_city = None #v['venue']['location']['city']
        venue_distance = v['venue']['location']['distance']

        venues_list.append([neighborhood, latitude, longitude, venue_name, venue_latitude, venue_longitude, venue_category, venue_city, venue_distance])

        venues= pd.DataFrame(data=venues_list, columns= ["Neighborhood_name", "Latitude", "Longitude", "Venue_name", "Venue_latitude", "Venue_longitude", "Venue_category", "Venue_city", "Venue_distance"])
        
    return venues

In [None]:
neighborhood_venues = pd.DataFrame()

for index, row in neighborhood_coords.iterrows():
    print(index, row['Neighborhood_name'], row['Latitude'], row['Longitude'])

    vl = getNeighborhoodVenues(neighborhood=row['Neighborhood_name'], latitude=row['Latitude'], longitude=row['Longitude'], radius=500)
    neighborhood_venues = neighborhood_venues.append(vl, ignore_index=True)
    
neighborhood_venues.head()

#### Write Venue data to a file

In [None]:
neighborhood_venues.to_csv('Boston_Venues.csv', index=False)

#### Reading from pre-saved venue file

In [9]:
neighborhood_venues = pd.read_csv('Boston_Venues.csv')

## 3. Explore Venues Dataset

In [10]:
neighborhood_venues.head()

Unnamed: 0,Neighborhood_name,Latitude,Longitude,Venue_name,Venue_latitude,Venue_longitude,Venue_category,Venue_distance
0,Allston,42.350531,-71.111091,Boston House of Pizza,42.350281,-71.113864,Pizza Place,229
1,Allston,42.350531,-71.111091,OTTO,42.350388,-71.115236,Pizza Place,341
2,Allston,42.350531,-71.111091,Amazon@Boston,42.350761,-71.114298,Shipping Store,265
3,Allston,42.350531,-71.111091,Pavement Coffeehouse,42.35003,-71.10702,Café,339
4,Allston,42.350531,-71.111091,Starbucks,42.350691,-71.114521,Coffee Shop,282


In [11]:
len(neighborhood_venues['Neighborhood_name'].unique())

22

### How many unique categories can be extracted from all the returned venues

In [12]:
len(neighborhood_venues['Venue_category'].unique())

207

### Vanue Counts per Neighborhood

In [13]:
neighborhood_venues['Neighborhood_name'].value_counts().to_frame()

Unnamed: 0,Neighborhood_name
Dorchester,100
Back Bay,100
Chinatown,100
East Boston,97
Downtown,96
West End,90
South Boston,85
South End,77
North End,65
Fenway Kenmore,60


### Counts By Category (Top 20)

In [14]:
neighborhood_venues['Venue_category'].value_counts().to_frame().head(20)

Unnamed: 0,Venue_category
Coffee Shop,48
Pizza Place,43
Italian Restaurant,43
Bakery,39
Sandwich Place,36
American Restaurant,36
Park,31
Café,30
Hotel,29
Chinese Restaurant,29


### Counts By Selected Categories

In [15]:
neighborhood_venues.loc[neighborhood_venues['Venue_category'].str.contains('Restaurant')]['Venue_category'].value_counts().to_frame()

Unnamed: 0,Venue_category
Italian Restaurant,43
American Restaurant,36
Chinese Restaurant,29
Asian Restaurant,26
Mexican Restaurant,21
Seafood Restaurant,17
French Restaurant,16
Sushi Restaurant,14
Vegetarian / Vegan Restaurant,10
Thai Restaurant,10


In [16]:
neighborhood_venues.loc[neighborhood_venues['Venue_category'].str.contains('Juice|Smoothie')]['Venue_category'].value_counts().to_frame()

Unnamed: 0,Venue_category
Juice Bar,6
Smoothie Shop,1


In [17]:
neighborhood_venues.loc[neighborhood_venues['Venue_category'].str.contains('Gym|Fitness|Yoga|Rec Center')]['Venue_category'].value_counts().to_frame()

Unnamed: 0,Venue_category
Gym / Fitness Center,13
Gym,8
Yoga Studio,8
Boxing Gym,1
College Gym,1
College Rec Center,1


In [18]:
neighborhood_venues.loc[neighborhood_venues['Venue_category'].str.contains('Park')]['Venue_category'].value_counts().to_frame()

Unnamed: 0,Venue_category
Park,31


In [19]:
neighborhood_venues.loc[neighborhood_venues['Venue_category'].str.contains('Bus')]['Venue_category'].value_counts().to_frame()

Unnamed: 0,Venue_category
Bus Station,1
Bus Stop,1
Bus Line,1


In [20]:
neighborhood_venues.loc[neighborhood_venues['Venue_category'].str.contains('Train')]['Venue_category'].value_counts().to_frame()

Unnamed: 0,Venue_category
Train Station,1


In [21]:
neighborhood_venues.loc[neighborhood_venues['Venue_category'].str.contains('Bubble Tea Shop|Ice Cream Shop|Food & Drink Shop')]['Venue_category'].value_counts().to_frame()

Unnamed: 0,Venue_category
Bubble Tea Shop,8
Ice Cream Shop,8
Food & Drink Shop,4


In [22]:
neighborhood_venues.loc[neighborhood_venues['Venue_category'].str.contains('Car')]['Venue_category'].value_counts().to_frame()

Unnamed: 0,Venue_category
Rental Car Location,3
Caribbean Restaurant,1


## 4. Analyze each Neighborhood

In [23]:
neighborhood_venues.head()

Unnamed: 0,Neighborhood_name,Latitude,Longitude,Venue_name,Venue_latitude,Venue_longitude,Venue_category,Venue_distance
0,Allston,42.350531,-71.111091,Boston House of Pizza,42.350281,-71.113864,Pizza Place,229
1,Allston,42.350531,-71.111091,OTTO,42.350388,-71.115236,Pizza Place,341
2,Allston,42.350531,-71.111091,Amazon@Boston,42.350761,-71.114298,Shipping Store,265
3,Allston,42.350531,-71.111091,Pavement Coffeehouse,42.35003,-71.10702,Café,339
4,Allston,42.350531,-71.111091,Starbucks,42.350691,-71.114521,Coffee Shop,282


### Onehot Encode Venue_category column

In [24]:
Venue_category_list = list(neighborhood_venues['Venue_category'].unique())

In [25]:
neighborhood_venues_onehot = pd.get_dummies(neighborhood_venues, columns=['Venue_category'], prefix="", prefix_sep="")
neighborhood_venues_onehot.head()

Unnamed: 0,Neighborhood_name,Latitude,Longitude,Venue_name,Venue_latitude,Venue_longitude,Venue_distance,Accessories Store,African Restaurant,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Allston,42.350531,-71.111091,Boston House of Pizza,42.350281,-71.113864,229,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Allston,42.350531,-71.111091,OTTO,42.350388,-71.115236,341,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Allston,42.350531,-71.111091,Amazon@Boston,42.350761,-71.114298,265,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Allston,42.350531,-71.111091,Pavement Coffeehouse,42.35003,-71.10702,339,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Allston,42.350531,-71.111091,Starbucks,42.350691,-71.114521,282,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
neighborhood_venues_onehot = neighborhood_venues_onehot[['Neighborhood_name','Latitude','Longitude'] + Venue_category_list]
neighborhood_venues_onehot.head()

Unnamed: 0,Neighborhood_name,Latitude,Longitude,Pizza Place,Shipping Store,Café,Coffee Shop,Thai Restaurant,Mexican Restaurant,Gym / Fitness Center,...,Arepa Restaurant,Ethiopian Restaurant,Flea Market,Health & Beauty Service,Science Museum,Boat or Ferry,Planetarium,Zoo Exhibit,Bus Stop,Train Station
0,Allston,42.350531,-71.111091,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Allston,42.350531,-71.111091,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Allston,42.350531,-71.111091,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Allston,42.350531,-71.111091,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Allston,42.350531,-71.111091,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
neighborhood_venues_counts = neighborhood_venues_onehot.groupby(by=['Neighborhood_name','Latitude','Longitude']).sum()
neighborhood_venues_counts.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Pizza Place,Shipping Store,Café,Coffee Shop,Thai Restaurant,Mexican Restaurant,Gym / Fitness Center,Sandwich Place,Track,Donut Shop,...,Arepa Restaurant,Ethiopian Restaurant,Flea Market,Health & Beauty Service,Science Museum,Boat or Ferry,Planetarium,Zoo Exhibit,Bus Stop,Train Station
Neighborhood_name,Latitude,Longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Allston,42.350531,-71.111091,2,1,2,2,1,2,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
Back Bay,42.34999,-71.08765,1,0,0,5,1,1,1,2,0,0,...,0,0,0,0,0,0,0,0,0,0
Bay Village,42.348165,-71.06847,2,0,0,0,0,0,0,2,0,1,...,0,0,0,0,0,0,0,0,0,0
Beacon Hill,42.35842,-71.0686,3,0,0,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
Brighton,42.352134,-71.124925,1,0,0,2,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Isolating Interested Venues

In [28]:
selected_venues = ['Juice Bar', 
                   'Smoothie Shop', 
                   'Gym / Fitness Center',
                   'Gym' ,
                   'Yoga Studio' ,
                   'College Rec Center',
                   'College Gym', 
                   'Boxing Gym', 
                   'Park', 
                   'Ice Cream Shop', 
                   'Bubble Tea Shop', 
                   'Food & Drink Shop',
                    'Bus Stop',
                    'Bus Station',
                    'Train Station',
                    'Rental Car Location',
                    'Big Box Store',
                    'Grocery Store',
                    'Department Store',
                    'Clothing Store',
                    'Sporting Goods Shop',
                    'Mobile Phone Shop',
                    'Automotive Shop',
                    'Gift Shop',
                    'Comic Shop',
                    'Bookstore'          
                  ]

neighborhood_venues_counts_select = neighborhood_venues_counts[selected_venues]
neighborhood_venues_counts_select

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Juice Bar,Smoothie Shop,Gym / Fitness Center,Gym,Yoga Studio,College Rec Center,College Gym,Boxing Gym,Park,Ice Cream Shop,...,Big Box Store,Grocery Store,Department Store,Clothing Store,Sporting Goods Shop,Mobile Phone Shop,Automotive Shop,Gift Shop,Comic Shop,Bookstore
Neighborhood_name,Latitude,Longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Allston,42.350531,-71.111091,0,0,1,0,0,1,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
Back Bay,42.34999,-71.08765,1,0,1,0,2,0,0,0,0,3,...,0,2,1,10,1,1,0,0,1,3
Bay Village,42.348165,-71.06847,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Beacon Hill,42.35842,-71.0686,0,0,0,0,0,0,0,0,1,1,...,0,1,0,0,0,0,0,1,0,0
Brighton,42.352134,-71.124925,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
Charlestown,42.367771,-71.059016,0,0,0,0,0,0,0,0,5,0,...,0,0,0,0,0,0,0,0,0,0
Chinatown,42.35251,-71.0609,0,0,3,1,1,0,0,0,2,0,...,0,0,0,1,0,0,0,0,0,1
Dorchester,42.351355,-71.052848,1,0,0,0,0,0,0,0,1,1,...,0,0,0,1,0,1,0,0,0,1
Downtown,42.35829,-71.05663,1,0,1,1,0,0,0,0,4,0,...,0,0,0,2,0,0,0,0,0,0
East Boston,42.351418,-71.056714,1,0,2,1,1,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0


In [29]:
len(selected_venues)

26

In [30]:
neighborhood_venues_counts_select.shape

(22, 26)

### Grouping Selected Venues

In [31]:
Smoothie_Juice = ['Smoothie Shop','Juice Bar']
Gym_Fitness = ['Gym / Fitness Center', 'Gym', 'Yoga Studio', 'College Rec Center', 'College Gym', 'Boxing Gym']
Dessert_Drink = ['Ice Cream Shop', 'Bubble Tea Shop', 'Food & Drink Shop']
Transportation = ['Bus Stop', 'Bus Station', 'Train Station', 'Rental Car Location']
Books_Gifts = ['Gift Shop', 'Comic Shop', 'Bookstore']
Shopping = ['Big Box Store', 'Grocery Store', 'Department Store', 'Clothing Store', 'Sporting Goods Shop', 'Mobile Phone Shop', 'Automotive Shop']
Park = ['Park']

### Map Selected Venues

In [32]:
neighborhood_venues_to_map = neighborhood_venues.loc[neighborhood_venues['Venue_category'].isin(selected_venues)]
neighborhood_venues_to_map['Venue_Group'] = None

neighborhood_venues_to_map.loc[neighborhood_venues['Venue_category'].isin(Smoothie_Juice), 'Venue_Group'] = 'Smoothie_Juice'
neighborhood_venues_to_map.loc[neighborhood_venues['Venue_category'].isin(Gym_Fitness), 'Venue_Group'] = 'Gym_Fitness'
neighborhood_venues_to_map.loc[neighborhood_venues['Venue_category'].isin(Dessert_Drink), 'Venue_Group'] = 'Dessert_Drink'
neighborhood_venues_to_map.loc[neighborhood_venues['Venue_category'].isin(Transportation), 'Venue_Group'] = 'Transportation'
neighborhood_venues_to_map.loc[neighborhood_venues['Venue_category'].isin(Books_Gifts), 'Venue_Group'] = 'Books_Gifts'
neighborhood_venues_to_map.loc[neighborhood_venues['Venue_category'].isin(Shopping), 'Venue_Group'] = 'Shopping'
neighborhood_venues_to_map.loc[neighborhood_venues['Venue_category'].isin(Park), 'Venue_Group'] = 'Park'

neighborhood_venues_to_map = neighborhood_venues_to_map[['Neighborhood_name', 'Venue_name', 'Venue_latitude', 'Venue_longitude', 'Venue_category', 'Venue_Group']]
neighborhood_venues_to_map = neighborhood_venues_to_map.reset_index(drop=True)
venue_groups = list(neighborhood_venues_to_map['Venue_Group'].unique())
neighborhood_venues_to_map.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,Neighborhood_name,Venue_name,Venue_latitude,Venue_longitude,Venue_category,Venue_Group
0,Allston,GymIt,42.350867,-71.11707,Gym / Fitness Center,Gym_Fitness
1,Allston,Target,42.350645,-71.114038,Big Box Store,Shopping
2,Allston,BU Sailing Pavilion,42.352024,-71.110263,College Rec Center,Gym_Fitness
3,Allston,BU Boat House,42.353368,-71.107648,College Gym,Gym_Fitness
4,Back Bay,UNIQLO,42.348495,-71.08687,Clothing Store,Shopping


In [33]:
# Boston City cooriditae for map initialization
latitude = 42.35866
longitude = -71.05674

map_boston = folium.Map(location=[latitude, longitude], zoom_start=12)

color = 'gray'

for index, row in neighborhood_coords.iterrows():
    print(index, row['Neighborhood_name'], row['Latitude'], row['Longitude'])
    neighborhood = row['Neighborhood_name']
    latitude =  row['Latitude']
    longitude = row['Longitude']
        
    label = folium.Popup(str(neighborhood), parse_html=True)

    folium.Marker(
        location=[latitude, longitude],
        popup=label,
        icon=folium.Icon(color=color)
    ).add_to(map_boston)
    
color = 'green'
neighborhood_venues_to_map_x = neighborhood_venues_to_map.loc[neighborhood_venues_to_map['Venue_Group'].isin(['Gym_Fitness', 'Transportation', 'Shopping'])]

for index, row in neighborhood_venues_to_map_x.iterrows():
    neighborhood = row['Neighborhood_name']
    venue = row['Venue_name']
    latitude =  row['Venue_latitude']
    longitude = row['Venue_longitude']
    group = row['Venue_Group']
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) + ' / ' + str(venue) + ' / [' +  str(group) + ']', parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)
    
color = 'orange'
neighborhood_venues_to_map_x = neighborhood_venues_to_map.loc[neighborhood_venues_to_map['Venue_Group'].isin(['Park', 'Books_Gifts'])]

for index, row in neighborhood_venues_to_map_x.iterrows():   
    neighborhood = row['Neighborhood_name']
    venue = row['Venue_name']
    latitude =  row['Venue_latitude']
    longitude = row['Venue_longitude']
    group = row['Venue_Group']
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) + ' / ' + str(venue) + ' / [' +  str(group) + ']', parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)

color = 'red'
neighborhood_venues_to_map_x = neighborhood_venues_to_map.loc[neighborhood_venues_to_map['Venue_Group'].isin(['Smoothie_Juice'])]

for index, row in neighborhood_venues_to_map_x.iterrows():    
    neighborhood = row['Neighborhood_name']
    venue = row['Venue_name']
    latitude =  row['Venue_latitude']
    longitude = row['Venue_longitude']
    group = row['Venue_Group']
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) + ' / ' + str(venue) + ' / [' +  str(group) + ']', parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=8,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)

neighborhood_venues_to_map_x = neighborhood_venues_to_map.loc[neighborhood_venues_to_map['Venue_Group'].isin(['Dessert_Drink'])]
for index, row in neighborhood_venues_to_map_x.iterrows():    
    neighborhood = row['Neighborhood_name']
    venue = row['Venue_name']
    latitude =  row['Venue_latitude']
    longitude = row['Venue_longitude']
    group = row['Venue_Group']
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) + ' / ' + str(venue) + ' / [' +  str(group) + ']', parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)
    
map_boston

0 Allston 42.35053081552873 -71.1110910326945
1 Back Bay 42.34999000000005 -71.08764999999994
2 Bay Village 42.348165031218976 -71.06846991510525
3 Beacon Hill 42.35842000000007 -71.06859999999995
4 Brighton 42.35213365368456 -71.12492527560583
5 Charlestown 42.36777120956261 -71.05901636136193
6 Chinatown 42.352510000000045 -71.06089999999995
7 Dorchester 42.351354908126154 -71.05284849998098
8 Downtown 42.358290000000075 -71.05662999999998
9 East Boston 42.35141817326235 -71.05671435784329
10 Fenway Kenmore 42.34355000000005 -71.10156999999998
11 Hyde Park 42.27477308432304 -71.119898451483
12 Jamaica Plain 42.30584890846422 -71.11909201668145
13 Mattapan 42.278222288859574 -71.0960831569464
14 Mission Hill 42.33571000000006 -71.10979999999995
15 North End 42.36549000000008 -71.05296999999996
16 Roslindale 42.28182009628248 -71.13710364030405
17 Roxbury 42.330303515648225 -71.08946869163573
18 South Boston 42.3522498538783 -71.05568998397878
19 South End 42.34256000000005 -71.0735799

## 5. Get Demographic Data

In [34]:
neighborhood_demographic = pd.read_csv('Boston_DemographicData.csv')
neighborhood_demographic.head()

Unnamed: 0,Location,Median Income,Total Households,Total Population,Median Age
0,United States,57652.0,118825921,321004407,38
1,Massachusetts,74167.0,2585715,6789319,39
2,Boston,62021.0,263229,669158,32
3,Allston,46982.76,6457,19363,26
4,Back Bay,102070.55,9824,18176,33


### Seperate Row for Boston Demographics and Neighborhood Deographics

In [35]:
total_demographic = neighborhood_demographic.loc[neighborhood_demographic['Location']=='Boston']
neighborhood_demographic = neighborhood_demographic.iloc[3:].reset_index(drop=True)
total_demographic

Unnamed: 0,Location,Median Income,Total Households,Total Population,Median Age
2,Boston,62021.0,263229,669158,32


In [36]:
neighborhood_demographic['Median Income'] = neighborhood_demographic['Median Income']/total_demographic.loc[2,'Median Income']
neighborhood_demographic['Total Households'] = neighborhood_demographic['Total Households']/total_demographic.loc[2,'Total Households']
neighborhood_demographic['Total Population'] = neighborhood_demographic['Total Population']/total_demographic.loc[2,'Total Population']
neighborhood_demographic['Median Age'] = neighborhood_demographic['Median Age']/total_demographic.loc[2,'Median Age']

demo_features = ['Median Income', 'Total Households', 'Total Population', 'Median Age'] 

neighborhood_demographic

Unnamed: 0,Location,Median Income,Total Households,Total Population,Median Age
0,Allston,0.75753,0.02453,0.028936,0.8125
1,Back Bay,1.645742,0.037321,0.027162,1.03125
2,Beacon Hill,1.581226,0.020735,0.014572,1.0
3,Brighton,1.000326,0.082077,0.077388,0.90625
4,Charlestown,1.664648,0.033929,0.028246,1.09375
5,Dorchester,0.800735,0.167482,0.188217,1.03125
6,Downtown,1.086198,0.02869,0.026273,1.03125
7,East Boston,0.853507,0.06187,0.069722,1.0625
8,Fenway Kenmore,0.637685,0.041508,0.048715,0.71875
9,Harbor Islands,0.0,0.0,0.000481,1.625


#### Merge neighborhood demographics data with venues dataset

In [37]:
neighborhood_venues_counts_select_demo = neighborhood_venues_counts_select.reset_index().merge(neighborhood_demographic, left_on='Neighborhood_name', right_on='Location', how='left')
neighborhood_venues_counts_select_demo = neighborhood_venues_counts_select_demo.set_index(['Neighborhood_name','Latitude','Longitude'])
# set the indexes back
neighborhood_venues_counts_select_demo

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Juice Bar,Smoothie Shop,Gym / Fitness Center,Gym,Yoga Studio,College Rec Center,College Gym,Boxing Gym,Park,Ice Cream Shop,...,Mobile Phone Shop,Automotive Shop,Gift Shop,Comic Shop,Bookstore,Location,Median Income,Total Households,Total Population,Median Age
Neighborhood_name,Latitude,Longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Allston,42.350531,-71.111091,0,0,1,0,0,1,1,0,0,0,...,0,0,0,0,0,Allston,0.75753,0.02453,0.028936,0.8125
Back Bay,42.34999,-71.08765,1,0,1,0,2,0,0,0,0,3,...,1,0,0,1,3,Back Bay,1.645742,0.037321,0.027162,1.03125
Bay Village,42.348165,-71.06847,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,Bay Village,1.402667,0.00318,0.001961,1.09375
Beacon Hill,42.35842,-71.0686,0,0,0,0,0,0,0,0,1,1,...,0,0,1,0,0,Beacon Hill,1.581226,0.020735,0.014572,1.0
Brighton,42.352134,-71.124925,0,0,0,1,2,0,0,0,0,0,...,0,1,0,0,0,Brighton,1.000326,0.082077,0.077388,0.90625
Charlestown,42.367771,-71.059016,0,0,0,0,0,0,0,0,5,0,...,0,0,0,0,0,Charlestown,1.664648,0.033929,0.028246,1.09375
Chinatown,42.35251,-71.0609,0,0,3,1,1,0,0,0,2,0,...,0,0,0,0,1,Chinatown,1.086198,0.008031,0.006641,1.03125
Dorchester,42.351355,-71.052848,1,0,0,0,0,0,0,0,1,1,...,1,0,0,0,1,Dorchester,0.800735,0.167482,0.188217,1.03125
Downtown,42.35829,-71.05663,1,0,1,1,0,0,0,0,4,0,...,0,0,0,0,0,Downtown,1.086198,0.02869,0.026273,1.03125
East Boston,42.351418,-71.056714,1,0,2,1,1,0,0,1,1,1,...,0,0,0,0,0,East Boston,0.853507,0.06187,0.069722,1.0625


## 6. Cluster Neighborhoods

In [38]:
neighborhood_venues_counts_select_demo.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Juice Bar,Smoothie Shop,Gym / Fitness Center,Gym,Yoga Studio,College Rec Center,College Gym,Boxing Gym,Park,Ice Cream Shop,...,Mobile Phone Shop,Automotive Shop,Gift Shop,Comic Shop,Bookstore,Location,Median Income,Total Households,Total Population,Median Age
Neighborhood_name,Latitude,Longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Allston,42.350531,-71.111091,0,0,1,0,0,1,1,0,0,0,...,0,0,0,0,0,Allston,0.75753,0.02453,0.028936,0.8125
Back Bay,42.34999,-71.08765,1,0,1,0,2,0,0,0,0,3,...,1,0,0,1,3,Back Bay,1.645742,0.037321,0.027162,1.03125
Bay Village,42.348165,-71.06847,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,Bay Village,1.402667,0.00318,0.001961,1.09375
Beacon Hill,42.35842,-71.0686,0,0,0,0,0,0,0,0,1,1,...,0,0,1,0,0,Beacon Hill,1.581226,0.020735,0.014572,1.0
Brighton,42.352134,-71.124925,0,0,0,1,2,0,0,0,0,0,...,0,1,0,0,0,Brighton,1.000326,0.082077,0.077388,0.90625


In [39]:
neighborhood_venues_counts_select_demo.columns

Index(['Juice Bar', 'Smoothie Shop', 'Gym / Fitness Center', 'Gym',
       'Yoga Studio', 'College Rec Center', 'College Gym', 'Boxing Gym',
       'Park', 'Ice Cream Shop', 'Bubble Tea Shop', 'Food & Drink Shop',
       'Bus Stop', 'Bus Station', 'Train Station', 'Rental Car Location',
       'Big Box Store', 'Grocery Store', 'Department Store', 'Clothing Store',
       'Sporting Goods Shop', 'Mobile Phone Shop', 'Automotive Shop',
       'Gift Shop', 'Comic Shop', 'Bookstore', 'Location', 'Median Income',
       'Total Households', 'Total Population', 'Median Age'],
      dtype='object')

### Grouping venue categories based on business domain

In [40]:
neighborhood_venues_counts_combined = neighborhood_venues_counts_select_demo

neighborhood_venues_counts_combined['Smoothie_Juice'] = neighborhood_venues_counts_combined[['Smoothie Shop','Juice Bar']].sum(axis=1)
neighborhood_venues_counts_combined['Gym_Fitness'] = neighborhood_venues_counts_combined[['Gym / Fitness Center', 'Gym',
       'Yoga Studio', 'College Rec Center', 'College Gym', 'Boxing Gym']].sum(axis=1)
neighborhood_venues_counts_combined['Dessert_Drink'] = neighborhood_venues_counts_combined[['Ice Cream Shop', 'Bubble Tea Shop', 'Food & Drink Shop']].sum(axis=1)

neighborhood_venues_counts_combined['Smoothie_Juice_per_Gym_Fitness'] = neighborhood_venues_counts_combined['Smoothie_Juice']/neighborhood_venues_counts_combined['Gym_Fitness']
neighborhood_venues_counts_combined['Smoothie_Juice_per_Gym_Fitness'] = neighborhood_venues_counts_combined['Smoothie_Juice_per_Gym_Fitness'].replace([np.inf, -np.inf], np.nan)
neighborhood_venues_counts_combined['Smoothie_Juice_per_Gym_Fitness'] = neighborhood_venues_counts_combined['Smoothie_Juice_per_Gym_Fitness'].fillna(-1)

neighborhood_venues_counts_combined['Transportation'] = neighborhood_venues_counts_combined[['Bus Stop', 'Bus Station', 'Train Station', 'Rental Car Location']].sum(axis=1)
neighborhood_venues_counts_combined['Shopping'] = neighborhood_venues_counts_combined[['Big Box Store', 'Grocery Store', 'Department Store', 'Clothing Store',
       'Sporting Goods Shop', 'Mobile Phone Shop', 'Automotive Shop']].sum(axis=1)
neighborhood_venues_counts_combined['Books_Gifts'] = neighborhood_venues_counts_combined[['Gift Shop', 'Comic Shop', 'Bookstore']].sum(axis=1)

neighborhood_venues_counts_combined['Smoothie_Juice_per_Shopping'] = neighborhood_venues_counts_combined['Smoothie_Juice']/neighborhood_venues_counts_combined['Shopping']
neighborhood_venues_counts_combined['Smoothie_Juice_per_Shopping'] = neighborhood_venues_counts_combined['Smoothie_Juice_per_Shopping'].replace([np.inf, -np.inf], np.nan)
neighborhood_venues_counts_combined['Smoothie_Juice_per_Shopping'] = neighborhood_venues_counts_combined['Smoothie_Juice_per_Shopping'].fillna(-1)

venue_features = ['Smoothie_Juice', 'Gym_Fitness', 'Dessert_Drink', 'Park', 'Transportation', 'Shopping', 'Books_Gifts']
neighborhood_venues_counts_combined['TotalVenues'] = neighborhood_venues_counts_combined[venue_features].sum(axis=1)

for vf in venue_features:
    neighborhood_venues_counts_combined[vf] = neighborhood_venues_counts_combined[vf]/neighborhood_venues_counts_combined['TotalVenues']

neighborhood_venues_counts_combined= neighborhood_venues_counts_combined.replace([np.inf, -np.inf], np.nan)
neighborhood_venues_counts_combined = neighborhood_venues_counts_combined.fillna(-1)

cluster_features = demo_features + venue_features + ['Smoothie_Juice_per_Gym_Fitness','Smoothie_Juice_per_Shopping']

neighborhood_venues_counts_combined[cluster_features].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Median Income,Total Households,Total Population,Median Age,Smoothie_Juice,Gym_Fitness,Dessert_Drink,Park,Transportation,Shopping,Books_Gifts,Smoothie_Juice_per_Gym_Fitness,Smoothie_Juice_per_Shopping
Neighborhood_name,Latitude,Longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Allston,42.350531,-71.111091,0.75753,0.02453,0.028936,0.8125,0.0,0.75,0.0,0.0,0.0,0.25,0.0,0.0,0.0
Back Bay,42.34999,-71.08765,1.645742,0.037321,0.027162,1.03125,0.037037,0.111111,0.111111,0.0,0.037037,0.555556,0.148148,0.333333,0.066667
Bay Village,42.348165,-71.06847,1.402667,0.00318,0.001961,1.09375,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-1.0,-1.0
Beacon Hill,42.35842,-71.0686,1.581226,0.020735,0.014572,1.0,0.0,0.0,0.4,0.2,0.0,0.2,0.2,-1.0,0.0
Brighton,42.352134,-71.124925,1.000326,0.082077,0.077388,0.90625,0.0,0.428571,0.285714,0.0,0.142857,0.142857,0.0,0.0,0.0


#### Apply KMeans Clustering

In [41]:
#neighborhood_clustered.drop(columns=['Cluster Labels'],inplace=True)

kclusters = 5

kmeans = KMeans(n_clusters=kclusters, random_state=0, tol=1e-6).fit(neighborhood_venues_counts_combined[cluster_features])

ClusterLabels = kmeans.labels_

neighborhood_clustered = neighborhood_venues_counts_combined
neighborhood_clustered['Cluster'] = ClusterLabels

print(neighborhood_clustered.shape)

neighborhood_clustered = neighborhood_clustered.reset_index()
neighborhood_clustered.head()

(22, 41)


Unnamed: 0,Neighborhood_name,Latitude,Longitude,Juice Bar,Smoothie Shop,Gym / Fitness Center,Gym,Yoga Studio,College Rec Center,College Gym,...,Smoothie_Juice,Gym_Fitness,Dessert_Drink,Smoothie_Juice_per_Gym_Fitness,Transportation,Shopping,Books_Gifts,Smoothie_Juice_per_Shopping,TotalVenues,Cluster
0,Allston,42.350531,-71.111091,0,0,1,0,0,1,1,...,0.0,0.75,0.0,0.0,0.0,0.25,0.0,0.0,4,3
1,Back Bay,42.34999,-71.08765,1,0,1,0,2,0,0,...,0.037037,0.111111,0.111111,0.333333,0.037037,0.555556,0.148148,0.066667,27,3
2,Bay Village,42.348165,-71.06847,0,0,0,0,0,0,0,...,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,-1.0,1,2
3,Beacon Hill,42.35842,-71.0686,0,0,0,0,0,0,0,...,0.0,0.0,0.4,-1.0,0.0,0.2,0.2,0.0,5,4
4,Brighton,42.352134,-71.124925,0,0,0,1,2,0,0,...,0.0,0.428571,0.285714,0.0,0.142857,0.142857,0.0,0.0,7,3


In [42]:
neighborhood_clustered['Cluster'].value_counts()

3    9
2    5
4    3
1    3
0    2
Name: Cluster, dtype: int64

In [43]:
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Boston City cooriditae for map initialization
latitude = 42.35866
longitude = -71.05674

map_boston = folium.Map(location=[latitude, longitude], zoom_start=12)

for index, row in neighborhood_clustered.iterrows():
    print(index, row['Neighborhood_name'], row['Latitude'], row['Longitude'], row['Cluster'])
    
    neighborhood = row['Neighborhood_name']
    latitude =  row['Latitude']
    longitude = row['Longitude']
    cluster = row['Cluster']
    
    color = rainbow[cluster-1]
    label = folium.Popup(str(neighborhood) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=8,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)

map_boston

0 Allston 42.3505308155287 -71.1110910326945 3
1 Back Bay 42.349990000000005 -71.08764999999991 3
2 Bay Village 42.348165031219004 -71.0684699151053 2
3 Beacon Hill 42.3584200000001 -71.0686 4
4 Brighton 42.352133653684604 -71.12492527560579 3
5 Charlestown 42.367771209562605 -71.0590163613619 2
6 Chinatown 42.352509999999995 -71.0609 3
7 Dorchester 42.351354908126204 -71.052848499981 4
8 Downtown 42.3582900000001 -71.05663 1
9 East Boston 42.3514181732624 -71.05671435784329 3
10 Fenway Kenmore 42.3435500000001 -71.10157 3
11 Hyde Park 42.274773084323 -71.119898451483 3
12 Jamaica Plain 42.305848908464206 -71.1190920166815 4
13 Mattapan 42.2782222888596 -71.0960831569464 2
14 Mission Hill 42.3357100000001 -71.1098 2
15 North End 42.3654900000001 -71.05297 1
16 Roslindale 42.2818200962825 -71.1371036403041 0
17 Roxbury 42.330303515648204 -71.0894686916357 0
18 South Boston 42.3522498538783 -71.0556899839788 1
19 South End 42.3425600000001 -71.07357999999991 3
20 West End 42.363940000000

In [44]:
neighborhood_clustered = neighborhood_clustered.set_index('Neighborhood_name')
neighborhood_clustered.head()

Unnamed: 0_level_0,Latitude,Longitude,Juice Bar,Smoothie Shop,Gym / Fitness Center,Gym,Yoga Studio,College Rec Center,College Gym,Boxing Gym,...,Smoothie_Juice,Gym_Fitness,Dessert_Drink,Smoothie_Juice_per_Gym_Fitness,Transportation,Shopping,Books_Gifts,Smoothie_Juice_per_Shopping,TotalVenues,Cluster
Neighborhood_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Allston,42.350531,-71.111091,0,0,1,0,0,1,1,0,...,0.0,0.75,0.0,0.0,0.0,0.25,0.0,0.0,4,3
Back Bay,42.34999,-71.08765,1,0,1,0,2,0,0,0,...,0.037037,0.111111,0.111111,0.333333,0.037037,0.555556,0.148148,0.066667,27,3
Bay Village,42.348165,-71.06847,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,-1.0,1,2
Beacon Hill,42.35842,-71.0686,0,0,0,0,0,0,0,0,...,0.0,0.0,0.4,-1.0,0.0,0.2,0.2,0.0,5,4
Brighton,42.352134,-71.124925,0,0,0,1,2,0,0,0,...,0.0,0.428571,0.285714,0.0,0.142857,0.142857,0.0,0.0,7,3


### Cluster 1

In [45]:
neighborhood_clustered.loc[neighborhood_clustered['Cluster']==0][cluster_features].transpose().style.background_gradient(cmap='viridis')

Neighborhood_name,Roslindale,Roxbury
Median Income,1.236141,0.446967
Total Households,0.043331,0.073723
Total Population,0.043646,0.07912
Median Age,1.21875,1.0
Smoothie_Juice,-1.0,-1.0
Gym_Fitness,-1.0,-1.0
Dessert_Drink,-1.0,-1.0
Park,-1.0,-1.0
Transportation,-1.0,-1.0
Shopping,-1.0,-1.0


#### Observations
* No data about the focused venues are available. Therefore, it is hard to make a recommendation on the neighborhoods Roslindale and Roxbury in this cluster.

### Cluster 2

In [46]:
neighborhood_clustered.loc[neighborhood_clustered['Cluster']==1][cluster_features].transpose().style.background_gradient(cmap='viridis')

Neighborhood_name,Downtown,North End,South Boston
Median Income,1.086198,1.565766,1.500743
Total Households,0.02869,0.020279,0.063169
Total Population,0.026273,0.013855,0.054116
Median Age,1.03125,0.9375,1.0
Smoothie_Juice,0.111111,0.125,0.090909
Gym_Fitness,0.222222,0.125,0.181818
Dessert_Drink,0.0,0.0,0.363636
Park,0.444444,0.625,0.181818
Transportation,0.0,0.0,0.0
Shopping,0.222222,0.125,0.090909


#### Observations
* Neighborhoods in this cluster have both types of the venues which can increase business opportunity as well as the competitive business such as Smoothie and Juice shops.

### Cluster 3

In [47]:
neighborhood_clustered.loc[neighborhood_clustered['Cluster']==2][cluster_features].transpose().style.background_gradient(cmap='viridis')

Neighborhood_name,Bay Village,Charlestown,Mattapan,Mission Hill,West Roxbury
Median Income,1.402667,1.664648,0.777106,0.57573,1.302856
Total Households,0.00318,0.033929,0.033682,0.02382,0.052262
Total Population,0.001961,0.028246,0.038236,0.026012,0.050706
Median Age,1.09375,1.09375,1.15625,0.8125,1.34375
Smoothie_Juice,0.0,0.0,0.0,0.0,0.0
Gym_Fitness,0.0,0.0,0.0,0.0,0.0
Dessert_Drink,0.0,0.0,1.0,0.0,0.0
Park,0.0,1.0,0.0,1.0,0.0
Transportation,1.0,0.0,0.0,0.0,1.0
Shopping,0.0,0.0,0.0,0.0,0.0


#### Observations
* The neighborhoods in this cluster have no venues which can increase business opportunity such as Gym/Fitness Centers and Shopping Centers.

### Cluster 4

In [48]:
neighborhood_clustered.loc[neighborhood_clustered['Cluster']==3][cluster_features].transpose().style.background_gradient(cmap='viridis')

Neighborhood_name,Allston,Back Bay,Brighton,Chinatown,East Boston,Fenway Kenmore,Hyde Park,South End,West End
Median Income,0.75753,1.645742,1.000326,1.086198,0.853507,0.637685,1.141712,1.402667,1.560554
Total Households,0.02453,0.037321,0.082077,0.008031,0.06187,0.041508,0.048973,0.061517,0.011906
Total Population,0.028936,0.027162,0.077388,0.006641,0.069722,0.048715,0.055434,0.047881,0.009225
Median Age,0.8125,1.03125,0.90625,1.03125,1.0625,0.71875,1.21875,1.09375,1.0625
Smoothie_Juice,0.0,0.037037,0.0,0.0,0.1,0.0,0.0,0.0,0.0
Gym_Fitness,0.75,0.111111,0.428571,0.454545,0.5,0.333333,1.0,0.090909,0.571429
Dessert_Drink,0.0,0.111111,0.285714,0.181818,0.3,0.0,0.0,0.090909,0.0
Park,0.0,0.0,0.0,0.181818,0.1,0.166667,0.0,0.363636,0.142857
Transportation,0.0,0.037037,0.142857,0.0,0.0,0.166667,0.0,0.0,0.0
Shopping,0.25,0.555556,0.142857,0.090909,0.0,0.333333,0.0,0.181818,0.142857


#### Observations
* The neighborhoods in this cluster have many venues including the ones can increase business opportunity such as Gym/Fitness Centers and Shopping Centers.
* The neighborhoods in this cluster can be highly considered to open a Smoothie and Juice shop.
* The neighborhoods Allston, Brighton, Fenway Kenmore, South End and West End are highly recommended to open a Smoothie and Juice shop based on the analysis with available data.

### Cluster 5

In [49]:
neighborhood_clustered.loc[neighborhood_clustered['Cluster']==4][cluster_features].transpose().style.background_gradient(cmap='viridis')

Neighborhood_name,Beacon Hill,Dorchester,Jamaica Plain
Median Income,1.581226,0.800735,1.361569
Total Households,0.020735,0.167482,0.061133
Total Population,0.014572,0.188217,0.058751
Median Age,1.0,1.03125,1.0625
Smoothie_Juice,0.0,0.142857,0.166667
Gym_Fitness,0.0,0.0,0.0
Dessert_Drink,0.4,0.285714,0.0
Park,0.2,0.142857,0.5
Transportation,0.0,0.0,0.0
Shopping,0.2,0.285714,0.166667


#### Observations
* The neighborhoods in this cluster have Shopping Centers but none of them have Gym/Fitness centers.
* These neighborhoods also have competition from other shops.

## Dashboard Map

In [50]:
# Boston City cooriditae for map initialization
latitude = 42.35866
longitude = -71.05674

map_boston = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
cluster_colors = ['lightblue', 'blue', 'darkblue', 'cadetblue', 'gray']

neighborhood_venues_to_map_x = neighborhood_clustered.reset_index()
for index, row in neighborhood_venues_to_map_x.iterrows():
    print(index, row['Neighborhood_name'], row['Latitude'], row['Longitude'], row['Cluster'])
    neighborhood = row['Neighborhood_name']
    latitude =  row['Latitude']
    longitude = row['Longitude']
    cluster= row['Cluster']
    
    color = cluster_colors[cluster]
    
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) +  ' / [' +  str(cluster+1) + ']', parse_html=True)

    folium.Marker(
        location=[latitude, longitude],
        popup=label,
        icon=folium.Icon(color=color)
    ).add_to(map_boston)
    
color = 'green'
neighborhood_venues_to_map_x = neighborhood_venues_to_map.loc[neighborhood_venues_to_map['Venue_Group'].isin(['Gym_Fitness', 'Park', 'Transportation', 'Shopping'])]

for index, row in neighborhood_venues_to_map_x.iterrows():
    neighborhood = row['Neighborhood_name']
    venue = row['Venue_name']
    latitude =  row['Venue_latitude']
    longitude = row['Venue_longitude']
    group = row['Venue_Group']
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) + ' / ' + str(venue) + ' / [' +  str(group) + ']', parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)

color = 'orange'
neighborhood_venues_to_map_x = neighborhood_venues_to_map.loc[neighborhood_venues_to_map['Venue_Group'].isin(['Park', 'Books_Gifts'])]

for index, row in neighborhood_venues_to_map_x.iterrows():   
    neighborhood = row['Neighborhood_name']
    venue = row['Venue_name']
    latitude =  row['Venue_latitude']
    longitude = row['Venue_longitude']
    group = row['Venue_Group']
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) + ' / ' + str(venue) + ' / [' +  str(group) + ']', parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)

color = 'red'
neighborhood_venues_to_map_x = neighborhood_venues_to_map.loc[neighborhood_venues_to_map['Venue_Group'].isin(['Smoothie_Juice'])]

for index, row in neighborhood_venues_to_map_x.iterrows():    
    neighborhood = row['Neighborhood_name']
    venue = row['Venue_name']
    latitude =  row['Venue_latitude']
    longitude = row['Venue_longitude']
    group = row['Venue_Group']
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) + ' / ' + str(venue) + ' / [' +  str(group) + ']', parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=8,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)

neighborhood_venues_to_map_x = neighborhood_venues_to_map.loc[neighborhood_venues_to_map['Venue_Group'].isin(['Dessert_Drink'])]
for index, row in neighborhood_venues_to_map_x.iterrows():    
    neighborhood = row['Neighborhood_name']
    venue = row['Venue_name']
    latitude =  row['Venue_latitude']
    longitude = row['Venue_longitude']
    group = row['Venue_Group']
    group_number = venue_groups.index(group)
    label = folium.Popup(str(neighborhood) + ' / ' + str(venue) + ' / [' +  str(group) + ']', parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7).add_to(map_boston)
    
map_boston

0 Allston 42.3505308155287 -71.1110910326945 3
1 Back Bay 42.349990000000005 -71.08764999999991 3
2 Bay Village 42.348165031219004 -71.0684699151053 2
3 Beacon Hill 42.3584200000001 -71.0686 4
4 Brighton 42.352133653684604 -71.12492527560579 3
5 Charlestown 42.367771209562605 -71.0590163613619 2
6 Chinatown 42.352509999999995 -71.0609 3
7 Dorchester 42.351354908126204 -71.052848499981 4
8 Downtown 42.3582900000001 -71.05663 1
9 East Boston 42.3514181732624 -71.05671435784329 3
10 Fenway Kenmore 42.3435500000001 -71.10157 3
11 Hyde Park 42.274773084323 -71.119898451483 3
12 Jamaica Plain 42.305848908464206 -71.1190920166815 4
13 Mattapan 42.2782222888596 -71.0960831569464 2
14 Mission Hill 42.3357100000001 -71.1098 2
15 North End 42.3654900000001 -71.05297 1
16 Roslindale 42.2818200962825 -71.1371036403041 0
17 Roxbury 42.330303515648204 -71.0894686916357 0
18 South Boston 42.3522498538783 -71.0556899839788 1
19 South End 42.3425600000001 -71.07357999999991 3
20 West End 42.363940000000

### Limitations of the Current Analysis and Future Improvements

* Only 100 vanues per gievn location can be obtained using free version of the Foursqure API 
* This will make some important locations were undetected in the analysis.

<hr>
Maheesha Tennakoon - IBM Applied Data Science Capstone Project