# **Battle of the Neighborhoods - Coursera Final Project**

## **Find the best part of Miami to open a Pizza Shoppe**

### Part 1 - Cleaning Data for Miami

In [462]:
# import packages 

import pandas as pd
import numpy as np


In [463]:
# scraping the table from wikipedia and creating a dataframe

df_read = pd.read_html('https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Miami', match='Neighborhood', flavor='bs4', header=[0])

df = pd.DataFrame(data=df_read[0])

df.head(2)

Unnamed: 0,Neighborhood,Demonym,Population2010,Population/Km²,Sub-neighborhoods,Coordinates
0,Allapattah,,54289,4401,,25.815-80.224
1,Arts & Entertainment District,,11033,7948,,25.799-80.190


In [464]:
# rename and drop the columns 

df = df.rename(columns={'Population2010': 'Population'})

df = df.drop(['Demonym', 'Population/Km²', 'Sub-neighborhoods'], axis=1)

df

Unnamed: 0,Neighborhood,Population,Coordinates
0,Allapattah,54289,25.815-80.224
1,Arts & Entertainment District,11033,25.799-80.190
2,Brickell,31759,25.758-80.193
3,Buena Vista,9058,25.813-80.192
4,Coconut Grove,20076,25.712-80.257
5,Coral Way,35062,25.750-80.283
6,Design District,3573,25.813-80.193
7,Downtown,"71,000 (13,635 CBD only)",25.774-80.193
8,Edgewater,15005,25.802-80.190
9,Flagami,50834,25.762-80.316


In [465]:
# split the coordinates into lat and long

df['Coordinates'] = df['Coordinates'].astype(str)

df[['Latitude','Longitude']] = df.Coordinates.str.split("-",expand=True)

df.drop('Coordinates', axis=1, inplace=True)

df

Unnamed: 0,Neighborhood,Population,Latitude,Longitude
0,Allapattah,54289,25.815,80.224
1,Arts & Entertainment District,11033,25.799,80.19
2,Brickell,31759,25.758,80.193
3,Buena Vista,9058,25.813,80.192
4,Coconut Grove,20076,25.712,80.257
5,Coral Way,35062,25.75,80.283
6,Design District,3573,25.813,80.193
7,Downtown,"71,000 (13,635 CBD only)",25.774,80.193
8,Edgewater,15005,25.802,80.19
9,Flagami,50834,25.762,80.316


In [466]:
# clean up missing data and misc



df.iloc[7,1]= 71000

df.iloc[16,1]= 24870

df.iloc[21,1]= 228

df.drop([11, 25], inplace=True)

df['Population'] = df['Population'].astype(np.int64)

df['Latitude'] = df['Latitude'].astype(np.float64)

df['Longitude'] = df['Longitude'].astype(np.float64)

df['Longitude'] = df['Longitude'] * -1

df.reset_index(drop=True , inplace=True)

df

Unnamed: 0,Neighborhood,Population,Latitude,Longitude
0,Allapattah,54289,25.815,-80.224
1,Arts & Entertainment District,11033,25.799,-80.19
2,Brickell,31759,25.758,-80.193
3,Buena Vista,9058,25.813,-80.192
4,Coconut Grove,20076,25.712,-80.257
5,Coral Way,35062,25.75,-80.283
6,Design District,3573,25.813,-80.193
7,Downtown,71000,25.774,-80.193
8,Edgewater,15005,25.802,-80.19
9,Flagami,50834,25.762,-80.316


### Part 2 - Segmenting the Venue Data

In [361]:
# import data viz libraries and maps

import matplotlib.cm as cm                                 #for colors etc...
import matplotlib.colors as colors


!conda install -c conda-forge folium=0.5.0 --yes            #conda installer

import folium                                               #for maps

import json
from pandas.io.json import json_normalize

import requests 


print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Libraries imported.


In [467]:
# The code was removed by Watson Studio for sharing.

In [468]:
# function to find the venues in any neighborhood


LIMIT = 100


def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [469]:
# apply the function to miami neighborhoods

miami_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Allapattah
Arts & Entertainment District
Brickell
Buena Vista
Coconut Grove
Coral Way
Design District
Downtown
Edgewater
Flagami
Grapeland Heights
Liberty City
Little Haiti
Little Havana
Lummus Park
Midtown
Overtown
Park West
The Roads
Upper Eastside
Venetian Islands
Virginia Key
West Flagler
Wynwood


In [510]:
# analyze each neighborhood

# one hot encoding
miami_onehot = pd.get_dummies(miami_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
miami_onehot['Neighborhood'] = miami_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [miami_onehot.columns[-1]] + list(miami_onehot.columns[:-1])
miami_onehot = miami_onehot[fixed_columns]

miami_onehot.head()


Unnamed: 0,Yoga Studio,Accessories Store,American Restaurant,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Asian Restaurant,...,Tennis Court,Theater,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [511]:
# find the neighborhods with the most pizza places

miami_pizza = miami_onehot[['Neighborhood','Pizza Place']]

miami_pizza = miami_pizza.groupby(['Neighborhood']).count()

miami_pizza = miami_pizza.reset_index()

miami_pizza = miami_pizza.join(df['Population'])

miami_pizza = miami_pizza.sort_values(by=['Pizza Place'])


miami_pizza



Unnamed: 0,Neighborhood,Pizza Place,Population
21,Virginia Key,2,14
11,Liberty City,3,19725
4,Coconut Grove,3,20076
16,Overtown,4,6736
0,Allapattah,4,54289
20,Venetian Islands,5,228
19,Upper Eastside,6,12525
9,Flagami,6,50834
22,West Flagler,7,31407
18,The Roads,8,7327


In [512]:
# group rows by neighborhood and mean freq

miami_grouped = miami_onehot.groupby('Neighborhood').mean().reset_index()

miami_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,American Restaurant,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,...,Tennis Court,Theater,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store
0,Allapattah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Arts & Entertainment District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0
2,Brickell,0.0,0.0,0.035088,0.0,0.0,0.0,0.035088,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
3,Buena Vista,0.0,0.0,0.02381,0.0,0.02381,0.02381,0.0,0.047619,0.047619,...,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.02381
4,Coconut Grove,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [513]:
# function to sort venues in descending order


def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [514]:
#each neighborhoods top ten venues

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = miami_grouped['Neighborhood']

for ind in np.arange(miami_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(miami_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allapattah,Convenience Store,Boxing Gym,Department Store,Lounge,Women's Store,Eastern European Restaurant,Event Space,Event Service,Empanada Restaurant,Electronics Store
1,Arts & Entertainment District,Restaurant,Ice Cream Shop,Paper / Office Supplies Store,Moving Target,Salon / Barbershop,Sandwich Place,Beach,Smoothie Shop,Pizza Place,Spa
2,Brickell,Hotel,Italian Restaurant,Café,Japanese Restaurant,Juice Bar,Restaurant,Bar,Bank,Salon / Barbershop,Sandwich Place
3,Buena Vista,Café,Boutique,Art Gallery,Coffee Shop,Clothing Store,Jewelry Store,Italian Restaurant,Furniture / Home Store,Art Museum,Nightclub
4,Coconut Grove,American Restaurant,Park,Boat or Ferry,Women's Store,Donut Shop,Fast Food Restaurant,Event Space,Event Service,Empanada Restaurant,Electronics Store


### Part 3 - Clustering the Neighborhoods

In [515]:
#cluster the neighborhoods

from sklearn.cluster import KMeans

# set number of clusters
kclusters = 6

miami_cluster = miami_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, init='k-means++', random_state=42).fit(miami_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([5, 0, 0, 0, 3, 0, 0, 0, 0, 0], dtype=int32)

In [516]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

miami_merged = df


# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
miami_merged = miami_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood', how='right')

miami_merged['Cluster Labels'] = miami_merged['Cluster Labels'].astype(int)


miami_merged # check the last columns!

Unnamed: 0,Neighborhood,Population,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allapattah,54289,25.815,-80.224,5,Convenience Store,Boxing Gym,Department Store,Lounge,Women's Store,Eastern European Restaurant,Event Space,Event Service,Empanada Restaurant,Electronics Store
1,Arts & Entertainment District,11033,25.799,-80.19,0,Restaurant,Ice Cream Shop,Paper / Office Supplies Store,Moving Target,Salon / Barbershop,Sandwich Place,Beach,Smoothie Shop,Pizza Place,Spa
2,Brickell,31759,25.758,-80.193,0,Hotel,Italian Restaurant,Café,Japanese Restaurant,Juice Bar,Restaurant,Bar,Bank,Salon / Barbershop,Sandwich Place
3,Buena Vista,9058,25.813,-80.192,0,Café,Boutique,Art Gallery,Coffee Shop,Clothing Store,Jewelry Store,Italian Restaurant,Furniture / Home Store,Art Museum,Nightclub
4,Coconut Grove,20076,25.712,-80.257,3,American Restaurant,Park,Boat or Ferry,Women's Store,Donut Shop,Fast Food Restaurant,Event Space,Event Service,Empanada Restaurant,Electronics Store
5,Coral Way,35062,25.75,-80.283,0,Liquor Store,Historic Site,Dive Bar,Café,Burger Joint,Electronics Store,Seafood Restaurant,Mobile Phone Shop,Pharmacy,Golf Course
6,Design District,3573,25.813,-80.193,0,Café,Boutique,Art Gallery,Coffee Shop,Clothing Store,Jewelry Store,Italian Restaurant,Furniture / Home Store,Art Museum,Nightclub
7,Downtown,71000,25.774,-80.193,0,Italian Restaurant,Hotel,Peruvian Restaurant,Lounge,Coffee Shop,Pharmacy,Cocktail Bar,Café,Sandwich Place,Brazilian Restaurant
8,Edgewater,15005,25.802,-80.19,0,Restaurant,Sandwich Place,French Restaurant,Breakfast Spot,Pizza Place,Gym,Peruvian Restaurant,Coffee Shop,Art Gallery,Cuban Restaurant
9,Flagami,50834,25.762,-80.316,0,Seafood Restaurant,Department Store,Spanish Restaurant,Bakery,Fast Food Restaurant,Women's Store,Donut Shop,Event Service,Empanada Restaurant,Electronics Store


In [517]:
# create map


latitude = 25.761681
longitude = -80.191788



map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(miami_merged['Latitude'], miami_merged['Longitude'], miami_merged['Neighborhood'], miami_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster -1],
        fill=True,
        fill_color=rainbow[cluster -1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Cluster 1

In [518]:
miami_merged.loc[miami_merged['Cluster Labels'] == 0, miami_merged.columns[[0] + list(range(5, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Arts & Entertainment District,Restaurant,Ice Cream Shop,Paper / Office Supplies Store,Moving Target,Salon / Barbershop,Sandwich Place,Beach,Smoothie Shop,Pizza Place,Spa
2,Brickell,Hotel,Italian Restaurant,Café,Japanese Restaurant,Juice Bar,Restaurant,Bar,Bank,Salon / Barbershop,Sandwich Place
3,Buena Vista,Café,Boutique,Art Gallery,Coffee Shop,Clothing Store,Jewelry Store,Italian Restaurant,Furniture / Home Store,Art Museum,Nightclub
5,Coral Way,Liquor Store,Historic Site,Dive Bar,Café,Burger Joint,Electronics Store,Seafood Restaurant,Mobile Phone Shop,Pharmacy,Golf Course
6,Design District,Café,Boutique,Art Gallery,Coffee Shop,Clothing Store,Jewelry Store,Italian Restaurant,Furniture / Home Store,Art Museum,Nightclub
7,Downtown,Italian Restaurant,Hotel,Peruvian Restaurant,Lounge,Coffee Shop,Pharmacy,Cocktail Bar,Café,Sandwich Place,Brazilian Restaurant
8,Edgewater,Restaurant,Sandwich Place,French Restaurant,Breakfast Spot,Pizza Place,Gym,Peruvian Restaurant,Coffee Shop,Art Gallery,Cuban Restaurant
9,Flagami,Seafood Restaurant,Department Store,Spanish Restaurant,Bakery,Fast Food Restaurant,Women's Store,Donut Shop,Event Service,Empanada Restaurant,Electronics Store
10,Grapeland Heights,Gym / Fitness Center,Bakery,Hotel,Hotel Pool,Bus Station,Golf Course,Restaurant,Bar,Auto Garage,Event Space
12,Little Haiti,Yoga Studio,Record Shop,Caribbean Restaurant,Pizza Place,Pilates Studio,Pharmacy,Coffee Shop,Dance Studio,Donut Shop,Liquor Store


# Cluster 2

In [519]:
miami_merged.loc[miami_merged['Cluster Labels'] == 1, miami_merged.columns[[0] + list(range(5, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Virginia Key,Beach,Women's Store,Donut Shop,Fast Food Restaurant,Event Space,Event Service,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dog Run


# Cluster 3

In [520]:
miami_merged.loc[miami_merged['Cluster Labels'] == 2, miami_merged.columns[[0] + list(range(5, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Liberty City,Southern / Soul Food Restaurant,Grocery Store,Park,Women's Store,Dive Bar,Event Service,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop


# Cluster 4

In [521]:
miami_merged.loc[miami_merged['Cluster Labels'] == 3, miami_merged.columns[[0] + list(range(5, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Coconut Grove,American Restaurant,Park,Boat or Ferry,Women's Store,Donut Shop,Fast Food Restaurant,Event Space,Event Service,Empanada Restaurant,Electronics Store


# Cluster 5

In [522]:
miami_merged.loc[miami_merged['Cluster Labels'] == 4, miami_merged.columns[[0] + list(range(5, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Overtown,Wings Joint,Athletics & Sports,Southern / Soul Food Restaurant,Bar,Fast Food Restaurant,Event Space,Event Service,Empanada Restaurant,Electronics Store,Eastern European Restaurant


# Cluster 6

In [523]:
miami_merged.loc[miami_merged['Cluster Labels'] == 5, miami_merged.columns[[0] + list(range(5, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allapattah,Convenience Store,Boxing Gym,Department Store,Lounge,Women's Store,Eastern European Restaurant,Event Space,Event Service,Empanada Restaurant,Electronics Store
