# Coursera IBM Data Science Professional Certificate
### Applied Data Science Capstone Project Week 3 (Part 3, Plotting)
The following notebook is the code and mardown for the IBM Data Science Professional Certificate Applied Data Science
- Explore, segment, and cluster the neighborhoods in the city of Toronto. 


## Import Dependencies 

In [1]:
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import requests
import json
from geopy.geocoders import Nominatim
import matplotlib.cm as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
from sklearn.cluster import KMeans
import folium
from config import client_id, client_secret

## Assign URL to variable

In [2]:
post_codes_url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

# Use BeautifulSoup to read in data

In [3]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(post_codes_url,'lxml')

## Loop through data and assign Table Data HTML tags to empty lists

In [4]:
post_codes = []
boroughs = []
neighborhoods = []
counter = 1
for i in soup.table.find_all('td'):
    if counter == 1:
        post_codes.append(i.text)
    if counter == 2:
        boroughs.append(i.text)
    if counter == 3:
        neighborhoods.append(i.text)
    counter += 1
    if counter == 4:
        counter = 1 

## Create DataFrame and assign lists to columns

In [5]:
toronto_df = pd.DataFrame(columns=['PostalCode','Borough','Neighborhood'])
toronto_df['PostalCode'] = post_codes
toronto_df['Borough'] = boroughs
toronto_df['Neighborhood'] = neighborhoods
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n


## Clean up DataFrame by getting rid of 'Not Assigned" Boroughs and getting rid of the '\n' newlines

In [6]:
toronto_df = toronto_df[toronto_df['Borough'] != 'Not assigned']
toronto_df.reset_index(inplace=True)
toronto_df['Neighborhood'] = toronto_df['Neighborhood'].str.replace("\n","")
toronto_df.head()

Unnamed: 0,index,PostalCode,Borough,Neighborhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,Harbourfront
3,5,M5A,Downtown Toronto,Regent Park
4,6,M6A,North York,Lawrence Heights


## Get the Shape of the DataFrame

In [7]:
toronto_df.shape

(211, 4)

## Get Geo Data

In [8]:
geo_df = pd.read_csv('https://cocl.us/Geospatial_data')
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merge DataFrames

In [9]:
toronto_df_merged = pd.merge(toronto_df,geo_df,left_on="PostalCode",\
                             right_on="Postal Code",how='left').drop('Postal Code',axis=1)
del toronto_df_merged['index']
toronto_df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763


## Exploritory Analysis

#### Count how many neighborhoods there are 

In [10]:
toronto_df_merged['Borough'].value_counts()

Etobicoke           45
North York          38
Downtown Toronto    37
Scarborough         37
Central Toronto     17
West Toronto        13
York                 9
East Toronto         7
East York            6
Queen's Park         1
Mississauga          1
Name: Borough, dtype: int64

In [11]:
toronto_df_merged['Neighborhood'].value_counts()

Runnymede                   2
St. James Town              2
Bathurst Quay               1
Ionview                     1
Rathnelly                   1
Downsview Northwest         1
CFB Toronto                 1
Little Portugal             1
Island airport              1
Commerce Court              1
Roncesvalles                1
Scarborough Village         1
Woodbine Heights            1
Victoria Hotel              1
East Birchmount Park        1
Silverstone                 1
Mimico South                1
Swansea                     1
West Hill                   1
Forest Hill West            1
Toronto Islands             1
Union Station               1
Martin Grove                1
Cliffside                   1
Lawrence Park               1
Cabbagetown                 1
Ryerson                     1
The Kingsway                1
The Beaches West            1
The Queensway West          1
                           ..
Parkview Hill               1
Thistletown                 1
Exhibition

#### Group DataFrame by Borough and their neighborhoods

In [12]:
toronto_df_merged.groupby(['Borough','Neighborhood']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Latitude,Longitude
Borough,Neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1
Central Toronto,Davisville,43.704324,-79.388790
Central Toronto,Davisville North,43.712751,-79.390197
Central Toronto,Deer Park,43.686412,-79.400049
Central Toronto,Forest Hill North,43.696948,-79.411307
Central Toronto,Forest Hill SE,43.686412,-79.400049
Central Toronto,Forest Hill West,43.696948,-79.411307
Central Toronto,Lawrence Park,43.728020,-79.388790
Central Toronto,Moore Park,43.689574,-79.383160
Central Toronto,North Midtown,43.672710,-79.405678
Central Toronto,North Toronto West,43.715383,-79.405678


### Plotting

In [13]:
city = 'Toronto, ON'
geo = Nominatim()
location = geo.geocode(city)
lat = location.latitude
long = location.longitude

  


In [14]:
toronto_map = folium.Map(location=[lat,long],zoom_start=11)
for latitude, longitude, borough, hood in zip(toronto_df_merged['Latitude'],toronto_df_merged['Longitude'],toronto_df_merged['Borough'],toronto_df_merged['Neighborhood']):
    label = f"{hood}, {borough}"
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [latitude,longitude],radius=4,popup=label,color="green",fill=True, fill_color='#008000',fill_opacity=0.6,parse_html=False).add_to(toronto_map)
toronto_map

### Explore the University of Toronto Neighborhood 

#### Establish Foursquare credentials

In [15]:
CLIENT_ID = client_id
CLIENT_SECRET = client_secret
VERSION = '20180605'

#### Get the index of University of Toronto and get the LAT/LONG

In [16]:
index = toronto_df_merged[toronto_df_merged['Neighborhood']=='University of Toronto'].index
uTor_index = index[0]
uTor_index

144

In [17]:
uTor_lat = toronto_df_merged.loc[uTor_index, 'Latitude']
uTor_long = toronto_df_merged.loc[uTor_index, 'Longitude']

#### Set up URL and limits

In [18]:
limit = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    uTor_lat, 
    uTor_long, 
    radius, 
    limit)

#### Get results from JSON

In [19]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d21a1b79ba3e5002cef2737'},
 'response': {'headerLocation': 'University of Toronto',
  'headerFullLocation': 'University of Toronto, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 34,
  'suggestedBounds': {'ne': {'lat': 43.6671956045, 'lng': -79.39384042790832},
   'sw': {'lat': 43.6581955955, 'lng': -79.4062581720917}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5362c366498e602fbe1db395',
       'name': 'Yasu',
       'location': {'address': '81 Harbord St.',
        'lat': 43.66283719650635,
        'lng': -79.40321739973975,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.66283719650635,
          'lng': -79.40321739973975}],
        'distance': 255,
        'postalCode': 'M5S 1G

In [20]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [21]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Yasu,Japanese Restaurant,43.662837,-79.403217
1,Rasa,Restaurant,43.662757,-79.403988
2,Piano Piano,Italian Restaurant,43.662949,-79.402898
3,The Dessert Kitchen,Dessert Shop,43.662823,-79.402746
4,Cafe Cancan,French Restaurant,43.662735,-79.403447


In [22]:
print(f"{nearby_venues.shape[0]} venues returned by Foursquare or University of Toronto Neighborhood")

34 venues returned by Foursquare or University of Toronto Neighborhood


In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
toronto_venues = getNearbyVenues(names=toronto_df_merged['Neighborhood'],
                                   latitudes=toronto_df_merged['Latitude'],
                                   longitudes=toronto_df_merged['Longitude'])

Parkwoods
Victoria Village
Harbourfront
Regent Park
Lawrence Heights
Lawrence Manor
Not assigned
Islington Avenue
Rouge
Malvern
Don Mills North
Woodbine Gardens
Parkview Hill
Ryerson
Garden District
Glencairn
Cloverdale
Islington
Martin Grove
Princess Gardens
West Deane Park
Highland Creek
Rouge Hill
Port Union
Flemingdon Park
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens
Eringate
Markland Wood
Old Burnhamthorpe
Guildwood
Morningside
West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor
Downsview North
Wilson Heights
Thorncliffe Park
Adelaide
King
Richmond
Dovercourt Village
Dufferin
Scarborough Village
Fairview
Henry Farm
Oriole
Northwood Park
York University
East Toronto
Harbourfront East
Toronto Islands
Union Station
Little Portugal
Trinity
East Birchmount Park
Ionview
Kennedy Park
Bayview Village
CFB Toronto
Downsview East
The Danforth West
Riverdale
Design E

In [27]:
toronto_venues.shape

(4457, 7)

In [28]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [29]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Agincourt,4,4,4,4,4,4
Agincourt North,3,3,3,3,3,3
Albion Gardens,11,11,11,11,11,11
Alderwood,10,10,10,10,10,10
Bathurst Manor,18,18,18,18,18,18
Bathurst Quay,16,16,16,16,16,16
Bayview Village,4,4,4,4,4,4
Beaumond Heights,11,11,11,11,11,11
Bedford Park,25,25,25,25,25,25


In [30]:
print(f"There are {len(toronto_venues['Venue Category'].unique())} unique venue categories")

There are 280 unique venue categories


## Analyze Neighborhoods

In [31]:
# one hot encoding
tor_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tor_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [tor_onehot.columns[-1]] + list(tor_onehot.columns[:-1])
tor_onehot = tor_onehot[fixed_columns]

tor_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
tor_onehot.shape

(4457, 280)

In [33]:
tor_grouped = tor_onehot.groupby('Neighborhood').mean().reset_index()
tor_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Adelaide,0.000000,0.01,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.010000,0.000000,0.000000,0.000000,0.000000,0.010000,0.000000,0.010000
1,Agincourt,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,Agincourt North,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,Albion Gardens,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.000000,0.000000,0.090909,0.000000,0.000000,0.000000,0.000000,0.000000
4,Alderwood,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,Bathurst Manor,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.000000,0.000000,0.055556,0.000000,0.000000,0.000000,0.000000,0.000000
6,Bathurst Quay,0.000000,0.00,0.000000,0.000000,0.062500,0.0625,0.0625,0.125,0.125,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,Bayview Village,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,Beaumond Heights,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.000000,0.000000,0.090909,0.000000,0.000000,0.000000,0.000000,0.000000
9,Bedford Park,0.000000,0.00,0.000000,0.000000,0.000000,0.0000,0.0000,0.000,0.000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [34]:
tor_grouped.shape

(205, 280)

In [35]:
num_top_venues = 5

for hood in tor_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = tor_grouped[tor_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
                 venue  freq
0          Coffee Shop  0.06
1                 Café  0.05
2                  Bar  0.04
3           Steakhouse  0.04
4  American Restaurant  0.04


----Agincourt----
                venue  freq
0      Sandwich Place  0.25
1              Lounge  0.25
2      Breakfast Spot  0.25
3  Chinese Restaurant  0.25
4         Yoga Studio  0.00


----Agincourt North----
                venue  freq
0          Playground  0.33
1    Asian Restaurant  0.33
2                Park  0.33
3         Yoga Studio  0.00
4  Mexican Restaurant  0.00


----Albion Gardens----
                  venue  freq
0         Grocery Store  0.18
1           Pizza Place  0.09
2           Coffee Shop  0.09
3        Sandwich Place  0.09
4  Fast Food Restaurant  0.09


----Alderwood----
            venue  freq
0     Pizza Place   0.2
1    Skating Rink   0.1
2  Sandwich Place   0.1
3    Dance Studio   0.1
4             Pub   0.1


----Bathurst Manor----
                       venue  fre

                             venue  freq
0                    Shopping Mall  0.33
1                    Grocery Store  0.33
2                             Bank  0.33
3               Mexican Restaurant  0.00
4  Molecular Gastronomy Restaurant  0.00


----Dufferin----
         venue  freq
0  Supermarket  0.10
1     Pharmacy  0.10
2       Bakery  0.10
3          Bar  0.05
4         Café  0.05


----East Birchmount Park----
                       venue  freq
0           Department Store  0.25
1             Discount Store  0.25
2                 Playground  0.25
3                Coffee Shop  0.25
4  Middle Eastern Restaurant  0.00


----East Toronto----
                venue  freq
0         Coffee Shop  0.33
1                Park  0.33
2   Convenience Store  0.33
3         Yoga Studio  0.00
4  Mexican Restaurant  0.00


----Emery----
                             venue  freq
0                   Baseball Field   1.0
1                      Yoga Studio   0.0
2  Molecular Gastronomy Restaurant   0

                             venue  freq
0                   Baseball Field   1.0
1                      Yoga Studio   0.0
2  Molecular Gastronomy Restaurant   0.0
3       Modern European Restaurant   0.0
4                Mobile Phone Shop   0.0


----Kingsview Village----
                venue  freq
0         Pizza Place  0.25
1                Park  0.25
2            Bus Line  0.25
3   Mobile Phone Shop  0.25
4  Mexican Restaurant  0.00


----Kingsway Park South East----
                             venue  freq
0                   Baseball Field   1.0
1                      Yoga Studio   0.0
2  Molecular Gastronomy Restaurant   0.0
3       Modern European Restaurant   0.0
4                Mobile Phone Shop   0.0


----Kingsway Park South West----
             venue  freq
0      Flower Shop  0.08
1     Burger Joint  0.08
2   Discount Store  0.08
3  Supplement Shop  0.08
4   Sandwich Place  0.08


----L'Amoreaux East----
                venue  freq
0          Playground  0.33
1    Asian

                        venue  freq
0  Construction & Landscaping   0.5
1                         Bar   0.5
2                 Yoga Studio   0.0
3   Middle Eastern Restaurant   0.0
4         Monument / Landmark   0.0


----Princess Gardens----
                             venue  freq
0                             Bank   1.0
1                      Yoga Studio   0.0
2               Mexican Restaurant   0.0
3  Molecular Gastronomy Restaurant   0.0
4       Modern European Restaurant   0.0


----Railway Lands----
              venue  freq
0  Airport Terminal  0.12
1    Airport Lounge  0.12
2   Airport Service  0.12
3   Harbor / Marina  0.06
4  Sculpture Garden  0.06


----Rathnelly----
                 venue  freq
0          Coffee Shop  0.14
1                  Pub  0.14
2   Light Rail Station  0.07
3  Fried Chicken Joint  0.07
4     Sushi Restaurant  0.07


----Regent Park----
            venue  freq
0     Coffee Shop  0.17
1          Bakery  0.06
2            Park  0.06
3             Pub  

                       venue  freq
0                Pizza Place  0.25
1                   Bus Line  0.25
2              Grocery Store  0.25
3          Convenience Store  0.25
4  Middle Eastern Restaurant  0.00


----The Junction South----
                 venue  freq
0                  Bar  0.09
1   Mexican Restaurant  0.09
2                 Café  0.09
3            Bookstore  0.04
4  Arts & Crafts Store  0.04


----The Kingsway----
                             venue  freq
0                            River  0.33
1                             Park  0.33
2                             Pool  0.33
3                    Metro Station  0.00
4  Molecular Gastronomy Restaurant  0.00


----The Queensway East----
                             venue  freq
0                   Baseball Field   1.0
1                      Yoga Studio   0.0
2  Molecular Gastronomy Restaurant   0.0
3       Modern European Restaurant   0.0
4                Mobile Phone Shop   0.0


----The Queensway West----
             v

In [36]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [37]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = tor_grouped['Neighborhood']

for ind in np.arange(tor_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(tor_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Bar,American Restaurant,Thai Restaurant,Steakhouse,Hotel,Cosmetics Shop,Gym,Burger Joint
1,Agincourt,Lounge,Breakfast Spot,Sandwich Place,Chinese Restaurant,Electronics Store,Eastern European Restaurant,Empanada Restaurant,Dumpling Restaurant,Drugstore,Department Store
2,Agincourt North,Park,Asian Restaurant,Playground,Women's Store,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
3,Albion Gardens,Grocery Store,Beer Store,Fried Chicken Joint,Fast Food Restaurant,Liquor Store,Pharmacy,Pizza Place,Sandwich Place,Coffee Shop,Video Store
4,Alderwood,Pizza Place,Gym,Pharmacy,Pub,Sandwich Place,Pool,Dance Studio,Skating Rink,Coffee Shop,Drugstore


### Clustering Neighborhoods 

In [38]:

kclusters = 5

tor_grouped_cluster = tor_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_grouped_cluster)

kmeans.labels_[0:10] 

array([0, 0, 2, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [39]:

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)


toronto_df_merged_clust = toronto_df_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_df_merged_clust.head() 

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,2.0,Fast Food Restaurant,Park,Food & Drink Shop,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Intersection,Coffee Shop,Hockey Arena,Portuguese Restaurant,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0.0,Coffee Shop,Park,Pub,Bakery,Theater,Breakfast Spot,Restaurant,Mexican Restaurant,Café,Performing Arts Venue
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636,0.0,Coffee Shop,Park,Pub,Bakery,Theater,Breakfast Spot,Restaurant,Mexican Restaurant,Café,Performing Arts Venue
4,M6A,North York,Lawrence Heights,43.718518,-79.464763,0.0,Clothing Store,Furniture / Home Store,Women's Store,Coffee Shop,Event Space,Miscellaneous Shop,Fraternity House,Boutique,Vietnamese Restaurant,Accessories Store


# Create Map

In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = plt.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_df_merged_clust['Latitude'], toronto_df_merged_clust['Longitude'], toronto_df_merged_clust['Neighborhood'], toronto_df_merged_clust['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

AttributeError: module 'matplotlib.pyplot' has no attribute 'rainbow'

## Examine Clusters

In [43]:
toronto_df_merged_clust.loc[toronto_df_merged_clust['Cluster Labels'] == 0, toronto_df_merged_clust.columns[[1] + list(range(5, toronto_df_merged_clust.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,0.0,Intersection,Coffee Shop,Hockey Arena,Portuguese Restaurant,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
2,Downtown Toronto,0.0,Coffee Shop,Park,Pub,Bakery,Theater,Breakfast Spot,Restaurant,Mexican Restaurant,Café,Performing Arts Venue
3,Downtown Toronto,0.0,Coffee Shop,Park,Pub,Bakery,Theater,Breakfast Spot,Restaurant,Mexican Restaurant,Café,Performing Arts Venue
4,North York,0.0,Clothing Store,Furniture / Home Store,Women's Store,Coffee Shop,Event Space,Miscellaneous Shop,Fraternity House,Boutique,Vietnamese Restaurant,Accessories Store
5,North York,0.0,Clothing Store,Furniture / Home Store,Women's Store,Coffee Shop,Event Space,Miscellaneous Shop,Fraternity House,Boutique,Vietnamese Restaurant,Accessories Store
6,Queen's Park,0.0,Coffee Shop,Park,Gym,Japanese Restaurant,Sushi Restaurant,Smoothie Shop,Seafood Restaurant,Sandwich Place,Burger Joint,Burrito Place
8,Scarborough,0.0,Fast Food Restaurant,Print Shop,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
9,Scarborough,0.0,Fast Food Restaurant,Print Shop,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
10,North York,0.0,Gym / Fitness Center,Caribbean Restaurant,Café,Japanese Restaurant,Baseball Field,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dessert Shop
11,East York,0.0,Fast Food Restaurant,Pizza Place,Gym / Fitness Center,Café,Bank,Athletics & Sports,Gastropub,Intersection,Pharmacy,Pet Store


In [44]:
toronto_df_merged_clust.loc[toronto_df_merged_clust['Cluster Labels'] == 1, toronto_df_merged_clust.columns[[1] + list(range(5, toronto_df_merged_clust.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Etobicoke,1.0,Bank,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore,Department Store
17,Etobicoke,1.0,Bank,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore,Department Store
18,Etobicoke,1.0,Bank,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore,Department Store
19,Etobicoke,1.0,Bank,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore,Department Store
20,Etobicoke,1.0,Bank,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore,Department Store


In [45]:
toronto_df_merged_clust.loc[toronto_df_merged_clust['Cluster Labels'] == 2, toronto_df_merged_clust.columns[[1] + list(range(5, toronto_df_merged_clust.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,2.0,Fast Food Restaurant,Park,Food & Drink Shop,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
15,North York,2.0,Park,Pizza Place,Japanese Restaurant,Pub,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
38,York,2.0,Park,Women's Store,Pharmacy,Fast Food Restaurant,Market,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
60,East York,2.0,Convenience Store,Park,Coffee Shop,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
70,North York,2.0,Park,Other Repair Shop,Airport,Women's Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
71,North York,2.0,Park,Other Repair Shop,Airport,Women's Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
112,Central Toronto,2.0,Park,Bus Line,Swim School,Women's Store,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
116,York,2.0,Park,Women's Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
120,North York,2.0,Park,Bank,Convenience Store,Bar,Women's Store,Donut Shop,Diner,Discount Store,Dog Run,Doner Restaurant
122,Central Toronto,2.0,Park,Jewelry Store,Sushi Restaurant,Trail,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Deli / Bodega


In [46]:
toronto_df_merged_clust.loc[toronto_df_merged_clust['Cluster Labels'] == 3, toronto_df_merged_clust.columns[[1] + list(range(5, toronto_df_merged_clust.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
106,North York,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
107,North York,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
198,Etobicoke,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
199,Etobicoke,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
200,Etobicoke,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
201,Etobicoke,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
202,Etobicoke,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
203,Etobicoke,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
204,Etobicoke,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store
205,Etobicoke,3.0,Baseball Field,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Department Store


In [47]:
toronto_df_merged_clust.loc[toronto_df_merged_clust['Cluster Labels'] == 4, toronto_df_merged_clust.columns[[1] + list(range(5, toronto_df_merged_clust.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Scarborough,4.0,Bar,Construction & Landscaping,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
22,Scarborough,4.0,Bar,Construction & Landscaping,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
23,Scarborough,4.0,Bar,Construction & Landscaping,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
