# Creation a new Notebook




In [1]:
import sys
import requests
import pandas as pd
import requests # library to handle requests

from bs4 import BeautifulSoup as bs

# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize 

# import k-means from clustering stage
from sklearn.cluster import KMeans

# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize 

# Use the Notebook to build the code to scrape the following Wikipedia page
###  https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
can_ps = requests.get(url)

### Reading the data from the url and creating the dataframe

In [3]:
df_toronto = pd.DataFrame()

toronto_soup = bs(can_ps.text, 'lxml')

toronto_tab = toronto_soup.find_all('table')[0] 

row_marker = 0

for row in toronto_tab.find_all('tr'):
    
    column_marker = 0
    
    columns = row.find_all('td')
    
    for column in columns:
        
        df_toronto.loc[row_marker,column_marker] = column.get_text()
        
        column_marker += 1
        
    row_marker += 1
    
df_toronto.head()
df_toronto.shape

(180, 3)

### Cleaning the data set 

In [4]:
#The column name is not as per the standard. Replace the column name 
#the dataframe will consist of three columns: PostalCode, Borough, and Neighborhood(One of the condition in assignment)
df_toronto.rename(columns={0:'PostalCode',1:'Borough',2:'Neighborhood'}, inplace=True)

#Cleaning new line character from the dataframe
df_toronto = df_toronto.replace('\n','', regex=True)

#Before removing the row where Bourough equals "Not assigned", check how many such value are there. 
df_toronto_Borough_na = df_toronto[df_toronto["Borough"] == "Not assigned" ]

print("number of rows where 'Borough' is not assigned is %d" %(df_toronto_Borough_na.shape[0]))
print("Remaining rows count is %d" %(df_toronto.shape[0] - df_toronto_Borough_na.shape[0]))


df_toronto.head()

number of rows where 'Borough' is not assigned is 77
Remaining rows count is 103


Unnamed: 0,PostalCode,Borough,Neighborhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Sub query:- Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.(One of the condition in assignment)

In [5]:
#Cleaning the data set and removing the not assigned value
df_toronto = df_toronto[df_toronto["Borough"] !='Not assigned']

#Setting the index again after removal of the row 
df_toronto.reset_index(drop=True,inplace=True)

In [6]:
# Expectation is there will be no record with df_toronto["Borough"] = "Not assigned"
df_toronto[df_toronto["Borough"] == "Not assigned"]

Unnamed: 0,PostalCode,Borough,Neighborhood



## If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

## More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above tab

In [7]:
# Check how many neighborhood are there where it is duplicate 
df_toronto_nei = df_toronto[df_toronto['Neighborhood'] == 'Not assigned']
df_toronto_nei

Unnamed: 0,PostalCode,Borough,Neighborhood


In [8]:
# Creating new dataframe for question 1 
df_toronto_Q1= pd.DataFrame(columns = ['PostalCode','Borough','Neighborhood'])


for ii in range(df_toronto.shape[0]):
    
    df_borough = df_toronto.loc[ii,'Borough']
    df_postalcode = df_toronto.loc[ii,'PostalCode']
    df_neighborhood = df_toronto.loc[ii,'Neighborhood'][:-1]
    
    # If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
    # ( One of the condition in the assignment )
    if df_neighborhood == 'Not assigned':
        df_neighborhood = borough
    
    #More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, 
    #you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park.
    #These two rows will be combined into one row with the neighborhoods separated with 
    #a comma as shown in row 11 in the above table
    
    if df_postalcode in df_toronto_Q1.PostalCode.values:
        df_old_neighborhood = df_toronto_Q1[df_toronto_Q1.PostalCode == df_postalcode].Neighborhood
        new_value = (old_neighborhood + ',' + df_neighborhood)
        df_toronto_Q1.loc[df_toronto_Q1.PostalCode == postalcode,'Neighborhood'] = new_value 
    
    else:
        df_toronto_Q1 = df_toronto_Q1.append({'PostalCode': df_postalcode,
                                                'Borough': df_borough,
                                                'Neighborhood': df_neighborhood,
                                               }, ignore_index=True)



df_toronto_Q1

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwood
1,M4A,North York,Victoria Villag
2,M5A,Downtown Toronto,"Regent Park, Harbourfron"
3,M6A,North York,"Lawrence Manor, Lawrence Height"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Governmen"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Villag"
6,M1B,Scarborough,"Malvern, Roug"
7,M3B,North York,Don Mill
8,M4B,East York,"Parkview Hill, Woodbine Garden"
9,M5B,Downtown Toronto,"Garden District, Ryerso"


## use the .shape method to print the number of rows of your dataframe.

In [9]:
df_toronto_Q1.shape

(103, 3)

## Get the latitude and the longitude coordinates of each neighborhood
## Create dataframe with latitude and the longitude

In [10]:
from geopy.geocoders import Nominatim  # import geocoder
import pgeocode

nomi = pgeocode.Nominatim('ca')

for zip_cd in df_toronto_Q1["PostalCode"]:
    # loop until you get the coordinates
    
    #geolocator = Nominatim(user_agent="Toronto_Exp")
    #location = geolocator.geocode({"postalcode": 'M1T' , 'countryRegion': 'Canada'})
    location = nomi.query_postal_code(zip_cd)
    
    latitude = location.latitude
    longitude = location.longitude
    
    df_toronto_Q1.loc[df_toronto_Q1["PostalCode"] == zip_cd, "latitude" ] = latitude
    df_toronto_Q1.loc[df_toronto_Q1["PostalCode"] == zip_cd, "longitude" ] = longitude
    
    #print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))
    
df_toronto_Q1.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M3A,North York,Parkwood,43.7545,-79.33
1,M4A,North York,Victoria Villag,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfron",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Height",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Governmen",43.6641,-79.3889


In [11]:
df_toronto_Q1 = df_toronto_Q1.dropna()
df_toronto_Q1 = df_toronto_Q1.reset_index(drop=True)

## Mapping neighbourhood present in the dataframe with Folium

In [12]:
import numpy as np
df_toronto_Q1[df_toronto_Q1['longitude'] == np.nan]

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude


In [13]:
df_toronto_Q1[df_toronto_Q1["PostalCode"] == "M8V"]

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
87,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shore",43.6075,-79.5013


In [14]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto_Q1['latitude'], df_toronto_Q1['longitude'], df_toronto_Q1['Borough'], df_toronto_Q1['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [15]:
CLIENT_ID = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' # your Foursquare ID
CLIENT_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [16]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Find the location within location which has all the required facilities that user is looking for within Toronto 
#### In order to get that, below are the steps which are required 

1. Explore all the venues that against the latitude and longitude 
2. Read the Json response and get that into a dataframe 
3. Append the dataframe into existing dataframe or copy of the existing dataframe
4. Query the dataframe with required user requirement 
5. Get the result set 

In [17]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Lets find out how many unique district are there in the data set 
Toronto_District_NM = df_toronto_Q1["Borough"].unique()
Toronto_District_NM

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto'], dtype=object)

### Steps which are carried out in the next step ###

1. **Explore all the venues that against the latitude and longitude**

    a. Set the radius limit as 500 meters 
    
    b. Number of venues to explore for is 100
    

2. ***Read the Json response and get that into a dataframe***

3. ***Append the dataframe into existing dataframe or copy of the existing dataframe***

In [18]:
radius = 500 
LIMIT = 50

In [19]:
#Lets get the number of vanues in each district and neighbourhood 
#In order to get the result set, iteerate through the data set and pass the latitude and longitude value to the foursquare 
#Explore venue API 

df_concat = pd.DataFrame()
temp_pd = pd.DataFrame()

for tornonto_pc, tornonto_borough, tornonto_neigh, tornonto_lat, tornonto_long in zip(df_toronto_Q1['PostalCode'],df_toronto_Q1['Borough'], df_toronto_Q1['Neighborhood'],df_toronto_Q1['latitude'], df_toronto_Q1['longitude']):
    
    print("Postal Code is ",tornonto_pc)
    print("District Name is ",tornonto_borough)
    print("Neighbourhood Name is ",tornonto_neigh)
    
    neighborhood_latitude  = tornonto_lat
    neighborhood_longitude = tornonto_long
    
    try:
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, VERSION, radius, LIMIT) 

        results = requests.get(url).json()
    
        venues = results['response']['groups'][0]['items']
    
        nearby_venues = json_normalize(venues) # flatten JSON
        
        # filter columns
        filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
        nearby_venues =nearby_venues.loc[:, filtered_columns]
        
        
        # filter the category for each row
        nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
        
        # clean columns
        nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
        
        print(nearby_venues)
    
        print("#############################################################################")
        
        # Get the total number of place for each categories for the given postal code and transpose the value
        # to create a dataframe with index as postal code
        
        temp_pd = pd.DataFrame(nearby_venues.categories.value_counts()).transpose()
        temp_pd["PostalCode"] = tornonto_pc
        temp_pd.set_index("PostalCode",inplace = True)
        
        
    except:
        print("Error while processing data for postalcode",tornonto_pc)
        print("Error received is ",sys.exc_info())
        
        pass
    
    df_concat = pd.concat([df_concat, temp_pd])
        


Postal Code is  M3A
District Name is  North York
Neighbourhood Name is  Parkwood
              name         categories        lat        lng
0  Brookbanks Park               Park  43.751976 -79.332140
1  GTA Restoration    Fireworks Store  43.753396 -79.333477
2    Variety Store  Food & Drink Shop  43.751974 -79.333114
#############################################################################
Postal Code is  M4A
District Name is  North York
Neighbourhood Name is  Victoria Villag
                                        name             categories  \
0                     Victoria Village Arena           Hockey Arena   
1                                Tim Hortons            Coffee Shop   
2                                  Portugril  Portuguese Restaurant   
3  Eglinton Ave E & Sloane Ave/Bermondsey Rd           Intersection   
4                                   The Frig      French Restaurant   
5                                 Pizza Nova            Pizza Place   
6               

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




                         name            categories        lat        lng
0            Roselle Desserts                Bakery  43.653447 -79.362017
1               Tandem Coffee           Coffee Shop  43.653559 -79.361809
2      Figs Breakfast & Lunch        Breakfast Spot  43.655675 -79.364503
3          Morning Glory Cafe        Breakfast Spot  43.653947 -79.361149
4             The Yoga Lounge           Yoga Studio  43.655515 -79.364955
5         Body Blitz Spa East                   Spa  43.654735 -79.359874
6             Berkeley Church           Event Space  43.655123 -79.365873
7             Sumach Espresso           Coffee Shop  43.658135 -79.359515
8                   Sukhothai       Thai Restaurant  43.658444 -79.365681
9              Impact Kitchen            Restaurant  43.656369 -79.356980
10             Rooster Coffee           Coffee Shop  43.651900 -79.365609
11   Dominion Pub and Kitchen                   Pub  43.656919 -79.358967
12                  Starbucks         

                    name     categories        lat        lng
0        TD Canada Trust           Bank  43.662545 -79.531749
1     Shoppers Drug Mart       Pharmacy  43.663067 -79.531753
2     Humber Valley Park           Park  43.664825 -79.524999
3     Humber Valley Rink   Skating Rink  43.664826 -79.524873
4  Thorncrest Drug Store       Pharmacy  43.662988 -79.531817
5     Foodland - Toronto  Grocery Store  43.662724 -79.531984
#############################################################################
Postal Code is  M1B
District Name is  Scarborough
Neighbourhood Name is  Malvern, Roug
                               name    categories        lat        lng
0  Canadian Appliance Source Whitby  Home Service  43.808353 -79.191331
#############################################################################
Postal Code is  M3B
District Name is  North York
Neighbourhood Name is  Don Mill
                   name                  categories        lat        lng
0        Elleffe Design 

                        name          categories        lat        lng
0                     Subway      Sandwich Place  43.648594 -79.548694
1                Pizza Pizza         Pizza Place  43.648755 -79.548726
2                 Pizza Nova         Pizza Place  43.649092 -79.549000
3      Far East Chinese Food  Chinese Restaurant  43.649244 -79.548866
4  Victorian Garden Tea Room            Tea Room  43.648143 -79.549124
5             My Daily Grind         Coffee Shop  43.650140 -79.545660
#############################################################################
Postal Code is  M1C
District Name is  Scarborough
Neighbourhood Name is  Rouge Hill, Port Union, Highland Cree
             name categories        lat        lng
0  Fox and Fiddle        Bar  43.789082 -79.154459
#############################################################################
Postal Code is  M3C
District Name is  North York
Neighbourhood Name is  Don Mill
                        name    categories        lat

                 name      categories        lat        lng
0                LCBO    Liquor Store  43.642099 -79.576592
1           Starbucks     Coffee Shop  43.641312 -79.576924
2      The Beer Store      Beer Store  43.641313 -79.576925
3  Shoppers Drug Mart        Pharmacy  43.641312 -79.576924
4           Pizza Hut     Pizza Place  43.641845 -79.576556
5      Cafe Sympatico            Café  43.641820 -79.576721
6  Burnhamthorpe Mall  Shopping Plaza  43.641741 -79.576891
7            Pet Valu       Pet Store  43.641667 -79.577050
#############################################################################
Postal Code is  M1E
District Name is  Scarborough
Neighbourhood Name is  Guildwood, Morningside, West Hil
                            name              categories        lat        lng
0                    Chick-N-Joy     Fried Chicken Joint  43.768752 -79.187982
1           Little Caesars Pizza             Pizza Place  43.769046 -79.184386
2                           LCBO       

                           name           categories        lat        lng
0    Doce Minho Pastry & Bakery               Bakery  43.691893 -79.448191
1                       Rebozos   Mexican Restaurant  43.684773 -79.449588
2   Al Flaherty's Outdoor Store  Sporting Goods Shop  43.687126 -79.446927
3  York Academy of Martial Arts                  Gym  43.687621 -79.446476
4        Fairbank Memorial Park                 Park  43.692028 -79.448924
5                The Beer Store           Beer Store  43.686707 -79.445493
6                    Nairn Park                 Park  43.690654 -79.456300
7                 Maximum Woman        Women's Store  43.690651 -79.456333
#############################################################################
Postal Code is  M1G
District Name is  Scarborough
Neighbourhood Name is  Wobur
                 name         categories        lat        lng
0  Korean Grill House  Korean Restaurant  43.770812 -79.214502
1    Convenience Plus  Convenience Store  

                    name     categories        lat        lng
0           Fiesta Farms  Grocery Store  43.668471 -79.420485
1   Scout and Cash Caffe           Café  43.667360 -79.419938
2     Christie Pits Park           Park  43.664177 -79.420466
3            Contra Cafe           Café  43.669107 -79.426105
4            Faema Caffe           Café  43.671046 -79.419297
5              Starbucks    Coffee Shop  43.671530 -79.421400
6     Karma Co-operative  Grocery Store  43.668185 -79.414504
7    Vermont Square Park     Playground  43.670493 -79.415399
8      Stubbe Chocolates    Candy Store  43.671566 -79.421289
9                Loblaws  Grocery Store  43.671657 -79.421364
10  Marlenes Just Babies     Baby Store  43.671824 -79.420499
#############################################################################
Postal Code is  M1H
District Name is  Scarborough
Neighbourhood Name is  Cedarbra
                                         name                  categories  \
0                  

                                  name                 categories        lat  \
0   Rosie Robin A Touch Of Convenience                       Café  43.663182   
1                 The Greater Good Bar                        Bar  43.669409   
2                             Parallel  Middle Eastern Restaurant  43.669516   
3                      Dovercourt Park                       Park  43.664915   
4                              FreshCo              Grocery Store  43.667918   
5              Happy Bakery & Pastries                     Bakery  43.667050   
6                      Nova Era Bakery                     Bakery  43.669886   
7                 Wallace Emerson Park                       Park  43.666933   
8                      TD Canada Trust                       Bank  43.667934   
9                               Rexall                   Pharmacy  43.667504   
10                TTC Bus #29 Dufferin                   Bus Line  43.664738   
11             Galleria Smokers Choice  

                                     name                     categories  \
0                       Bellwoods Brewery                        Brewery   
1                     Artscape Youngplace                    Art Gallery   
2           Bellwoods Brewery Bottle Shop                     Beer Store   
3                           Foxley Bistro               Asian Restaurant   
4                                Reposado                            Bar   
5            Bang Bang Ice Cream & Bakery                 Ice Cream Shop   
6                                OddSeoul              Korean Restaurant   
7                       Pizzeria Libretto                    Pizza Place   
8            Pho Rua Vang (Golden Turtle)          Vietnamese Restaurant   
9                               YogaSpace                    Yoga Studio   
10                              La Cubana               Cuban Restaurant   
11                              Gift Shop                   Cocktail Bar   
12          

                                         name                     categories  \
0                                       Canoe                     Restaurant   
1                                       WVRST                       Beer Bar   
2                          Equinox Bay Street                            Gym   
3                                   DAVIDsTEA                       Tea Room   
4                                       Maman                           Café   
5                       Adelaide Club Toronto           Gym / Fitness Center   
6                     The Fairmont Royal York                          Hotel   
7                       Pilot Coffee Roasters                    Coffee Shop   
8                 Sam James Coffee Bar (SJCB)                           Café   
9                       Union Pearson Express                  Train Station   
10                     Walrus Pub & Beer Hall                            Pub   
11                      Dineen @Commerce

                            name     categories        lat        lng
0                    Tim Hortons    Coffee Shop  43.716288 -79.283337
1   Warden Ave & St. Clair Ave E   Intersection  43.712057 -79.281005
2             TTC Bus #68 Warden       Bus Line  43.711778 -79.279714
3         Clairlea Futbol Centre   Soccer Field  43.715234 -79.286506
4       TTC Bus 102 Markham Road       Bus Line  43.711381 -79.279588
5          Warden Subway Station  Metro Station  43.711229 -79.279602
6        Warden Station Bus Loop    Bus Station  43.711241 -79.279576
7               Bakery On The Go         Bakery  43.711271 -79.279506
8                 Cafe on the go         Bakery  43.711151 -79.279469
9      Warden Ave & Fairfax Cres   Intersection  43.716187 -79.282936
10             Warden Woods Park           Park  43.710527 -79.278966
#############################################################################
Postal Code is  M2L
District Name is  North York
Neighbourhood Name is  York Mills

                     name        categories        lat        lng
0           Rustic Bakery            Bakery  43.715414 -79.490300
1  Queen's GreenBelt Park             Trail  43.712355 -79.484754
2                  Sporty  Basketball Court  43.716503 -79.489838
#############################################################################
Postal Code is  M9L
District Name is  North York
Neighbourhood Name is  Humber Summi
                                      name              categories        lat  \
0  Bad Boy Furniture - Distribution Centre  Furniture / Home Store  43.762881   
1                    Alton Windows & Doors            Home Service  43.760565   

         lng  
0 -79.556444  
1 -79.551368  
#############################################################################
Postal Code is  M1M
District Name is  Scarborough
Neighbourhood Name is  Cliffside, Cliffcrest, Scarborough Village Wes
                     name      categories        lat        lng
0  Vi Pei Bistro - Blu

                         name             categories        lat        lng
0                   No Frills          Grocery Store  43.758178 -79.519680
1            Pho Com Viet Nam  Vietnamese Restaurant  43.756631 -79.518336
2          Shoppers Drug Mart               Pharmacy  43.756147 -79.515843
3                         KFC   Fast Food Restaurant  43.756600 -79.518100
4                   Pizza Hut            Pizza Place  43.756340 -79.517818
5   Popeyes Louisiana Kitchen    Fried Chicken Joint  43.756604 -79.516047
6                Petro-Canada            Gas Station  43.757187 -79.517778
7                 Tim Hortons            Coffee Shop  43.756128 -79.516266
8              The Beer Store             Beer Store  43.756094 -79.516239
9                      Subway         Sandwich Place  43.756171 -79.518251
10                Pizza Pizza            Pizza Place  43.756186 -79.515460
11             Planet Fitness   Gym / Fitness Center  43.757538 -79.519610
12                  Dolla

                          name                     categories        lat  \
0                 Ezra's Pound                           Café  43.675153   
1                     Big Crow                      BBQ Joint  43.675896   
2                 Playa Cabana             Mexican Restaurant  43.676112   
3        Roti Cuisine of India              Indian Restaurant  43.674618   
4                      Fet Zun      Middle Eastern Restaurant  43.675147   
5        Madame Boeuf And Flea                   Burger Joint  43.675240   
6        Live Organic Food Bar  Vegetarian / Vegan Restaurant  43.675053   
7                      Mistura             Italian Restaurant  43.674285   
8                   Le Paradis              French Restaurant  43.675007   
9             Toronto Archives                 History Museum  43.676447   
10  Krispy Kreme Doughnut Cafe                     Donut Shop  43.674732   
11                Haute Coffee                           Café  43.675818   
12        Je

                          name                 categories        lat  \
0  Panagio's Breakfast & Lunch             Breakfast Spot  43.792370   
1                El Pulgarcito  Latin American Restaurant  43.792648   
2              Commander Arena               Skating Rink  43.794867   
3          Commander Badminton            Badminton Court  43.793546   

         lng  
0 -79.260203  
1 -79.259208  
2 -79.267989  
3 -79.269835  
#############################################################################
Postal Code is  M4S
District Name is  Central Toronto
Neighbourhood Name is  Davisvill
                                                 name            categories  \
0                              Marigold Indian Bistro     Indian Restaurant   
1                               Jules Cafe Patisserie          Dessert Shop   
2   GoodLife Fitness Toronto Mount Pleasant and Da...                   Gym   
3                                  June Rowlands Park                  Park   
4   

                                  name            categories        lat  \
0               Remezzo Italian Bistro    Italian Restaurant  43.778649   
1   The Royal Chinese Restaurant 避風塘小炒    Chinese Restaurant  43.780505   
2                      TD Canada Trust                  Bank  43.779169   
3                             Kub Khao       Thai Restaurant  43.780438   
4                         Petro-Canada           Gas Station  43.779337   
5            Popeyes Louisiana Kitchen   Fried Chicken Joint  43.780476   
6                 Little Caesars Pizza           Pizza Place  43.780563   
7                                  KFC  Fast Food Restaurant  43.780400   
8                Enterprise Rent-A-Car   Rental Car Location  43.779506   
9                               Rexall              Pharmacy  43.780900   
10                         Gusto Pizza           Pizza Place  43.783607   

          lng  
0  -79.308264  
1  -79.298844  
2  -79.303617  
3  -79.299837  
4  -79.307682  
5  

                                    name               categories        lat  \
0                         Sansotei Ramen         Ramen Restaurant  43.639176   
1                 Victoria Memorial Park                     Park  43.642785   
2                                 Stackt                   Market  43.640815   
3                         Thompson Hotel                    Hotel  43.642753   
4                               Roywoods     Caribbean Restaurant  43.639099   
5                   Moksha Yoga Downtown              Yoga Studio  43.642353   
6                      Thor Espresso Bar                     Café  43.641998   
7                               Bar Buca       Italian Restaurant  43.643918   
8                              The Cloak                Speakeasy  43.643422   
9                   Belgian Moon Brewery                  Brewery  43.640516   
10                                  Cykl     Gym / Fitness Center  43.642778   
11                         Forno Cultura

                                         name               categories  \
0                         Real Sports Apparel      Sporting Goods Shop   
1                           Maple Leaf Square                    Plaza   
2                            Scotiabank Arena       Basketball Stadium   
3                             Air Canada Club                   Lounge   
4                            Le Germain Hotel                    Hotel   
5                       Pilot Coffee Roasters              Coffee Shop   
6                                        Miku      Japanese Restaurant   
7                                       WVRST                 Beer Bar   
8                                       TELUS              IT Services   
9                                  iQ Food Co              Salad Place   
10                  Longo's Maple Leaf Square              Supermarket   
11                                    Mos Mos              Coffee Shop   
12                      Union Pearson 

                                           name  \
0                         Adelaide Club Toronto   
1                                         Canoe   
2                           Brick Street Bakery   
3                         Pilot Coffee Roasters   
4                      John & Sons Oyster House   
5        The Keg Steakhouse + Bar - York Street   
6                            Equinox Bay Street   
7                                         Maman   
8                               Hy's Steakhouse   
9                         Dineen @CommerceCourt   
10                                 Indigospirit   
11                                    King Taps   
12                          Pumpernickel's Deli   
13                                 Cafe Landwer   
14                             Cactus Club Cafe   
15                                    Rosalinda   
16                               Mos Mos Coffee   
17                     John & Sons Oyster House   
18                     Boxcar S

                                name           categories        lat  \
0                      Fukuoka Sushi     Sushi Restaurant  43.780679   
1                             Cora's       Breakfast Spot  43.783417   
2         Faith Family Books & Gifts            Bookstore  43.783330   
3                   Timothy's Coffee          Coffee Shop  43.776855   
4           Jack Astor's Bar & Grill           Restaurant  43.777502   
5   Scaddabush Italian Kitchen & Bar   Italian Restaurant  43.777460   
6                     RBC Royal Bank                 Bank  43.776224   
7                     Pumpernickel's        Deli / Bodega  43.776882   
8                          Starbucks          Coffee Shop  43.783742   
9                     Mitra Hot Yoga          Yoga Studio  43.776812   
10               Teriyaki Experience  Japanese Restaurant  43.783539   
11                        Milestones           Restaurant  43.778060   
12             Ultimate Martial Arts    Martial Arts Dojo  43.77

### Dataframe cleanup and querying with user requirement 

#### User Requirement are :- 

    a. Asian or Indian Restaurant
    
    b. Gas station
    
    c. Bank
    
    d. Market or SuperMarket

4. Query the dataframe with required user requirement
5. Get the result set

In [21]:
# Creating copy of the dataframe and replacing space in the column name with "_"

df_tmp = df_toronto_Q1.copy()
df_concat_tmp = df_concat.copy()

df_concat_tmp.columns = df_concat_tmp.columns.str.replace(' ', '_')
df_tmp.set_index("PostalCode",inplace=True)

df_concat_tmp.head()


Unnamed: 0_level_0,Accessories_Store,Afghan_Restaurant,Airport,American_Restaurant,Art_Gallery,Art_Museum,Arts_&_Crafts_Store,Asian_Restaurant,Athletics_&_Sports,Auto_Dealership,...,Vegetarian_/_Vegan_Restaurant,Video_Game_Store,Video_Store,Vietnamese_Restaurant,Warehouse_Store,Wine_Bar,Wine_Shop,Wings_Joint,Women's_Store,Yoga_Studio
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M3A,,,,,,,,,,,...,,,,,,,,,,
M4A,,,,,,,,,,,...,,,,,,,,,,
M5A,,,,,,,,,,,...,,,,,,,1.0,,,1.0
M6A,,,,1.0,,,,,,,...,,1.0,,,,,,,3.0,
M7A,,,,,,,1.0,,,,...,,,,,,,,1.0,,


#### Concatenate original dataframe with that of Venues dataframe 

In [22]:
df_tmp.head()
df_with_venue = df_tmp.join(df_concat_tmp,on = ["PostalCode"] , rsuffix='_left')
df_with_venue.reset_index(inplace=True)
#df_with_venue.fillna(0,inplace=True)
df_with_venue.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,Accessories_Store,Afghan_Restaurant,Airport,American_Restaurant,Art_Gallery,...,Vegetarian_/_Vegan_Restaurant,Video_Game_Store,Video_Store,Vietnamese_Restaurant,Warehouse_Store,Wine_Bar,Wine_Shop,Wings_Joint,Women's_Store,Yoga_Studio
0,M3A,North York,Parkwood,43.7545,-79.33,,,,,,...,,,,,,,,,,
1,M4A,North York,Victoria Villag,43.7276,-79.3148,,,,,,...,,,,,,,,,,
2,M5A,Downtown Toronto,"Regent Park, Harbourfron",43.6555,-79.3626,,,,,,...,,,,,,,1.0,,,1.0
3,M6A,North York,"Lawrence Manor, Lawrence Height",43.7223,-79.4504,,,,1.0,,...,,1.0,,,,,,,3.0,
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Governmen",43.6641,-79.3889,,,,,,...,,,,,,,,1.0,,


#### User Requirement are :- 

    a. Asian or Indian Restaurant
    
    b. Gas station
    
    c. Bank
    
    d. Market or SuperMarket

In [23]:
df_venue_reqr = df_with_venue.loc[:,df_with_venue.columns.str.contains('postalcode|Borough|Neighborhood|latitude|longitude|india|asian|bank|gas|department|market', case=False,regex=True)]
df_venue_final = df_venue_reqr[(df_venue_reqr["Indian_Restaurant"].notnull() | df_venue_reqr["Asian_Restaurant"].notnull() ) & df_venue_reqr["Gas_Station"].notnull() & df_venue_reqr.Bank.notnull() & ( df_venue_reqr.Market.notnull() | df_venue_reqr.Supermarket.notnull() )  ]
df_venue_final.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,Asian_Restaurant,Bank,Department_Store,Farmers_Market,Fish_Market,Flea_Market,Gas_Station,Gastropub,Indian_Restaurant,Market,Neighborhood_left,Supermarket
29,M4H,East York,Thorncliffe Par,43.7059,-79.3464,1.0,2.0,,,,,1.0,,3.0,,,1.0


### Other Scenario is that user is looking for the options in "North York"district and user wants to weigh in location within the district

#### Create a new dataframe where district is "North York"

In [24]:
NorthYork_data = df_toronto_Q1[df_toronto_Q1['Borough'] == 'North York'].reset_index(drop=True)
NorthYork_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M3A,North York,Parkwood,43.7545,-79.33
1,M4A,North York,Victoria Villag,43.7276,-79.3148
2,M6A,North York,"Lawrence Manor, Lawrence Height",43.7223,-79.4504
3,M3B,North York,Don Mill,43.745,-79.359
4,M6B,North York,Glencair,43.7081,-79.4479


#### Creating a map to look for all the location marked by the zip code in North York district

In [25]:
# create map of Totonto using latitude and longitude values
map_NorthYork = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(NorthYork_data['latitude'], NorthYork_data['longitude'], NorthYork_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_NorthYork)  
    
map_NorthYork

#### Creating a function to get the nearby venues for the given latitude and longitudes within a given radius

##### User required input are 

        1. Name of the neighbourhood 
        2. Latitude
        3. Longitude
        4. Radius as optional since default value is set to 500
        
        Return values :- Set of nearby venues within defined radius

In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Getting the nearby venues for the district "North York"

In [27]:
NorthYork_venues = getNearbyVenues(names=NorthYork_data['Neighborhood'],
                                   latitudes=NorthYork_data['latitude'],
                                   longitudes=NorthYork_data['longitude']
                                  )


Parkwood
Victoria Villag
Lawrence Manor, Lawrence Height
Don Mill
Glencair
Don Mill
Hillcrest Villag
Bathurst Manor, Wilson Heights, Downsview Nort
Fairview, Henry Farm, Oriol
Northwood Park, York Universit
Bayview Villag
Downsvie
York Mills, Silver Hill
Downsvie
North Park, Maple Leaf Park, Upwood Par
Humber Summi
Willowdale, Newtonbroo
Downsvie
Bedford Park, Lawrence Manor Eas
Humberlea, Emer
Willowdale, Willowdale Eas
Downsvie
York Mills Wes
Willowdale, Willowdale Wes


In [28]:
print(NorthYork_venues.shape)
NorthYork_venues.head()

(274, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwood,43.7545,-79.33,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwood,43.7545,-79.33,GTA Restoration,43.753396,-79.333477,Fireworks Store
2,Parkwood,43.7545,-79.33,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Villag,43.7276,-79.3148,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Villag,43.7276,-79.3148,Tim Hortons,43.725517,-79.313103,Coffee Shop


##### Get the venue count from the result set in the "North York" district 

In [29]:
NorthYork_venues.loc[:,["Neighborhood","Venue"]].groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue
Neighborhood,Unnamed: 1_level_1
"Bathurst Manor, Wilson Heights, Downsview Nort",8
Bayview Villag,3
"Bedford Park, Lawrence Manor Eas",24
Don Mill,9
Downsvie,28
"Fairview, Henry Farm, Oriol",50
Glencair,14
Hillcrest Villag,2
Humber Summi,2
"Humberlea, Emer",6


In [30]:
print('There are {} uniques categories.'.format(len(NorthYork_venues['Venue Category'].unique())))

There are 108 uniques categories.


In [31]:
# one hot encoding
NorthYork_onehot = pd.get_dummies(NorthYork_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
NorthYork_onehot['Neighborhood'] = NorthYork_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [NorthYork_onehot.columns[-1]] + list(NorthYork_onehot.columns[:-1])
NorthYork_onehot = NorthYork_onehot[fixed_columns]

NorthYork_onehot.head()

Unnamed: 0,Neighborhood,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Baseball Field,Basketball Court,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Women's Store,Yoga Studio
0,Parkwood,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwood,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwood,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Villag,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Villag,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Normalization of the dataset 

In [32]:
NorthYork_grouped = NorthYork_onehot.groupby('Neighborhood').mean().reset_index()
NorthYork_grouped

Unnamed: 0,Neighborhood,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Baseball Field,Basketball Court,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Women's Store,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview Nort",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Villag,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor Eas",0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Don Mill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111
4,Downsvie,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0
5,"Fairview, Henry Farm, Oriol",0.0,0.02,0.0,0.02,0.02,0.04,0.02,0.04,0.0,...,0.0,0.02,0.0,0.02,0.02,0.0,0.02,0.0,0.04,0.0
6,Glencair,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,...,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Villag,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Humberlea, Emer",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Get the top five location against each venue present in "North York" district against the frequency of number of appearance of the shop categories

In [33]:
num_top_venues = 5

for hood in NorthYork_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = NorthYork_grouped[NorthYork_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview Nort----
                       venue  freq
0   Mediterranean Restaurant  0.12
1              Deli / Bodega  0.12
2                Coffee Shop  0.12
3  Middle Eastern Restaurant  0.12
4        Fried Chicken Joint  0.12


----Bayview Villag----
         venue  freq
0  Gas Station  0.33
1         Park  0.33
2        Trail  0.33
3      Airport  0.00
4    Pet Store  0.00


----Bedford Park, Lawrence Manor Eas----
                 venue  freq
0   Italian Restaurant  0.08
1       Sandwich Place  0.08
2          Coffee Shop  0.08
3           Restaurant  0.08
4  Japanese Restaurant  0.04


----Don Mill----
                    venue  freq
0                    Park  0.22
1            Home Service  0.22
2             Yoga Studio  0.11
3  Furniture / Home Store  0.11
4                    Pool  0.11


----Downsvie----
            venue  freq
0  Discount Store  0.07
1   Grocery Store  0.07
2     Coffee Shop  0.07
3     Pizza Place  0.07
4   Shopping Mall

In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### clustering the dataframe on the most comman venue type using K-means clustering

In [35]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = NorthYork_grouped['Neighborhood']

for ind in np.arange(NorthYork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(NorthYork_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview Nort",Spa,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Grocery Store,Mediterranean Restaurant,Fried Chicken Joint,Deli / Bodega,Food & Drink Shop,Dessert Shop
1,Bayview Villag,Park,Trail,Gas Station,Yoga Studio,French Restaurant,Department Store,Dessert Shop,Discount Store,Distribution Center,Electronics Store
2,"Bedford Park, Lawrence Manor Eas",Coffee Shop,Restaurant,Sandwich Place,Italian Restaurant,Liquor Store,Pub,Butcher,Pizza Place,Pharmacy,Hobby Shop
3,Don Mill,Park,Home Service,Gym,Construction & Landscaping,Pool,Furniture / Home Store,Yoga Studio,Gym / Fitness Center,Fireworks Store,Ice Cream Shop
4,Downsvie,Vietnamese Restaurant,Discount Store,Coffee Shop,Grocery Store,Pizza Place,Shopping Mall,Airport,Shoe Store,Home Service,Pharmacy


In [36]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

NorthYork_grouped_clustering = NorthYork_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(NorthYork_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 1, 2, 0])

In [37]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

NorthYork_merged = NorthYork_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
NorthYork_merged = NorthYork_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

NorthYork_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwood,43.7545,-79.33,0,Park,Fireworks Store,Food & Drink Shop,Yoga Studio,French Restaurant,Department Store,Dessert Shop,Discount Store,Distribution Center,Electronics Store
1,M4A,North York,Victoria Villag,43.7276,-79.3148,0,Intersection,Portuguese Restaurant,Hockey Arena,French Restaurant,Park,Coffee Shop,Pizza Place,Bar,Baseball Field,Dessert Shop
2,M6A,North York,"Lawrence Manor, Lawrence Height",43.7223,-79.4504,0,Clothing Store,Coffee Shop,Women's Store,Men's Store,Cosmetics Shop,Toy / Game Store,Food Court,Electronics Store,Pharmacy,Kitchen Supply Store
3,M3B,North York,Don Mill,43.745,-79.359,0,Park,Home Service,Gym,Construction & Landscaping,Pool,Furniture / Home Store,Yoga Studio,Gym / Fitness Center,Fireworks Store,Ice Cream Shop
4,M6B,North York,Glencair,43.7081,-79.4479,0,Pizza Place,Sushi Restaurant,Grocery Store,Mediterranean Restaurant,Gas Station,Latin American Restaurant,Fast Food Restaurant,Fish Market,Japanese Restaurant,Italian Restaurant


In [38]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(NorthYork_merged['latitude'], NorthYork_merged['longitude'], NorthYork_merged['Neighborhood'], NorthYork_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [39]:
NorthYork_merged.loc[NorthYork_merged['Cluster Labels'] == 0, NorthYork_merged.columns[[1] + list(range(5, NorthYork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Park,Fireworks Store,Food & Drink Shop,Yoga Studio,French Restaurant,Department Store,Dessert Shop,Discount Store,Distribution Center,Electronics Store
1,North York,0,Intersection,Portuguese Restaurant,Hockey Arena,French Restaurant,Park,Coffee Shop,Pizza Place,Bar,Baseball Field,Dessert Shop
2,North York,0,Clothing Store,Coffee Shop,Women's Store,Men's Store,Cosmetics Shop,Toy / Game Store,Food Court,Electronics Store,Pharmacy,Kitchen Supply Store
3,North York,0,Park,Home Service,Gym,Construction & Landscaping,Pool,Furniture / Home Store,Yoga Studio,Gym / Fitness Center,Fireworks Store,Ice Cream Shop
4,North York,0,Pizza Place,Sushi Restaurant,Grocery Store,Mediterranean Restaurant,Gas Station,Latin American Restaurant,Fast Food Restaurant,Fish Market,Japanese Restaurant,Italian Restaurant
5,North York,0,Park,Home Service,Gym,Construction & Landscaping,Pool,Furniture / Home Store,Yoga Studio,Gym / Fitness Center,Fireworks Store,Ice Cream Shop
7,North York,0,Spa,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Grocery Store,Mediterranean Restaurant,Fried Chicken Joint,Deli / Bodega,Food & Drink Shop,Dessert Shop
8,North York,0,Clothing Store,Fast Food Restaurant,Coffee Shop,Restaurant,Women's Store,Baseball Field,Bank,Juice Bar,Burrito Place,Food Court
9,North York,0,Pizza Place,Middle Eastern Restaurant,Metro Station,Massage Studio,Sports Bar,Sandwich Place,Yoga Studio,Fish Market,Department Store,Dessert Shop
10,North York,0,Park,Trail,Gas Station,Yoga Studio,French Restaurant,Department Store,Dessert Shop,Discount Store,Distribution Center,Electronics Store


In [40]:
NorthYork_merged.loc[[0,6,22],:]

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwood,43.7545,-79.33,0,Park,Fireworks Store,Food & Drink Shop,Yoga Studio,French Restaurant,Department Store,Dessert Shop,Discount Store,Distribution Center,Electronics Store
6,M2H,North York,Hillcrest Villag,43.8015,-79.3577,1,Park,Residential Building (Apartment / Condo),Yoga Studio,Cosmetics Shop,Department Store,Dessert Shop,Discount Store,Distribution Center,Electronics Store,Fast Food Restaurant
22,M2P,North York,York Mills Wes,43.75,-79.3978,1,Convenience Store,Park,French Restaurant,Department Store,Dessert Shop,Discount Store,Distribution Center,Electronics Store,Fast Food Restaurant,Fireworks Store


### Conclusion 

#### There were two scenario which was applied above

        1. When user provided the requirement, and requirement was to find out the location which fulfills its criteria. This was fulfilled by location having postalcode as M4H 
        
        2. When user is looking for a place within a given district and user wants to find out the details around that district. So,finally against the PostalCode "M3A","M2H","M2P", the most command place have been clustered and each venue type has been ranked against the number of occurance with that postal location