<h1>Grocery Dealer Recommendation System</h1>

In [1]:
import sys
!{sys.executable} -m pip install geocoder
!{sys.executable} -m pip install folium

import numpy as np
import pandas as pd
import geocoder
import requests 
from bs4 import BeautifulSoup 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library
print('Libraries are imported.')

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 15.1MB/s ta 0:00:01
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 14.0MB/s eta 0:00:01
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any

<h3>Scraping the Wikipedia page<h3>

In [2]:
website_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
r = requests.get(website_url) 
  
soup = BeautifulSoup(r.content, 'html5lib') 
table = soup.find('div', attrs = {'id':'container'}) 
print("Scraped and got the data") 

Scraped and got the data


<h3>Wragling and Cleaning the data</h3>

In [3]:
postalCodes = [];
boroughs= [];
neighborhoods = [];
columnNum = 1;
selected = False

for row in soup.find_all('td'):
    for cell in row:
        if cell.string and cell.string[0].isalpha() and len(cell.string) > 2:
            selected = False
            if columnNum == 1:
                if selected == False and cell.string[1].isdigit():
                    postalCodes.append(cell.string);   
                    columnNum = 2
                else:
                    continue
            elif columnNum == 2 :
                if cell.string == 'Not assigned':
                    selected = True
                    del postalCodes[-1]
                    columnNum = 1
                    continue
                else:
                    boroughs.append(cell.string);      
                    columnNum = 3
            elif columnNum == 3 :
                if cell.string == 'Not assigned\n':
                    neighborhoods.append(boroughs[-1])
                else:
                    neighborhoods.append(cell.string); 
                columnNum = 1

print('Data cleaned')

Data cleaned


<h3>Creating dataframe<h3>

In [4]:
column_names = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
lat_lng_coords = None

neighborhood_df = pd.DataFrame(columns=column_names)

for data in range(len(neighborhoods)):
    code = postalCodes[data]
    borough = boroughs[data]
    neighborhood_name = neighborhoods[data]
    
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
    lat_lng_coords = g.latlng
    
    neighborhood_df = neighborhood_df.append({ 'PostalCode': code,
                                   'Borough': borough,
                                   'Neighborhood': neighborhood_name,
                                   'Latitude': lat_lng_coords[0],
                                   'Longitude': lat_lng_coords[1]}, ignore_index=True)
    
neighborhood_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75242,-79.329242
1,M4A,North York,Victoria Village,43.7306,-79.313265
2,M5A,Downtown Toronto,Harbourfront,43.650295,-79.359166
3,M6A,North York,Lawrence Heights,43.72327,-79.451286
4,M6A,North York,Lawrence Manor,43.72327,-79.451286


<h3>Focusing on the "North York" Borough in Toronto</h3>

In [5]:
# selecting only neighborhoods regarding to "North York" borough.
north_york_data = neighborhood_df[neighborhood_df['Borough'] == 'North York']
north_york_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75242,-79.329242
1,M4A,North York,Victoria Village,43.7306,-79.313265
3,M6A,North York,Lawrence Heights,43.72327,-79.451286
4,M6A,North York,Lawrence Manor,43.72327,-79.451286
9,M3B,North York,Don Mills North,43.749055,-79.362227


<h3>Create a Map of North York and Its Neighbourhoods</h3>

In [6]:
address_scar = 'North York, Toronto'
latitude_scar = 43.7615
longitude_scar = -79.4111

map_north_york = folium.Map(location=[latitude_scar, longitude_scar], zoom_start=11.5)

# add markers to map
for lat, lng, label in zip(north_york_data['Latitude'], north_york_data['Longitude'], north_york_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_north_york)  
    
map_north_york

<h3>Fetching from Foursquare API</h3>

In [7]:
def foursquareapi (postal_code_list, neighborhood_list, lat_list, lng_list):
    result_ds = []
    counter = 0
    VERSION = '20180605' # Foursquare API version
    radius = 500 # define radius
    LIMIT = 100 # limit of number of venues returned by Foursquare API
    CLIENT_ID = '3X1QIGDCUX3QU31VA5MIWGB54V1RFKR3I2EF02JLVSNNYZVO' # your Foursquare ID
    CLIENT_SECRET = 'NNJHGVZJ4FMUV5ARX0NN1HIEWDIO2DIQPPZI4XHNABXRLB1I' # your Foursquare Secret
    
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighborhood(s)'] = neighborhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('{}.'.format(counter))
        print('Data is Obtained, for the Postal Code {} (and Neighborhoods {}) SUCCESSFULLY.'.format(postal_code, neighborhood))
    return result_ds;

<h3>Venues in the Neighborhoods inside "North York"</h3>

In [8]:
north_york_foursquare_dataset = foursquareapi(list(north_york_data['PostalCode']),
                                                   list(north_york_data['Neighborhood']),
                                                   list(north_york_data['Latitude']),
                                                   list(north_york_data['Longitude']),)

1.
Data is Obtained, for the Postal Code M3A (and Neighborhoods Parkwoods) SUCCESSFULLY.
2.
Data is Obtained, for the Postal Code M4A (and Neighborhoods Victoria Village) SUCCESSFULLY.
3.
Data is Obtained, for the Postal Code M6A (and Neighborhoods Lawrence Heights) SUCCESSFULLY.
4.
Data is Obtained, for the Postal Code M6A (and Neighborhoods Lawrence Manor) SUCCESSFULLY.
5.
Data is Obtained, for the Postal Code M3B (and Neighborhoods Don Mills North
) SUCCESSFULLY.
6.
Data is Obtained, for the Postal Code M6B (and Neighborhoods Glencairn
) SUCCESSFULLY.
7.
Data is Obtained, for the Postal Code M3C (and Neighborhoods Flemingdon Park) SUCCESSFULLY.
8.
Data is Obtained, for the Postal Code M3C (and Neighborhoods Don Mills South
) SUCCESSFULLY.
9.
Data is Obtained, for the Postal Code M2H (and Neighborhoods Hillcrest Village) SUCCESSFULLY.
10.
Data is Obtained, for the Postal Code M3H (and Neighborhoods Bathurst Manor) SUCCESSFULLY.
11.
Data is Obtained, for the Postal Code M3H (and Neigh

<h3>Cleaning the RAW Data and Exploring Neighborhoods Received from Foursquare Database</h3>

In [9]:
# This function is created to connect to the saved list which is the received database. It will extract each venue 
# for every neighborhood inside the database

def get_venue_dataset(foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    
    for neigh_dict in foursquare_dataset:
        postal_code = neigh_dict['Postal Code']; neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']; lng = neigh_dict['Longitude']
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
            
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': cat, 'Distance': dist}, ignore_index = True)
    
    return(result_df)

In [10]:
north_york_venues = get_venue_dataset(north_york_foursquare_dataset)
print("Explore Complete")

Explore Complete


<h3>Displaying Venues for Each Neighborhood in North York</h3>

In [11]:
north_york_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M3A,Parkwoods,43.75242,-79.329242,Brookbanks Park,This spot is popular,Park,238
1,M3A,Parkwoods,43.75242,-79.329242,Variety Store,This spot is popular,Food & Drink Shop,315
2,M4A,Victoria Village,43.7306,-79.313265,Wigmore Park,This spot is popular,Park,206
3,M4A,Victoria Village,43.7306,-79.313265,Memories of Africa,This spot is popular,Grocery Store,450
4,M4A,Victoria Village,43.7306,-79.313265,Guardian Drug,This spot is popular,Pharmacy,469


<h3>Summary Information about Neighborhoods inside "North York"</h3>

In [12]:
neighbor_list = list(north_york_venues['Neighborhood'].unique())
print('Number of Neighborhoods inside North York:')
print(len(neighbor_list))
print('List of Neighborhoods inside North York:')
neighbor_list

Number of Neighborhoods inside North York:
33
List of Neighborhoods inside North York:


['Parkwoods',
 'Victoria Village',
 'Lawrence Heights',
 'Lawrence Manor',
 'Don Mills North\n',
 'Glencairn\n',
 'Flemingdon Park',
 'Don Mills South\n',
 'Hillcrest Village',
 'Fairview\n',
 'Henry Farm',
 'Oriole\n',
 'Northwood Park',
 'York University',
 'Bayview Village',
 'CFB Toronto',
 'Downsview East\n',
 'Downsview West',
 'Downsview',
 'North Park\n',
 'Upwood Park\n',
 'Humber Summit',
 'Newtonbrook',
 'Willowdale',
 'Downsview Central\n',
 'Bedford Park',
 'Lawrence Manor East\n',
 'Emery',
 'Humberlea',
 'Willowdale South\n',
 'Downsview Northwest\n',
 'York Mills West\n',
 'Willowdale West']

<h3> After Grouping</h3>

In [13]:
neighbor_venue_summary = north_york_venues.groupby('Neighborhood').count()
neighbor_venue_summary.head()

Unnamed: 0_level_0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bayview Village,4,4,4,4,4,4,4
Bedford Park,19,19,19,19,19,19,19
CFB Toronto,4,4,4,4,4,4,4
Don Mills North,3,3,3,3,3,3,3
Don Mills South,8,8,8,8,8,8,8


<h3>Unique Categories</h3>

In [14]:
print('There are {} uniques categories.'.format(len(north_york_venues['Venue Category'].unique())))

print('Here is the list of different categories:')
list(north_york_venues['Venue Category'].unique())

There are 110 uniques categories.
Here is the list of different categories:


['Park',
 'Food & Drink Shop',
 'Grocery Store',
 'Pharmacy',
 'Clothing Store',
 'Electronics Store',
 'Cosmetics Shop',
 'Toy / Game Store',
 'Shopping Mall',
 "Men's Store",
 'Jewelry Store',
 'American Restaurant',
 'Tea Room',
 'Kitchen Supply Store',
 'Furniture / Home Store',
 'Restaurant',
 'Chocolate Shop',
 'Leather Goods Store',
 'Coffee Shop',
 'Dessert Shop',
 'Food Court',
 'Greek Restaurant',
 'Stationery Store',
 'Boutique',
 'Bookstore',
 'Mediterranean Restaurant',
 'Bakery',
 'Department Store',
 'Movie Theater',
 'Frozen Yogurt Shop',
 'Burger Joint',
 'Fried Chicken Joint',
 'Café',
 'Smoothie Shop',
 'Sushi Restaurant',
 'Sporting Goods Shop',
 'Platform',
 'Metro Station',
 'Rental Car Location',
 'Convenience Store',
 'Gas Station',
 'Latin American Restaurant',
 'Japanese Restaurant',
 'Fast Food Restaurant',
 'Pizza Place',
 'Asian Restaurant',
 'Supermarket',
 'Gym',
 'Beer Store',
 'Intersection',
 'Smoke Shop',
 'Bubble Tea Shop',
 'Residential Building (Ap

<h3>One-hot Encoding the "categroies" Column into Every Unique Categorical Feature</h3>

In [15]:
# one hot encoding
north_york_onehot = pd.get_dummies(data = north_york_venues, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
north_york_onehot.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Distance,Airport,American Restaurant,Arts & Crafts Store,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,M3A,Parkwoods,43.75242,-79.329242,Brookbanks Park,This spot is popular,238,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M3A,Parkwoods,43.75242,-79.329242,Variety Store,This spot is popular,315,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4A,Victoria Village,43.7306,-79.313265,Wigmore Park,This spot is popular,206,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4A,Victoria Village,43.7306,-79.313265,Memories of Africa,This spot is popular,450,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4A,Victoria Village,43.7306,-79.313265,Guardian Drug,This spot is popular,469,0,0,0,...,0,0,0,0,0,0,0,0,0,0


<h3>Getting Columns</h3>

In [16]:
for col in north_york_onehot.columns: 
    print(col) 

Postal Code
Neighborhood
Neighborhood Latitude
Neighborhood Longitude
Venue
Venue Summary
Distance
Airport
American Restaurant
Arts & Crafts Store
Asian Restaurant
Bakery
Bank
Bar
Basketball Court
Beer Store
Bookstore
Boutique
Bubble Tea Shop
Burger Joint
Burrito Place
Bus Line
Business Service
Butcher
Café
Camera Store
Candy Store
Caribbean Restaurant
Chocolate Shop
Clothing Store
Coffee Shop
Comfort Food Restaurant
Construction & Landscaping
Convenience Store
Cosmetics Shop
Deli / Bodega
Department Store
Dessert Shop
Discount Store
Dog Run
Eastern European Restaurant
Electronics Store
Falafel Restaurant
Fast Food Restaurant
Food & Drink Shop
Food Court
Fried Chicken Joint
Frozen Yogurt Shop
Furniture / Home Store
Gas Station
Gift Shop
Greek Restaurant
Grocery Store
Gym
Gym / Fitness Center
Hockey Arena
Home Service
Hookah Bar
Hotel
Ice Cream Shop
Indian Restaurant
Indonesian Restaurant
Intersection
Italian Restaurant
Japanese Restaurant
Jewelry Store
Juice Bar
Kitchen Supply Store
Ko

<h3>Manually Selecting (Subsetting) Related Features for the Groceries Contractor</h3>

In [17]:
# This list is created manually 
important_list_of_features = [

'Neighborhood',
'Neighborhood Latitude',
'Neighborhood Longitude',
'Bakery',
'Bar',
'Bubble Tea Shop',
'Burger Joint',
'Burrito Place',
'Café',
'Caribbean Restaurant',
'Coffee Shop',
'Comfort Food Restaurant',
'Falafel Restaurant',
'Fast Food Restaurant',
'Food & Drink Shop',
'Food Court',
'Fried Chicken Joint',
'Greek Restaurant',
'Grocery Store',
'Indian Restaurant',
'Indonesian Restaurant',
'Italian Restaurant',
'Japanese Restaurant',
'Juice Bar',
'Kitchen Supply Store',
'Korean Restaurant',
'Latin American Restaurant',
'Market',
'Mediterranean Restaurant',
'Middle Eastern Restaurant',
'Pizza Place',
'Ramen Restaurant',
'Restaurant',
'Sandwich Place',
'Smoothie Shop',
'Supermarket',
'Sushi Restaurant',
'Tea Room',
'Thai Restaurant',
'Vietnamese Restaurant',
'Wings Joint']

<h3>Updating the One-hot Encoded DataFrame and Grouping the Data by Neighborhoods</h3>

In [18]:
north_york_onehot = north_york_onehot[important_list_of_features].drop(
    columns = ['Neighborhood Latitude', 'Neighborhood Longitude']).groupby(
    'Neighborhood').sum()


north_york_onehot.head()

Unnamed: 0_level_0,Bakery,Bar,Bubble Tea Shop,Burger Joint,Burrito Place,Café,Caribbean Restaurant,Coffee Shop,Comfort Food Restaurant,Falafel Restaurant,...,Ramen Restaurant,Restaurant,Sandwich Place,Smoothie Shop,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Vietnamese Restaurant,Wings Joint
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Bayview Village,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Bedford Park,0,0,0,0,0,1,0,2,1,0,...,0,1,1,0,0,1,0,1,0,0
CFB Toronto,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
Don Mills North,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Don Mills South,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0


<h3>Integrating Different Restaurants and Different Joints </h3><br>
<h4>(Assuming Different Resaturants Use the Same Raw Groceries) </h4>

In [19]:
feat_name_list = list(north_york_onehot.columns)
restaurant_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
        
north_york_onehot['Total Restaurants'] = north_york_onehot[restaurant_list].sum(axis = 1)
north_york_onehot = north_york_onehot.drop(columns = restaurant_list)


feat_name_list = list(north_york_onehot.columns)
joint_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
north_york_onehot['Total Joints'] = north_york_onehot[joint_list].sum(axis = 1)
north_york_onehot = north_york_onehot.drop(columns = joint_list)

<h3>Showing the Fully-Processed DataFrame about Neighborhoods inside North York.</h3><br>
<h3>This Dataset is Ready for any Machine Learning Algorithm.</h3>

In [20]:
north_york_onehot.head()

Unnamed: 0_level_0,Bakery,Bar,Bubble Tea Shop,Burrito Place,Café,Coffee Shop,Food & Drink Shop,Food Court,Grocery Store,Juice Bar,Kitchen Supply Store,Market,Pizza Place,Sandwich Place,Smoothie Shop,Supermarket,Tea Room,Total Restaurants,Total Joints
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Bayview Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Bedford Park,0,0,0,0,1,2,0,0,0,1,0,0,0,1,0,0,0,9,0
CFB Toronto,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0
Don Mills North,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
Don Mills South,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0


<h3>Run k-means to Cluster Neighborhoods into 5 Clusters</h3>

In [21]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(north_york_onehot)

<h3>Showing Centers of Each Cluster</h3>

In [22]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = north_york_onehot.columns
means_df.index = ['G1','G2','G3','G4','G5']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Bakery,Bar,Bubble Tea Shop,Burrito Place,Café,Coffee Shop,Food & Drink Shop,Food Court,Grocery Store,Juice Bar,Kitchen Supply Store,Market,Pizza Place,Sandwich Place,Smoothie Shop,Supermarket,Tea Room,Total Restaurants,Total Joints,Total Sum
G3,0.0,0.0,1.387779e-17,1.0,5.5511150000000004e-17,3.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,6.0,2.0,18.0
G2,0.4,0.0,0.2,0.0,2.0,1.8,0.0,0.0,0.6,0.6,0.0,0.2,0.6,0.8,0.0,0.4,0.0,8.6,0.6,16.8
G5,1.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,4.0,2.0,14.0
G4,0.0,0.5,0.0,0.0,0.0,0.75,0.0,0.0,0.75,0.0,0.0,0.0,1.75,0.25,0.0,0.0,0.0,4.75,0.25,9.0
G1,0.263158,3.4694470000000005e-17,0.1052632,2.775558e-17,1.110223e-16,0.526316,0.052632,0.105263,0.210526,5.5511150000000004e-17,3.4694470000000005e-17,1.734723e-17,0.052632,2.775558e-17,2.775558e-17,0.105263,1.387779e-16,0.105263,0.052632,1.578947


<h3>Result:</h3><br>
 1. Best Group is G3 <br>
 2. Second Best Group is G2 <br>
 3. Third Best Group is G5 <br>
 <br>
**Inserting "kmeans.labels_" into the Original Scarborough DataFrame.
Finding the Corresponding Group for Each Neighborhood.**

In [25]:
neigh_summary = pd.DataFrame([north_york_onehot.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighborhood', 'Group']
neigh_summary

Unnamed: 0,Neighborhood,Group
0,Bayview Village,1
1,Bedford Park,2
2,CFB Toronto,1
3,Don Mills North,1
4,Don Mills South,1
5,Downsview,1
6,Downsview Central,1
7,Downsview East,1
8,Downsview Northwest,4
9,Downsview West,1


<h3>Deducing Results:</h3><br>
<h3>Best Neighborhood Are...</h3>

In [34]:
neigh_summary[neigh_summary['Group'] == 3]

Unnamed: 0,Neighborhood,Group
11,Fairview,3
14,Henry Farm,3
24,Oriole,3


<h3> Second Best Neighborhood Are..</h3>

In [36]:
neigh_summary[neigh_summary['Group'] == 2]

Unnamed: 0,Neighborhood,Group
1,Bedford Park,2
20,Lawrence Manor East,2
21,Newtonbrook,2
28,Willowdale,2
29,Willowdale South,2


<h3> Third Best Neighborhood Are..</h3>

In [37]:
neigh_summary[neigh_summary['Group'] == 5]

Unnamed: 0,Neighborhood,Group
18,Lawrence Heights,5
19,Lawrence Manor,5


<h3> Thus the grocery dealer who opens the dealer ship in this particular are will profit a lot when compared to the rest of the area as the opportunity are more in these location</h3>