# Importing libraries

In [2]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
from bs4 import BeautifulSoup
import requests # library to handle requests
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print('Libraries are imported.')

Libraries are imported.


# WebScraping of Dataset 

In [3]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
response = requests.get(url).text

In [4]:
# Scraping the table from the wikipedia webpage, and then re-organizing the data into a dataframe
soup = BeautifulSoup(response,'lxml')
Canada_Table = soup.find('table',{'class':'wikitable sortable'})
table_rows = Canada_Table.find_all('tr')
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pd.DataFrame(data, columns= ['Postcode', 'Borough', 'Neighbourhood'])

In [5]:
# subsetting the dataframe so that it includes all entries where 'Postcode' has a value
df = df[df['Postcode'].notnull()]

In [6]:
# Subsetting the dataframe to include only entries where Borough is not 'Not Assigned'
df = df[df['Borough'] != 'Not assigned']

In [7]:
# Grouping rows based on the value in the 'Postcode' column, and then separating each neighborhood with a comma
df = df.groupby(by = ['Postcode', 'Borough']).agg(lambda x:', '.join(x))

In [8]:
df.reset_index(level = ['Postcode', 'Borough'], inplace=True)

In [9]:
# Replacing instances of 'Not assigned' in the Neighbourhood column with the Borough name
df.loc[df.Neighbourhood == 'Not assigned', 'Neighbourhood'] = df.Borough

In [10]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [11]:
df.shape

(103, 3)

In [12]:
# Reading the geospatial data into a dataframe
Locations = pd.read_csv('https://cocl.us/Geospatial_data')

In [13]:
# Merging the 2 dataframes together
df_toronto = df.merge(Locations, left_on='Postcode', right_on='Postal Code')

In [14]:
# Dropping a superfluous column 
df_toronto = df_toronto.drop(['Postal Code'], axis=1)

In [15]:
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Map of Toronto City

In [16]:
import folium
toronto_latitude = 43.6932; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

In [17]:
def foursquare_crawler (postal_code_list, neighborhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighborhood(s)'] = neighborhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('{}.'.format(counter))
        print('Data is Obtained, for the Postal Code {} (and Neighborhoods {}) SUCCESSFULLY.'.format(postal_code, neighborhood))
    return result_ds;

In [18]:
scarborough_data = df_toronto[df_toronto['Borough'] == 'Scarborough']

scarborough_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [19]:
CLIENT_ID = 'Q5CXD5FJOYBBND3RQTGBJDFRYSWTEMA5GUVDBGGZFEGLG2BI' # your Foursquare ID
CLIENT_SECRET = 'QAETUCKSKJMSS1DJXNV4N5USHPY1P5Q55D2EOR3GHNV5UJWK' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

# Venues in the Neighborhoods inside Scarbarough

In [20]:
print('Crawling different neighborhoods inside "Scarborough"')
Scarborough_foursquare_dataset = foursquare_crawler(list(scarborough_data['Postcode']),
                                                   list(scarborough_data['Neighbourhood']),
                                                   list(scarborough_data['Latitude']),
                                                   list(scarborough_data['Longitude']),)

Crawling different neighborhoods inside "Scarborough"
1.
Data is Obtained, for the Postal Code M1B (and Neighborhoods Rouge, Malvern) SUCCESSFULLY.
2.
Data is Obtained, for the Postal Code M1C (and Neighborhoods Highland Creek, Rouge Hill, Port Union) SUCCESSFULLY.
3.
Data is Obtained, for the Postal Code M1E (and Neighborhoods Guildwood, Morningside, West Hill) SUCCESSFULLY.
4.
Data is Obtained, for the Postal Code M1G (and Neighborhoods Woburn) SUCCESSFULLY.
5.
Data is Obtained, for the Postal Code M1H (and Neighborhoods Cedarbrae) SUCCESSFULLY.
6.
Data is Obtained, for the Postal Code M1J (and Neighborhoods Scarborough Village) SUCCESSFULLY.
7.
Data is Obtained, for the Postal Code M1K (and Neighborhoods East Birchmount Park, Ionview, Kennedy Park) SUCCESSFULLY.
8.
Data is Obtained, for the Postal Code M1L (and Neighborhoods Clairlea, Golden Mile, Oakridge) SUCCESSFULLY.
9.
Data is Obtained, for the Postal Code M1M (and Neighborhoods Cliffcrest, Cliffside, Scarborough Village West) 

# Saving Results to reduce the connection in Four Square API

In [21]:
import pickle
with open("Scarborough_foursquare_dataset.txt", "wb") as fp:   #Pickling
    pickle.dump(Scarborough_foursquare_dataset, fp)
print('Received Data from Internet is Saved to Computer.')

Received Data from Internet is Saved to Computer.


In [22]:
with open("Scarborough_foursquare_dataset.txt", "rb") as fp:   # Unpickling
    Scarborough_foursquare_dataset = pickle.load(fp)
# print(type(Scarborough_foursquare_dataset))
# Scarborough_foursquare_dataset

# Cleaning Raw data from the Four Square Database

In [23]:
def get_venue_dataset(foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    # print(result_df)
    
    for neigh_dict in foursquare_dataset:
        postal_code = neigh_dict['Postal Code']; neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']; lng = neigh_dict['Longitude']
        print('Number of Venuse in Coordination "{}" Posal Code and "{}" Negihborhood(s) is:'.format(postal_code, neigh))
        print(len(neigh_dict['Crawling_result']))
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
            
            
            # print({'Postal Code': postal_code, 'Neighborhood': neigh, 
            #                   'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
            #                   'Venue': name, 'Venue Summary': summary, 
            #                   'Venue Category': cat, 'Distance': dist})
            
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': cat, 'Distance': dist}, ignore_index = True)
            # print(result_df)
    
    return(result_df)

In [24]:
scarborough_venues = get_venue_dataset(Scarborough_foursquare_dataset)

Number of Venuse in Coordination "M1B" Posal Code and "Rouge, Malvern" Negihborhood(s) is:
18
Number of Venuse in Coordination "M1C" Posal Code and "Highland Creek, Rouge Hill, Port Union" Negihborhood(s) is:
5
Number of Venuse in Coordination "M1E" Posal Code and "Guildwood, Morningside, West Hill" Negihborhood(s) is:
26
Number of Venuse in Coordination "M1G" Posal Code and "Woburn" Negihborhood(s) is:
8
Number of Venuse in Coordination "M1H" Posal Code and "Cedarbrae" Negihborhood(s) is:
27
Number of Venuse in Coordination "M1J" Posal Code and "Scarborough Village" Negihborhood(s) is:
12
Number of Venuse in Coordination "M1K" Posal Code and "East Birchmount Park, Ionview, Kennedy Park" Negihborhood(s) is:
23
Number of Venuse in Coordination "M1L" Posal Code and "Clairlea, Golden Mile, Oakridge" Negihborhood(s) is:
30
Number of Venuse in Coordination "M1M" Posal Code and "Cliffcrest, Cliffside, Scarborough Village West" Negihborhood(s) is:
12
Number of Venuse in Coordination "M1N" Pos

# Showing Venues for Each Neighborhood in Scarborugh

In [25]:
scarborough_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M1B,"Rouge, Malvern",43.806686,-79.194353,Images Salon & Spa,This spot is popular,Spa,595
1,M1B,"Rouge, Malvern",43.806686,-79.194353,Staples Morningside,This spot is popular,Paper / Office Supplies Store,735
2,M1B,"Rouge, Malvern",43.806686,-79.194353,Caribbean Wave,This spot is popular,Caribbean Restaurant,912
3,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,This spot is popular,Fast Food Restaurant,600
4,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,This spot is popular,Fast Food Restaurant,387


In [26]:
scarborough_venues.tail()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
373,M1W,L'Amoreaux West,43.799525,-79.318389,Pharmacy Ave. & Finch Ave. E,This spot is popular,Intersection,830
374,M1W,L'Amoreaux West,43.799525,-79.318389,Divine Wok Restaurant,This spot is popular,Chinese Restaurant,957
375,M1W,L'Amoreaux West,43.799525,-79.318389,Buddy Cafe,This spot is popular,Chinese Restaurant,973
376,M1W,L'Amoreaux West,43.799525,-79.318389,Olympian Swimming,This spot is popular,Gym Pool,978
377,M1W,L'Amoreaux West,43.799525,-79.318389,Red Sail Boat Bakery 紅帆船西餅麵包,This spot is popular,Bakery,966


# Saving the cleaned dataframe as a result to Foursquare 

In [27]:
scarborough_venues.to_csv('scarborough_venues.csv')

In [28]:
#loading the dataset
scarborough_venues = pd.read_csv('scarborough_venues.csv')

In [29]:
neigh_list = list(scarborough_venues['Neighborhood'].unique())
print('Number of Neighborhoods inside Scarborough:')
print(len(neigh_list))
print('List of Neighborhoods inside Scarborough:')
neigh_list

Number of Neighborhoods inside Scarborough:
16
List of Neighborhoods inside Scarborough:


['Rouge, Malvern',
 'Highland Creek, Rouge Hill, Port Union',
 'Guildwood, Morningside, West Hill',
 'Woburn',
 'Cedarbrae',
 'Scarborough Village',
 'East Birchmount Park, Ionview, Kennedy Park',
 'Clairlea, Golden Mile, Oakridge',
 'Cliffcrest, Cliffside, Scarborough Village West',
 'Birch Cliff, Cliffside West',
 'Dorset Park, Scarborough Town Centre, Wexford Heights',
 'Maryvale, Wexford',
 'Agincourt',
 "Clarks Corners, Sullivan, Tam O'Shanter",
 "Agincourt North, L'Amoreaux East, Milliken, Steeles East",
 "L'Amoreaux West"]

In [30]:
neigh_venue_summary = scarborough_venues.groupby('Neighborhood').count()
neigh_venue_summary.drop(columns = ['Unnamed: 0']).head()

Unnamed: 0_level_0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Agincourt,45,45,45,45,45,45,45
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",30,30,30,30,30,30,30
"Birch Cliff, Cliffside West",12,12,12,12,12,12,12
Cedarbrae,27,27,27,27,27,27,27
"Clairlea, Golden Mile, Oakridge",30,30,30,30,30,30,30


In [31]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

print('Here is the list of different categories:')
list(scarborough_venues['Venue Category'].unique())

There are 114 uniques categories.
Here is the list of different categories:


['Spa',
 'Paper / Office Supplies Store',
 'Caribbean Restaurant',
 'Fast Food Restaurant',
 'Coffee Shop',
 'Hobby Shop',
 'Martial Arts Dojo',
 'Trail',
 'Auto Workshop',
 'Chinese Restaurant',
 'Greek Restaurant',
 'Fruit & Vegetable Store',
 'Gym',
 'Bakery',
 'Sandwich Place',
 'Park',
 'Burger Joint',
 'Italian Restaurant',
 'Breakfast Spot',
 'Playground',
 'Fried Chicken Joint',
 'Liquor Store',
 'Food & Drink Shop',
 'Pizza Place',
 'Smoothie Shop',
 'Beer Store',
 'Sports Bar',
 'Pharmacy',
 'Bank',
 'Discount Store',
 'Medical Center',
 'Supermarket',
 'Grocery Store',
 'Salon / Barbershop',
 'Bus Station',
 'Video Game Store',
 'Indian Restaurant',
 'Electronics Store',
 'Hakka Restaurant',
 'Music Store',
 'Thai Restaurant',
 'Athletics & Sports',
 'Wings Joint',
 'Yoga Studio',
 'Board Shop',
 'IT Services',
 'Sporting Goods Shop',
 'Convenience Store',
 'Restaurant',
 'Train Station',
 'Japanese Restaurant',
 'Bowling Alley',
 'Department Store',
 'Metro Station',
 'Ligh

In [32]:
# one hot encoding
scarborough_onehot = pd.get_dummies(data = scarborough_venues, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
scarborough_onehot.head()

Unnamed: 0.1,Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Distance,Asian Restaurant,Athletics & Sports,Auto Workshop,Automotive Shop,Badminton Court,Bakery,Bank,Bar,Beach,Beer Store,Board Shop,Bowling Alley,Breakfast Spot,Bubble Tea Shop,Burger Joint,Bus Line,Bus Station,Bus Stop,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store,Event Space,Fast Food Restaurant,Fish Market,Flea Market,Food & Drink Shop,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Gas Station,General Entertainment,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym Pool,Hakka Restaurant,Hobby Shop,Hong Kong Restaurant,Hotpot Restaurant,IT Services,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Malay Restaurant,Martial Arts Dojo,Medical Center,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motorcycle Shop,Music Store,Noodle House,Other Great Outdoors,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pool Hall,Print Shop,Pub,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Shoe Store,Shop & Service,Shopping Mall,Skating Rink,Smoothie Shop,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Shop,Wings Joint,Yoga Studio
0,0,M1B,"Rouge, Malvern",43.806686,-79.194353,Images Salon & Spa,This spot is popular,595,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,M1B,"Rouge, Malvern",43.806686,-79.194353,Staples Morningside,This spot is popular,735,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,M1B,"Rouge, Malvern",43.806686,-79.194353,Caribbean Wave,This spot is popular,912,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,This spot is popular,600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,This spot is popular,387,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Manually Selecting the Features for the Groceries

# Grouping the Data By Neigborhood

In [49]:
important_list_of_features = [
 
 'Neighborhood',
 'Neighborhood Latitude',
 'Neighborhood Longitude',
 'Spa',
 'Paper / Office Supplies Store',
 'Caribbean Restaurant',
 'Fast Food Restaurant',
 'Coffee Shop',
 'Hobby Shop',
 'Martial Arts Dojo',
 'Trail',
 'Auto Workshop',
 'Chinese Restaurant',
 'Greek Restaurant',
 'Fruit & Vegetable Store',
 'Gym',
 'Bakery',
 'Sandwich Place',
 'Park',
 'Burger Joint',
 'Italian Restaurant',
 'Breakfast Spot',
 'Playground',
 'Fried Chicken Joint',
 'Liquor Store',
 'Food & Drink Shop',
 'Pizza Place',
 'Smoothie Shop',
 'Beer Store',
 'Sports Bar',
 'Pharmacy',
 'Bank',
 'Discount Store',
 'Medical Center',
 'Supermarket',
 'Grocery Store',
 'Salon / Barbershop',
 'Bus Station',
 'Video Game Store',
 'Indian Restaurant',
 'Electronics Store',
 'Hakka Restaurant',
 'Music Store',
 'Thai Restaurant',
 'Athletics & Sports',
 'Wings Joint',
 'Yoga Studio',
 'Board Shop',
 'IT Services',
 'Sporting Goods Shop',
 'Convenience Store',
 'Restaurant',
 'Train Station',
 'Japanese Restaurant',
 'Bowling Alley',
 'Department Store',
 'Metro Station',
 'Light Rail Station',
 'Rental Car Location',
 'Asian Restaurant',
 'Mexican Restaurant',
 'Intersection',
 'Bus Line',
 'Soccer Field',
 'Diner',
 'Pub',
 'General Entertainment',
 'Beach',
 'Furniture / Home Store',
 'Cajun / Creole Restaurant',
 'Café',
 'Skating Rink',
 'College Stadium',
 'Gym Pool',
 'Latin American Restaurant',
 'Pet Store',
 'Vietnamese Restaurant',
 'Wine Shop',
 'Plaza',
 'Shoe Store',
 'Clothing Store',
 'Automotive Shop',
 'Dessert Shop',
 'Korean Restaurant',
 'Fish Market',
 'Middle Eastern Restaurant',
 'Seafood Restaurant',
 'Badminton Court',
 'Bar',
 'Gas Station',
 'Print Shop',
 'Flea Market',
 'Sri Lankan Restaurant',
 'Cantonese Restaurant',
 'Noodle House',
 'Malay Restaurant',
 'Lounge',
 'Pool Hall',
 'Sushi Restaurant',
 'Shopping Mall',
 'Mediterranean Restaurant',
 'Pool',
 'Shanghai Restaurant',
 'Motorcycle Shop',
 'Hong Kong Restaurant',
 'Bus Stop',
 'Golf Course',
 'Taiwanese Restaurant',
 'Deli / Bodega',
 'Vegetarian / Vegan Restaurant',
 'Event Space',
 'Shop & Service',
 'Bubble Tea Shop',
 'Hotpot Restaurant',
 'Cosmetics Shop',
 'Thrift / Vintage Store',
 'Other Great Outdoors'
]

In [51]:
scarborough_onehot = scarborough_onehot[important_list_of_features].drop(
    columns = ['Neighborhood Latitude', 'Neighborhood Longitude']).groupby(
    'Neighborhood').sum()


scarborough_onehot.head()

Unnamed: 0_level_0,Spa,Paper / Office Supplies Store,Caribbean Restaurant,Fast Food Restaurant,Coffee Shop,Hobby Shop,Martial Arts Dojo,Trail,Auto Workshop,Chinese Restaurant,Greek Restaurant,Fruit & Vegetable Store,Gym,Bakery,Sandwich Place,Park,Burger Joint,Italian Restaurant,Breakfast Spot,Playground,Fried Chicken Joint,Liquor Store,Food & Drink Shop,Pizza Place,Smoothie Shop,Beer Store,Sports Bar,Pharmacy,Bank,Discount Store,Medical Center,Supermarket,Grocery Store,Salon / Barbershop,Bus Station,Video Game Store,Indian Restaurant,Electronics Store,Hakka Restaurant,Music Store,Thai Restaurant,Athletics & Sports,Wings Joint,Yoga Studio,Board Shop,IT Services,Sporting Goods Shop,Convenience Store,Restaurant,Train Station,Japanese Restaurant,Bowling Alley,Department Store,Metro Station,Light Rail Station,Rental Car Location,Asian Restaurant,Mexican Restaurant,Intersection,Bus Line,Soccer Field,Diner,Pub,General Entertainment,Beach,Furniture / Home Store,Cajun / Creole Restaurant,Café,Skating Rink,College Stadium,Gym Pool,Latin American Restaurant,Pet Store,Vietnamese Restaurant,Wine Shop,Plaza,Shoe Store,Clothing Store,Automotive Shop,Dessert Shop,Korean Restaurant,Fish Market,Middle Eastern Restaurant,Seafood Restaurant,Badminton Court,Bar,Gas Station,Print Shop,Flea Market,Sri Lankan Restaurant,Cantonese Restaurant,Noodle House,Malay Restaurant,Lounge,Pool Hall,Sushi Restaurant,Shopping Mall,Mediterranean Restaurant,Pool,Shanghai Restaurant,Motorcycle Shop,Hong Kong Restaurant,Bus Stop,Golf Course,Taiwanese Restaurant,Deli / Bodega,Vegetarian / Vegan Restaurant,Event Space,Shop & Service,Bubble Tea Shop,Hotpot Restaurant,Cosmetics Shop,Thrift / Vintage Store,Other Great Outdoors
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1
Agincourt,0,0,2,0,2,0,0,0,0,5,0,0,0,2,2,1,0,0,1,0,0,0,0,2,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,1,1,1,1,1,1,1,4,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",0,0,1,2,1,1,0,0,0,7,0,0,1,2,0,2,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0
"Birch Cliff, Cliffside West",0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Cedarbrae,0,0,1,1,3,0,0,0,0,1,0,0,0,3,0,0,1,0,0,0,1,0,0,1,0,0,0,2,1,0,0,0,1,0,0,0,2,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Clairlea, Golden Mile, Oakridge",0,0,0,2,3,0,0,1,0,0,0,0,1,2,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,1,4,2,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Integrating Different Restaurants and Different Joint# 

In [52]:
feat_name_list = list(scarborough_onehot.columns)
restaurant_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
        
scarborough_onehot['Total Restaurants'] = scarborough_onehot[restaurant_list].sum(axis = 1)
scarborough_onehot = scarborough_onehot.drop(columns = restaurant_list)


feat_name_list = list(scarborough_onehot.columns)
joint_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
scarborough_onehot['Total Joints'] = scarborough_onehot[joint_list].sum(axis = 1)
scarborough_onehot = scarborough_onehot.drop(columns = joint_list)

In [53]:
scarborough_onehot

Unnamed: 0_level_0,Spa,Paper / Office Supplies Store,Coffee Shop,Hobby Shop,Martial Arts Dojo,Trail,Auto Workshop,Fruit & Vegetable Store,Gym,Bakery,Sandwich Place,Park,Breakfast Spot,Playground,Liquor Store,Food & Drink Shop,Pizza Place,Smoothie Shop,Beer Store,Sports Bar,Pharmacy,Bank,Discount Store,Medical Center,Supermarket,Grocery Store,Salon / Barbershop,Bus Station,Video Game Store,Electronics Store,Music Store,Athletics & Sports,Yoga Studio,Board Shop,IT Services,Sporting Goods Shop,Convenience Store,Train Station,Bowling Alley,Department Store,Metro Station,Light Rail Station,Rental Car Location,Intersection,Bus Line,Soccer Field,Diner,Pub,General Entertainment,Beach,Furniture / Home Store,Café,Skating Rink,College Stadium,Gym Pool,Pet Store,Wine Shop,Plaza,Shoe Store,Clothing Store,Automotive Shop,Dessert Shop,Fish Market,Badminton Court,Bar,Gas Station,Print Shop,Flea Market,Noodle House,Lounge,Pool Hall,Shopping Mall,Pool,Motorcycle Shop,Bus Stop,Golf Course,Deli / Bodega,Event Space,Shop & Service,Bubble Tea Shop,Cosmetics Shop,Thrift / Vintage Store,Other Great Outdoors,Total Restaurants,Total Joints
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1
Agincourt,0,0,2,0,0,0,0,0,0,2,2,1,1,0,0,0,2,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,4,1,1,0,0,0,0,0,0,0,0,0,18,0
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",0,0,1,1,0,0,0,0,1,2,0,2,0,0,0,0,2,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,1,0,0,0,13,0
"Birch Cliff, Cliffside West",0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0
Cedarbrae,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,2,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,3
"Clairlea, Golden Mile, Oakridge",0,0,3,0,0,1,0,0,1,2,1,1,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,4,2,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0
"Clarks Corners, Sullivan, Tam O'Shanter",0,0,2,1,0,0,0,0,0,1,2,2,0,0,0,0,1,0,0,0,2,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,0,0,0,12,1
"Cliffcrest, Cliffside, Scarborough Village West",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1
"Dorset Park, Scarborough Town Centre, Wexford Heights",0,0,3,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,1,0,2,1,0,0,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,4
"East Birchmount Park, Ionview, Kennedy Park",0,0,4,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,2,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,1
"Guildwood, Morningside, West Hill",0,0,2,0,0,0,0,0,0,0,1,0,0,0,1,1,3,1,1,1,1,1,1,1,1,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2


# K _means to the cluster neighborhoods

In [55]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(scarborough_onehot)

# Showing Centers  for each Clusters

In [56]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = scarborough_onehot.columns
means_df.index = ['G1','G2','G3','G4','G5']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Spa,Paper / Office Supplies Store,Coffee Shop,Hobby Shop,Martial Arts Dojo,Trail,Auto Workshop,Fruit & Vegetable Store,Gym,Bakery,Sandwich Place,Park,Breakfast Spot,Playground,Liquor Store,Food & Drink Shop,Pizza Place,Smoothie Shop,Beer Store,Sports Bar,Pharmacy,Bank,Discount Store,Medical Center,Supermarket,Grocery Store,Salon / Barbershop,Bus Station,Video Game Store,Electronics Store,Music Store,Athletics & Sports,Yoga Studio,Board Shop,IT Services,Sporting Goods Shop,Convenience Store,Train Station,Bowling Alley,Department Store,Metro Station,Light Rail Station,Rental Car Location,Intersection,Bus Line,Soccer Field,Diner,Pub,General Entertainment,Beach,Furniture / Home Store,Café,Skating Rink,College Stadium,Gym Pool,Pet Store,Wine Shop,Plaza,Shoe Store,Clothing Store,Automotive Shop,Dessert Shop,Fish Market,Badminton Court,Bar,Gas Station,Print Shop,Flea Market,Noodle House,Lounge,Pool Hall,Shopping Mall,Pool,Motorcycle Shop,Bus Stop,Golf Course,Deli / Bodega,Event Space,Shop & Service,Bubble Tea Shop,Cosmetics Shop,Thrift / Vintage Store,Other Great Outdoors,Total Restaurants,Total Joints,Total Sum
G3,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,4.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,45.0
G1,0.0,0.0,2.0,0.666667,0.0,0.0,0.0,0.0,0.666667,1.333333,1.0,1.333333,0.0,0.0,0.0,0.0,1.333333,0.0,0.333333,0.0,1.666667,0.666667,0.0,0.0,0.0,0.666667,0.0,0.333333,0.333333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.333333,0.333333,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.666667,0.0,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,0.0,0.0,12.666667,1.666667,36.0
G4,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,30.0
G5,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.666667,0.333333,0.0,0.666667,0.0,0.0,0.0,1.666667,0.0,0.0,0.0,1.333333,0.666667,0.0,0.0,0.333333,1.666667,0.0,0.0,0.0,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.333333,0.333333,7.666667,1.666667,26.333333
G2,0.125,0.125,1.25,0.125,0.125,0.125,0.125,0.125,0.25,0.125,0.5,0.75,0.125,0.125,0.125,0.125,1.0,0.125,0.125,0.25,0.25,0.375,0.375,0.125,0.125,0.625,0.125,0.25,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.125,0.125,0.125,0.125,0.125,0.0,0.0,0.0,0.125,0.0,0.125,0.25,0.125,0.125,0.125,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.625,0.625,14.5


# Results

In [59]:
neigh_summary = pd.DataFrame([scarborough_onehot.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighborhood', 'Group']
neigh_summary

Unnamed: 0,Neighborhood,Group
0,Agincourt,3
1,"Agincourt North, L'Amoreaux East, Milliken, St...",1
2,"Birch Cliff, Cliffside West",2
3,Cedarbrae,5
4,"Clairlea, Golden Mile, Oakridge",4
5,"Clarks Corners, Sullivan, Tam O'Shanter",1
6,"Cliffcrest, Cliffside, Scarborough Village West",2
7,"Dorset Park, Scarborough Town Centre, Wexford ...",1
8,"East Birchmount Park, Ionview, Kennedy Park",2
9,"Guildwood, Morningside, West Hill",2


# Eliminating the Results 

# First Neighborhood is ....

In [62]:
neigh_summary[neigh_summary['Group'] == 4]

Unnamed: 0,Neighborhood,Group
4,"Clairlea, Golden Mile, Oakridge",4


In [63]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 5]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Postal Code': 'M1H',
 'Neighborhood': 'Cedarbrae',
 'Neighborhood Latitude': 43.773136,
 'Neighborhood Longitude': -79.23947609999998}

# Second Neighborhood is ....

In [64]:
neigh_summary[neigh_summary['Group'] == 1]

Unnamed: 0,Neighborhood,Group
1,"Agincourt North, L'Amoreaux East, Milliken, St...",1
5,"Clarks Corners, Sullivan, Tam O'Shanter",1
7,"Dorset Park, Scarborough Town Centre, Wexford ...",1


# Third Neighborhood is... 

In [66]:
neigh_summary[neigh_summary['Group'] == 5]

Unnamed: 0,Neighborhood,Group
3,Cedarbrae,5
11,L'Amoreaux West,5
12,"Maryvale, Wexford",5


In [67]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 4]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Postal Code': 'M1L',
 'Neighborhood': 'Clairlea, Golden Mile, Oakridge',
 'Neighborhood Latitude': 43.7111117,
 'Neighborhood Longitude': -79.2845772}

Thankyou for coming this far....
I am having a great passion in the field of Datascience.
Srinivas Bogireddy - Srinivas.Bogireddy@gmail.com