## Recommender System for Dairy Products Contractor

In [1]:
# importing libraries
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
from bs4 import BeautifulSoup
import requests # library to handle requests
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
import geopy.geocoders # convert an address into latitude and longitude values

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries are imported.')

Libraries are imported.


### Postal Codes in Toronto

In [2]:
# Loading the dataset which is about postal codes in Toronto
# This dataset was created in week 3. 
df_toronto = pd.read_csv('toronto_base.csv')
df_toronto.head()

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,1,M4H,East York,Thorncliffe Park,43.705369,-79.349372
2,2,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
3,3,M9W,Etobicoke,Northwest,43.706748,-79.594054
4,4,M9L,North York,Humber Summit,43.756303,-79.565963


### Create a Map of Toronto City with Postal Code Regions

In [3]:
# for the city Toronto, latitude and longtitude are manually extracted via google search
toronto_latitude = 43.6932; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

### Focusing only neighborhoods of "Scarborough" borough

In [4]:
scarborough_data = df_toronto[df_toronto['Borough'] == 'Scarborough']
scarborough_data = scarborough_data.reset_index(drop=True).drop(columns = 'Unnamed: 0')
scarborough_data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1T,Scarborough,Tam O'Shanter,43.781638,-79.304302
2,M1W,Scarborough,Steeles West,43.799525,-79.318389
3,M1X,Scarborough,Upper Rouge,43.836125,-79.205636
4,M1M,Scarborough,"Cliffcrest, Cliffside",43.716316,-79.239476


### Create a Map of Scarborough and Its Neighbourhoods

In [5]:
address_scar = 'Scarborough, Toronto'
latitude_scar = 43.773077
longitude_scar = -79.257774
print('The geograpical coordinate of "Scarborough" are: {}, {}.'.format(latitude_scar, longitude_scar))

map_Scarborough = folium.Map(location=[latitude_scar, longitude_scar], zoom_start=11.5)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_Scarborough)  
    
map_Scarborough

The geograpical coordinate of "Scarborough" are: 43.773077, -79.257774.


In [6]:
def foursquare_crawler (postal_code_list, neighborhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighborhood(s)'] = neighborhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('{}.'.format(counter))
        print('Data is Obtained, for the Postal Code {} (and Neighborhoods {}) SUCCESSFULLY.'.format(postal_code, neighborhood))
    return result_ds;

In [7]:
# @hiddel_cell
CLIENT_ID = 'N2EQLVXLCWFDXBTEA42MHUVILYIEAZR1ZGRS3RCWJPZ3CI5D' # your Foursquare ID
CLIENT_SECRET = 'FJRT1IXT0USP20LMD0YWDORQF0RY3IJK2GHMVIBTBWHCME4X' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

### Crawling Foursquare  for venues in the Neighborhoods in "Scarborough"

In [9]:
print('Crawling different neighborhoods inside "Scarborough"')
Scarborough_foursquare_dataset = foursquare_crawler(list(scarborough_data['PostalCode']),
                                                   list(scarborough_data['Neighbourhood']),
                                                   list(scarborough_data['Latitude']),
                                                   list(scarborough_data['Longitude']),)

Crawling different neighborhoods inside "Scarborough"
1.
Data is Obtained, for the Postal Code M1B (and Neighborhoods Rouge, Malvern) SUCCESSFULLY.
2.
Data is Obtained, for the Postal Code M1T (and Neighborhoods Tam O'Shanter) SUCCESSFULLY.
3.
Data is Obtained, for the Postal Code M1W (and Neighborhoods Steeles West) SUCCESSFULLY.
4.
Data is Obtained, for the Postal Code M1X (and Neighborhoods Upper Rouge) SUCCESSFULLY.
5.
Data is Obtained, for the Postal Code M1M (and Neighborhoods Cliffcrest, Cliffside) SUCCESSFULLY.
6.
Data is Obtained, for the Postal Code M1C (and Neighborhoods Highland Creek, Rouge Hill, Port Union) SUCCESSFULLY.
7.
Data is Obtained, for the Postal Code M1R (and Neighborhoods Maryvale, Wexford) SUCCESSFULLY.
8.
Data is Obtained, for the Postal Code M1J (and Neighborhoods Scarborough Village) SUCCESSFULLY.
9.
Data is Obtained, for the Postal Code M1V (and Neighborhoods Agincourt North, Milliken) SUCCESSFULLY.
10.
Data is Obtained, for the Postal Code M1L (and Neigh

### Breakpoint: Saving results of Foursquare, so that we don't have to connect every time to Foursquare

In [10]:

import pickle
with open("Scarborough_foursquare_dataset.txt", "wb") as fp:   #Pickling
    pickle.dump(Scarborough_foursquare_dataset, fp)
print('Received Data from Internet is Saved to Computer.')

Received Data from Internet is Saved to Computer.


In [11]:
with open("Scarborough_foursquare_dataset.txt", "rb") as fp:   # Unpickling
    Scarborough_foursquare_dataset = pickle.load(fp)
# print(type(Scarborough_foursquare_dataset))
# Scarborough_foursquare_dataset

### Cleaning the RAW Data Received from Foursquare Database

In [12]:
# This function is created to connect to the saved list which is the received database. It will extract each venue 
# for every neighborhood inside the database

def get_venue_dataset(foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    # print(result_df)
    
    for neigh_dict in foursquare_dataset:
        postal_code = neigh_dict['Postal Code']; neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']; lng = neigh_dict['Longitude']
        print('Number of Venuse in Coordination "{}" Posal Code and "{}" Negihborhood(s) is:'.format(postal_code, neigh))
        print(len(neigh_dict['Crawling_result']))
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
            
            
            # print({'Postal Code': postal_code, 'Neighborhood': neigh, 
            #                   'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
            #                   'Venue': name, 'Venue Summary': summary, 
            #                   'Venue Category': cat, 'Distance': dist})
            
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary,'Venue Category': cat, 'Distance': dist}, ignore_index = True)
            # print(result_df)
    
    return(result_df)
                                          

In [13]:
scarborough_venues = get_venue_dataset(Scarborough_foursquare_dataset)

Number of Venuse in Coordination "M1B" Posal Code and "Rouge, Malvern" Negihborhood(s) is:
17
Number of Venuse in Coordination "M1T" Posal Code and "Tam O'Shanter" Negihborhood(s) is:
34
Number of Venuse in Coordination "M1W" Posal Code and "Steeles West" Negihborhood(s) is:
26
Number of Venuse in Coordination "M1X" Posal Code and "Upper Rouge" Negihborhood(s) is:
0
Number of Venuse in Coordination "M1M" Posal Code and "Cliffcrest, Cliffside" Negihborhood(s) is:
14
Number of Venuse in Coordination "M1C" Posal Code and "Highland Creek, Rouge Hill, Port Union" Negihborhood(s) is:
4
Number of Venuse in Coordination "M1R" Posal Code and "Maryvale, Wexford" Negihborhood(s) is:
27
Number of Venuse in Coordination "M1J" Posal Code and "Scarborough Village" Negihborhood(s) is:
11
Number of Venuse in Coordination "M1V" Posal Code and "Agincourt North, Milliken" Negihborhood(s) is:
28
Number of Venuse in Coordination "M1L" Posal Code and "Clairlea, Golden Mile, Oakridge" Negihborhood(s) is:
28
N

### Showing Venues for Each Neighborhood in Scarborough

In [14]:
scarborough_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M1B,"Rouge, Malvern",43.806686,-79.194353,Images Salon & Spa,This spot is popular,Spa,595
1,M1B,"Rouge, Malvern",43.806686,-79.194353,Caribbean Wave,This spot is popular,Caribbean Restaurant,912
2,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,This spot is popular,Fast Food Restaurant,600
3,M1B,"Rouge, Malvern",43.806686,-79.194353,Harvey's,This spot is popular,Fast Food Restaurant,796
4,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,This spot is popular,Fast Food Restaurant,387


In [15]:
scarborough_venues.tail()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
379,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,FactoryDirect.ca,This spot is popular,Electronics Store,918
380,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Factory Direct,This spot is popular,Electronics Store,932
381,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Jesse Jr. Filipino Foods,This spot is popular,Asian Restaurant,932
382,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,2001 Audio Video,This spot is popular,Automotive Shop,963
383,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Patisserie Royale,This spot is popular,Dessert Shop,989


### Saving a Cleaned Version of DataFrame as the Results from Foursquare

In [16]:
scarborough_venues.to_csv('scarborough_venues.csv')

### Loading Data from File (Saved "Foursquare " DataFrame for Venues)

In [17]:
scarborough_venues = pd.read_csv('scarborough_venues.csv')

### Summary Information about Neighborhoods inside "Scarborough"

In [18]:
neigh_list = list(scarborough_venues['Neighborhood'].unique())
print('Number of Neighborhoods inside Scarborough:')
print(len(neigh_list))
print('List of Neighborhoods inside Scarborough:')
neigh_list

Number of Neighborhoods inside Scarborough:
16
List of Neighborhoods inside Scarborough:


['Rouge, Malvern',
 "Tam O'Shanter",
 'Steeles West',
 'Cliffcrest, Cliffside',
 'Highland Creek, Rouge Hill, Port Union',
 'Maryvale, Wexford',
 'Scarborough Village',
 'Agincourt North, Milliken',
 'Clairlea, Golden Mile, Oakridge',
 'Woburn',
 'Ionview, Kennedy Park',
 'Birch Cliff',
 'Morningside, West Hill',
 'Cedarbrae',
 'Agincourt',
 'Dorset Park, Scarborough Town Centre, Wexford Heights']

In [19]:
neigh_venue_summary = scarborough_venues.groupby('Neighborhood').count()
neigh_venue_summary.drop(columns = ['Unnamed: 0']).head()

Unnamed: 0_level_0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Agincourt,45,45,45,45,45,45,45
"Agincourt North, Milliken",28,28,28,28,28,28,28
Birch Cliff,16,16,16,16,16,16,16
Cedarbrae,30,30,30,30,30,30,30
"Clairlea, Golden Mile, Oakridge",28,28,28,28,28,28,28


In [20]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

print('Here is the list of different categories:')
list(scarborough_venues['Venue Category'].unique())

There are 111 uniques categories.
Here is the list of different categories:


['Spa',
 'Caribbean Restaurant',
 'Fast Food Restaurant',
 'Coffee Shop',
 'Paper / Office Supplies Store',
 'Hobby Shop',
 'Martial Arts Dojo',
 'Chinese Restaurant',
 'Greek Restaurant',
 'Fruit & Vegetable Store',
 'Gym',
 'Bakery',
 'Sandwich Place',
 'Park',
 'Italian Restaurant',
 'Noodle House',
 'Pharmacy',
 'Seafood Restaurant',
 'Cantonese Restaurant',
 'Mexican Restaurant',
 'Thai Restaurant',
 'Vietnamese Restaurant',
 'Fried Chicken Joint',
 'Pizza Place',
 'Rental Car Location',
 'Bus Stop',
 'Intersection',
 'Shopping Mall',
 'Golf Course',
 'Taiwanese Restaurant',
 'Discount Store',
 'Deli / Bodega',
 'Video Game Store',
 'Grocery Store',
 'Hotpot Restaurant',
 'Japanese Restaurant',
 'Breakfast Spot',
 'Thrift / Vintage Store',
 'Bank',
 'Nail Salon',
 'Other Great Outdoors',
 'Tennis Court',
 'Gym Pool',
 'Beach',
 'Furniture / Home Store',
 'Cajun / Creole Restaurant',
 'Sports Bar',
 'Gym / Fitness Center',
 'Wings Joint',
 'Burger Joint',
 'Playground',
 'Korean Re

### One-hot Encoding the "categroies" Column into Every Unique Categorical Feature.

In [22]:
# one hot encoding
scarborough_onehot = pd.get_dummies(data = scarborough_venues, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
scarborough_onehot.head()

Unnamed: 0.1,Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Distance,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,Bar,Beach,Beer Store,Bowling Alley,Breakfast Spot,Burger Joint,Bus Line,Bus Station,Bus Stop,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store,Event Service,Event Space,Fast Food Restaurant,Fish Market,Flea Market,Food & Drink Shop,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,General Entertainment,German Restaurant,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hakka Restaurant,Hobby Shop,Hong Kong Restaurant,Hotpot Restaurant,IT Services,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Malay Restaurant,Martial Arts Dojo,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motorcycle Shop,Music Store,Nail Salon,Noodle House,Other Great Outdoors,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pool Hall,Print Shop,Pub,Rental Car Location,Restaurant,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Shop & Service,Shopping Mall,Skating Rink,Smoothie Shop,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tennis Court,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio
0,0,M1B,"Rouge, Malvern",43.806686,-79.194353,Images Salon & Spa,This spot is popular,595,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,M1B,"Rouge, Malvern",43.806686,-79.194353,Caribbean Wave,This spot is popular,912,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,This spot is popular,600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,M1B,"Rouge, Malvern",43.806686,-79.194353,Harvey's,This spot is popular,796,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,This spot is popular,387,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Manually Selecting Related Features for Dairy Product Contractor

In [25]:
important_list_of_features = [
 
 'Neighborhood',
 'Neighborhood Latitude',
 'Neighborhood Longitude',
 'Asian Restaurant',
 'BBQ Joint',
 'Bakery',
 'Breakfast Spot',
 'Burger Joint',
 'Cajun / Creole Restaurant',
 'Cantonese Restaurant',
 'Caribbean Restaurant',
 'Chinese Restaurant',
  'Diner',
 'Fast Food Restaurant',
 'Fish Market',
 'Food & Drink Shop',
 'Fried Chicken Joint',
 'Fruit & Vegetable Store',
 'Greek Restaurant',
 'Grocery Store',
 'Hakka Restaurant',
 'Indian Restaurant',
 'Italian Restaurant',
 'Japanese Restaurant',
 'Korean Restaurant',
 'Latin American Restaurant',
 'Malay Restaurant',
 'Mediterranean Restaurant',
 'Mexican Restaurant',
 'Middle Eastern Restaurant',
 'Noodle House',
 'Pizza Place',
 'Restaurant',
 'Sandwich Place',
 'Seafood Restaurant',
 'Shanghai Restaurant',
 'Sushi Restaurant',
 'Taiwanese Restaurant',
 'Thai Restaurant',
 'Vegetarian / Vegan Restaurant',
 'Vietnamese Restaurant',
 'Wings Joint']

### Grouping the Data by Neighborhoods

In [26]:
scarborough_onehot = scarborough_onehot[important_list_of_features].drop(
    columns = ['Neighborhood Latitude', 'Neighborhood Longitude']).groupby(
    'Neighborhood').sum()


scarborough_onehot.head()

Unnamed: 0_level_0,Asian Restaurant,BBQ Joint,Bakery,Breakfast Spot,Burger Joint,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Diner,Fast Food Restaurant,Fish Market,Food & Drink Shop,Fried Chicken Joint,Fruit & Vegetable Store,Greek Restaurant,Grocery Store,Hakka Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Noodle House,Pizza Place,Restaurant,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
Agincourt,0,0,2,1,0,0,1,2,6,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,2,1,2,1,1,1,0,0,0,1,0
"Agincourt North, Milliken",0,1,2,0,0,0,0,1,5,0,2,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0
Birch Cliff,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0
Cedarbrae,0,0,3,0,1,0,0,1,1,0,1,0,0,1,0,0,1,1,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1
"Clairlea, Golden Mile, Oakridge",0,0,2,0,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0


### Integrating Different Restaurants and Different Joints

In [27]:
feat_name_list = list(scarborough_onehot.columns)
restaurant_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
        
scarborough_onehot['Total Restaurants'] = scarborough_onehot[restaurant_list].sum(axis = 1)
scarborough_onehot = scarborough_onehot.drop(columns = restaurant_list)


feat_name_list = list(scarborough_onehot.columns)
joint_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
scarborough_onehot['Total Joints'] = scarborough_onehot[joint_list].sum(axis = 1)
scarborough_onehot = scarborough_onehot.drop(columns = joint_list)

### Showing Fully-Processed DataFrame about Neighborhoods inside Scarborrough

In [28]:
scarborough_onehot

Unnamed: 0_level_0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Agincourt,2,1,0,0,0,0,0,1,2,2,18,0
"Agincourt North, Milliken",2,0,0,0,0,0,0,2,2,0,11,1
Birch Cliff,0,0,1,0,0,0,0,0,0,0,3,0
Cedarbrae,3,0,0,0,0,0,1,0,1,0,7,3
"Clairlea, Golden Mile, Oakridge",2,0,1,0,0,0,1,0,1,1,3,0
"Cliffcrest, Cliffside",0,0,0,0,0,0,0,0,3,0,3,2
"Dorset Park, Scarborough Town Centre, Wexford Heights",1,0,0,0,0,0,1,0,1,1,13,4
"Highland Creek, Rouge Hill, Port Union",0,1,0,0,0,0,0,0,0,0,1,1
"Ionview, Kennedy Park",0,0,0,0,0,0,2,0,1,1,5,1
"Maryvale, Wexford",0,1,0,1,0,0,2,0,2,0,9,1


### Run k-means to Cluster Neighborhoods into 5 Clusters

In [29]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(scarborough_onehot)

### Showing Centers of Each Cluster

In [30]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = scarborough_onehot.columns
means_df.index = ['G1','G2','G3','G4','G5']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints,Total Sum
G5,2.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,18.0,0.0,26.0
G2,1.0,0.0,0.0,0.0,0.0,0.0,0.666667,1.0,1.666667,1.0,12.333333,2.0,19.666667
G3,1.0,0.333333,0.0,0.166667,0.0,0.166667,1.0,0.0,1.0,0.666667,6.5,0.833333,11.666667
G1,0.0,0.5,0.0,0.0,0.5,0.0,0.5,0.0,3.5,0.5,4.0,2.0,11.5
G4,0.5,0.25,0.5,0.0,0.0,0.0,0.25,0.0,0.25,0.25,2.5,0.25,4.75


## Inference

### Best Group is G5;

### Second Best Group is G2;

### Third Best Group is G3;

### Inserting "kmeans.labels_" into the Original Scarborough DataFrame

#### Finding the Corresponding Group for Each Neighborhood.

In [33]:
neigh_summary = pd.DataFrame([scarborough_onehot.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighborhood', 'Group']
neigh_summary

Unnamed: 0,Neighborhood,Group
0,Agincourt,5
1,"Agincourt North, Milliken",2
2,Birch Cliff,4
3,Cedarbrae,3
4,"Clairlea, Golden Mile, Oakridge",4
5,"Cliffcrest, Cliffside",1
6,"Dorset Park, Scarborough Town Centre, Wexford ...",2
7,"Highland Creek, Rouge Hill, Port Union",4
8,"Ionview, Kennedy Park",3
9,"Maryvale, Wexford",3


## Summarizing Results

### Best Neighborhood

In [34]:

neigh_summary[neigh_summary['Group'] == 5]

Unnamed: 0,Neighborhood,Group
0,Agincourt,5


In [35]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 5]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Postal Code': 'M1S',
 'Neighborhood': 'Agincourt',
 'Neighborhood Latitude': 43.7942003,
 'Neighborhood Longitude': -79.26202940000002}

### Second Best Neighborhood

In [36]:
neigh_summary[neigh_summary['Group'] == 2]

Unnamed: 0,Neighborhood,Group
1,"Agincourt North, Milliken",2
6,"Dorset Park, Scarborough Town Centre, Wexford ...",2
14,Tam O'Shanter,2


### Third Best Neighborhood

In [37]:
neigh_summary[neigh_summary['Group'] == 3]

Unnamed: 0,Neighborhood,Group
3,Cedarbrae,3
8,"Ionview, Kennedy Park",3
9,"Maryvale, Wexford",3
11,"Rouge, Malvern",3
12,Scarborough Village,3
13,Steeles West,3


In [38]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 3]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Postal Code': 'M1H',
 'Neighborhood': 'Cedarbrae',
 'Neighborhood Latitude': 43.773136,
 'Neighborhood Longitude': -79.23947609999998}