In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import warnings
warnings.filterwarnings('ignore')

### Part 1. 


##### Gather all Relevant Borough  Data with Latitude & Longitude

In [2]:
### Now we are going to get and parse the html and convert the table into dataframe:

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
website_text = requests.get(url).text
soup  = BeautifulSoup(website_text, "lxml")

matrix = []
table = soup.table
headings = [heading.text.strip() for heading in table.findAll('th')]

for row in table.find_all('tr'):
    redefined_row = [col.text.strip() for col in row.findAll('td')]
    if redefined_row and redefined_row[2]:
        matrix.append([redefined_row[0], redefined_row[1], redefined_row[2]])

df_data = pd.DataFrame(matrix,columns=["PostalCode", "Borough", "Neighborhood"])




### Below we will replace "/" in neighbourhood cells with ",":

def preprocess_dataframe(df_row):
    if '/' in df_row['Neighborhood']:
        df_row['Neighborhood'] = ", ".join(df_row['Neighborhood'].split('/'))
    return df_row

df_data = df_data.apply(preprocess_dataframe, axis=1)



### Now we will get the latitude & longitude of all the Boroughs/Rows in the Dataframe:

df_coordinates = pd.read_csv("Geospatial_Coordinates.csv")
Latitude, Longitude = [], []
for row in df_data.iterrows():
    try:
        temporary_row = df_coordinates[df_coordinates["Postal Code"]==row[1][0]]
        Latitude.append(temporary_row.iloc[0, 1])
        Longitude.append(temporary_row.iloc[0,2])
    except:
        print("No Latitude or Longitude for Code : " , row[1][0])

df_data.insert(3, "Latitude", Latitude)
df_data.insert(4, "Longitude",Longitude)


### To visualize Locations in Dataframe:

def View_Locations(df_data):
    # Initialize Map
    visual_map = folium.Map(zoom_start=11)
    # Add Markers/Points to Map
    for lat, lng, label in zip(df_data['Latitude'], df_data['Longitude'], df_data['Neighborhood']):
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(visual_map)  
    return visual_map
# View_Locations(df_data)


def Search_Scarborough_Boroughs(row): 
    if "Scarborough" in row["Borough"].strip().split(" "): 
        return row 
    else: 
        return pd.Series()

df_Scarborough = df_data.apply(Search_Scarborough_Boroughs, axis =1).dropna().reset_index(drop=True)
# View_Locations(df_data)
df_Scarborough.head()

Unnamed: 0,Borough,Latitude,Longitude,Neighborhood,PostalCode
0,Scarborough,43.806686,-79.194353,"Malvern , Rouge",M1B
1,Scarborough,43.784535,-79.160497,"Rouge Hill , Port Union , Highland Creek",M1C
2,Scarborough,43.763573,-79.188711,"Guildwood , Morningside , West Hill",M1E
3,Scarborough,43.770992,-79.216917,Woburn,M1G
4,Scarborough,43.773136,-79.239476,Cedarbrae,M1H


### Part 2.

##### Now get all the Relevant Foursquare Data

In [3]:
### Describing Foursquare Credentials:

CLIENT_ID = 'QVVZFBOIM23WXTHS0QNXXXQRKDR2Y0RZ1IVUKQ5SXP013CWI'     # your Foursquare ID
CLIENT_SECRET = '***************' # your Foursquare Secret Key
VERSION = '20181018'
LIMIT = 100
RADIUS = 1000

##### Lets Search and Plot the Latitude & Longitude of One Location & Visualize it on Map:

In [4]:
### We can get address of any location from the below mentioned website:
### https://www.mapquest.com/search/result?query=Scarborough%20Canada&boundingBox=67.33986082559095,-208.828125,-53.748710796898976,158.90625&page=0&mqId=282177680&index=0

address = 'Scarborough, ON'      # Scarborough
geolocator = Nominatim()
location = geolocator.geocode(address)
LATITUDE_SCARBOROUGH = location.latitude
LONGITUDE_SCARBOROUGH = location.longitude
ADDRESS_SCARBOROUGH = location.address
print("Address : ", ADDRESS_SCARBOROUGH)
print("LATITUDE : ", LATITUDE_SCARBOROUGH)
print("LONGITUDE : ", LONGITUDE_SCARBOROUGH)


### IF you want to viusalize all the Neighborhoods of Scarborough:

# print("\nBelow is the Map of all Neighborhood of Scarborough !")
# visual_map = folium.Map(zoom_start=11)
# for lat, lng, label in zip(df_Scarborough["Latitude"], df_Scarborough['Longitude'], "Scarborough"):
#         label = folium.Popup(label, parse_html=True)
#         folium.CircleMarker(
#             [lat, lng],
#             radius=5,
#             popup=label,
#             color='blue',
#             fill=True,
#             fill_color='#3186cc',
#             fill_opacity=0.7,
#             parse_html=False).add_to(visual_map)      
# visual_map

Address :  Scarborough, Scarborough Centre, Scarborough, Toronto, Golden Horseshoe, Ontario, M1P 4N7, Canada
LATITUDE :  43.773077
LONGITUDE :  -79.257774


In [5]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},\
                            {}&radius={}&limit={}'.format( CLIENT_ID, CLIENT_SECRET, VERSION, \
                                                          ADDRESS_SCARBOROUGH, LONGITUDE_SCARBOROUGH,
                                                          RADIUS, LIMIT)
results = requests.get(url).json()

#####  Some testing:

>> type(results)

dict

>> results.keys()

dict_keys(['meta', 'response'])

>> results['response'].keys()

dict_keys(['suggestedFilters', 'headerLocation', 'headerFullLocation', 'headerLocationGranularity', 'totalResults', 'suggestedBounds', 'groups'])

>> type(results['response']['groups'])

list

>> len(results['response']['groups'])

1

>> results['response']['groups'][0].keys()

dict_keys(['type', 'name', 'items'])

>> len(results['response']['groups'][0]['items'])

67

>> results['response']['groups'][0]['items'][0]

{'reasons': {'count': 0,
  'items': [{'summary': 'This spot is popular',
    'type': 'general',
    'reasonName': 'globalInteractionReason'}]},
 'venue': {'id': '5085ec39e4b0b1ead2eb0818',
  'name': 'Disney Store',
  'location': {'address': '300 Borough Drive',
   'crossStreet': 'in Scarborough Town Centre',
   'lat': 43.775537,
   'lng': -79.256833,
   'labeledLatLngs': [{'label': 'display',
     'lat': 43.775537,
     'lng': -79.256833}],
   'distance': 284,
   'postalCode': 'M1P 4P5',
   'cc': 'CA',
   'city': 'Scarborough',
   'state': 'ON',
   'country': 'Canada',
   'formattedAddress': ['300 Borough Drive (in Scarborough Town Centre)',
    'Scarborough ON M1P 4P5',
    'Canada']},
  'categories': [{'id': '4bf58dd8d48988d1f3941735',
    'name': 'Toy / Game Store',
    'pluralName': 'Toy / Game Stores',
    'shortName': 'Toys & Games',
    'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/toys_',
     'suffix': '.png'},
    'primary': True}],
  'photos': {'count': 0, 'groups': []}},
 'referralId': 'e-0-5085ec39e4b0b1ead2eb0818-0'}

In [6]:
# ##  Here we are going to do a VERY IMPORTANT job: sort the pile of data in 'result' json.

# def get_category_type(row):
#     try:
#         categories_list = row['categories']
#     except:
#         categories_list = row['venue.categories']
        
#     if len(categories_list) == 0:
#         return None
#     else:
#         return categories_list[0]['name']

# venues = results['response']['groups'][0]['items']
    
# Scarborough_nearby_venues = json_normalize(venues) # flatten JSON

# # filter columns
# filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
# Scarborough_nearby_venues =Scarborough_nearby_venues.loc[:, filtered_columns]

# # filter the category for each row
# Scarborough_nearby_venues['venue.categories'] = Scarborough_nearby_venues.apply(get_category_type, axis=1)

# # clean columns
# Scarborough_nearby_venues.columns = [col.split(".")[-1] for col in Scarborough_nearby_venues.columns]
# Scarborough_nearby_venues.head()

In [7]:
def get_nearby_venues_data(df_Scarborough):
    
    CLIENT_ID = 'QVVZFBOIM23WXTHS0QNXXXQRKDR2Y0RZ1IVUKQ5SXP013CWI' 
    CLIENT_SECRET = 'AQNG4XNAKC025K3OASFTQR4WVQGP4LGFRLJ5FPNC4W1E0VEK' 
    VERSION = '20181018'
    LIMIT = 200
    radius = 1000
    
    df = pd.DataFrame()
    
    for index, row in df_Scarborough.iterrows():
        #print("index of row of df_toronto ===========>  ", index)
        
        try:
            neighborhood_name = row['Neighborhood']
            neighbor_latitude = row["Latitude"]
            neighbor_longitude = row["Longitude"]

            url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},\
                            {}&radius={}&limit={}'.format( CLIENT_ID, CLIENT_SECRET, VERSION, \
                                                          neighbor_latitude, neighbor_longitude, radius, LIMIT)
            results = requests.get(url).json()

            venues = results['response']['groups'][0]['items']

            df_venues = json_normalize(venues)

            df_temp = pd.DataFrame(columns=['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude',\
                                           'Venue', 'Venue Category', 'Venue Latitude', 'Venue Longitude'])

            for ind, r in df_venues.iterrows():

                series = r[['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']]
                series["venue.categories"] = series['venue.categories'][0]['name']
                series["Neighborhood"] = neighborhood_name
                series["Neighborhood Latitude"] = neighbor_latitude
                series["Neighborhood Longitude"] = neighbor_longitude

                series.rename({'venue.name': 'Venue',
                                          'venue.categories':'Venue Category',
                                          'venue.location.lat' : 'Venue Latitude',
                                          'venue.location.lng': 'Venue Longitude'}, inplace=True)

                df_temp.loc[ind] = series
            
            df  = pd.concat([df, df_temp])
        except Exception as e:
            print("Exception :  ", e)
            pass
    return df

In [None]:
Scarborough_nearby_venues = get_nearby_venues_data(df_Scarborough)

In [78]:
Scarborough_nearby_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Category,Venue Latitude,Venue Longitude
0,"Malvern , Rouge",43.806686,-79.194353,Wendy's,Fast Food Restaurant,43.802008,-79.19808
1,"Malvern , Rouge",43.806686,-79.194353,Wendy’s,Fast Food Restaurant,43.807448,-79.199056
2,"Malvern , Rouge",43.806686,-79.194353,Caribbean Wave,Caribbean Restaurant,43.798558,-79.195777
3,"Malvern , Rouge",43.806686,-79.194353,Harvey's,Restaurant,43.80002,-79.198307
4,"Malvern , Rouge",43.806686,-79.194353,Staples Morningside,Paper / Office Supplies Store,43.800285,-79.196607


In [129]:
list(Scarborough_nearby_venues["Venue Category"].unique())

['Fast Food Restaurant',
 'Caribbean Restaurant',
 'Restaurant',
 'Paper / Office Supplies Store',
 'Coffee Shop',
 'Hobby Shop',
 'Bus Station',
 'Trail',
 'Chinese Restaurant',
 'Gym',
 'Greek Restaurant',
 'Supermarket',
 'Bakery',
 'Sandwich Place',
 'Bank',
 'Burger Joint',
 'Italian Restaurant',
 'Breakfast Spot',
 'Playground',
 'Park',
 'Fried Chicken Joint',
 'Food & Drink Shop',
 'Pizza Place',
 'Liquor Store',
 'Juice Bar',
 'Pharmacy',
 'Beer Store',
 'Discount Store',
 'Sports Bar',
 'Electronics Store',
 'Rental Car Location',
 'Grocery Store',
 'Indian Restaurant',
 'Mobile Phone Shop',
 'Hakka Restaurant',
 'Thai Restaurant',
 'Music Store',
 'Athletics & Sports',
 'Gas Station',
 'Wings Joint',
 'Yoga Studio',
 'Board Shop',
 'Bus Line',
 'German Restaurant',
 'Pet Store',
 'Intersection',
 'Ice Cream Shop',
 "Women's Store",
 'Convenience Store',
 'Train Station',
 'Japanese Restaurant',
 'Bowling Alley',
 'Department Store',
 'Hockey Arena',
 'Metro Station',
 'Light

In [21]:
path = "/home/lodhi/DataScienceCapstone/Applied_Data_Science_Capstone_Coursera/Coursera_Capstone/Final_Project_Scarborough_DF"
Scarborough_nearby_venues.to_csv(path)

### Part 3.

##### Methodology Section !

In [83]:
print("Total Unique Neighborhood     : ", Scarborough_nearby_venues.Neighborhood.unique().shape[0])
print("Total Rows in Scarborough_Data: ", Scarborough_nearby_venues.shape[0])
print("Total Unique Venue Category   : ", end="")
print(len(Scarborough_nearby_venues['Venue Category'].unique()))

print("\n\n","Thus the one-hot-encode matrix must have 412 rows and 112 columns because we are doing one-hot encoding on all\
        Neighborhoods with respect to Venue Category.")

Total Unique Neighborhood     :  16
Total Rows in Scarborough_Data:  412
Total Unique Venue Category   : 112


 Thus the one-hot-encode matrix must have 412 rows and 112 columns because we are doing one-hot encoding on all        Neighborhoods with respect to Venue Category.


In [103]:
### Now lets do one hot encoding of Scarborough_nearby_venues data based on 'Venue Category' column
Scarborough_One_Hot_Encoded = pd.get_dummies(Scarborough_nearby_venues[['Venue Category']], prefix="", prefix_sep="")
### Now lets add Neighborhood column to distinguish and identify each row.
Scarborough_One_Hot_Encoded["Neighborhood"] = Scarborough_nearby_venues["Neighborhood"]

### Now lets move the last column, "Neighborhood" to first column:
cols = list(Scarborough_One_Hot_Encoded.columns)
cols = [cols[-1]] + cols[:-1]
Scarborough_One_Hot_Encoded = Scarborough_One_Hot_Encoded[cols]

print("Final Scarborough One Hot Encoded Matrix Shape : ", Scarborough_One_Hot_Encoded.shape, end="\n\n")
print("Scarborough One Hot Encoded DataFrame:")
Scarborough_One_Hot_Encoded.head()

Final Scarborough One Hot Encoded Matrix Shape :  (412, 113)

Scarborough One Hot Encoded DataFrame:


Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio
0,"Malvern , Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malvern , Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Malvern , Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Malvern , Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malvern , Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [138]:
### We can also do sum instead of mean in below code, for better understanding, 
### but we do this as a Normalization process:

Scarborough_One_Hot_Encoded_Normalized = Scarborough_One_Hot_Encoded.groupby(by="Neighborhood").mean().reset_index()

In [140]:
Scarborough_One_Hot_Encoded_Normalized.shape

(16, 113)

#### Now Lets do Feature Selection by Selecting Relevant Columns:

In [148]:
feature_or_columns = ['Neighborhood',
 'Bar',
 'Beer Store',
 'Bubble Tea Shop',
 'Café',
 'Coffee Shop',
 'Hookah Bar',
 'Juice Bar',
 'Pub',
 'Sports Bar']

print("Selected Feature : ", len(feature_or_columns))
print("Total Features : ", len(list(Scarborough_One_Hot_Encoded_Normalized.columns)))

Selected Feature :  10
Total Features :  113


In [142]:
# set number of clusters
kclusters = 3

Scarborough_Clustering_df = Scarborough_One_Hot_Encoded_Normalized.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Scarborough_Clustering_df)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:]

array([0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0], dtype=int32)

In [151]:
Scarborough_final_df = Scarborough_One_Hot_Encoded_Normalized

# add clustering labels
Scarborough_final_df['Cluster Labels'] = kmeans.labels_

Scarborough_final_df.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio,Cluster Labels
0,Agincourt,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.04,0.02,...,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0
1,"Birch Cliff , Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,Cedarbrae,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.096774,0.064516,...,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.032258,0
3,"Clarks Corners , Tam O'Shanter , Sullivan",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.05,...,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.0,0
4,"Cliffside , Cliffcrest , Scarborough Village...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2


In [None]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Scarborough_final_df = Scarborough_final_df.join(df_Scarborough.set_index("Neighborhood"), on="Neighborhood")

In [154]:
Scarborough_final_df

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio,Cluster Labels,Borough,Latitude,Longitude,PostalCode
0,Agincourt,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.04,0.02,...,0.0,0.02,0.0,0.0,0.0,0,Scarborough,43.7942,-79.262029,M1S
1,"Birch Cliff , Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2,Scarborough,43.692657,-79.264848,M1N
2,Cedarbrae,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.096774,0.064516,...,0.0,0.0,0.032258,0.0,0.032258,0,Scarborough,43.773136,-79.239476,M1H
3,"Clarks Corners , Tam O'Shanter , Sullivan",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.05,...,0.025,0.025,0.0,0.0,0.0,0,Scarborough,43.781638,-79.304302,M1T
4,"Cliffside , Cliffcrest , Scarborough Village...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2,Scarborough,43.716316,-79.239476,M1M
5,"Dorset Park , Wexford Heights , Scarborough ...",0.022222,0.0,0.044444,0.0,0.022222,0.0,0.0,0.044444,0.022222,...,0.0,0.022222,0.0,0.0,0.0,0,Scarborough,43.75741,-79.273304,M1P
6,"Golden Mile , Clairlea , Oakridge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.034483,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.711112,-79.284577,M1L
7,"Guildwood , Morningside , West Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.763573,-79.188711,M1E
8,"Kennedy Park , Ionview , East Birchmount Park",0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.727929,-79.262029,M1K
9,"Malvern , Rouge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.055556,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.806686,-79.194353,M1B


In [153]:
# create map
map_clusters = folium.Map(location=[43.706204, -79.398752], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Scarborough_final_df['Latitude'], Scarborough_final_df['Longitude'], \
                                  Scarborough_final_df['Neighborhood'], Scarborough_final_df['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [157]:
# save the map as HTML file
map_clusters.save('final_project_map_clusters.html')

### Part 3.

##### Result & Conclusion Section:

<!-- afsdgd
sdfvsdfb
sdfbsdfbs
sdf
sdfbsfsbdfbd -->

##### Cluster : 1

In [158]:
Scarborough_final_df.loc[Scarborough_final_df['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio,Cluster Labels,Borough,Latitude,Longitude,PostalCode
0,Agincourt,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.04,0.02,...,0.0,0.02,0.0,0.0,0.0,0,Scarborough,43.7942,-79.262029,M1S
2,Cedarbrae,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.096774,0.064516,...,0.0,0.0,0.032258,0.0,0.032258,0,Scarborough,43.773136,-79.239476,M1H
3,"Clarks Corners , Tam O'Shanter , Sullivan",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.05,...,0.025,0.025,0.0,0.0,0.0,0,Scarborough,43.781638,-79.304302,M1T
5,"Dorset Park , Wexford Heights , Scarborough ...",0.022222,0.0,0.044444,0.0,0.022222,0.0,0.0,0.044444,0.022222,...,0.0,0.022222,0.0,0.0,0.0,0,Scarborough,43.75741,-79.273304,M1P
6,"Golden Mile , Clairlea , Oakridge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.034483,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.711112,-79.284577,M1L
7,"Guildwood , Morningside , West Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.763573,-79.188711,M1E
8,"Kennedy Park , Ionview , East Birchmount Park",0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.727929,-79.262029,M1K
9,"Malvern , Rouge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.055556,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.806686,-79.194353,M1B
10,"Milliken , Agincourt North , Steeles East , ...",0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.064516,0.0,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.815252,-79.284577,M1V
13,"Steeles West , L'Amoreaux West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,...,0.0,0.0,0.0,0.0,0.0,0,Scarborough,43.799525,-79.318389,M1W


##### Cluster : 2

In [159]:
Scarborough_final_df.loc[Scarborough_final_df['Cluster Labels']==1]

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio,Cluster Labels,Borough,Latitude,Longitude,PostalCode
11,"Rouge Hill , Port Union , Highland Creek",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1,Scarborough,43.784535,-79.160497,M1C


##### Cluster : 3

In [161]:
Scarborough_final_df.loc[Scarborough_final_df['Cluster Labels']==2]

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Video Game Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio,Cluster Labels,Borough,Latitude,Longitude,PostalCode
1,"Birch Cliff , Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2,Scarborough,43.692657,-79.264848,M1N
4,"Cliffside , Cliffcrest , Scarborough Village...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2,Scarborough,43.716316,-79.239476,M1M
12,Scarborough Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.076923,0.0,2,Scarborough,43.744734,-79.239476,M1J


#### Conclusion:

From the above three dataframe or clusters it can be seen that most of the bars are open around central Scarborough and second greatest cluster is north of Scarborough. Thus it can be inferred from the map and clusters that openning a bar at the outskirts of Scarborough, Rouge Hill , Port Union , Highland Creek,(Cluster # 1) will lead to less competition and greater chances of success. 

https://rawgit.com/malodhi/Coursera_Capstone/Branch_1/final_project_map_clusters.html