In [1]:
#Import libraries
import numpy as np
import pandas as pd

In [2]:
#Import table data from Wikipedia
link="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
df=pd.read_html(link, header=0)
#Put data into a table
df1=df[0]

In [3]:
#Drop rows where the Borough is 'Not assigned'
NotAssign = df1[df1['Borough'] == 'Not assigned' ].index
df1.drop(NotAssign,inplace=True)
#Replace 'Not assigned' Neighborhoods with their Borough
df1.Neighborhood.replace('Not assigned',df1.Borough,inplace=True)
#Combine duplicate Postcodes into one row with the Neighborhoods separated by commas
df1 = df1.groupby(['Postcode','Borough'])['Neighborhood'].apply(', '.join).reset_index()
#Rename Postcode to Postal Code
df1.rename(columns = {'Postcode':'Postal Code'}, inplace = True)

In [4]:
#Output the number of rows, columns in the DataFrame.
df1.shape

(103, 3)

In [5]:
#Import the Latitude and Longitude Data
df2=pd.read_csv('http://cocl.us/Geospatial_data')

In [6]:
#Merge the two DataFrames based on their common column, Postal Code
df3 = pd.merge(df1, df2, on="Postal Code")
df3

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [7]:
import json # library to handle JSON files

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.



In [8]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Foursquare Data

Presence of Vegan Donuts

In [10]:
search_query =  'Vegan'

In [13]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&query={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            search_query,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [14]:
# Get Venues that are defined as Vegan
vegan_venues = getNearbyVenues(names=df3['Neighborhood'],
                                   latitudes=df3['Latitude'],
                                   longitudes=df3['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [35]:
#Select only the Vegan Venues that sell donuts
vegan_donuts = vegan_venues[vegan_venues['Venue Category'].isin(['Bakery', 'Café', 'Coffee Shop', 'Dessert Shop']) ]
#Group these venues by Neighborhood
vegan_donuts.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",2,2,2,2,2,2
Central Bay Street,2,2,2,2,2,2
"Chinatown, Grange Park, Kensington Market",2,2,2,2,2,2
"Commerce Court, Victoria Hotel",1,1,1,1,1,1
"Design Exchange, Toronto Dominion Centre",2,2,2,2,2,2
"First Canadian Place, Underground city",2,2,2,2,2,2
"Harbord, University of Toronto",1,1,1,1,1,1
Harbourfront,2,2,2,2,2,2
"High Park, The Junction South",1,1,1,1,1,1
"Parkdale, Roncesvalles",1,1,1,1,1,1


Presence of Gluten Free Shops

In [36]:
search_query =  'Gluten Free'

In [38]:
# Get Venues that are defined as Gluten Free
gf_venues = getNearbyVenues(names=df3['Neighborhood'],
                                   latitudes=df3['Latitude'],
                                   longitudes=df3['Longitude']
                                  ) 

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [40]:
#Select only the Vegan Venues that sell donuts
gf_donuts = gf_venues[gf_venues['Venue Category'].isin(['Bakery', 'Café', 'Coffee Shop', 'Dessert Shop']) ]
#Group these venues by Neighborhood
gf_donuts.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Brockton, Exhibition Place, Parkdale Village",1,1,1,1,1,1
Central Bay Street,1,1,1,1,1,1
"Chinatown, Grange Park, Kensington Market",4,4,4,4,4,4
Christie,1,1,1,1,1,1
"Harbord, University of Toronto",1,1,1,1,1,1
Harbourfront,2,2,2,2,2,2
"High Park, The Junction South",1,1,1,1,1,1
North Toronto West,1,1,1,1,1,1
"Ryerson, Garden District",3,3,3,3,3,3
St. James Town,2,2,2,2,2,2


In [128]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=10)

# add Vegan venue markers to map
for lat, lng, venue, neighborhood in zip(vegan_donuts['Venue Latitude'], vegan_donuts['Venue Longitude'], vegan_donuts['Venue'], vegan_donuts['Neighborhood']):
    label = '{}, {}'.format('Venue', 'Neighborhood')
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
# add Gluten Free venue markers to map
for lat, lng, venue, neighborhood in zip(gf_donuts['Venue Latitude'], gf_donuts['Venue Longitude'], gf_donuts['Venue'], gf_donuts['Neighborhood']):
    label = '{}, {}'.format('Venue', 'Neighborhood')
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='purple',
        fill=True,
        fill_color='purple',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [57]:
#Add Gluten Free data to dataset
df4=pd.merge(df3, gf_donuts, on='Neighborhood', how='left')
df6=df4.drop(columns=['Venue', 'Neighborhood Latitude','Neighborhood Longitude','Venue Latitude','Venue Longitude'])
df6.rename(columns={'Venue Category':'Gluten Free'}, inplace=True)
#Add Vegan data to dataset
df7=pd.merge(df6, vegan_donuts, on='Neighborhood', how='left')
df8=df7.drop(columns=['Venue', 'Neighborhood Latitude','Neighborhood Longitude','Venue Latitude','Venue Longitude'])
df8.rename(columns={'Venue Category':'Vegan'}, inplace=True)
#Replace unassigned values with 0
df9=df8.fillna(0)
df9

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Gluten Free,Vegan
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0,0
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0,0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0,0
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,0
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,0,0
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,0,0
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,0,0
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,0,0
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,0,0


Population Data

In [122]:
#Download population dataset
df_pop = pd.read_csv (r'https://ckan0.cf.opendata.inter.prod-toronto.ca/download_resource/ef0239b1-832b-4d0b-a1f3-4153e53b189e?format=csv')
#Drop unnecessary columns and select Millenial population data
df_pop1=df_pop.drop(columns=['_id','Category','Data Source','City of Toronto'])
df_agech = df_pop1[df_pop1.Topic == 'Age characteristics']
array=['Male: 20 to 24 years', 'Male: 25 to 29 years', 'Male: 30 to 34 years', 'Female: 20 to 24 years', 'Female: 25 to 29 years', 'Female: 30 to 34 years']
df_mil=df_agech.loc[df_agech['Characteristic'].isin(array)]
df_mil1=df_mil.drop(columns=['Topic','Characteristic'])
#Transpose data and convert to integer format
df_mil1.index = pd.Index(['1','2','3','4','5','6'],name='Neighborhood')
df_mils=df_mil1.T
df_mils['7']=df_mils[['1']].astype('int64')
df_mils['8']=df_mils[['2']].astype('int64')
df_mils['9']=df_mils[['3']].astype('int64')
df_mils['10']=df_mils[['4']].astype('int64')
df_mils['11']=df_mils[['5']].astype('int64')
df_mils['12']=df_mils[['6']].astype('int64')
#Sum up all Millenial population data
df_mils['Millenial Population']=df_mils[['7','8','9','10','11','12']].sum(axis=1)
df_millen=df_mils.drop(columns=['1','2','3','4','5','6','7','8','9','10','11','12'])
df_millens = df_millen.sort_values(by=['Millenial Population'], ascending=False)
df_millens

Neighborhood,Millenial Population
Waterfront Communities-The Island,34850
Niagara,15690
Willowdale East,15365
Church-Yonge Corridor,14045
Bay Street Corridor,13385
Woburn,12075
Dovercourt-Wallace Emerson-Junction,11235
Islington-City Centre West,10845
Annex,10365
Mount Pleasant West,10070


In [135]:
#Select Postal Codes with the highest Millenial population
neigh=['M5V','M5J','M6K','M2M','M4Y','M5B','M5C','M5S','M1H','M1G','M6G','M6H']
df_maxpopul=df9.loc[df9['Postal Code'].isin(neigh)]
df_maxpopul

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Gluten Free,Vegan
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,0
21,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493,0,0
52,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,0
57,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,Coffee Shop,Coffee Shop
58,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,Coffee Shop,Café
59,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,Coffee Shop,Coffee Shop
60,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,Café,Coffee Shop
61,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,Café,Café
62,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,Café,Coffee Shop


Recommended Cartems Franchise Locations

In [137]:
#Select winning postal codes based on population, venues, and Subway access
neighwin=['M5V','M5J','M2M','M4Y','M1H','M1G','M6H']
df_cartems=df_maxpopul.loc[df_maxpopul['Postal Code'].isin(neighwin)]
df_cartems

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Gluten Free,Vegan
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,0
21,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493,0,0
52,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,0
73,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,0,0
90,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442,0,0
99,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259,0,0


In [138]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=10)

# add Cartem's Locations markers to map
for lat, lng, venue, neighborhood in zip(df_cartems['Latitude'], df_cartems['Longitude'], df_cartems['Borough'], df_cartems['Neighborhood']):
    label = '{}, {}'.format('Borough', 'Neighborhood')
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='green',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)    
    
map_toronto